From 8942e2807c351bbe149ca2bc4e676beee9754034 Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Thu, 3 Oct 2024 01:33:54 -0700 Subject: [PATCH 1/8] [LV][VPlan] Use VF VPValue in VPVectorPointerRecipe Refactors VPVectorPointerRecipe to use the VF VPValue to obtain the runtime VF, similar to #95305. Since only reverse vector pointers require the runtime VF, the patch sets VPUnrollPart::PartOpIndex to 1 for vector pointers and 2 for reverse vector pointers. As a result, the generation of reverse vector pointers is moved into a separate recipe. --- .../Transforms/Vectorize/LoopVectorize.cpp | 13 +- llvm/lib/Transforms/Vectorize/VPlan.h | 63 +++++++-- .../Transforms/Vectorize/VPlanAnalysis.cpp | 7 +- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 65 ++++++--- llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 4 +- llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 + .../AArch64/sve-vector-reverse.ll | 130 ++++++++---------- .../RISCV/riscv-vector-reverse.ll | 20 +-- ...-force-tail-with-evl-reverse-load-store.ll | 120 ++++++++-------- ...orize-force-tail-with-evl-uniform-store.ll | 6 +- .../LoopVectorize/reverse_induction.ll | 24 ++-- .../vplan-sink-scalars-and-merge.ll | 5 +- 12 files changed, 258 insertions(+), 200 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 6e082b1c134de..220625908dff9 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4442,6 +4442,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF, case VPDef::VPInstructionSC: case VPDef::VPCanonicalIVPHISC: case VPDef::VPVectorPointerSC: + case VPDef::VPReverseVectorPointerSC: case VPDef::VPExpandSCEVSC: case VPDef::VPEVLBasedIVPHISC: case VPDef::VPPredInstPHISC: @@ -8160,9 +8161,15 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef Operands, if (Consecutive) { auto *GEP = dyn_cast( Ptr->getUnderlyingValue()->stripPointerCasts()); - auto *VectorPtr = new VPVectorPointerRecipe( - Ptr, getLoadStoreType(I), Reverse, GEP ? GEP->isInBounds() : false, - I->getDebugLoc()); + VPSingleDefRecipe *VectorPtr; + if (Reverse) + VectorPtr = new VPReverseVectorPointerRecipe( + Ptr, &Plan.getVF(), getLoadStoreType(I), + GEP ? GEP->isInBounds() : false, I->getDebugLoc()); + else + VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I), + GEP ? GEP->isInBounds() : false, + I->getDebugLoc()); Builder.getInsertBlock()->appendRecipe(VectorPtr); Ptr = VectorPtr; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 594492344d43c..24e066753f9da 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -882,6 +882,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { case VPRecipeBase::VPReplicateSC: case VPRecipeBase::VPScalarIVStepsSC: case VPRecipeBase::VPVectorPointerSC: + case VPRecipeBase::VPReverseVectorPointerSC: case VPRecipeBase::VPWidenCallSC: case VPRecipeBase::VPWidenCanonicalIVSC: case VPRecipeBase::VPWidenCastSC: @@ -1078,6 +1079,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { R->getVPDefID() == VPRecipeBase::VPWidenGEPSC || R->getVPDefID() == VPRecipeBase::VPWidenCastSC || R->getVPDefID() == VPRecipeBase::VPReplicateSC || + R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC || R->getVPDefID() == VPRecipeBase::VPVectorPointerSC; } @@ -1785,20 +1787,65 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags { #endif }; +/// A recipe to compute the pointers for widened memory accesses of IndexTy +/// in reverse order per part. +class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags, + public VPUnrollPartAccessor<2> { + Type *IndexedTy; + +public: + VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, + bool IsInBounds, DebugLoc DL) + : VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC, + ArrayRef({Ptr, VF}), + GEPFlagsTy(IsInBounds), DL), + IndexedTy(IndexedTy) {} + + VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC) + + VPValue *getVFValue() { return getOperand(1); } + const VPValue *getVFValue() const { return getOperand(1); } + + void execute(VPTransformState &State) override; + + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } + + /// Returns true if the recipe only uses the first part of operand \p Op. + bool onlyFirstPartUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + assert(getNumOperands() <= 2 && "must have at most two operands"); + return true; + } + + VPReverseVectorPointerRecipe *clone() override { + return new VPReverseVectorPointerRecipe( + getOperand(0), getVFValue(), IndexedTy, isInBounds(), getDebugLoc()); + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A recipe to compute the pointers for widened memory accesses of IndexTy for -/// all parts. If IsReverse is true, compute pointers for accessing the input in -/// reverse order per part. +/// all parts. class VPVectorPointerRecipe : public VPRecipeWithIRFlags, public VPUnrollPartAccessor<1> { Type *IndexedTy; - bool IsReverse; public: - VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse, - bool IsInBounds, DebugLoc DL) + VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsInBounds, + DebugLoc DL) : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef(Ptr), GEPFlagsTy(IsInBounds), DL), - IndexedTy(IndexedTy), IsReverse(IsReverse) {} + IndexedTy(IndexedTy) {} VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC) @@ -1819,8 +1866,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags, } VPVectorPointerRecipe *clone() override { - return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse, - isInBounds(), getDebugLoc()); + return new VPVectorPointerRecipe(getOperand(0), IndexedTy, isInBounds(), + getDebugLoc()); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 277df0637372d..7bfbcc3dff0e1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -261,9 +261,10 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { [](const auto *R) { return R->getScalarType(); }) .Case([this](const VPRecipeBase *R) { - return inferScalarType(R->getOperand(0)); - }) + VPReverseVectorPointerRecipe, VPWidenCanonicalIVRecipe>( + [this](const VPRecipeBase *R) { + return inferScalarType(R->getOperand(0)); + }) .Case( diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 0d092b9c10acc..bcf7b6d9b64c2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1813,6 +1813,46 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, } #endif +void VPReverseVectorPointerRecipe ::execute(VPTransformState &State) { + auto &Builder = State.Builder; + State.setDebugLocFrom(getDebugLoc()); + unsigned CurrentPart = getUnrollPart(*this); + // Use i32 for the gep index type when the value is constant, + // or query DataLayout for a more suitable index type otherwise. + const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); + Type *IndexTy = State.VF.isScalable() + ? DL.getIndexType(IndexedTy->getPointerTo()) + : Builder.getInt32Ty(); + Value *Ptr = State.get(getOperand(0), VPLane(0)); + bool InBounds = isInBounds(); + + // the wide store needs to start at the last vector element. + // RunTimeVF = VScale * VF.getKnownMinValue() + // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() + Value *RunTimeVF = State.get(getVFValue(), VPLane(0)); + if (IndexTy != RunTimeVF->getType()) + RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy); + // NumElt = -CurrentPart * RunTimeVF + Value *NumElt = Builder.CreateMul( + ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF); + // LastLane = 1 - RunTimeVF + Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); + Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds); + ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds); + + State.set(this, ResultPtr, /*IsScalar*/ true); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent; + printAsOperand(O, SlotTracker); + O << " = reverse-vector-pointer "; + printOperands(O, SlotTracker); +} +#endif + void VPVectorPointerRecipe ::execute(VPTransformState &State) { auto &Builder = State.Builder; State.setDebugLocFrom(getDebugLoc()); @@ -1820,31 +1860,14 @@ void VPVectorPointerRecipe ::execute(VPTransformState &State) { // Use i32 for the gep index type when the value is constant, // or query DataLayout for a more suitable index type otherwise. const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); - Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0) + Type *IndexTy = State.VF.isScalable() && (CurrentPart > 0) ? DL.getIndexType(Builder.getPtrTy(0)) : Builder.getInt32Ty(); Value *Ptr = State.get(getOperand(0), VPLane(0)); bool InBounds = isInBounds(); - Value *ResultPtr = nullptr; - if (IsReverse) { - // If the address is consecutive but reversed, then the - // wide store needs to start at the last vector element. - // RunTimeVF = VScale * VF.getKnownMinValue() - // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() - Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF); - // NumElt = -CurrentPart * RunTimeVF - Value *NumElt = Builder.CreateMul( - ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF); - // LastLane = 1 - RunTimeVF - Value *LastLane = - Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); - ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds); - ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds); - } else { - Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart); - ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds); - } + Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart); + Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds); State.set(this, ResultPtr, /*IsScalar*/ true); } @@ -1855,8 +1878,6 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent, O << Indent; printAsOperand(O, SlotTracker); O << " = vector-pointer "; - if (IsReverse) - O << "(reverse) "; printOperands(O, SlotTracker); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index ca78f32506ef7..1e32865e8ee57 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -316,12 +316,12 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) { // Add operand indicating the part to generate code for, to recipes still // requiring it. if (isa(Copy) || + VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(Copy) || match(Copy, m_VPInstruction( m_VPValue()))) Copy->addOperand(getConstantVPV(Part)); - if (isa(R)) + if (isa(R)) Copy->setOperand(0, R.getOperand(0)); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 4c383244f96f1..d7626e437db33 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -346,6 +346,7 @@ class VPDef { VPScalarCastSC, VPScalarIVStepsSC, VPVectorPointerSC, + VPReverseVectorPointerSC, VPWidenCallSC, VPWidenCanonicalIVSC, VPWidenCastSC, diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll index 81121019efe76..76562e80fbc4a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -24,43 +24,36 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 3 +; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[INDEX]], -1 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[N]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP9]], 3 -; CHECK-NEXT: [[TMP11:%.*]] = sub i64 1, [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP13]], 3 -; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = sub i64 1, [[TMP14]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[TMP17]], i64 [[TMP16]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 8 -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP18]], align 8 -; CHECK-NEXT: [[TMP19:%.*]] = fadd [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP20:%.*]] = fadd [[WIDE_LOAD1]], shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP23:%.*]] = shl i64 [[TMP22]], 3 -; CHECK-NEXT: [[TMP24:%.*]] = sub i64 1, [[TMP23]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP27:%.*]] = shl i64 [[TMP26]], 3 -; CHECK-NEXT: [[TMP28:%.*]] = sub i64 0, [[TMP27]] -; CHECK-NEXT: [[TMP29:%.*]] = sub i64 1, [[TMP27]] -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[TMP28]] -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP29]] -; CHECK-NEXT: store [[TMP19]], ptr [[TMP25]], align 8 -; CHECK-NEXT: store [[TMP20]], ptr [[TMP31]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[INDEX]], -1 +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[N]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = sub i64 0, [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = sub i64 1, [[TMP5]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i64 [[TMP13]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP11]], align 8 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP15]], align 8 +; CHECK-NEXT: [[TMP16:%.*]] = fadd [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP17:%.*]] = fadd [[WIDE_LOAD1]], shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP19:%.*]] = sub i64 1, [[TMP5]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = sub i64 0, [[TMP5]] +; CHECK-NEXT: [[TMP22:%.*]] = sub i64 1, [[TMP5]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[TMP22]] +; CHECK-NEXT: store [[TMP16]], ptr [[TMP20]], align 8 +; CHECK-NEXT: store [[TMP17]], ptr [[TMP24]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -75,8 +68,8 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{ ; CHECK-NEXT: [[I_08_IN:%.*]] = phi i64 [ [[I_08:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_08]] = add nsw i64 [[I_08_IN]], -1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[I_08]] -; CHECK-NEXT: [[TMP33:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP33]], 1.000000e+00 +; CHECK-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP26]], 1.000000e+00 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]] ; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_08_IN]], 1 @@ -126,43 +119,36 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 3 +; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP7]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[INDEX]], -1 -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[N]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = shl i64 [[TMP12]], 3 -; CHECK-NEXT: [[TMP14:%.*]] = sub i64 1, [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i64 [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP16]], 3 -; CHECK-NEXT: [[TMP18:%.*]] = sub i64 0, [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = sub i64 1, [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i64 [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i64 [[TMP19]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP15]], align 8 -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP21]], align 8 -; CHECK-NEXT: [[TMP22:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP23:%.*]] = add [[WIDE_LOAD3]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP26:%.*]] = shl i64 [[TMP25]], 3 -; CHECK-NEXT: [[TMP27:%.*]] = sub i64 1, [[TMP26]] -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[TMP24]], i64 [[TMP27]] -; CHECK-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP30:%.*]] = shl i64 [[TMP29]], 3 -; CHECK-NEXT: [[TMP31:%.*]] = sub i64 0, [[TMP30]] -; CHECK-NEXT: [[TMP32:%.*]] = sub i64 1, [[TMP30]] -; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i64, ptr [[TMP24]], i64 [[TMP31]] -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i64, ptr [[TMP33]], i64 [[TMP32]] -; CHECK-NEXT: store [[TMP22]], ptr [[TMP28]], align 8 -; CHECK-NEXT: store [[TMP23]], ptr [[TMP34]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[INDEX]], -1 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[N]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = sub i64 1, [[TMP8]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[TMP8]] +; CHECK-NEXT: [[TMP16:%.*]] = sub i64 1, [[TMP8]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP16]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 8 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP18]], align 8 +; CHECK-NEXT: [[TMP19:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP20:%.*]] = add [[WIDE_LOAD3]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP22:%.*]] = sub i64 1, [[TMP8]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = sub i64 0, [[TMP8]] +; CHECK-NEXT: [[TMP25:%.*]] = sub i64 1, [[TMP8]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[TMP26]], i64 [[TMP25]] +; CHECK-NEXT: store [[TMP19]], ptr [[TMP23]], align 8 +; CHECK-NEXT: store [[TMP20]], ptr [[TMP27]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -177,8 +163,8 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[I_09_IN:%.*]] = phi i64 [ [[I_09:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_09]] = add nsw i64 [[I_09_IN]], -1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_09]] -; CHECK-NEXT: [[TMP36:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP36]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP29]], 1 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_09]] ; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX2]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_09_IN]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index 1d5e6c117a2ea..4f49c30de9299 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -53,6 +53,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -73,11 +74,11 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> ; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer ir<%arrayidx>, vp<[[VF]]> ; CHECK-NEXT: WIDEN ir<%1> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%add9> = add ir<%1>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer (reverse) ir<%arrayidx3> +; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer ir<%arrayidx3>, vp<[[VF]]> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add9> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> @@ -137,6 +138,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop ; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1 ; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF={1}' { +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -157,11 +159,11 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> ; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer ir<%arrayidx>, vp<[[VF]]> ; CHECK-NEXT: WIDEN ir<%13> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%add9> = add ir<%13>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer (reverse) ir<%arrayidx3> +; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer ir<%arrayidx3>, vp<[[VF]]> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add9> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> @@ -257,6 +259,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -277,11 +280,11 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> ; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer ir<%arrayidx>, vp<[[VF]]> ; CHECK-NEXT: WIDEN ir<%1> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<%1>, ir<1.000000e+00> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer (reverse) ir<%arrayidx3> +; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer ir<%arrayidx3>, vp<[[VF]]> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%conv1> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> @@ -341,6 +344,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop ; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1 ; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF={1}' { +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -361,11 +365,11 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> ; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer ir<%arrayidx>, vp<[[VF]]> ; CHECK-NEXT: WIDEN ir<%13> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<%13>, ir<1.000000e+00> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx3> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer ir<%arrayidx3>, vp<[[VF]]> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%conv1> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll index c1cf8b0fc541e..9a001f36da7d4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll @@ -28,34 +28,30 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 1024, [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP5]], i32 4, i1 true) +; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0 -; IF-EVL-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], -1 -; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP8]] -; IF-EVL-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 -; IF-EVL-NEXT: [[TMP12:%.*]] = mul i64 0, [[TMP11]] -; IF-EVL-NEXT: [[TMP13:%.*]] = sub i64 1, [[TMP11]] -; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 [[TMP12]] -; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP13]] -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) -; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP8]] -; IF-EVL-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 4 -; IF-EVL-NEXT: [[TMP19:%.*]] = mul i64 0, [[TMP18]] -; IF-EVL-NEXT: [[TMP20:%.*]] = sub i64 1, [[TMP18]] -; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[TMP19]] -; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP20]] -; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP22]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) -; IF-EVL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0 +; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], -1 +; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP7]] +; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP4]] +; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP4]] +; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[TMP9]] +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 [[TMP10]] +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]] +; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 0, [[TMP4]] +; IF-EVL-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP4]] +; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP14]] +; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[TMP15]] +; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP17]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; IF-EVL-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]] ; IF-EVL: scalar.ph: @@ -131,49 +127,45 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 1024, [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP5]], i32 4, i1 true) +; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0 +; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0 ; IF-EVL-NEXT: [[OFFSET_IDX3:%.*]] = trunc i64 [[EVL_BASED_IV]] to i32 -; IF-EVL-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX3]], 0 +; IF-EVL-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX3]], 0 ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[EVL_BASED_IV]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; IF-EVL-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() -; IF-EVL-NEXT: [[TMP10:%.*]] = add zeroinitializer, [[TMP9]] -; IF-EVL-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP10]] -; IF-EVL-NEXT: [[TMP11:%.*]] = icmp ule [[VEC_IV]], shufflevector ( insertelement ( poison, i64 1023, i64 0), poison, zeroinitializer) -; IF-EVL-NEXT: [[TMP12:%.*]] = add i64 [[TMP7]], -1 -; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP8]] -; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = icmp slt [[VP_OP_LOAD]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) -; IF-EVL-NEXT: [[TMP16:%.*]] = select [[TMP11]], [[TMP15]], zeroinitializer -; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP12]] -; IF-EVL-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 -; IF-EVL-NEXT: [[TMP20:%.*]] = mul i64 0, [[TMP19]] -; IF-EVL-NEXT: [[TMP21:%.*]] = sub i64 1, [[TMP19]] -; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP17]], i64 [[TMP20]] -; IF-EVL-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP21]] -; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) -; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP23]], [[VP_REVERSE_MASK]], i32 [[TMP6]]) -; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD4]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) -; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP12]] -; IF-EVL-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 4 -; IF-EVL-NEXT: [[TMP27:%.*]] = mul i64 0, [[TMP26]] -; IF-EVL-NEXT: [[TMP28:%.*]] = sub i64 1, [[TMP26]] -; IF-EVL-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP27]] -; IF-EVL-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP29]], i64 [[TMP28]] -; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) -; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE5]], ptr align 4 [[TMP30]], [[VP_REVERSE_MASK6]], i32 [[TMP6]]) -; IF-EVL-NEXT: [[TMP31:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP31]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() +; IF-EVL-NEXT: [[TMP9:%.*]] = add zeroinitializer, [[TMP8]] +; IF-EVL-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP9]] +; IF-EVL-NEXT: [[TMP10:%.*]] = icmp ule [[VEC_IV]], shufflevector ( insertelement ( poison, i64 1023, i64 0), poison, zeroinitializer) +; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[TMP6]], -1 +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP7]] +; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt [[VP_OP_LOAD]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) +; IF-EVL-NEXT: [[TMP15:%.*]] = select [[TMP10]], [[TMP14]], zeroinitializer +; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP11]] +; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP4]] +; IF-EVL-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP4]] +; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP17]] +; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i64 [[TMP18]] +; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], [[VP_REVERSE_MASK]], i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD4]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP11]] +; IF-EVL-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP4]] +; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 1, [[TMP4]] +; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP22]] +; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP23]] +; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE5]], ptr align 4 [[TMP25]], [[VP_REVERSE_MASK6]], i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP26:%.*]] = zext i32 [[TMP5]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP26]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; IF-EVL-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IF-EVL-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]] ; IF-EVL: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll index 870925950ae49..c492b296903e6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll @@ -38,10 +38,8 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) { ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP13:%.*]] = sub nuw nsw i64 1, [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP9]] +; CHECK-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP9]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[TMP14]], i64 [[TMP17]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i64, ptr [[TMP19]], i64 [[TMP18]] ; CHECK-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv2i64( zeroinitializer, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP11]]) diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll index c1322792071e4..d983c5138164f 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll @@ -20,11 +20,11 @@ define i32 @reverse_induction_i64(i64 %startval, ptr %ptr) { ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP0]], -1 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP0]], -1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -3 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 -4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -4 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> @@ -93,11 +93,11 @@ define i32 @reverse_induction_i128(i128 %startval, ptr %ptr) { ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i128 [[STARTVAL]], [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i128 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = add i128 [[TMP0]], -1 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = add i128 [[TMP0]], -1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -3 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 -4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -4 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> @@ -176,11 +176,11 @@ define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) { ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i16 [[STARTVAL]], [[DOTCAST]] ; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[TMP4]], -1 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[TMP4]], -1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 -3 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 -4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 -4 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 0f3cd9d4ca4d6..a0488b7478050 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -1101,6 +1101,7 @@ exit: define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-LABEL: LV: Checking a loop in 'ptr_induction_remove_dead_recipe' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -1115,11 +1116,11 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<0> + vp<%3> * ir<-1> +; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<-1> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DEV_IV]]>, ir<-1> ; CHECK-NEXT: EMIT vp<[[PTR_IV:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]> ; CHECK-NEXT: CLONE ir<%ptr.iv.next> = getelementptr inbounds vp<[[PTR_IV]]>, ir<-1> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%ptr.iv.next> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer ir<%ptr.iv.next>, vp<[[VF]]> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%c.1> = icmp eq ir<%l>, ir<0> ; CHECK-NEXT: EMIT vp<[[NEG:%.+]]> = not ir<%c.1> From 9f1c301e9d520f411d237a739c3952080ae9bb52 Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Thu, 3 Oct 2024 19:58:32 -0700 Subject: [PATCH 2/8] Address comments --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index bcf7b6d9b64c2..44f42361cf9a7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1827,8 +1827,6 @@ void VPReverseVectorPointerRecipe ::execute(VPTransformState &State) { bool InBounds = isInBounds(); // the wide store needs to start at the last vector element. - // RunTimeVF = VScale * VF.getKnownMinValue() - // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() Value *RunTimeVF = State.get(getVFValue(), VPLane(0)); if (IndexTy != RunTimeVF->getType()) RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy); From fd731974d7a9177bfd9fe7d1d50cd091b1427ffd Mon Sep 17 00:00:00 2001 From: Shih-Po Hung Date: Tue, 15 Oct 2024 17:10:09 +0800 Subject: [PATCH 3/8] Update llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp Co-authored-by: Florian Hahn --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 44f42361cf9a7..c8d912b176fe6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1858,7 +1858,7 @@ void VPVectorPointerRecipe ::execute(VPTransformState &State) { // Use i32 for the gep index type when the value is constant, // or query DataLayout for a more suitable index type otherwise. const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); - Type *IndexTy = State.VF.isScalable() && (CurrentPart > 0) + Type *IndexTy = State.VF.isScalable() && CurrentPart > 0 ? DL.getIndexType(Builder.getPtrTy(0)) : Builder.getInt32Ty(); Value *Ptr = State.get(getOperand(0), VPLane(0)); From 7a4daf546a5781cc7902ccb3e6384ec13a97e962 Mon Sep 17 00:00:00 2001 From: Shih-Po Hung Date: Tue, 15 Oct 2024 17:10:32 +0800 Subject: [PATCH 4/8] Update llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp Co-authored-by: Florian Hahn --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index c8d912b176fe6..b9c37f8bf991b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1826,7 +1826,7 @@ void VPReverseVectorPointerRecipe ::execute(VPTransformState &State) { Value *Ptr = State.get(getOperand(0), VPLane(0)); bool InBounds = isInBounds(); - // the wide store needs to start at the last vector element. + // The wide store needs to start at the last vector element. Value *RunTimeVF = State.get(getVFValue(), VPLane(0)); if (IndexTy != RunTimeVF->getType()) RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy); From 142ea90cd51192ecffdff4f6c7f8338748b2a8f6 Mon Sep 17 00:00:00 2001 From: Shih-Po Hung Date: Tue, 15 Oct 2024 17:23:11 +0800 Subject: [PATCH 5/8] Update llvm/lib/Transforms/Vectorize/VPlan.h Co-authored-by: Florian Hahn --- llvm/lib/Transforms/Vectorize/VPlan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 24e066753f9da..f5e9e00907273 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1788,7 +1788,7 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags { }; /// A recipe to compute the pointers for widened memory accesses of IndexTy -/// in reverse order per part. +/// in reverse order. class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags, public VPUnrollPartAccessor<2> { Type *IndexedTy; From 6a575e2c37354b0ba8273842922bf87f57987103 Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Tue, 15 Oct 2024 02:51:50 -0700 Subject: [PATCH 6/8] Address comments - refine stale comments - sink to use - add inbounds in print() --- llvm/lib/Transforms/Vectorize/VPlan.h | 3 +-- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 10 ++++++---- .../LoopVectorize/RISCV/riscv-vector-reverse.ll | 16 ++++++++-------- .../vplan-sink-scalars-and-merge.ll | 2 +- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index f5e9e00907273..8df63b5796c73 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1834,8 +1834,7 @@ class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags, #endif }; -/// A recipe to compute the pointers for widened memory accesses of IndexTy for -/// all parts. +/// A recipe to compute the pointers for widened memory accesses of IndexTy. class VPVectorPointerRecipe : public VPRecipeWithIRFlags, public VPUnrollPartAccessor<1> { Type *IndexedTy; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index b9c37f8bf991b..13bf067b35776 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1813,7 +1813,7 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, } #endif -void VPReverseVectorPointerRecipe ::execute(VPTransformState &State) { +void VPReverseVectorPointerRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; State.setDebugLocFrom(getDebugLoc()); unsigned CurrentPart = getUnrollPart(*this); @@ -1823,8 +1823,6 @@ void VPReverseVectorPointerRecipe ::execute(VPTransformState &State) { Type *IndexTy = State.VF.isScalable() ? DL.getIndexType(IndexedTy->getPointerTo()) : Builder.getInt32Ty(); - Value *Ptr = State.get(getOperand(0), VPLane(0)); - bool InBounds = isInBounds(); // The wide store needs to start at the last vector element. Value *RunTimeVF = State.get(getVFValue(), VPLane(0)); @@ -1835,6 +1833,8 @@ void VPReverseVectorPointerRecipe ::execute(VPTransformState &State) { ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF); // LastLane = 1 - RunTimeVF Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); + Value *Ptr = State.get(getOperand(0), VPLane(0)); + bool InBounds = isInBounds(); Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds); ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds); @@ -1847,11 +1847,13 @@ void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent, O << Indent; printAsOperand(O, SlotTracker); O << " = reverse-vector-pointer "; + if (isInBounds()) + O << "inbounds "; printOperands(O, SlotTracker); } #endif -void VPVectorPointerRecipe ::execute(VPTransformState &State) { +void VPVectorPointerRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; State.setDebugLocFrom(getDebugLoc()); unsigned CurrentPart = getUnrollPart(*this); diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index 4f49c30de9299..7a4252dc9e05d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -74,11 +74,11 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> ; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer ir<%arrayidx>, vp<[[VF]]> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx>, vp<[[VF]]> ; CHECK-NEXT: WIDEN ir<%1> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%add9> = add ir<%1>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer ir<%arrayidx3>, vp<[[VF]]> +; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx3>, vp<[[VF]]> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add9> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> @@ -159,11 +159,11 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> ; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer ir<%arrayidx>, vp<[[VF]]> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx>, vp<[[VF]]> ; CHECK-NEXT: WIDEN ir<%13> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%add9> = add ir<%13>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer ir<%arrayidx3>, vp<[[VF]]> +; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx3>, vp<[[VF]]> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add9> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> @@ -280,11 +280,11 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> ; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer ir<%arrayidx>, vp<[[VF]]> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx>, vp<[[VF]]> ; CHECK-NEXT: WIDEN ir<%1> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<%1>, ir<1.000000e+00> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer ir<%arrayidx3>, vp<[[VF]]> +; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx3>, vp<[[VF]]> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%conv1> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> @@ -365,11 +365,11 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> ; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer ir<%arrayidx>, vp<[[VF]]> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx>, vp<[[VF]]> ; CHECK-NEXT: WIDEN ir<%13> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<%13>, ir<1.000000e+00> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer ir<%arrayidx3>, vp<[[VF]]> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx3>, vp<[[VF]]> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%conv1> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index a0488b7478050..446b720ad1ba4 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -1120,7 +1120,7 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DEV_IV]]>, ir<-1> ; CHECK-NEXT: EMIT vp<[[PTR_IV:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]> ; CHECK-NEXT: CLONE ir<%ptr.iv.next> = getelementptr inbounds vp<[[PTR_IV]]>, ir<-1> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer ir<%ptr.iv.next>, vp<[[VF]]> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%ptr.iv.next>, vp<[[VF]]> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%c.1> = icmp eq ir<%l>, ir<0> ; CHECK-NEXT: EMIT vp<[[NEG:%.+]]> = not ir<%c.1> From 0a500c78f3bf45952163c3cb975a97aa840f8321 Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Tue, 15 Oct 2024 09:04:42 -0700 Subject: [PATCH 7/8] Add getGEPIndexTy utility function to avoid duplication --- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 13bf067b35776..19bc298b717c4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1813,16 +1813,22 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, } #endif +static Type *getGEPIndexTy(bool IsScalable, bool IsReverse, + unsigned CurrentPart, IRBuilderBase &Builder) { + // Use i32 for the gep index type when the value is constant, + // or query DataLayout for a more suitable index type otherwise. + const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); + return IsScalable && (IsReverse || CurrentPart > 0) + ? DL.getIndexType(Builder.getPtrTy(0)) + : Builder.getInt32Ty(); +} + void VPReverseVectorPointerRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; State.setDebugLocFrom(getDebugLoc()); unsigned CurrentPart = getUnrollPart(*this); - // Use i32 for the gep index type when the value is constant, - // or query DataLayout for a more suitable index type otherwise. - const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); - Type *IndexTy = State.VF.isScalable() - ? DL.getIndexType(IndexedTy->getPointerTo()) - : Builder.getInt32Ty(); + Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true, + CurrentPart, Builder); // The wide store needs to start at the last vector element. Value *RunTimeVF = State.get(getVFValue(), VPLane(0)); @@ -1857,12 +1863,8 @@ void VPVectorPointerRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; State.setDebugLocFrom(getDebugLoc()); unsigned CurrentPart = getUnrollPart(*this); - // Use i32 for the gep index type when the value is constant, - // or query DataLayout for a more suitable index type otherwise. - const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); - Type *IndexTy = State.VF.isScalable() && CurrentPart > 0 - ? DL.getIndexType(Builder.getPtrTy(0)) - : Builder.getInt32Ty(); + Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false, + CurrentPart, Builder); Value *Ptr = State.get(getOperand(0), VPLane(0)); bool InBounds = isInBounds(); From d46752d547addf1da16a37333da06a1faed0733f Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Wed, 16 Oct 2024 08:26:09 -0700 Subject: [PATCH 8/8] Make VPReverVectorPointer return false in mayHaveSideEffects --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 19bc298b717c4..627ed50e11a82 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -154,6 +154,7 @@ bool VPRecipeBase::mayHaveSideEffects() const { case VPDerivedIVSC: case VPPredInstPHISC: case VPScalarCastSC: + case VPReverseVectorPointerSC: return false; case VPInstructionSC: return mayWriteToMemory();