diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 811fc1a389d8d..9208fc45a0188 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7783,6 +7783,7 @@ DenseMap LoopVectorizationPlanner::executePlan( "Trying to execute plan with unsupported VF"); assert(BestVPlan.hasUF(BestUF) && "Trying to execute plan with unsupported UF"); + VPlanTransforms::materializeStepVectors(BestVPlan); // TODO: Move to VPlan transform stage once the transition to the VPlan-based // cost model is complete for better cost estimates. VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF, diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index c77c944c01a36..cb35557064d7e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -902,6 +902,8 @@ class VPInstruction : public VPRecipeWithIRFlags, /// Scale the first operand (vector step) by the second operand /// (scalar-step). Casts both operands to the result type if needed. WideIVStep, + // Creates a step vector starting from 0 to VF with a step of 1. + StepVector, }; @@ -1072,7 +1074,15 @@ class VPInstructionWithType : public VPInstruction { if (R->isScalarCast()) return true; auto *VPI = dyn_cast(R); - return VPI && VPI->getOpcode() == VPInstruction::WideIVStep; + if (!VPI) + return false; + switch (VPI->getOpcode()) { + case VPInstruction::WideIVStep: + case VPInstruction::StepVector: + return true; + default: + return false; + } } static inline bool classof(const VPUser *R) { @@ -1869,7 +1879,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { TruncInst *Trunc; // If this recipe is unrolled it will have 2 additional operands. - bool isUnrolled() const { return getNumOperands() == 5; } + bool isUnrolled() const { return getNumOperands() == 6; } public: VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, @@ -1918,6 +1928,16 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { VPValue *getVFValue() { return getOperand(2); } const VPValue *getVFValue() const { return getOperand(2); } + // TODO: Remove once VPWidenIntOrFpInduction is fully expanded in + // convertToConcreteRecipes. + VPInstructionWithType *getStepVector() { + auto *StepVector = + cast(getOperand(3)->getDefiningRecipe()); + assert(StepVector->getOpcode() == VPInstruction::StepVector && + "step vector operand must be a VPInstruction::StepVector"); + return StepVector; + } + VPValue *getSplatVFValue() { // If the recipe has been unrolled return the VPValue for the induction // increment. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index c45c62e13d76c..e164f36adb98c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -934,6 +934,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { case VPInstruction::Not: case VPInstruction::PtrAdd: case VPInstruction::WideIVStep: + case VPInstruction::StepVector: return false; default: return true; @@ -1085,8 +1086,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, void VPInstructionWithType::execute(VPTransformState &State) { State.setDebugLocFrom(getDebugLoc()); - assert(vputils::onlyFirstLaneUsed(this) && - "Codegen only implemented for first lane."); switch (getOpcode()) { case Instruction::ZExt: case Instruction::Trunc: { @@ -1096,6 +1095,12 @@ void VPInstructionWithType::execute(VPTransformState &State) { State.set(this, Cast, VPLane(0)); break; } + case VPInstruction::StepVector: { + Value *StepVector = + State.Builder.CreateStepVector(VectorType::get(ResultTy, State.VF)); + State.set(this, StepVector); + break; + } default: llvm_unreachable("opcode not implemented yet"); } @@ -1113,6 +1118,9 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent, O << "wide-iv-step "; printOperands(O, SlotTracker); break; + case VPInstruction::StepVector: + O << "step-vector " << *ResultTy; + break; default: assert(Instruction::isCast(getOpcode()) && "unhandled opcode"); O << Instruction::getOpcodeName(getOpcode()) << " "; @@ -1880,7 +1888,8 @@ InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF, /// (0 * Step, 1 * Step, 2 * Step, ...) /// to each vector element of Val. /// \p Opcode is relevant for FP induction variable. -static Value *getStepVector(Value *Val, Value *Step, +/// \p InitVec is an integer step vector from 0 with a step of 1. +static Value *getStepVector(Value *Val, Value *Step, Value *InitVec, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder) { assert(VF.isVector() && "only vector VFs are supported"); @@ -1894,15 +1903,6 @@ static Value *getStepVector(Value *Val, Value *Step, "Induction Step must be an integer or FP"); assert(Step->getType() == STy && "Step has wrong type"); - // Create a vector of consecutive numbers from zero to VF. - VectorType *InitVecValVTy = ValVTy; - if (STy->isFloatingPointTy()) { - Type *InitVecValSTy = - IntegerType::get(STy->getContext(), STy->getScalarSizeInBits()); - InitVecValVTy = VectorType::get(InitVecValSTy, VLen); - } - Value *InitVec = Builder.CreateStepVector(InitVecValVTy); - if (STy->isIntegerTy()) { Step = Builder.CreateVectorSplat(VLen, Step); assert(Step->getType() == Val->getType() && "Invalid step vec"); @@ -1969,8 +1969,9 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { } Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start); - Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(), - State.VF, State.Builder); + Value *SteppedStart = + ::getStepVector(SplatStart, Step, State.get(getStepVector()), + ID.getInductionOpcode(), State.VF, State.Builder); // We create vector phi nodes for both integer and floating-point induction // variables. Here, we determine the kind of arithmetic we will perform. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index eba8b16bf288d..7943f58f0739a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1223,6 +1223,16 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan, WideIV->setStartValue(NewStart); auto *NewStep = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, 1)); WideIV->setStepValue(NewStep); + // TODO: Remove once VPWidenIntOrFpInductionRecipe is fully expanded. + VPInstructionWithType *OldStepVector = WideIV->getStepVector(); + assert(OldStepVector->getNumUsers() == 1 && + "step vector should only be used by single " + "VPWidenIntOrFpInductionRecipe"); + auto *NewStepVector = new VPInstructionWithType( + VPInstruction::StepVector, {}, NewIVTy, OldStepVector->getDebugLoc()); + NewStepVector->insertAfter(OldStepVector->getDefiningRecipe()); + OldStepVector->replaceAllUsesWith(NewStepVector); + OldStepVector->eraseFromParent(); auto *NewBTC = new VPWidenCastRecipe( Instruction::Trunc, Plan.getOrCreateBackedgeTakenCount(), NewIVTy); @@ -2585,6 +2595,27 @@ void VPlanTransforms::handleUncountableEarlyExit( LatchExitingBranch->eraseFromParent(); } +void VPlanTransforms::materializeStepVectors(VPlan &Plan) { + for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { + auto *IVR = dyn_cast(&Phi); + if (!IVR) + continue; + + Type *Ty = IVR->getPHINode()->getType(); + if (TruncInst *Trunc = IVR->getTruncInst()) + Ty = Trunc->getType(); + if (Ty->isFloatingPointTy()) + Ty = IntegerType::get(Ty->getContext(), Ty->getScalarSizeInBits()); + + VPBuilder Builder(Plan.getVectorPreheader()); + VPInstruction *StepVector = Builder.createNaryOp( + VPInstruction::StepVector, {}, Ty, {}, IVR->getDebugLoc()); + assert(IVR->getNumOperands() == 3 && + "can only add step vector before unrolling"); + IVR->addOperand(StepVector); + } +} + void VPlanTransforms::materializeBroadcasts(VPlan &Plan) { if (Plan.hasScalarVFOnly()) return; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 9e8b518a0c7eb..7a05816f2e2da 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -199,6 +199,11 @@ struct VPlanTransforms { optimizeInductionExitUsers(VPlan &Plan, DenseMap &EndValues); + /// Materialize VPInstruction::StepVectors for VPWidenIntOrFpInductionRecipes. + /// TODO: Remove once all of VPWidenIntOrFpInductionRecipe is expanded in + /// convertToConcreteRecipes. + static void materializeStepVectors(VPlan &Plan); + /// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors. static void materializeBroadcasts(VPlan &Plan); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll index 3a4c1c0cc7ada..ae7719757dc30 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll @@ -16,9 +16,9 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8) +; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]] @@ -100,9 +100,9 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]]) +; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index d59607711b5bf..4775a6ec3f917 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -123,9 +123,9 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[N]], [[TMP11]] ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]]) +; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[M]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP15]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP17]] ; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]] @@ -246,9 +246,9 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP0]], [[TMP11]] ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[TMP0]]) +; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[MUL_2_I]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP15]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP17]] ; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll index 5c876b760e943..04b859337e663 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll @@ -517,13 +517,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]] ; DEFAULT-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 ; DEFAULT-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15) +; DEFAULT-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[A]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i8 [[B]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i8 [[C]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; DEFAULT-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() ; DEFAULT-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i8 1) ; DEFAULT-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] ; DEFAULT-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8 @@ -593,13 +593,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; OPTSIZE-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]] ; OPTSIZE-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 ; OPTSIZE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15) +; OPTSIZE-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() ; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[A]], i64 0 ; OPTSIZE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i8 [[B]], i64 0 ; OPTSIZE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i8 [[C]], i64 0 ; OPTSIZE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; OPTSIZE-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() ; OPTSIZE-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i8 1) ; OPTSIZE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] ; OPTSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8 @@ -669,13 +669,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; MINSIZE-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]] ; MINSIZE-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 ; MINSIZE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15) +; MINSIZE-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() ; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[A]], i64 0 ; MINSIZE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i8 [[B]], i64 0 ; MINSIZE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i8 [[C]], i64 0 ; MINSIZE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; MINSIZE-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() ; MINSIZE-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i8 1) ; MINSIZE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] ; MINSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll index acdc32a57750d..ba58c32bb3f52 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll @@ -26,9 +26,9 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]] +; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv2i32() ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[IDX]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv2i32() ; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP8]], splat (i32 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[DOTSPLAT]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP7]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll index 97b133605f441..49584bd47353d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll @@ -20,11 +20,11 @@ define void @induction_i7(ptr %dst) #0 { ; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i7 +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i8() +; CHECK-NEXT: [[TMP7:%.*]] = trunc [[TMP6]] to ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP40]], i64 0 ; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[DOTSPLAT:%.*]] = trunc [[DOTSPLAT_]] to -; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i8() -; CHECK-NEXT: [[TMP7:%.*]] = trunc [[TMP6]] to ; CHECK-NEXT: [[TMP9:%.*]] = mul [[TMP7]], splat (i7 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP9]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -85,11 +85,11 @@ define void @induction_i3_zext(ptr %dst) #0 { ; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i3 +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i8() +; CHECK-NEXT: [[TMP7:%.*]] = trunc [[TMP6]] to ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP40]], i64 0 ; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[DOTSPLAT:%.*]] = trunc [[DOTSPLAT_]] to -; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i8() -; CHECK-NEXT: [[TMP7:%.*]] = trunc [[TMP6]] to ; CHECK-NEXT: [[TMP9:%.*]] = mul [[TMP7]], splat (i3 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP9]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index 8bbda981895ac..fe68501ad94f4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -101,11 +101,11 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[C:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[D:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl [[TMP2]], splat (i64 1) ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 @@ -185,11 +185,11 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[C:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[D:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl [[TMP2]], splat (i64 1) ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 @@ -579,9 +579,9 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[INDUCTION:%.*]] = sub splat (i64 1023), [[TMP2]] ; CHECK-NEXT: [[DOTNEG:%.*]] = sub nsw i64 0, [[TMP1]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[DOTNEG]], i64 0 @@ -809,9 +809,9 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -958,9 +958,9 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1113,13 +1113,13 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP9:%.*]] = shl [[TMP10]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 @@ -1191,13 +1191,13 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i64 [[TMP11]], 3 +; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP21:%.*]] = shl [[TMP10]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[TMP21]], splat (i64 3) ; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3 @@ -1283,11 +1283,11 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i32 [[TMP11]], 2 ; CHECK-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], -1 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 [[DOTPRE]], i32 [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP15:%.*]] = shl [[TMP14]], splat (i64 1) ; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP9]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP17]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index d8681bef80417..469faf67a71b3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -37,9 +37,9 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() ; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] @@ -83,9 +83,9 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] @@ -182,9 +182,9 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() ; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] @@ -221,9 +221,9 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] @@ -309,11 +309,11 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[CONV3]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() ; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] @@ -352,11 +352,11 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[CONV3]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] @@ -456,9 +456,9 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() ; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] @@ -516,9 +516,9 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index 00c8a1c5a72c9..4450678871ac3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -33,9 +33,9 @@ define void @dead_load(ptr %p, i16 %start) { ; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 8 ; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[N_VEC]], 3 ; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START_EXT]], [[TMP18]] +; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[START_EXT]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP15]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[DOTSPLAT]], [[TMP17]] ; CHECK-NEXT: [[TMP20:%.*]] = mul i64 3, [[TMP14]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll index 2582882baba00..5c15660e87132 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll @@ -70,9 +70,9 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 { ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[TMP50:%.*]] = mul i32 [[DOTCAST]], 3 ; CHECK-NEXT: [[IND_END22:%.*]] = add i32 [[X_I32]], [[TMP50]] +; CHECK-NEXT: [[TMP53:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[X_I64]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP53:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP55:%.*]] = mul [[TMP53]], splat (i64 3) ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[DOTSPLAT]], [[TMP55]] ; CHECK-NEXT: [[TMP58:%.*]] = mul i64 3, [[TMP52]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll index 3e4d337c0706c..c64f6df075a04 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll @@ -22,9 +22,9 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; VLENUNK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; VLENUNK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; VLENUNK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 ; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; VLENUNK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; VLENUNK-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) ; VLENUNK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] ; VLENUNK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP5]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll index 9d6372e8ccca2..ebbc41f034bd1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll @@ -20,13 +20,13 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1001, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[B]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[C]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP5]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 79590f5060ad4..f89a863d1e5f5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -553,9 +553,9 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; STRIDED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; STRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 +; STRIDED-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i64() ; STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[STRIDE]], i64 0 ; STRIDED-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; STRIDED-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i64() ; STRIDED-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i64 1) ; STRIDED-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] ; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 1, [[TMP11]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 827612cfe36d5..70c04ded5cf57 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -325,9 +325,9 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) ; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] ; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] @@ -432,9 +432,9 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv4i64() ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv4i64() ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = mul [[TMP5]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] @@ -996,11 +996,11 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i64 [[V]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP5]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] @@ -1127,11 +1127,11 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 +; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i64 [[V]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() ; SCALABLE-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) ; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] ; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] @@ -1233,11 +1233,11 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i64 [[V]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP5]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll index 05d7c1202baf0..01a7ea4ffcd05 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll @@ -581,8 +581,8 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 -; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i32() +; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i32 1) ; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32 @@ -771,8 +771,8 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 -; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i32() +; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement zeroinitializer, i32 [[START]], i32 0 ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i32 1) ; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] ; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll index c616fc1b11b93..427123cfca6d4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll @@ -29,9 +29,9 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; IF-EVL-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; IF-EVL-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() ; IF-EVL-NEXT: [[TMP12:%.*]] = mul [[TMP10]], splat (i64 1) ; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP12]] ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll index 788535d6a0c5d..2a48e0a5e5310 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll @@ -19,9 +19,9 @@ define i32 @iv_live_out_wide(ptr %dst) { ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i32() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[STEP_2]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i32() ; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i32 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0