Skip to content

[VPlan] Add VPInstruction::StepVector and use it in VPWidenIntOrFpInductionRecipe #129508

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
May 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7783,6 +7783,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
"Trying to execute plan with unsupported VF");
assert(BestVPlan.hasUF(BestUF) &&
"Trying to execute plan with unsupported UF");
VPlanTransforms::materializeStepVectors(BestVPlan);
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
// cost model is complete for better cost estimates.
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF,
Expand Down
24 changes: 22 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,8 @@ class VPInstruction : public VPRecipeWithIRFlags,
/// Scale the first operand (vector step) by the second operand
/// (scalar-step). Casts both operands to the result type if needed.
WideIVStep,
// Creates a step vector starting from 0 to VF with a step of 1.
StepVector,

};

Expand Down Expand Up @@ -1072,7 +1074,15 @@ class VPInstructionWithType : public VPInstruction {
if (R->isScalarCast())
return true;
auto *VPI = dyn_cast<VPInstruction>(R);
return VPI && VPI->getOpcode() == VPInstruction::WideIVStep;
if (!VPI)
return false;
switch (VPI->getOpcode()) {
case VPInstruction::WideIVStep:
case VPInstruction::StepVector:
return true;
default:
return false;
}
}

static inline bool classof(const VPUser *R) {
Expand Down Expand Up @@ -1869,7 +1879,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
TruncInst *Trunc;

// If this recipe is unrolled it will have 2 additional operands.
bool isUnrolled() const { return getNumOperands() == 5; }
bool isUnrolled() const { return getNumOperands() == 6; }

public:
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
Expand Down Expand Up @@ -1918,6 +1928,16 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
VPValue *getVFValue() { return getOperand(2); }
const VPValue *getVFValue() const { return getOperand(2); }

// TODO: Remove once VPWidenIntOrFpInduction is fully expanded in
// convertToConcreteRecipes.
VPInstructionWithType *getStepVector() {
auto *StepVector =
cast<VPInstructionWithType>(getOperand(3)->getDefiningRecipe());
assert(StepVector->getOpcode() == VPInstruction::StepVector &&
"step vector operand must be a VPInstruction::StepVector");
return StepVector;
}

VPValue *getSplatVFValue() {
// If the recipe has been unrolled return the VPValue for the induction
// increment.
Expand Down
29 changes: 15 additions & 14 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::Not:
case VPInstruction::PtrAdd:
case VPInstruction::WideIVStep:
case VPInstruction::StepVector:
return false;
default:
return true;
Expand Down Expand Up @@ -1085,8 +1086,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,

void VPInstructionWithType::execute(VPTransformState &State) {
State.setDebugLocFrom(getDebugLoc());
assert(vputils::onlyFirstLaneUsed(this) &&
"Codegen only implemented for first lane.");
switch (getOpcode()) {
case Instruction::ZExt:
case Instruction::Trunc: {
Expand All @@ -1096,6 +1095,12 @@ void VPInstructionWithType::execute(VPTransformState &State) {
State.set(this, Cast, VPLane(0));
break;
}
case VPInstruction::StepVector: {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to add this to VPInstruction::computeCost?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's currently costed as zero for now since we don't want to change the overall cost of VPWidenIntOrFpInductionRecipe.

Once we expand VPWidenIntOrFpInductionRecipe in #118638 I think it should be safe to add a cost to it then? Since the expansion will happen just before execution, after any costing.

Value *StepVector =
State.Builder.CreateStepVector(VectorType::get(ResultTy, State.VF));
State.set(this, StepVector);
break;
}
default:
llvm_unreachable("opcode not implemented yet");
}
Expand All @@ -1113,6 +1118,9 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
O << "wide-iv-step ";
printOperands(O, SlotTracker);
break;
case VPInstruction::StepVector:
O << "step-vector " << *ResultTy;
break;
default:
assert(Instruction::isCast(getOpcode()) && "unhandled opcode");
O << Instruction::getOpcodeName(getOpcode()) << " ";
Expand Down Expand Up @@ -1880,7 +1888,8 @@ InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,
/// (0 * Step, 1 * Step, 2 * Step, ...)
/// to each vector element of Val.
/// \p Opcode is relevant for FP induction variable.
static Value *getStepVector(Value *Val, Value *Step,
/// \p InitVec is an integer step vector from 0 with a step of 1.
static Value *getStepVector(Value *Val, Value *Step, Value *InitVec,
Instruction::BinaryOps BinOp, ElementCount VF,
IRBuilderBase &Builder) {
assert(VF.isVector() && "only vector VFs are supported");
Expand All @@ -1894,15 +1903,6 @@ static Value *getStepVector(Value *Val, Value *Step,
"Induction Step must be an integer or FP");
assert(Step->getType() == STy && "Step has wrong type");

// Create a vector of consecutive numbers from zero to VF.
VectorType *InitVecValVTy = ValVTy;
if (STy->isFloatingPointTy()) {
Type *InitVecValSTy =
IntegerType::get(STy->getContext(), STy->getScalarSizeInBits());
InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we should assert that InitVec is the type we expect, i.e. it must have the same type as Step

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They might not always have the same type as step might be a float or an integer, whereas InitVec is always an integer. Hopefully the calls to Builder.CreateMul(InitVec, Step) in the integer case and Builder.CreateUIToFP(InitVec, ValVTy) in the float case will catch this.

}
Value *InitVec = Builder.CreateStepVector(InitVecValVTy);

if (STy->isIntegerTy()) {
Step = Builder.CreateVectorSplat(VLen, Step);
assert(Step->getType() == Val->getType() && "Invalid step vec");
Expand Down Expand Up @@ -1969,8 +1969,9 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
}

Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
State.VF, State.Builder);
Value *SteppedStart =
::getStepVector(SplatStart, Step, State.get(getStepVector()),
ID.getInductionOpcode(), State.VF, State.Builder);

// We create vector phi nodes for both integer and floating-point induction
// variables. Here, we determine the kind of arithmetic we will perform.
Expand Down
31 changes: 31 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1223,6 +1223,16 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
WideIV->setStartValue(NewStart);
auto *NewStep = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, 1));
WideIV->setStepValue(NewStep);
// TODO: Remove once VPWidenIntOrFpInductionRecipe is fully expanded.
VPInstructionWithType *OldStepVector = WideIV->getStepVector();
assert(OldStepVector->getNumUsers() == 1 &&
"step vector should only be used by single "
"VPWidenIntOrFpInductionRecipe");
auto *NewStepVector = new VPInstructionWithType(
VPInstruction::StepVector, {}, NewIVTy, OldStepVector->getDebugLoc());
NewStepVector->insertAfter(OldStepVector->getDefiningRecipe());
OldStepVector->replaceAllUsesWith(NewStepVector);
OldStepVector->eraseFromParent();

auto *NewBTC = new VPWidenCastRecipe(
Instruction::Trunc, Plan.getOrCreateBackedgeTakenCount(), NewIVTy);
Expand Down Expand Up @@ -2585,6 +2595,27 @@ void VPlanTransforms::handleUncountableEarlyExit(
LatchExitingBranch->eraseFromParent();
}

void VPlanTransforms::materializeStepVectors(VPlan &Plan) {
for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
auto *IVR = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
if (!IVR)
continue;

Type *Ty = IVR->getPHINode()->getType();
if (TruncInst *Trunc = IVR->getTruncInst())
Ty = Trunc->getType();
if (Ty->isFloatingPointTy())
Ty = IntegerType::get(Ty->getContext(), Ty->getScalarSizeInBits());

VPBuilder Builder(Plan.getVectorPreheader());
VPInstruction *StepVector = Builder.createNaryOp(
VPInstruction::StepVector, {}, Ty, {}, IVR->getDebugLoc());
assert(IVR->getNumOperands() == 3 &&
"can only add step vector before unrolling");
IVR->addOperand(StepVector);
}
}

void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
if (Plan.hasScalarVFOnly())
return;
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,11 @@ struct VPlanTransforms {
optimizeInductionExitUsers(VPlan &Plan,
DenseMap<VPValue *, VPValue *> &EndValues);

/// Materialize VPInstruction::StepVectors for VPWidenIntOrFpInductionRecipes.
/// TODO: Remove once all of VPWidenIntOrFpInductionRecipe is expanded in
/// convertToConcreteRecipes.
static void materializeStepVectors(VPlan &Plan);

/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
static void materializeBroadcasts(VPlan &Plan);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
Expand Down Expand Up @@ -100,9 +100,9 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,9 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[N]], [[TMP11]]
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
Expand Down Expand Up @@ -246,9 +246,9 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP0]], [[TMP11]]
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[TMP0]])
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -517,13 +517,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]]
; DEFAULT-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
; DEFAULT-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15)
; DEFAULT-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[C]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
; DEFAULT-NEXT: [[TMP11:%.*]] = mul <vscale x 16 x i8> [[TMP10]], splat (i8 1)
; DEFAULT-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i8> zeroinitializer, [[TMP11]]
; DEFAULT-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8
Expand Down Expand Up @@ -593,13 +593,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
; OPTSIZE-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]]
; OPTSIZE-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
; OPTSIZE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15)
; OPTSIZE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i64 0
; OPTSIZE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B]], i64 0
; OPTSIZE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[C]], i64 0
; OPTSIZE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; OPTSIZE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
; OPTSIZE-NEXT: [[TMP11:%.*]] = mul <vscale x 16 x i8> [[TMP10]], splat (i8 1)
; OPTSIZE-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i8> zeroinitializer, [[TMP11]]
; OPTSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8
Expand Down Expand Up @@ -669,13 +669,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
; MINSIZE-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]]
; MINSIZE-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
; MINSIZE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15)
; MINSIZE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i64 0
; MINSIZE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B]], i64 0
; MINSIZE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[C]], i64 0
; MINSIZE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; MINSIZE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
; MINSIZE-NEXT: [[TMP11:%.*]] = mul <vscale x 16 x i8> [[TMP10]], splat (i8 1)
; MINSIZE-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i8> zeroinitializer, [[TMP11]]
; MINSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 2
; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]]
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[IDX]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP8]], splat (i32 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> [[DOTSPLAT]], [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP7]]
Expand Down
Loading
Loading