Skip to content

[VectorCombine] Use InstSimplifyFolder to simplify instrs on creation. #146350

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions llvm/include/llvm/IR/IRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -1575,10 +1575,14 @@ class IRBuilderBase {
return Accum;
}

Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") {
Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "",
bool IsDisjoint = false) {
if (auto *V = Folder.FoldBinOp(Instruction::Or, LHS, RHS))
return V;
return Insert(BinaryOperator::CreateOr(LHS, RHS), Name);
return Insert(
IsDisjoint ? BinaryOperator::CreateDisjoint(Instruction::Or, LHS, RHS)
: BinaryOperator::CreateOr(LHS, RHS),
Name);
Comment on lines -1578 to +1585
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be better to introduce a CreateDisjoint?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added CreatebinOpDisjoint instead, similar to CreateBinOpFMF.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think your previous code was better.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Specifically because there is only a single operation that can be disjoint, which is or, so it doesn't make much sense to pretend otherwise.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm happy to defer to @nikic's judgement: you can ignore my suggestion.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, reverted the change, thanks

}

Value *CreateOr(Value *LHS, const APInt &RHS, const Twine &Name = "") {
Expand Down
26 changes: 15 additions & 11 deletions llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstSimplifyFolder.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
Expand Down Expand Up @@ -72,14 +73,15 @@ class VectorCombine {
const DominatorTree &DT, AAResults &AA, AssumptionCache &AC,
const DataLayout *DL, TTI::TargetCostKind CostKind,
bool TryEarlyFoldsOnly)
: F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA), AC(AC), DL(DL),
CostKind(CostKind), TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
: F(F), Builder(F.getContext(), InstSimplifyFolder(*DL)), TTI(TTI),
DT(DT), AA(AA), AC(AC), DL(DL), CostKind(CostKind),
TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}

bool run();

private:
Function &F;
IRBuilder<> Builder;
IRBuilder<InstSimplifyFolder> Builder;
const TargetTransformInfo &TTI;
const DominatorTree &DT;
AAResults &AA;
Expand Down Expand Up @@ -529,7 +531,7 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
/// Create a shuffle that translates (shifts) 1 element from the input vector
/// to a new element location.
static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
unsigned NewIndex, IRBuilder<> &Builder) {
unsigned NewIndex, IRBuilderBase &Builder) {
// The shuffle mask is poison except for 1 lane that is being translated
// to the new element index. Example for OldIndex == 2 and NewIndex == 0:
// ShufMask = { 2, poison, poison, poison }
Expand All @@ -545,7 +547,7 @@ static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
/// unnecessary instructions.
static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
unsigned NewIndex,
IRBuilder<> &Builder) {
IRBuilderBase &Builder) {
// Shufflevectors can only be created for fixed-width vectors.
Value *X = ExtElt->getVectorOperand();
if (!isa<FixedVectorType>(X->getType()))
Expand Down Expand Up @@ -1459,10 +1461,12 @@ bool VectorCombine::foldBinopOfReductions(Instruction &I) {
LLVM_DEBUG(dbgs() << "Found two mergeable reductions: " << I
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
<< "\n");
Value *VectorBO = Builder.CreateBinOp(BinOpOpc, V0, V1);
if (auto *PDInst = dyn_cast<PossiblyDisjointInst>(&I))
if (auto *PDVectorBO = dyn_cast<PossiblyDisjointInst>(VectorBO))
PDVectorBO->setIsDisjoint(PDInst->isDisjoint());
Value *VectorBO;
if (BinOpOpc == Instruction::Or)
VectorBO = Builder.CreateOr(V0, V1, "",
cast<PossiblyDisjointInst>(I).isDisjoint());
else
VectorBO = Builder.CreateBinOp(BinOpOpc, V0, V1);
Comment on lines +1465 to +1469
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (BinOpOpc == Instruction::Or)
VectorBO = Builder.CreateOr(V0, V1, "",
cast<PossiblyDisjointInst>(I).isDisjoint());
else
VectorBO = Builder.CreateBinOp(BinOpOpc, V0, V1);
VectorBO = isa<PossiblyDisjointInst>(I) ?
Builder.CreateDisjoint(BinOpOpc, V0, V1) :
Builder.CreateBinOp(BinOpOpc, V0, V1);

Better not to rely on just Or being PossiblyDisjoint.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I added CreateBinOpDisjoint (similar to CreateBinOpFMF). ALthough there I still retained the if, because we need to call isDisjoint as well.


Instruction *Rdx = Builder.CreateIntrinsic(ReductionIID, {VTy}, {VectorBO});
replaceValue(I, *Rdx);
Expand Down Expand Up @@ -1519,7 +1523,7 @@ class ScalarizationResult {
}

/// Freeze the ToFreeze and update the use in \p User to use it.
void freeze(IRBuilder<> &Builder, Instruction &UserI) {
void freeze(IRBuilderBase &Builder, Instruction &UserI) {
assert(isSafeWithFreeze() &&
"should only be used when freezing is required");
assert(is_contained(ToFreeze->users(), &UserI) &&
Expand Down Expand Up @@ -2617,7 +2621,7 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
const SmallPtrSet<Use *, 4> &IdentityLeafs,
const SmallPtrSet<Use *, 4> &SplatLeafs,
const SmallPtrSet<Use *, 4> &ConcatLeafs,
IRBuilder<> &Builder,
IRBuilderBase &Builder,
const TargetTransformInfo *TTI) {
auto [FrontU, FrontLane] = Item.front();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,12 @@ target triple = "arm64-apple-darwin"
define void @load_extract_insert_store_const_idx(ptr %A) {
; CHECK-LABEL: @load_extract_insert_store_const_idx(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, ptr [[A:%.*]], i32 0, i64 0
; CHECK-NEXT: [[EXT_0:%.*]] = load double, ptr [[TMP0]], align 8
; CHECK-NEXT: [[EXT_0:%.*]] = load double, ptr [[TMP0:%.*]], align 8
; CHECK-NEXT: [[MUL:%.*]] = fmul double 2.000000e+01, [[EXT_0]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i32 0, i64 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, ptr [[TMP0]], i32 0, i64 1
; CHECK-NEXT: [[EXT_1:%.*]] = load double, ptr [[TMP1]], align 8
; CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT_1]], [[MUL]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, ptr [[TMP0]], i64 0, i64 1
; CHECK-NEXT: store double [[SUB]], ptr [[TMP2]], align 8
; CHECK-NEXT: ret void
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ define i32 @load_extract_idx_0(ptr %x) {

define i32 @vscale_load_extract_idx_0(ptr %x) {
; CHECK-LABEL: @vscale_load_extract_idx_0(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <vscale x 4 x i32>, ptr [[X:%.*]], i32 0, i32 0
; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 16
; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1:%.*]], align 16
; CHECK-NEXT: ret i32 [[R]]
;
%lv = load <vscale x 4 x i32>, ptr %x
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -997,10 +997,8 @@ define <4 x i64> @bitcast_smax_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b) {
; CHECK-NEXT: [[A_BC0:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
; CHECK-NEXT: [[B_BC0:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[A_BC0]], [[B_BC0]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[B]], <4 x i64> [[B]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT: [[B_BC1:%.*]] = bitcast <4 x i64> [[TMP2]] to <8 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[A]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT: [[A_BC1:%.*]] = bitcast <4 x i64> [[TMP4]] to <8 x i32>
; CHECK-NEXT: [[B_BC1:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32>
; CHECK-NEXT: [[A_BC1:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
; CHECK-NEXT: [[CONCAT:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[B_BC1]], <8 x i32> [[A_BC1]]
; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[CONCAT]] to <4 x i64>
; CHECK-NEXT: ret <4 x i64> [[RES]]
Expand Down
18 changes: 6 additions & 12 deletions llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ define i32 @reducebase_v4i32(<4 x i32> %a, <4 x i32> %b) {

define i32 @reduceshuffle_onein_v4i32(<4 x i32> %a) {
; CHECK-LABEL: @reduceshuffle_onein_v4i32(
; CHECK-NEXT: [[X:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X]])
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X:%.*]])
; CHECK-NEXT: ret i32 [[R]]
;
%x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
Expand All @@ -27,8 +26,7 @@ define i32 @reduceshuffle_onein_v4i32(<4 x i32> %a) {

define i32 @reduceshuffle_onein_const_v4i32(<4 x i32> %a) {
; CHECK-LABEL: @reduceshuffle_onein_const_v4i32(
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[X:%.*]] = xor <4 x i32> [[S]], splat (i32 -1)
; CHECK-NEXT: [[X:%.*]] = xor <4 x i32> [[S:%.*]], splat (i32 -1)
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X]])
; CHECK-NEXT: ret i32 [[R]]
;
Expand Down Expand Up @@ -222,8 +220,7 @@ define i32 @reducebase_v16i32(<16 x i32> %a, <16 x i32> %b) {

define i32 @reduceshuffle_onein_v16i32(<16 x i32> %a) {
; CHECK-LABEL: @reduceshuffle_onein_v16i32(
; CHECK-NEXT: [[X:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]])
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X:%.*]])
; CHECK-NEXT: ret i32 [[R]]
;
%x = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
Expand All @@ -233,8 +230,7 @@ define i32 @reduceshuffle_onein_v16i32(<16 x i32> %a) {

define i32 @reduceshuffle_onein_ext_v16i32(<16 x i32> %a) {
; CHECK-LABEL: @reduceshuffle_onein_ext_v16i32(
; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], splat (i32 -1)
; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S:%.*]], splat (i32 -1)
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]])
; CHECK-NEXT: ret i32 [[R]]
;
Expand Down Expand Up @@ -353,8 +349,7 @@ define i16 @reducebase_v16i16(<16 x i16> %a, <16 x i16> %b) {

define i16 @reduceshuffle_onein_v16i16(<16 x i16> %a) {
; CHECK-LABEL: @reduceshuffle_onein_v16i16(
; CHECK-NEXT: [[X:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]])
; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X:%.*]])
; CHECK-NEXT: ret i16 [[R]]
;
%x = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
Expand All @@ -364,8 +359,7 @@ define i16 @reduceshuffle_onein_v16i16(<16 x i16> %a) {

define i16 @reduceshuffle_onein_ext_v16i16(<16 x i16> %a) {
; CHECK-LABEL: @reduceshuffle_onein_ext_v16i16(
; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], splat (i16 -1)
; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S:%.*]], splat (i16 -1)
; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]])
; CHECK-NEXT: ret i16 [[R]]
;
Expand Down
23 changes: 5 additions & 18 deletions llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -486,10 +486,7 @@ define <vscale x 1 x i64> @urem_nxv1i64_unspeculatable(i64 %x, i64 %y, i32 zeroe

define <vscale x 1 x i64> @sdiv_nxv1i64_allonesmask_knownvl(i64 %x, i64 %y) {
; VEC-COMBINE-LABEL: @sdiv_nxv1i64_allonesmask_knownvl(
; VEC-COMBINE-NEXT: [[TMP1:%.*]] = sdiv i64 [[X:%.*]], [[X]]
; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
; VEC-COMBINE-NEXT: [[RES:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[RES]]
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> splat (i64 1)
;
; NO-VEC-COMBINE-LABEL: @sdiv_nxv1i64_allonesmask_knownvl(
; NO-VEC-COMBINE-NEXT: [[MASK_HEAD:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
Expand Down Expand Up @@ -530,10 +527,7 @@ define <vscale x 1 x i64> @sdiv_nxv1i64_anymask_knownvl(i64 %x, i64 %y, <vscale

define <vscale x 1 x i64> @udiv_nxv1i64_allonesmask_knownvl(i64 %x, i64 %y) {
; VEC-COMBINE-LABEL: @udiv_nxv1i64_allonesmask_knownvl(
; VEC-COMBINE-NEXT: [[TMP1:%.*]] = udiv i64 [[X:%.*]], [[X]]
; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
; VEC-COMBINE-NEXT: [[RES:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[RES]]
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> splat (i64 1)
;
; NO-VEC-COMBINE-LABEL: @udiv_nxv1i64_allonesmask_knownvl(
; NO-VEC-COMBINE-NEXT: [[MASK_HEAD:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
Expand Down Expand Up @@ -574,10 +568,7 @@ define <vscale x 1 x i64> @udiv_nxv1i64_anymask_knownvl(i64 %x, i64 %y, <vscale

define <vscale x 1 x i64> @srem_nxv1i64_allonesmask_knownvl(i64 %x, i64 %y) {
; VEC-COMBINE-LABEL: @srem_nxv1i64_allonesmask_knownvl(
; VEC-COMBINE-NEXT: [[TMP1:%.*]] = srem i64 [[X:%.*]], [[X]]
; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
; VEC-COMBINE-NEXT: [[RES:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[RES]]
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> zeroinitializer
;
; NO-VEC-COMBINE-LABEL: @srem_nxv1i64_allonesmask_knownvl(
; NO-VEC-COMBINE-NEXT: [[MASK_HEAD:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
Expand Down Expand Up @@ -618,10 +609,7 @@ define <vscale x 1 x i64> @srem_nxv1i64_anymask_knownvl(i64 %x, i64 %y, <vscale

define <vscale x 1 x i64> @urem_nxv1i64_allonesmask_knownvl(i64 %x, i64 %y) {
; VEC-COMBINE-LABEL: @urem_nxv1i64_allonesmask_knownvl(
; VEC-COMBINE-NEXT: [[TMP1:%.*]] = urem i64 [[X:%.*]], [[X]]
; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
; VEC-COMBINE-NEXT: [[RES:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[RES]]
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> zeroinitializer
;
; NO-VEC-COMBINE-LABEL: @urem_nxv1i64_allonesmask_knownvl(
; NO-VEC-COMBINE-NEXT: [[MASK_HEAD:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
Expand Down Expand Up @@ -1572,8 +1560,7 @@ define <1 x i64> @add_v1i64_allonesmask(<1 x i64> %x, i64 %y, i32 zeroext %evl)
; VEC-COMBINE-64-NEXT: [[MASK:%.*]] = shufflevector <1 x i1> [[SPLAT]], <1 x i1> poison, <1 x i32> zeroinitializer
; VEC-COMBINE-64-NEXT: [[TMP1:%.*]] = add i64 [[Y:%.*]], 42
; VEC-COMBINE-64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <1 x i64> poison, i64 [[TMP1]], i64 0
; VEC-COMBINE-64-NEXT: [[TMP2:%.*]] = shufflevector <1 x i64> [[DOTSPLATINSERT]], <1 x i64> poison, <1 x i32> zeroinitializer
; VEC-COMBINE-64-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.mul.v1i64(<1 x i64> [[X:%.*]], <1 x i64> [[TMP2]], <1 x i1> [[MASK]], i32 [[EVL:%.*]])
; VEC-COMBINE-64-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.mul.v1i64(<1 x i64> [[X:%.*]], <1 x i64> [[DOTSPLATINSERT]], <1 x i1> [[MASK]], i32 [[EVL:%.*]])
; VEC-COMBINE-64-NEXT: ret <1 x i64> [[TMP3]]
;
; NO-VEC-COMBINE-LABEL: @add_v1i64_allonesmask(
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@ define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
define <4 x float> @ext2_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
; CHECK-LABEL: @ext2_v2f32v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 poison>
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%e = extractelement <2 x float> %x, i32 2
%e = extractelement <2 x float> %x, i32 1
%n = fneg float %e
%r = insertelement <4 x float> %y, float %n, i32 2
%r = insertelement <4 x float> %y, float %n, i32 1
ret <4 x float> %r
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
define void @multiple_extract(ptr %p) {
; CHECK-LABEL: @multiple_extract(
; CHECK-NEXT: [[VP:%.*]] = load ptr, ptr [[P:%.*]], align 8
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr [[VP]], i32 0, i64 0
; CHECK-NEXT: [[E0:%.*]] = load i32, ptr [[TMP1]], align 16
; CHECK-NEXT: [[E0:%.*]] = load i32, ptr [[VP]], align 16
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <2 x i32>, ptr [[VP]], i32 0, i64 1
; CHECK-NEXT: [[E1:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: store i32 [[E0]], ptr [[P]], align 4
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
Original file line number Diff line number Diff line change
Expand Up @@ -657,11 +657,10 @@ define <2 x float> @load_f32_insert_v2f32_msan(ptr align 16 dereferenceable(16)
; PR30986 - split vector loads for scalarized operations
define <2 x i64> @PR30986(ptr %0) {
; CHECK-LABEL: @PR30986(
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP0:%.*]], i32 0, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 16
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2:%.*]], align 16
; CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP0]], i32 0, i32 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP2]], i32 0, i32 1
; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[TMP8]], i32 1
Expand Down
Loading
Loading