Skip to content

Commit 2c60d7d

Browse files
committed
[VectorCombine] Use InstSimplifyFolder to simplify instrs on creation.
Update VectorCombine to use InstSimplifyFolder to simplify redundant instructions on creation.
1 parent d7e23be commit 2c60d7d

12 files changed

+41
-75
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/Analysis/BasicAliasAnalysis.h"
2222
#include "llvm/Analysis/ConstantFolding.h"
2323
#include "llvm/Analysis/GlobalsModRef.h"
24+
#include "llvm/Analysis/InstSimplifyFolder.h"
2425
#include "llvm/Analysis/Loads.h"
2526
#include "llvm/Analysis/TargetTransformInfo.h"
2627
#include "llvm/Analysis/ValueTracking.h"
@@ -72,14 +73,15 @@ class VectorCombine {
7273
const DominatorTree &DT, AAResults &AA, AssumptionCache &AC,
7374
const DataLayout *DL, TTI::TargetCostKind CostKind,
7475
bool TryEarlyFoldsOnly)
75-
: F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA), AC(AC), DL(DL),
76-
CostKind(CostKind), TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
76+
: F(F), Builder(F.getContext(), InstSimplifyFolder(*DL)), TTI(TTI),
77+
DT(DT), AA(AA), AC(AC), DL(DL), CostKind(CostKind),
78+
TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
7779

7880
bool run();
7981

8082
private:
8183
Function &F;
82-
IRBuilder<> Builder;
84+
IRBuilder<InstSimplifyFolder> Builder;
8385
const TargetTransformInfo &TTI;
8486
const DominatorTree &DT;
8587
AAResults &AA;
@@ -529,7 +531,8 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
529531
/// Create a shuffle that translates (shifts) 1 element from the input vector
530532
/// to a new element location.
531533
static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
532-
unsigned NewIndex, IRBuilder<> &Builder) {
534+
unsigned NewIndex,
535+
IRBuilder<InstSimplifyFolder> &Builder) {
533536
// The shuffle mask is poison except for 1 lane that is being translated
534537
// to the new element index. Example for OldIndex == 2 and NewIndex == 0:
535538
// ShufMask = { 2, poison, poison, poison }
@@ -543,9 +546,9 @@ static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
543546
/// the source vector (shift the scalar element) to a NewIndex for extraction.
544547
/// Return null if the input can be constant folded, so that we are not creating
545548
/// unnecessary instructions.
546-
static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
547-
unsigned NewIndex,
548-
IRBuilder<> &Builder) {
549+
static ExtractElementInst *
550+
translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex,
551+
IRBuilder<InstSimplifyFolder> &Builder) {
549552
// Shufflevectors can only be created for fixed-width vectors.
550553
Value *X = ExtElt->getVectorOperand();
551554
if (!isa<FixedVectorType>(X->getType()))
@@ -1519,7 +1522,7 @@ class ScalarizationResult {
15191522
}
15201523

15211524
/// Freeze the ToFreeze and update the use in \p User to use it.
1522-
void freeze(IRBuilder<> &Builder, Instruction &UserI) {
1525+
void freeze(IRBuilder<InstSimplifyFolder> &Builder, Instruction &UserI) {
15231526
assert(isSafeWithFreeze() &&
15241527
"should only be used when freezing is required");
15251528
assert(is_contained(ToFreeze->users(), &UserI) &&
@@ -2617,7 +2620,7 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
26172620
const SmallPtrSet<Use *, 4> &IdentityLeafs,
26182621
const SmallPtrSet<Use *, 4> &SplatLeafs,
26192622
const SmallPtrSet<Use *, 4> &ConcatLeafs,
2620-
IRBuilder<> &Builder,
2623+
IRBuilder<InstSimplifyFolder> &Builder,
26212624
const TargetTransformInfo *TTI) {
26222625
auto [FrontU, FrontLane] = Item.front();
26232626

llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,12 @@ target triple = "arm64-apple-darwin"
66
define void @load_extract_insert_store_const_idx(ptr %A) {
77
; CHECK-LABEL: @load_extract_insert_store_const_idx(
88
; CHECK-NEXT: entry:
9-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, ptr [[A:%.*]], i32 0, i64 0
10-
; CHECK-NEXT: [[EXT_0:%.*]] = load double, ptr [[TMP0]], align 8
9+
; CHECK-NEXT: [[EXT_0:%.*]] = load double, ptr [[TMP0:%.*]], align 8
1110
; CHECK-NEXT: [[MUL:%.*]] = fmul double 2.000000e+01, [[EXT_0]]
12-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i32 0, i64 1
11+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, ptr [[TMP0]], i32 0, i64 1
1312
; CHECK-NEXT: [[EXT_1:%.*]] = load double, ptr [[TMP1]], align 8
1413
; CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT_1]], [[MUL]]
15-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 1
14+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, ptr [[TMP0]], i64 0, i64 1
1615
; CHECK-NEXT: store double [[SUB]], ptr [[TMP2]], align 8
1716
; CHECK-NEXT: ret void
1817
;

llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ define i32 @load_extract_idx_0(ptr %x) {
1515

1616
define i32 @vscale_load_extract_idx_0(ptr %x) {
1717
; CHECK-LABEL: @vscale_load_extract_idx_0(
18-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <vscale x 4 x i32>, ptr [[X:%.*]], i32 0, i32 0
19-
; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 16
18+
; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1:%.*]], align 16
2019
; CHECK-NEXT: ret i32 [[R]]
2120
;
2221
%lv = load <vscale x 4 x i32>, ptr %x

llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -997,10 +997,8 @@ define <4 x i64> @bitcast_smax_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b) {
997997
; CHECK-NEXT: [[A_BC0:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
998998
; CHECK-NEXT: [[B_BC0:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
999999
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[A_BC0]], [[B_BC0]]
1000-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[B]], <4 x i64> [[B]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1001-
; CHECK-NEXT: [[B_BC1:%.*]] = bitcast <4 x i64> [[TMP2]] to <8 x i32>
1002-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[A]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1003-
; CHECK-NEXT: [[A_BC1:%.*]] = bitcast <4 x i64> [[TMP4]] to <8 x i32>
1000+
; CHECK-NEXT: [[B_BC1:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32>
1001+
; CHECK-NEXT: [[A_BC1:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
10041002
; CHECK-NEXT: [[CONCAT:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[B_BC1]], <8 x i32> [[A_BC1]]
10051003
; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[CONCAT]] to <4 x i64>
10061004
; CHECK-NEXT: ret <4 x i64> [[RES]]

llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@ define i32 @reducebase_v4i32(<4 x i32> %a, <4 x i32> %b) {
1616

1717
define i32 @reduceshuffle_onein_v4i32(<4 x i32> %a) {
1818
; CHECK-LABEL: @reduceshuffle_onein_v4i32(
19-
; CHECK-NEXT: [[X:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
20-
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X]])
19+
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X:%.*]])
2120
; CHECK-NEXT: ret i32 [[R]]
2221
;
2322
%x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -27,8 +26,7 @@ define i32 @reduceshuffle_onein_v4i32(<4 x i32> %a) {
2726

2827
define i32 @reduceshuffle_onein_const_v4i32(<4 x i32> %a) {
2928
; CHECK-LABEL: @reduceshuffle_onein_const_v4i32(
30-
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
31-
; CHECK-NEXT: [[X:%.*]] = xor <4 x i32> [[S]], splat (i32 -1)
29+
; CHECK-NEXT: [[X:%.*]] = xor <4 x i32> [[S:%.*]], splat (i32 -1)
3230
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X]])
3331
; CHECK-NEXT: ret i32 [[R]]
3432
;
@@ -222,8 +220,7 @@ define i32 @reducebase_v16i32(<16 x i32> %a, <16 x i32> %b) {
222220

223221
define i32 @reduceshuffle_onein_v16i32(<16 x i32> %a) {
224222
; CHECK-LABEL: @reduceshuffle_onein_v16i32(
225-
; CHECK-NEXT: [[X:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
226-
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]])
223+
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X:%.*]])
227224
; CHECK-NEXT: ret i32 [[R]]
228225
;
229226
%x = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -233,8 +230,7 @@ define i32 @reduceshuffle_onein_v16i32(<16 x i32> %a) {
233230

234231
define i32 @reduceshuffle_onein_ext_v16i32(<16 x i32> %a) {
235232
; CHECK-LABEL: @reduceshuffle_onein_ext_v16i32(
236-
; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
237-
; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], splat (i32 -1)
233+
; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S:%.*]], splat (i32 -1)
238234
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]])
239235
; CHECK-NEXT: ret i32 [[R]]
240236
;
@@ -353,8 +349,7 @@ define i16 @reducebase_v16i16(<16 x i16> %a, <16 x i16> %b) {
353349

354350
define i16 @reduceshuffle_onein_v16i16(<16 x i16> %a) {
355351
; CHECK-LABEL: @reduceshuffle_onein_v16i16(
356-
; CHECK-NEXT: [[X:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
357-
; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]])
352+
; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X:%.*]])
358353
; CHECK-NEXT: ret i16 [[R]]
359354
;
360355
%x = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -364,8 +359,7 @@ define i16 @reduceshuffle_onein_v16i16(<16 x i16> %a) {
364359

365360
define i16 @reduceshuffle_onein_ext_v16i16(<16 x i16> %a) {
366361
; CHECK-LABEL: @reduceshuffle_onein_ext_v16i16(
367-
; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
368-
; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], splat (i16 -1)
362+
; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S:%.*]], splat (i16 -1)
369363
; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]])
370364
; CHECK-NEXT: ret i16 [[R]]
371365
;

llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -486,10 +486,7 @@ define <vscale x 1 x i64> @urem_nxv1i64_unspeculatable(i64 %x, i64 %y, i32 zeroe
486486

487487
define <vscale x 1 x i64> @sdiv_nxv1i64_allonesmask_knownvl(i64 %x, i64 %y) {
488488
; VEC-COMBINE-LABEL: @sdiv_nxv1i64_allonesmask_knownvl(
489-
; VEC-COMBINE-NEXT: [[TMP1:%.*]] = sdiv i64 [[X:%.*]], [[X]]
490-
; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
491-
; VEC-COMBINE-NEXT: [[RES:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
492-
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[RES]]
489+
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> splat (i64 1)
493490
;
494491
; NO-VEC-COMBINE-LABEL: @sdiv_nxv1i64_allonesmask_knownvl(
495492
; NO-VEC-COMBINE-NEXT: [[MASK_HEAD:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
@@ -530,10 +527,7 @@ define <vscale x 1 x i64> @sdiv_nxv1i64_anymask_knownvl(i64 %x, i64 %y, <vscale
530527

531528
define <vscale x 1 x i64> @udiv_nxv1i64_allonesmask_knownvl(i64 %x, i64 %y) {
532529
; VEC-COMBINE-LABEL: @udiv_nxv1i64_allonesmask_knownvl(
533-
; VEC-COMBINE-NEXT: [[TMP1:%.*]] = udiv i64 [[X:%.*]], [[X]]
534-
; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
535-
; VEC-COMBINE-NEXT: [[RES:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
536-
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[RES]]
530+
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> splat (i64 1)
537531
;
538532
; NO-VEC-COMBINE-LABEL: @udiv_nxv1i64_allonesmask_knownvl(
539533
; NO-VEC-COMBINE-NEXT: [[MASK_HEAD:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
@@ -574,10 +568,7 @@ define <vscale x 1 x i64> @udiv_nxv1i64_anymask_knownvl(i64 %x, i64 %y, <vscale
574568

575569
define <vscale x 1 x i64> @srem_nxv1i64_allonesmask_knownvl(i64 %x, i64 %y) {
576570
; VEC-COMBINE-LABEL: @srem_nxv1i64_allonesmask_knownvl(
577-
; VEC-COMBINE-NEXT: [[TMP1:%.*]] = srem i64 [[X:%.*]], [[X]]
578-
; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
579-
; VEC-COMBINE-NEXT: [[RES:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
580-
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[RES]]
571+
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> zeroinitializer
581572
;
582573
; NO-VEC-COMBINE-LABEL: @srem_nxv1i64_allonesmask_knownvl(
583574
; NO-VEC-COMBINE-NEXT: [[MASK_HEAD:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
@@ -618,10 +609,7 @@ define <vscale x 1 x i64> @srem_nxv1i64_anymask_knownvl(i64 %x, i64 %y, <vscale
618609

619610
define <vscale x 1 x i64> @urem_nxv1i64_allonesmask_knownvl(i64 %x, i64 %y) {
620611
; VEC-COMBINE-LABEL: @urem_nxv1i64_allonesmask_knownvl(
621-
; VEC-COMBINE-NEXT: [[TMP1:%.*]] = urem i64 [[X:%.*]], [[X]]
622-
; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
623-
; VEC-COMBINE-NEXT: [[RES:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
624-
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[RES]]
612+
; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> zeroinitializer
625613
;
626614
; NO-VEC-COMBINE-LABEL: @urem_nxv1i64_allonesmask_knownvl(
627615
; NO-VEC-COMBINE-NEXT: [[MASK_HEAD:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
@@ -1572,8 +1560,7 @@ define <1 x i64> @add_v1i64_allonesmask(<1 x i64> %x, i64 %y, i32 zeroext %evl)
15721560
; VEC-COMBINE-64-NEXT: [[MASK:%.*]] = shufflevector <1 x i1> [[SPLAT]], <1 x i1> poison, <1 x i32> zeroinitializer
15731561
; VEC-COMBINE-64-NEXT: [[TMP1:%.*]] = add i64 [[Y:%.*]], 42
15741562
; VEC-COMBINE-64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <1 x i64> poison, i64 [[TMP1]], i64 0
1575-
; VEC-COMBINE-64-NEXT: [[TMP2:%.*]] = shufflevector <1 x i64> [[DOTSPLATINSERT]], <1 x i64> poison, <1 x i32> zeroinitializer
1576-
; VEC-COMBINE-64-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.mul.v1i64(<1 x i64> [[X:%.*]], <1 x i64> [[TMP2]], <1 x i1> [[MASK]], i32 [[EVL:%.*]])
1563+
; VEC-COMBINE-64-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.mul.v1i64(<1 x i64> [[X:%.*]], <1 x i64> [[DOTSPLATINSERT]], <1 x i1> [[MASK]], i32 [[EVL:%.*]])
15771564
; VEC-COMBINE-64-NEXT: ret <1 x i64> [[TMP3]]
15781565
;
15791566
; NO-VEC-COMBINE-LABEL: @add_v1i64_allonesmask(

llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,7 @@ define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
4848
define <4 x float> @ext2_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
4949
; CHECK-LABEL: @ext2_v2f32v4f32(
5050
; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]]
51-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 poison>
52-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
51+
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> <float undef, float undef, float poison, float undef>, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
5352
; CHECK-NEXT: ret <4 x float> [[R]]
5453
;
5554
%e = extractelement <2 x float> %x, i32 2

llvm/test/Transforms/VectorCombine/X86/load-extractelement-scalarization.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@
66
define void @multiple_extract(ptr %p) {
77
; CHECK-LABEL: @multiple_extract(
88
; CHECK-NEXT: [[VP:%.*]] = load ptr, ptr [[P:%.*]], align 8
9-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr [[VP]], i32 0, i64 0
10-
; CHECK-NEXT: [[E0:%.*]] = load i32, ptr [[TMP1]], align 16
9+
; CHECK-NEXT: [[E0:%.*]] = load i32, ptr [[VP]], align 16
1110
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <2 x i32>, ptr [[VP]], i32 0, i64 1
1211
; CHECK-NEXT: [[E1:%.*]] = load i32, ptr [[TMP2]], align 4
1312
; CHECK-NEXT: store i32 [[E0]], ptr [[P]], align 4

llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -657,11 +657,10 @@ define <2 x float> @load_f32_insert_v2f32_msan(ptr align 16 dereferenceable(16)
657657
; PR30986 - split vector loads for scalarized operations
658658
define <2 x i64> @PR30986(ptr %0) {
659659
; CHECK-LABEL: @PR30986(
660-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP0:%.*]], i32 0, i32 0
661-
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 16
660+
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2:%.*]], align 16
662661
; CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[TMP3]])
663662
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i32 0
664-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP0]], i32 0, i32 1
663+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP2]], i32 0, i32 1
665664
; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8
666665
; CHECK-NEXT: [[TMP8:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[TMP7]])
667666
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[TMP8]], i32 1

llvm/test/Transforms/VectorCombine/X86/load.ll

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -553,8 +553,7 @@ define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable
553553

554554
define <4 x float> @load_v8f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync {
555555
; SSE2-LABEL: @load_v8f32_extract_insert_v4f32(
556-
; SSE2-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x float>, ptr [[P:%.*]], i32 0, i32 0
557-
; SSE2-NEXT: [[S:%.*]] = load float, ptr [[TMP1]], align 4
556+
; SSE2-NEXT: [[S:%.*]] = load float, ptr [[TMP1:%.*]], align 4
558557
; SSE2-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
559558
; SSE2-NEXT: ret <4 x float> [[R]]
560559
;
@@ -590,8 +589,7 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(ptr align 16 derefer
590589
define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(ptr align 1 dereferenceable(16) %p) nofree nosync {
591590
; CHECK-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
592591
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <2 x i16>, ptr [[P:%.*]], i64 1
593-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i16>, ptr [[GEP]], i32 0, i32 0
594-
; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[TMP1]], align 8
592+
; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 8
595593
; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
596594
; CHECK-NEXT: ret <8 x i16> [[R]]
597595
;
@@ -605,11 +603,10 @@ define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(ptr align 1 dereferenceab
605603
; PR30986 - split vector loads for scalarized operations
606604
define <2 x i64> @PR30986(ptr %0) {
607605
; CHECK-LABEL: @PR30986(
608-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP0:%.*]], i32 0, i32 0
609-
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 16
606+
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2:%.*]], align 16
610607
; CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[TMP3]])
611608
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0
612-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP0]], i32 0, i32 1
609+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP2]], i32 0, i32 1
613610
; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8
614611
; CHECK-NEXT: [[TMP8:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[TMP7]])
615612
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[TMP8]], i32 1

0 commit comments

Comments
 (0)