diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 8fe610835dca5..66ab2fa5610f5 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -1575,10 +1575,14 @@ class IRBuilderBase { return Accum; } - Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") { + Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "", + bool IsDisjoint = false) { if (auto *V = Folder.FoldBinOp(Instruction::Or, LHS, RHS)) return V; - return Insert(BinaryOperator::CreateOr(LHS, RHS), Name); + return Insert( + IsDisjoint ? BinaryOperator::CreateDisjoint(Instruction::Or, LHS, RHS) + : BinaryOperator::CreateOr(LHS, RHS), + Name); } Value *CreateOr(Value *LHS, const APInt &RHS, const Twine &Name = "") { diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 19e82099e87f0..9a42ed20d6973 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InstSimplifyFolder.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -72,14 +73,15 @@ class VectorCombine { const DominatorTree &DT, AAResults &AA, AssumptionCache &AC, const DataLayout *DL, TTI::TargetCostKind CostKind, bool TryEarlyFoldsOnly) - : F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA), AC(AC), DL(DL), - CostKind(CostKind), TryEarlyFoldsOnly(TryEarlyFoldsOnly) {} + : F(F), Builder(F.getContext(), InstSimplifyFolder(*DL)), TTI(TTI), + DT(DT), AA(AA), AC(AC), DL(DL), CostKind(CostKind), + TryEarlyFoldsOnly(TryEarlyFoldsOnly) {} bool run(); private: Function &F; - IRBuilder<> Builder; + IRBuilder Builder; const TargetTransformInfo &TTI; const DominatorTree &DT; AAResults &AA; @@ -529,7 +531,7 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0, /// Create a shuffle that translates (shifts) 1 element from the input vector /// to a new element location. static Value *createShiftShuffle(Value *Vec, unsigned OldIndex, - unsigned NewIndex, IRBuilder<> &Builder) { + unsigned NewIndex, IRBuilderBase &Builder) { // The shuffle mask is poison except for 1 lane that is being translated // to the new element index. Example for OldIndex == 2 and NewIndex == 0: // ShufMask = { 2, poison, poison, poison } @@ -545,7 +547,7 @@ static Value *createShiftShuffle(Value *Vec, unsigned OldIndex, /// unnecessary instructions. static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, - IRBuilder<> &Builder) { + IRBuilderBase &Builder) { // Shufflevectors can only be created for fixed-width vectors. Value *X = ExtElt->getVectorOperand(); if (!isa(X->getType())) @@ -1459,10 +1461,12 @@ bool VectorCombine::foldBinopOfReductions(Instruction &I) { LLVM_DEBUG(dbgs() << "Found two mergeable reductions: " << I << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost << "\n"); - Value *VectorBO = Builder.CreateBinOp(BinOpOpc, V0, V1); - if (auto *PDInst = dyn_cast(&I)) - if (auto *PDVectorBO = dyn_cast(VectorBO)) - PDVectorBO->setIsDisjoint(PDInst->isDisjoint()); + Value *VectorBO; + if (BinOpOpc == Instruction::Or) + VectorBO = Builder.CreateOr(V0, V1, "", + cast(I).isDisjoint()); + else + VectorBO = Builder.CreateBinOp(BinOpOpc, V0, V1); Instruction *Rdx = Builder.CreateIntrinsic(ReductionIID, {VTy}, {VectorBO}); replaceValue(I, *Rdx); @@ -1519,7 +1523,7 @@ class ScalarizationResult { } /// Freeze the ToFreeze and update the use in \p User to use it. - void freeze(IRBuilder<> &Builder, Instruction &UserI) { + void freeze(IRBuilderBase &Builder, Instruction &UserI) { assert(isSafeWithFreeze() && "should only be used when freezing is required"); assert(is_contained(ToFreeze->users(), &UserI) && @@ -2617,7 +2621,7 @@ static Value *generateNewInstTree(ArrayRef Item, FixedVectorType *Ty, const SmallPtrSet &IdentityLeafs, const SmallPtrSet &SplatLeafs, const SmallPtrSet &ConcatLeafs, - IRBuilder<> &Builder, + IRBuilderBase &Builder, const TargetTransformInfo *TTI) { auto [FrontU, FrontLane] = Item.front(); diff --git a/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll b/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll index 605c0772f6e38..391215b3e4159 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll @@ -6,13 +6,12 @@ target triple = "arm64-apple-darwin" define void @load_extract_insert_store_const_idx(ptr %A) { ; CHECK-LABEL: @load_extract_insert_store_const_idx( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, ptr [[A:%.*]], i32 0, i64 0 -; CHECK-NEXT: [[EXT_0:%.*]] = load double, ptr [[TMP0]], align 8 +; CHECK-NEXT: [[EXT_0:%.*]] = load double, ptr [[TMP0:%.*]], align 8 ; CHECK-NEXT: [[MUL:%.*]] = fmul double 2.000000e+01, [[EXT_0]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i32 0, i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, ptr [[TMP0]], i32 0, i64 1 ; CHECK-NEXT: [[EXT_1:%.*]] = load double, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT_1]], [[MUL]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, ptr [[TMP0]], i64 0, i64 1 ; CHECK-NEXT: store double [[SUB]], ptr [[TMP2]], align 8 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll index 551d6d1cabd41..e6e5f5196d3da 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll @@ -15,8 +15,7 @@ define i32 @load_extract_idx_0(ptr %x) { define i32 @vscale_load_extract_idx_0(ptr %x) { ; CHECK-LABEL: @vscale_load_extract_idx_0( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds , ptr [[X:%.*]], i32 0, i32 0 -; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 16 +; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1:%.*]], align 16 ; CHECK-NEXT: ret i32 [[R]] ; %lv = load , ptr %x diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll index 1c128c8f56a03..a40d514a520ca 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll @@ -997,10 +997,8 @@ define <4 x i64> @bitcast_smax_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b) { ; CHECK-NEXT: [[A_BC0:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32> ; CHECK-NEXT: [[B_BC0:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32> ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[A_BC0]], [[B_BC0]] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[B]], <4 x i64> [[B]], <4 x i32> -; CHECK-NEXT: [[B_BC1:%.*]] = bitcast <4 x i64> [[TMP2]] to <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[A]], <4 x i32> -; CHECK-NEXT: [[A_BC1:%.*]] = bitcast <4 x i64> [[TMP4]] to <8 x i32> +; CHECK-NEXT: [[B_BC1:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32> +; CHECK-NEXT: [[A_BC1:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> ; CHECK-NEXT: [[CONCAT:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[B_BC1]], <8 x i32> [[A_BC1]] ; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[CONCAT]] to <4 x i64> ; CHECK-NEXT: ret <4 x i64> [[RES]] diff --git a/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll index 46a622148c871..59ece31631d88 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll @@ -16,8 +16,7 @@ define i32 @reducebase_v4i32(<4 x i32> %a, <4 x i32> %b) { define i32 @reduceshuffle_onein_v4i32(<4 x i32> %a) { ; CHECK-LABEL: @reduceshuffle_onein_v4i32( -; CHECK-NEXT: [[X:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> -; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X]]) +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[R]] ; %x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> @@ -27,8 +26,7 @@ define i32 @reduceshuffle_onein_v4i32(<4 x i32> %a) { define i32 @reduceshuffle_onein_const_v4i32(<4 x i32> %a) { ; CHECK-LABEL: @reduceshuffle_onein_const_v4i32( -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <4 x i32> [[S]], splat (i32 -1) +; CHECK-NEXT: [[X:%.*]] = xor <4 x i32> [[S:%.*]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X]]) ; CHECK-NEXT: ret i32 [[R]] ; @@ -222,8 +220,7 @@ define i32 @reducebase_v16i32(<16 x i32> %a, <16 x i32> %b) { define i32 @reduceshuffle_onein_v16i32(<16 x i32> %a) { ; CHECK-LABEL: @reduceshuffle_onein_v16i32( -; CHECK-NEXT: [[X:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> undef, <16 x i32> -; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]]) +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[R]] ; %x = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> @@ -233,8 +230,7 @@ define i32 @reduceshuffle_onein_v16i32(<16 x i32> %a) { define i32 @reduceshuffle_onein_ext_v16i32(<16 x i32> %a) { ; CHECK-LABEL: @reduceshuffle_onein_ext_v16i32( -; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> undef, <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], splat (i32 -1) +; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S:%.*]], splat (i32 -1) ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]]) ; CHECK-NEXT: ret i32 [[R]] ; @@ -353,8 +349,7 @@ define i16 @reducebase_v16i16(<16 x i16> %a, <16 x i16> %b) { define i16 @reduceshuffle_onein_v16i16(<16 x i16> %a) { ; CHECK-LABEL: @reduceshuffle_onein_v16i16( -; CHECK-NEXT: [[X:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> undef, <16 x i32> -; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]]) +; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X:%.*]]) ; CHECK-NEXT: ret i16 [[R]] ; %x = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> @@ -364,8 +359,7 @@ define i16 @reduceshuffle_onein_v16i16(<16 x i16> %a) { define i16 @reduceshuffle_onein_ext_v16i16(<16 x i16> %a) { ; CHECK-LABEL: @reduceshuffle_onein_ext_v16i16( -; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> undef, <16 x i32> -; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], splat (i16 -1) +; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S:%.*]], splat (i16 -1) ; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]]) ; CHECK-NEXT: ret i16 [[R]] ; diff --git a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll index c123458669088..f4aee898ec838 100644 --- a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll +++ b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll @@ -486,10 +486,7 @@ define @urem_nxv1i64_unspeculatable(i64 %x, i64 %y, i32 zeroe define @sdiv_nxv1i64_allonesmask_knownvl(i64 %x, i64 %y) { ; VEC-COMBINE-LABEL: @sdiv_nxv1i64_allonesmask_knownvl( -; VEC-COMBINE-NEXT: [[TMP1:%.*]] = sdiv i64 [[X:%.*]], [[X]] -; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP1]], i64 0 -; VEC-COMBINE-NEXT: [[RES:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; VEC-COMBINE-NEXT: ret [[RES]] +; VEC-COMBINE-NEXT: ret splat (i64 1) ; ; NO-VEC-COMBINE-LABEL: @sdiv_nxv1i64_allonesmask_knownvl( ; NO-VEC-COMBINE-NEXT: [[MASK_HEAD:%.*]] = insertelement poison, i1 true, i32 0 @@ -530,10 +527,7 @@ define @sdiv_nxv1i64_anymask_knownvl(i64 %x, i64 %y, @udiv_nxv1i64_allonesmask_knownvl(i64 %x, i64 %y) { ; VEC-COMBINE-LABEL: @udiv_nxv1i64_allonesmask_knownvl( -; VEC-COMBINE-NEXT: [[TMP1:%.*]] = udiv i64 [[X:%.*]], [[X]] -; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP1]], i64 0 -; VEC-COMBINE-NEXT: [[RES:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; VEC-COMBINE-NEXT: ret [[RES]] +; VEC-COMBINE-NEXT: ret splat (i64 1) ; ; NO-VEC-COMBINE-LABEL: @udiv_nxv1i64_allonesmask_knownvl( ; NO-VEC-COMBINE-NEXT: [[MASK_HEAD:%.*]] = insertelement poison, i1 true, i32 0 @@ -574,10 +568,7 @@ define @udiv_nxv1i64_anymask_knownvl(i64 %x, i64 %y, @srem_nxv1i64_allonesmask_knownvl(i64 %x, i64 %y) { ; VEC-COMBINE-LABEL: @srem_nxv1i64_allonesmask_knownvl( -; VEC-COMBINE-NEXT: [[TMP1:%.*]] = srem i64 [[X:%.*]], [[X]] -; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP1]], i64 0 -; VEC-COMBINE-NEXT: [[RES:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; VEC-COMBINE-NEXT: ret [[RES]] +; VEC-COMBINE-NEXT: ret zeroinitializer ; ; NO-VEC-COMBINE-LABEL: @srem_nxv1i64_allonesmask_knownvl( ; NO-VEC-COMBINE-NEXT: [[MASK_HEAD:%.*]] = insertelement poison, i1 true, i32 0 @@ -618,10 +609,7 @@ define @srem_nxv1i64_anymask_knownvl(i64 %x, i64 %y, @urem_nxv1i64_allonesmask_knownvl(i64 %x, i64 %y) { ; VEC-COMBINE-LABEL: @urem_nxv1i64_allonesmask_knownvl( -; VEC-COMBINE-NEXT: [[TMP1:%.*]] = urem i64 [[X:%.*]], [[X]] -; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP1]], i64 0 -; VEC-COMBINE-NEXT: [[RES:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; VEC-COMBINE-NEXT: ret [[RES]] +; VEC-COMBINE-NEXT: ret zeroinitializer ; ; NO-VEC-COMBINE-LABEL: @urem_nxv1i64_allonesmask_knownvl( ; NO-VEC-COMBINE-NEXT: [[MASK_HEAD:%.*]] = insertelement poison, i1 true, i32 0 @@ -1572,8 +1560,7 @@ define <1 x i64> @add_v1i64_allonesmask(<1 x i64> %x, i64 %y, i32 zeroext %evl) ; VEC-COMBINE-64-NEXT: [[MASK:%.*]] = shufflevector <1 x i1> [[SPLAT]], <1 x i1> poison, <1 x i32> zeroinitializer ; VEC-COMBINE-64-NEXT: [[TMP1:%.*]] = add i64 [[Y:%.*]], 42 ; VEC-COMBINE-64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <1 x i64> poison, i64 [[TMP1]], i64 0 -; VEC-COMBINE-64-NEXT: [[TMP2:%.*]] = shufflevector <1 x i64> [[DOTSPLATINSERT]], <1 x i64> poison, <1 x i32> zeroinitializer -; VEC-COMBINE-64-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.mul.v1i64(<1 x i64> [[X:%.*]], <1 x i64> [[TMP2]], <1 x i1> [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-64-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.mul.v1i64(<1 x i64> [[X:%.*]], <1 x i64> [[DOTSPLATINSERT]], <1 x i1> [[MASK]], i32 [[EVL:%.*]]) ; VEC-COMBINE-64-NEXT: ret <1 x i64> [[TMP3]] ; ; NO-VEC-COMBINE-LABEL: @add_v1i64_allonesmask( diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll index cd2bc757eb9d2..5358e0419e7a7 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll @@ -48,13 +48,13 @@ define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) { define <4 x float> @ext2_v2f32v4f32(<2 x float> %x, <4 x float> %y) { ; CHECK-LABEL: @ext2_v2f32v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; - %e = extractelement <2 x float> %x, i32 2 + %e = extractelement <2 x float> %x, i32 1 %n = fneg float %e - %r = insertelement <4 x float> %y, float %n, i32 2 + %r = insertelement <4 x float> %y, float %n, i32 1 ret <4 x float> %r } diff --git a/llvm/test/Transforms/VectorCombine/X86/load-extractelement-scalarization.ll b/llvm/test/Transforms/VectorCombine/X86/load-extractelement-scalarization.ll index d46c8c0de4037..b26e5ec2698a5 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load-extractelement-scalarization.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load-extractelement-scalarization.ll @@ -6,8 +6,7 @@ define void @multiple_extract(ptr %p) { ; CHECK-LABEL: @multiple_extract( ; CHECK-NEXT: [[VP:%.*]] = load ptr, ptr [[P:%.*]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr [[VP]], i32 0, i64 0 -; CHECK-NEXT: [[E0:%.*]] = load i32, ptr [[TMP1]], align 16 +; CHECK-NEXT: [[E0:%.*]] = load i32, ptr [[VP]], align 16 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <2 x i32>, ptr [[VP]], i32 0, i64 1 ; CHECK-NEXT: [[E1:%.*]] = load i32, ptr [[TMP2]], align 4 ; CHECK-NEXT: store i32 [[E0]], ptr [[P]], align 4 diff --git a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll index 40437ca345224..977da754ec5a7 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll @@ -657,11 +657,10 @@ define <2 x float> @load_f32_insert_v2f32_msan(ptr align 16 dereferenceable(16) ; PR30986 - split vector loads for scalarized operations define <2 x i64> @PR30986(ptr %0) { ; CHECK-LABEL: @PR30986( -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP0:%.*]], i32 0, i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 16 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2:%.*]], align 16 ; CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP2]], i32 0, i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[TMP8]], i32 1 diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll index 84c223be88621..388b655641b7d 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -553,8 +553,7 @@ define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable define <4 x float> @load_v8f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; SSE2-LABEL: @load_v8f32_extract_insert_v4f32( -; SSE2-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x float>, ptr [[P:%.*]], i32 0, i32 0 -; SSE2-NEXT: [[S:%.*]] = load float, ptr [[TMP1]], align 4 +; SSE2-NEXT: [[S:%.*]] = load float, ptr [[TMP1:%.*]], align 4 ; SSE2-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0 ; SSE2-NEXT: ret <4 x float> [[R]] ; @@ -590,8 +589,7 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(ptr align 16 derefer define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(ptr align 1 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @gep1_load_v2i16_extract_insert_v8i16( ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <2 x i16>, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i16>, ptr [[GEP]], i32 0, i32 0 -; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[TMP1]], align 8 +; CHECK-NEXT: [[S:%.*]] = load i16, ptr [[GEP]], align 8 ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 ; CHECK-NEXT: ret <8 x i16> [[R]] ; @@ -605,11 +603,10 @@ define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(ptr align 1 dereferenceab ; PR30986 - split vector loads for scalarized operations define <2 x i64> @PR30986(ptr %0) { ; CHECK-LABEL: @PR30986( -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP0:%.*]], i32 0, i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 16 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2:%.*]], align 16 ; CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP2]], i32 0, i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[TMP8]], i32 1 diff --git a/llvm/test/Transforms/VectorCombine/X86/pr114901.ll b/llvm/test/Transforms/VectorCombine/X86/pr114901.ll index d6917e1007cf4..982944b8b7bd2 100644 --- a/llvm/test/Transforms/VectorCombine/X86/pr114901.ll +++ b/llvm/test/Transforms/VectorCombine/X86/pr114901.ll @@ -17,8 +17,7 @@ define i1 @PR114901(<4 x i32> %a) { ; AVX-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] { ; AVX-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A]], ; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> -; AVX-NEXT: [[TMP2:%.*]] = ashr <4 x i1> [[SHIFT]], [[TMP1]] -; AVX-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 +; AVX-NEXT: [[R:%.*]] = extractelement <4 x i1> [[SHIFT]], i64 1 ; AVX-NEXT: ret i1 [[R]] ; %e1 = extractelement <4 x i32> %a, i32 1 @@ -43,8 +42,7 @@ define i1 @PR114901_flip(<4 x i32> %a) { ; AVX-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] { ; AVX-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A]], ; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> -; AVX-NEXT: [[TMP2:%.*]] = ashr <4 x i1> [[TMP1]], [[SHIFT]] -; AVX-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 +; AVX-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1 ; AVX-NEXT: ret i1 [[R]] ; %e1 = extractelement <4 x i32> %a, i32 1 diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll index 2588f9116f322..0c995bde9f25a 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll @@ -34,18 +34,12 @@ define <8 x i16> @src_v4tov8_i16(<4 x i1> %a, <4 x i1> %b, <4 x i16> %x, <4 x i1 define <8 x i16> @src_v8tov8_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) { ; SSE-LABEL: define <8 x i16> @src_v8tov8_i16( ; SSE-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] { -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[A]], <8 x i1> [[B]], <8 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[X]], <8 x i16> [[Y]], <8 x i32> -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[Z]], <8 x i16> [[X]], <8 x i32> -; SSE-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]] +; SSE-NEXT: [[RES:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]] ; SSE-NEXT: ret <8 x i16> [[RES]] ; ; AVX2-LABEL: define <8 x i16> @src_v8tov8_i16( ; AVX2-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] { -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[A]], <8 x i1> [[B]], <8 x i32> -; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[X]], <8 x i16> [[Y]], <8 x i32> -; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[Z]], <8 x i16> [[X]], <8 x i32> -; AVX2-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]] +; AVX2-NEXT: [[RES:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]] ; AVX2-NEXT: ret <8 x i16> [[RES]] ; ; AVX512-LABEL: define <8 x i16> @src_v8tov8_i16(