-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[VectorCombine] Use InstSimplifyFolder to simplify instrs on creation. #146350
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) ChangesUpdate VectorCombine to use InstSimplifyFolder to simplify redundant instructions on creation. Patch is 25.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146350.diff 12 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 19e82099e87f0..2cdc619d9fcad 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstSimplifyFolder.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -72,14 +73,15 @@ class VectorCombine {
const DominatorTree &DT, AAResults &AA, AssumptionCache &AC,
const DataLayout *DL, TTI::TargetCostKind CostKind,
bool TryEarlyFoldsOnly)
- : F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA), AC(AC), DL(DL),
- CostKind(CostKind), TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
+ : F(F), Builder(F.getContext(), InstSimplifyFolder(*DL)), TTI(TTI),
+ DT(DT), AA(AA), AC(AC), DL(DL), CostKind(CostKind),
+ TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
bool run();
private:
Function &F;
- IRBuilder<> Builder;
+ IRBuilder<InstSimplifyFolder> Builder;
const TargetTransformInfo &TTI;
const DominatorTree &DT;
AAResults &AA;
@@ -529,7 +531,8 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
/// Create a shuffle that translates (shifts) 1 element from the input vector
/// to a new element location.
static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
- unsigned NewIndex, IRBuilder<> &Builder) {
+ unsigned NewIndex,
+ IRBuilder<InstSimplifyFolder> &Builder) {
// The shuffle mask is poison except for 1 lane that is being translated
// to the new element index. Example for OldIndex == 2 and NewIndex == 0:
// ShufMask = { 2, poison, poison, poison }
@@ -543,9 +546,9 @@ static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
/// the source vector (shift the scalar element) to a NewIndex for extraction.
/// Return null if the input can be constant folded, so that we are not creating
/// unnecessary instructions.
-static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
- unsigned NewIndex,
- IRBuilder<> &Builder) {
+static ExtractElementInst *
+translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex,
+ IRBuilder<InstSimplifyFolder> &Builder) {
// Shufflevectors can only be created for fixed-width vectors.
Value *X = ExtElt->getVectorOperand();
if (!isa<FixedVectorType>(X->getType()))
@@ -1519,7 +1522,7 @@ class ScalarizationResult {
}
/// Freeze the ToFreeze and update the use in \p User to use it.
- void freeze(IRBuilder<> &Builder, Instruction &UserI) {
+ void freeze(IRBuilder<InstSimplifyFolder> &Builder, Instruction &UserI) {
assert(isSafeWithFreeze() &&
"should only be used when freezing is required");
assert(is_contained(ToFreeze->users(), &UserI) &&
@@ -2617,7 +2620,7 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
const SmallPtrSet<Use *, 4> &IdentityLeafs,
const SmallPtrSet<Use *, 4> &SplatLeafs,
const SmallPtrSet<Use *, 4> &ConcatLeafs,
- IRBuilder<> &Builder,
+ IRBuilder<InstSimplifyFolder> &Builder,
const TargetTransformInfo *TTI) {
auto [FrontU, FrontLane] = Item.front();
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll b/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll
index 605c0772f6e38..391215b3e4159 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/load-extract-insert-store-scalarization.ll
@@ -6,13 +6,12 @@ target triple = "arm64-apple-darwin"
define void @load_extract_insert_store_const_idx(ptr %A) {
; CHECK-LABEL: @load_extract_insert_store_const_idx(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <225 x double>, ptr [[A:%.*]], i32 0, i64 0
-; CHECK-NEXT: [[EXT_0:%.*]] = load double, ptr [[TMP0]], align 8
+; CHECK-NEXT: [[EXT_0:%.*]] = load double, ptr [[TMP0:%.*]], align 8
; CHECK-NEXT: [[MUL:%.*]] = fmul double 2.000000e+01, [[EXT_0]]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i32 0, i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <225 x double>, ptr [[TMP0]], i32 0, i64 1
; CHECK-NEXT: [[EXT_1:%.*]] = load double, ptr [[TMP1]], align 8
; CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT_1]], [[MUL]]
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <225 x double>, ptr [[TMP0]], i64 0, i64 1
; CHECK-NEXT: store double [[SUB]], ptr [[TMP2]], align 8
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
index 551d6d1cabd41..e6e5f5196d3da 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll
@@ -15,8 +15,7 @@ define i32 @load_extract_idx_0(ptr %x) {
define i32 @vscale_load_extract_idx_0(ptr %x) {
; CHECK-LABEL: @vscale_load_extract_idx_0(
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <vscale x 4 x i32>, ptr [[X:%.*]], i32 0, i32 0
-; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 16
+; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1:%.*]], align 16
; CHECK-NEXT: ret i32 [[R]]
;
%lv = load <vscale x 4 x i32>, ptr %x
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index 1c128c8f56a03..a40d514a520ca 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -997,10 +997,8 @@ define <4 x i64> @bitcast_smax_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b) {
; CHECK-NEXT: [[A_BC0:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
; CHECK-NEXT: [[B_BC0:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[A_BC0]], [[B_BC0]]
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[B]], <4 x i64> [[B]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-; CHECK-NEXT: [[B_BC1:%.*]] = bitcast <4 x i64> [[TMP2]] to <8 x i32>
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[A]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-; CHECK-NEXT: [[A_BC1:%.*]] = bitcast <4 x i64> [[TMP4]] to <8 x i32>
+; CHECK-NEXT: [[B_BC1:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32>
+; CHECK-NEXT: [[A_BC1:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
; CHECK-NEXT: [[CONCAT:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[B_BC1]], <8 x i32> [[A_BC1]]
; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[CONCAT]] to <4 x i64>
; CHECK-NEXT: ret <4 x i64> [[RES]]
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll
index 46a622148c871..59ece31631d88 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll
@@ -16,8 +16,7 @@ define i32 @reducebase_v4i32(<4 x i32> %a, <4 x i32> %b) {
define i32 @reduceshuffle_onein_v4i32(<4 x i32> %a) {
; CHECK-LABEL: @reduceshuffle_onein_v4i32(
-; CHECK-NEXT: [[X:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X]])
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X:%.*]])
; CHECK-NEXT: ret i32 [[R]]
;
%x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -27,8 +26,7 @@ define i32 @reduceshuffle_onein_v4i32(<4 x i32> %a) {
define i32 @reduceshuffle_onein_const_v4i32(<4 x i32> %a) {
; CHECK-LABEL: @reduceshuffle_onein_const_v4i32(
-; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[X:%.*]] = xor <4 x i32> [[S]], splat (i32 -1)
+; CHECK-NEXT: [[X:%.*]] = xor <4 x i32> [[S:%.*]], splat (i32 -1)
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X]])
; CHECK-NEXT: ret i32 [[R]]
;
@@ -222,8 +220,7 @@ define i32 @reducebase_v16i32(<16 x i32> %a, <16 x i32> %b) {
define i32 @reduceshuffle_onein_v16i32(<16 x i32> %a) {
; CHECK-LABEL: @reduceshuffle_onein_v16i32(
-; CHECK-NEXT: [[X:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]])
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X:%.*]])
; CHECK-NEXT: ret i32 [[R]]
;
%x = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -233,8 +230,7 @@ define i32 @reduceshuffle_onein_v16i32(<16 x i32> %a) {
define i32 @reduceshuffle_onein_ext_v16i32(<16 x i32> %a) {
; CHECK-LABEL: @reduceshuffle_onein_ext_v16i32(
-; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S]], splat (i32 -1)
+; CHECK-NEXT: [[X:%.*]] = xor <16 x i32> [[S:%.*]], splat (i32 -1)
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]])
; CHECK-NEXT: ret i32 [[R]]
;
@@ -353,8 +349,7 @@ define i16 @reducebase_v16i16(<16 x i16> %a, <16 x i16> %b) {
define i16 @reduceshuffle_onein_v16i16(<16 x i16> %a) {
; CHECK-LABEL: @reduceshuffle_onein_v16i16(
-; CHECK-NEXT: [[X:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]])
+; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X:%.*]])
; CHECK-NEXT: ret i16 [[R]]
;
%x = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -364,8 +359,7 @@ define i16 @reduceshuffle_onein_v16i16(<16 x i16> %a) {
define i16 @reduceshuffle_onein_ext_v16i16(<16 x i16> %a) {
; CHECK-LABEL: @reduceshuffle_onein_ext_v16i16(
-; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S]], splat (i16 -1)
+; CHECK-NEXT: [[X:%.*]] = xor <16 x i16> [[S:%.*]], splat (i16 -1)
; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]])
; CHECK-NEXT: ret i16 [[R]]
;
diff --git a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll
index c123458669088..f4aee898ec838 100644
--- a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll
+++ b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll
@@ -486,10 +486,7 @@ define <vscale x 1 x i64> @urem_nxv1i64_unspeculatable(i64 %x, i64 %y, i32 zeroe
define <vscale x 1 x i64> @sdiv_nxv1i64_allonesmask_knownvl(i64 %x, i64 %y) {
; VEC-COMBINE-LABEL: @sdiv_nxv1i64_allonesmask_knownvl(
-; VEC-COMBINE-NEXT: [[TMP1:%.*]] = sdiv i64 [[X:%.*]], [[X]]
-; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
-; VEC-COMBINE-NEXT: [[RES:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
-; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[RES]]
+; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> splat (i64 1)
;
; NO-VEC-COMBINE-LABEL: @sdiv_nxv1i64_allonesmask_knownvl(
; NO-VEC-COMBINE-NEXT: [[MASK_HEAD:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
@@ -530,10 +527,7 @@ define <vscale x 1 x i64> @sdiv_nxv1i64_anymask_knownvl(i64 %x, i64 %y, <vscale
define <vscale x 1 x i64> @udiv_nxv1i64_allonesmask_knownvl(i64 %x, i64 %y) {
; VEC-COMBINE-LABEL: @udiv_nxv1i64_allonesmask_knownvl(
-; VEC-COMBINE-NEXT: [[TMP1:%.*]] = udiv i64 [[X:%.*]], [[X]]
-; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
-; VEC-COMBINE-NEXT: [[RES:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
-; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[RES]]
+; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> splat (i64 1)
;
; NO-VEC-COMBINE-LABEL: @udiv_nxv1i64_allonesmask_knownvl(
; NO-VEC-COMBINE-NEXT: [[MASK_HEAD:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
@@ -574,10 +568,7 @@ define <vscale x 1 x i64> @udiv_nxv1i64_anymask_knownvl(i64 %x, i64 %y, <vscale
define <vscale x 1 x i64> @srem_nxv1i64_allonesmask_knownvl(i64 %x, i64 %y) {
; VEC-COMBINE-LABEL: @srem_nxv1i64_allonesmask_knownvl(
-; VEC-COMBINE-NEXT: [[TMP1:%.*]] = srem i64 [[X:%.*]], [[X]]
-; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
-; VEC-COMBINE-NEXT: [[RES:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
-; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[RES]]
+; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> zeroinitializer
;
; NO-VEC-COMBINE-LABEL: @srem_nxv1i64_allonesmask_knownvl(
; NO-VEC-COMBINE-NEXT: [[MASK_HEAD:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
@@ -618,10 +609,7 @@ define <vscale x 1 x i64> @srem_nxv1i64_anymask_knownvl(i64 %x, i64 %y, <vscale
define <vscale x 1 x i64> @urem_nxv1i64_allonesmask_knownvl(i64 %x, i64 %y) {
; VEC-COMBINE-LABEL: @urem_nxv1i64_allonesmask_knownvl(
-; VEC-COMBINE-NEXT: [[TMP1:%.*]] = urem i64 [[X:%.*]], [[X]]
-; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
-; VEC-COMBINE-NEXT: [[RES:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
-; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> [[RES]]
+; VEC-COMBINE-NEXT: ret <vscale x 1 x i64> zeroinitializer
;
; NO-VEC-COMBINE-LABEL: @urem_nxv1i64_allonesmask_knownvl(
; NO-VEC-COMBINE-NEXT: [[MASK_HEAD:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
@@ -1572,8 +1560,7 @@ define <1 x i64> @add_v1i64_allonesmask(<1 x i64> %x, i64 %y, i32 zeroext %evl)
; VEC-COMBINE-64-NEXT: [[MASK:%.*]] = shufflevector <1 x i1> [[SPLAT]], <1 x i1> poison, <1 x i32> zeroinitializer
; VEC-COMBINE-64-NEXT: [[TMP1:%.*]] = add i64 [[Y:%.*]], 42
; VEC-COMBINE-64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <1 x i64> poison, i64 [[TMP1]], i64 0
-; VEC-COMBINE-64-NEXT: [[TMP2:%.*]] = shufflevector <1 x i64> [[DOTSPLATINSERT]], <1 x i64> poison, <1 x i32> zeroinitializer
-; VEC-COMBINE-64-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.mul.v1i64(<1 x i64> [[X:%.*]], <1 x i64> [[TMP2]], <1 x i1> [[MASK]], i32 [[EVL:%.*]])
+; VEC-COMBINE-64-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.vp.mul.v1i64(<1 x i64> [[X:%.*]], <1 x i64> [[DOTSPLATINSERT]], <1 x i1> [[MASK]], i32 [[EVL:%.*]])
; VEC-COMBINE-64-NEXT: ret <1 x i64> [[TMP3]]
;
; NO-VEC-COMBINE-LABEL: @add_v1i64_allonesmask(
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
index cd2bc757eb9d2..dd52a6e892deb 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
@@ -48,8 +48,7 @@ define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
define <4 x float> @ext2_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
; CHECK-LABEL: @ext2_v2f32v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 poison>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> <float undef, float undef, float poison, float undef>, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%e = extractelement <2 x float> %x, i32 2
diff --git a/llvm/test/Transforms/VectorCombine/X86/load-extractelement-scalarization.ll b/llvm/test/Transforms/VectorCombine/X86/load-extractelement-scalarization.ll
index d46c8c0de4037..b26e5ec2698a5 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load-extractelement-scalarization.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load-extractelement-scalarization.ll
@@ -6,8 +6,7 @@
define void @multiple_extract(ptr %p) {
; CHECK-LABEL: @multiple_extract(
; CHECK-NEXT: [[VP:%.*]] = load ptr, ptr [[P:%.*]], align 8
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr [[VP]], i32 0, i64 0
-; CHECK-NEXT: [[E0:%.*]] = load i32, ptr [[TMP1]], align 16
+; CHECK-NEXT: [[E0:%.*]] = load i32, ptr [[VP]], align 16
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <2 x i32>, ptr [[VP]], i32 0, i64 1
; CHECK-NEXT: [[E1:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: store i32 [[E0]], ptr [[P]], align 4
diff --git a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
index 40437ca345224..977da754ec5a7 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
@@ -657,11 +657,10 @@ define <2 x float> @load_f32_insert_v2f32_msan(ptr align 16 dereferenceable(16)
; PR30986 - split vector loads for scalarized operations
define <2 x i64> @PR30986(ptr %0) {
; CHECK-LABEL: @PR30986(
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP0:%.*]], i32 0, i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 16
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2:%.*]], align 16
; CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP0]], i32 0, i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <2 x i64>, ptr [[TMP2]], i32 0, i32 1
; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[TMP8]], i32 1
diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll
index 84c223be88621..388b655641b7d 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load.ll
@@ -553,8 +553,7 @@ define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable
define <4 x float> @load_v8f32_extract_insert_v4f32(ptr align 16 derefer...
[truncated]
|
✅ With the latest revision this PR passed the undef deprecator. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
InstSimplifyFolder is incompatible with setting instruction flags after the fact, so this code should be adjusted:
llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Lines 1465 to 1468 in 2c60d7d
Value *VectorBO = Builder.CreateBinOp(BinOpOpc, V0, V1); | |
if (auto *PDInst = dyn_cast<PossiblyDisjointInst>(&I)) | |
if (auto *PDVectorBO = dyn_cast<PossiblyDisjointInst>(VectorBO)) | |
PDVectorBO->setIsDisjoint(PDInst->isDisjoint()); |
@@ -529,7 +531,8 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0, | |||
/// Create a shuffle that translates (shifts) 1 element from the input vector | |||
/// to a new element location. | |||
static Value *createShiftShuffle(Value *Vec, unsigned OldIndex, | |||
unsigned NewIndex, IRBuilder<> &Builder) { | |||
unsigned NewIndex, | |||
IRBuilder<InstSimplifyFolder> &Builder) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please replace these uses with IRBuilderBase &
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks.
2c60d7d
to
cdc57db
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
InstSimplifyFolder is incompatible with setting instruction flags after the fact, so this code should be adjusted:
llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Lines 1465 to 1468 in 2c60d7d
Value *VectorBO = Builder.CreateBinOp(BinOpOpc, V0, V1); if (auto *PDInst = dyn_cast<PossiblyDisjointInst>(&I)) if (auto *PDVectorBO = dyn_cast<PossiblyDisjointInst>(VectorBO)) PDVectorBO->setIsDisjoint(PDInst->isDisjoint());
Updated the code, but I had to create a new variant of CreateOr
. Not sure if there's a nicer way to pass through possible flags to creation
@@ -529,7 +531,8 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0, | |||
/// Create a shuffle that translates (shifts) 1 element from the input vector | |||
/// to a new element location. | |||
static Value *createShiftShuffle(Value *Vec, unsigned OldIndex, | |||
unsigned NewIndex, IRBuilder<> &Builder) { | |||
unsigned NewIndex, | |||
IRBuilder<InstSimplifyFolder> &Builder) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks.
@@ -48,8 +48,7 @@ define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) { | |||
define <4 x float> @ext2_v2f32v4f32(<2 x float> %x, <4 x float> %y) { | |||
; CHECK-LABEL: @ext2_v2f32v4f32( | |||
; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]] | |||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 poison> | |||
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3> | |||
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> <float undef, float undef, float poison, float undef>, <4 x i32> <i32 0, i32 1, i32 6, i32 3> | |||
; CHECK-NEXT: ret <4 x float> [[R]] | |||
; | |||
%e = extractelement <2 x float> %x, i32 2 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This out of bounds index looks like a typo from #120461 - change to i32 1
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right, the undef-deprecator warned about this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks!
Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") { | ||
Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "", | ||
bool IsDisjoint = false) { | ||
if (auto *V = Folder.FoldBinOp(Instruction::Or, LHS, RHS)) | ||
return V; | ||
return Insert(BinaryOperator::CreateOr(LHS, RHS), Name); | ||
return Insert( | ||
IsDisjoint ? BinaryOperator::CreateDisjoint(Instruction::Or, LHS, RHS) | ||
: BinaryOperator::CreateOr(LHS, RHS), | ||
Name); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Might be better to introduce a CreateDisjoint?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added CreatebinOpDisjoint instead, similar to CreateBinOpFMF.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think your previous code was better.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Specifically because there is only a single operation that can be disjoint, which is or, so it doesn't make much sense to pretend otherwise.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm happy to defer to @nikic's judgement: you can ignore my suggestion.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, reverted the change, thanks
if (BinOpOpc == Instruction::Or) | ||
VectorBO = Builder.CreateOr(V0, V1, "", | ||
cast<PossiblyDisjointInst>(I).isDisjoint()); | ||
else | ||
VectorBO = Builder.CreateBinOp(BinOpOpc, V0, V1); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (BinOpOpc == Instruction::Or) | |
VectorBO = Builder.CreateOr(V0, V1, "", | |
cast<PossiblyDisjointInst>(I).isDisjoint()); | |
else | |
VectorBO = Builder.CreateBinOp(BinOpOpc, V0, V1); | |
VectorBO = isa<PossiblyDisjointInst>(I) ? | |
Builder.CreateDisjoint(BinOpOpc, V0, V1) : | |
Builder.CreateBinOp(BinOpOpc, V0, V1); |
Better not to rely on just Or being PossiblyDisjoint.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah I added CreateBinOpDisjoint
(similar to CreateBinOpFMF). ALthough there I still retained the if
, because we need to call isDisjoint as well.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with the test nit resolved.
Update VectorCombine to use InstSimplifyFolder to simplify redundant instructions on creation.
cdc57db
to
4c30369
Compare
This reverts commit 4c30369.
…on creation. (#146350) Update VectorCombine to use InstSimplifyFolder to simplify redundant instructions on creation. PR: llvm/llvm-project#146350
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/187/builds/7578 Here is the relevant piece of the build log for the reference
|
Expensive check failure was due to a code path not indication that a function is modified, should be fixed in 829f2f2 |
I've just bisected #147218 to this commit. Please could you take a look? Thanks! |
…anslateExt. After llvm/llvm-project#146350, CreateExtractElement may return a folded value and not create an ExtractElement instruction. Replace cast with dyn_cast. Note that the function returns nullptr already earlier if the extract may be constant folded. Fixes llvm/llvm-project#147218
It looks like the addition of a defaulted parameter to `llvm::IRBuilderBase::CreateOr` in llvm/llvm-project#146350 caused it to conflict with our own version of the method. Sync up their signatures.
It looks like the addition of a defaulted parameter to `llvm::IRBuilderBase::CreateOr` in llvm/llvm-project#146350 caused it to conflict with our own version of the method. Sync up their signatures.
Update VectorCombine to use InstSimplifyFolder to simplify redundant instructions on creation.