From 27569450c58c82fd717007da9510944993660a04 Mon Sep 17 00:00:00 2001 From: Lauren Date: Tue, 1 Jul 2025 02:55:20 -0400 Subject: [PATCH 1/4] [VectorCombine] Expand `vector_insert` into shufflevector for earlier cost optimizations (#145512) Move folding logic from `InstCombineCalls` to `VectorCombine` to ensure `vector_insert` intrinsics are expanded into shufflevector instructions before cost-based shuffle optimizations run. Canonicalizes fixed-width vectors only. --- .../InstCombine/InstCombineCalls.cpp | 46 ------------ .../Transforms/Vectorize/VectorCombine.cpp | 71 +++++++++++++++++++ .../VectorCombine/fold-vector-insert.ll | 71 +++++++++++++++++++ 3 files changed, 142 insertions(+), 46 deletions(-) create mode 100644 llvm/test/Transforms/VectorCombine/fold-vector-insert.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 5b398d3b75f59..df29024a86f67 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3462,52 +3462,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } break; } - case Intrinsic::vector_insert: { - Value *Vec = II->getArgOperand(0); - Value *SubVec = II->getArgOperand(1); - Value *Idx = II->getArgOperand(2); - auto *DstTy = dyn_cast(II->getType()); - auto *VecTy = dyn_cast(Vec->getType()); - auto *SubVecTy = dyn_cast(SubVec->getType()); - - // Only canonicalize if the destination vector, Vec, and SubVec are all - // fixed vectors. - if (DstTy && VecTy && SubVecTy) { - unsigned DstNumElts = DstTy->getNumElements(); - unsigned VecNumElts = VecTy->getNumElements(); - unsigned SubVecNumElts = SubVecTy->getNumElements(); - unsigned IdxN = cast(Idx)->getZExtValue(); - - // An insert that entirely overwrites Vec with SubVec is a nop. - if (VecNumElts == SubVecNumElts) - return replaceInstUsesWith(CI, SubVec); - - // Widen SubVec into a vector of the same width as Vec, since - // shufflevector requires the two input vectors to be the same width. - // Elements beyond the bounds of SubVec within the widened vector are - // undefined. - SmallVector WidenMask; - unsigned i; - for (i = 0; i != SubVecNumElts; ++i) - WidenMask.push_back(i); - for (; i != VecNumElts; ++i) - WidenMask.push_back(PoisonMaskElem); - - Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask); - - SmallVector Mask; - for (unsigned i = 0; i != IdxN; ++i) - Mask.push_back(i); - for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i) - Mask.push_back(i); - for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i) - Mask.push_back(i); - - Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask); - return replaceInstUsesWith(CI, Shuffle); - } - break; - } case Intrinsic::vector_extract: { Value *Vec = II->getArgOperand(0); Value *Idx = II->getArgOperand(1); diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 19e82099e87f0..dbbc6c5a07ec8 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -112,6 +112,7 @@ class VectorCombine { bool foldExtractExtract(Instruction &I); bool foldInsExtFNeg(Instruction &I); bool foldInsExtBinop(Instruction &I); + bool foldVectorInsertToShuffle(Instruction &I); bool foldInsExtVectorToShuffle(Instruction &I); bool foldBitOpOfBitcasts(Instruction &I); bool foldBitcastShuffle(Instruction &I); @@ -804,6 +805,73 @@ bool VectorCombine::foldInsExtBinop(Instruction &I) { return true; } +/// Try to fold vector_insert intrinsics into shufflevector instructions. +bool VectorCombine::foldVectorInsertToShuffle(Instruction &I) { + auto *II = dyn_cast(&I); + // This optimization only applies to vector_insert intrinsics. + if (!II || II->getIntrinsicID() != Intrinsic::vector_insert) + return false; + + Value *Vec = II->getArgOperand(0); + Value *SubVec = II->getArgOperand(1); + Value *Idx = II->getArgOperand(2); + + // Caller guarantees DstTy is a fixed vector. + auto *DstTy = cast(II->getType()); + auto *VecTy = dyn_cast(Vec->getType()); + auto *SubVecTy = dyn_cast(SubVec->getType()); + + // Only canonicalize if Vec and SubVec are both fixed vectors. + if (!VecTy || !SubVecTy) + return false; + + unsigned DstNumElts = DstTy->getNumElements(); + unsigned VecNumElts = VecTy->getNumElements(); + unsigned SubVecNumElts = SubVecTy->getNumElements(); + auto *SubVecPtr = dyn_cast(Idx); + if (!SubVecPtr) + return false; + + unsigned SubVecIdx = SubVecPtr->getZExtValue(); + + // Ensure insertion of SubVec doesn't exceed Dst bounds. + if (SubVecIdx % SubVecNumElts != 0 || SubVecIdx + SubVecNumElts > DstNumElts) + return false; + + // An insert that entirely overwrites Vec with SubVec is a nop. + if (VecNumElts == SubVecNumElts) { + replaceValue(I, *SubVec); + return true; + } + + // Widen SubVec into a vector of the same width as Vec, since + // shufflevector requires the two input vectors to be the same width. + // Elements beyond the bounds of SubVec within the widened vector are + // undefined. + SmallVector WidenMask; + unsigned int i = 0; + for (i = 0; i != SubVecNumElts; ++i) + WidenMask.push_back(i); + for (; i != VecNumElts; ++i) + WidenMask.push_back(PoisonMaskElem); + + auto *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask); + Worklist.pushValue(WidenShuffle); + + SmallVector Mask; + unsigned int j; + for (i = 0; i != SubVecIdx; ++i) + Mask.push_back(i); + for (j = 0; j != SubVecNumElts; ++j) + Mask.push_back(DstNumElts + j); + for (i = SubVecIdx + SubVecNumElts; i != DstNumElts; ++i) + Mask.push_back(i); + + auto *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask); + replaceValue(I, *Shuffle); + return true; +} + bool VectorCombine::foldBitOpOfBitcasts(Instruction &I) { // Match: bitop(bitcast(x), bitcast(y)) -> bitcast(bitop(x, y)) Value *LHSSrc, *RHSSrc; @@ -3639,6 +3707,9 @@ bool VectorCombine::run() { // dispatching to folding functions if there's no chance of matching. if (IsFixedVectorType) { switch (Opcode) { + case Instruction::Call: + MadeChange |= foldVectorInsertToShuffle(I); + break; case Instruction::InsertElement: MadeChange |= vectorizeLoadInsert(I); break; diff --git a/llvm/test/Transforms/VectorCombine/fold-vector-insert.ll b/llvm/test/Transforms/VectorCombine/fold-vector-insert.ll new file mode 100644 index 0000000000000..976fdb322005b --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/fold-vector-insert.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=vector-combine -S | FileCheck %s + +declare <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32>, <4 x i32>, i64) +declare <8 x i32> @llvm.vector.insert.v8i32.v8i32(<8 x i32>, <8 x i32>, i64) +declare <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32>, <2 x i32>, i64) +declare <8 x i32> @llvm.vector.insert.v8i32.v1i32(<8 x i32>, <1 x i32>, i64) +declare @llvm.vector.insert.nxv4i32.v2i32(, <2 x i32>, i64) + +define <8 x i32> @vector_insert_begin(<8 x i32> %vec, <4 x i32> %subvec) { +; CHECK-LABEL: define <8 x i32> @vector_insert_begin( +; CHECK-SAME: <8 x i32> [[VEC:%.*]], <4 x i32> [[SUBVEC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[RESULT]] +; + %result = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 0) + ret <8 x i32> %result +} + +define <8 x i32> @vector_insert_middle(<8 x i32> %vec, <2 x i32> %subvec) { +; CHECK-LABEL: define <8 x i32> @vector_insert_middle( +; CHECK-SAME: <8 x i32> [[VEC:%.*]], <2 x i32> [[SUBVEC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[RESULT]] +; + %result = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 2) + ret <8 x i32> %result +} + +define <8 x i32> @vector_insert_end(<8 x i32> %vec, <4 x i32> %subvec) { +; CHECK-LABEL: define <8 x i32> @vector_insert_end( +; CHECK-SAME: <8 x i32> [[VEC:%.*]], <4 x i32> [[SUBVEC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[RESULT]] +; + %result = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 4) + ret <8 x i32> %result +} + +define <8 x i32> @vector_insert_overwrite(<8 x i32> %vec, <8 x i32> %subvec) { +; CHECK-LABEL: define <8 x i32> @vector_insert_overwrite( +; CHECK-SAME: <8 x i32> [[VEC:%.*]], <8 x i32> [[SUBVEC:%.*]]) { +; CHECK-NEXT: ret <8 x i32> [[SUBVEC]] +; + %result = call <8 x i32> @llvm.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 0) + ret <8 x i32> %result +} + +define <8 x i32> @vector_insert_single_element_at_end(<8 x i32> %vec, <1 x i32> %subvec) { +; CHECK-LABEL: define <8 x i32> @vector_insert_single_element_at_end( +; CHECK-SAME: <8 x i32> [[VEC:%.*]], <1 x i32> [[SUBVEC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i32> [[SUBVEC]], <1 x i32> poison, <8 x i32> +; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[RESULT]] +; + %result = call <8 x i32> @llvm.vector.insert.v8i32.v1i32(<8 x i32> %vec, <1 x i32> %subvec, i64 7) + ret <8 x i32> %result +} + +define @vector_insert_no_fold_scalable( %vec, <2 x i32> %subvec) { +; CHECK-LABEL: define @vector_insert_no_fold_scalable( +; CHECK-SAME: [[VEC:%.*]], <2 x i32> [[SUBVEC:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = call @llvm.vector.insert.nxv4i32.v2i32( [[VEC]], <2 x i32> [[SUBVEC]], i64 0) +; CHECK-NEXT: ret [[RESULT]] +; + %result = call @llvm.vector.insert.nxv4i32.v2i32( %vec, <2 x i32> %subvec, i64 0) + ret %result +} From fc894c8d658554dbb04e5a99c3c8f1aeef986220 Mon Sep 17 00:00:00 2001 From: Lauren Date: Tue, 1 Jul 2025 04:52:10 -0400 Subject: [PATCH 2/4] [VectorCombine] Move canonicalize-vector-insert tests from InstCombine to VectorCombine --- .../canonicalize-vector-insert.ll | 19 +++-- .../VectorCombine/fold-vector-insert.ll | 71 ------------------- 2 files changed, 15 insertions(+), 75 deletions(-) rename llvm/test/Transforms/{InstCombine => VectorCombine}/canonicalize-vector-insert.ll (84%) delete mode 100644 llvm/test/Transforms/VectorCombine/fold-vector-insert.ll diff --git a/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll b/llvm/test/Transforms/VectorCombine/canonicalize-vector-insert.ll similarity index 84% rename from llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll rename to llvm/test/Transforms/VectorCombine/canonicalize-vector-insert.ll index ab7a50e55db0f..af6fe52c07920 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll +++ b/llvm/test/Transforms/VectorCombine/canonicalize-vector-insert.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; RUN: opt < %s -passes=vector-combine -S | FileCheck %s ; llvm.vector.insert canonicalizes to shufflevector in the fixed case. In the ; scalable case, we lower to the INSERT_SUBVECTOR ISD node. @@ -31,7 +31,7 @@ define <8 x i32> @trivial_nop(<8 x i32> %vec, <8 x i32> %subvec) { define <8 x i32> @valid_insertion_a(<8 x i32> %vec, <2 x i32> %subvec) { ; CHECK-LABEL: @valid_insertion_a( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; %1 = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 0) @@ -71,7 +71,7 @@ define <8 x i32> @valid_insertion_d(<8 x i32> %vec, <2 x i32> %subvec) { define <8 x i32> @valid_insertion_e(<8 x i32> %vec, <4 x i32> %subvec) { ; CHECK-LABEL: @valid_insertion_e( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC:%.*]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; %1 = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 0) @@ -91,7 +91,7 @@ define <8 x i32> @valid_insertion_f(<8 x i32> %vec, <4 x i32> %subvec) { define <8 x i32> @valid_insertion_g(<8 x i32> %vec, <3 x i32> %subvec) { ; CHECK-LABEL: @valid_insertion_g( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[SUBVEC:%.*]], <3 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; %1 = call <8 x i32> @llvm.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 0) @@ -108,6 +108,17 @@ define <8 x i32> @valid_insertion_h(<8 x i32> %vec, <3 x i32> %subvec) { ret <8 x i32> %1 } +; Tests insertion at middle index +define <8 x i32> @valid_insertion_i(<8 x i32> %vec, <2 x i32> %subvec) { +; CHECK-LABEL: @valid_insertion_i( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[RESULT]] +; + %result = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 2) + ret <8 x i32> %result +} + ; ============================================================================ ; ; Scalable cases ; ============================================================================ ; diff --git a/llvm/test/Transforms/VectorCombine/fold-vector-insert.ll b/llvm/test/Transforms/VectorCombine/fold-vector-insert.ll deleted file mode 100644 index 976fdb322005b..0000000000000 --- a/llvm/test/Transforms/VectorCombine/fold-vector-insert.ll +++ /dev/null @@ -1,71 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -passes=vector-combine -S | FileCheck %s - -declare <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32>, <4 x i32>, i64) -declare <8 x i32> @llvm.vector.insert.v8i32.v8i32(<8 x i32>, <8 x i32>, i64) -declare <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32>, <2 x i32>, i64) -declare <8 x i32> @llvm.vector.insert.v8i32.v1i32(<8 x i32>, <1 x i32>, i64) -declare @llvm.vector.insert.nxv4i32.v2i32(, <2 x i32>, i64) - -define <8 x i32> @vector_insert_begin(<8 x i32> %vec, <4 x i32> %subvec) { -; CHECK-LABEL: define <8 x i32> @vector_insert_begin( -; CHECK-SAME: <8 x i32> [[VEC:%.*]], <4 x i32> [[SUBVEC:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> [[TMP1]], <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[RESULT]] -; - %result = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 0) - ret <8 x i32> %result -} - -define <8 x i32> @vector_insert_middle(<8 x i32> %vec, <2 x i32> %subvec) { -; CHECK-LABEL: define <8 x i32> @vector_insert_middle( -; CHECK-SAME: <8 x i32> [[VEC:%.*]], <2 x i32> [[SUBVEC:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> [[TMP1]], <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[RESULT]] -; - %result = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 2) - ret <8 x i32> %result -} - -define <8 x i32> @vector_insert_end(<8 x i32> %vec, <4 x i32> %subvec) { -; CHECK-LABEL: define <8 x i32> @vector_insert_end( -; CHECK-SAME: <8 x i32> [[VEC:%.*]], <4 x i32> [[SUBVEC:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> [[TMP1]], <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[RESULT]] -; - %result = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 4) - ret <8 x i32> %result -} - -define <8 x i32> @vector_insert_overwrite(<8 x i32> %vec, <8 x i32> %subvec) { -; CHECK-LABEL: define <8 x i32> @vector_insert_overwrite( -; CHECK-SAME: <8 x i32> [[VEC:%.*]], <8 x i32> [[SUBVEC:%.*]]) { -; CHECK-NEXT: ret <8 x i32> [[SUBVEC]] -; - %result = call <8 x i32> @llvm.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 0) - ret <8 x i32> %result -} - -define <8 x i32> @vector_insert_single_element_at_end(<8 x i32> %vec, <1 x i32> %subvec) { -; CHECK-LABEL: define <8 x i32> @vector_insert_single_element_at_end( -; CHECK-SAME: <8 x i32> [[VEC:%.*]], <1 x i32> [[SUBVEC:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i32> [[SUBVEC]], <1 x i32> poison, <8 x i32> -; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> [[TMP1]], <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[RESULT]] -; - %result = call <8 x i32> @llvm.vector.insert.v8i32.v1i32(<8 x i32> %vec, <1 x i32> %subvec, i64 7) - ret <8 x i32> %result -} - -define @vector_insert_no_fold_scalable( %vec, <2 x i32> %subvec) { -; CHECK-LABEL: define @vector_insert_no_fold_scalable( -; CHECK-SAME: [[VEC:%.*]], <2 x i32> [[SUBVEC:%.*]]) { -; CHECK-NEXT: [[RESULT:%.*]] = call @llvm.vector.insert.nxv4i32.v2i32( [[VEC]], <2 x i32> [[SUBVEC]], i64 0) -; CHECK-NEXT: ret [[RESULT]] -; - %result = call @llvm.vector.insert.nxv4i32.v2i32( %vec, <2 x i32> %subvec, i64 0) - ret %result -} From 78a18d03e9fca922403550762369a3c2202ba7fb Mon Sep 17 00:00:00 2001 From: Lauren Date: Tue, 1 Jul 2025 11:44:43 -0400 Subject: [PATCH 3/4] [VectorCombine] Use std::iota for shuffle mask construction --- .../Transforms/Vectorize/VectorCombine.cpp | 21 +++++++------------ 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index dbbc6c5a07ec8..55c320103afb2 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -848,24 +848,17 @@ bool VectorCombine::foldVectorInsertToShuffle(Instruction &I) { // shufflevector requires the two input vectors to be the same width. // Elements beyond the bounds of SubVec within the widened vector are // undefined. - SmallVector WidenMask; - unsigned int i = 0; - for (i = 0; i != SubVecNumElts; ++i) - WidenMask.push_back(i); - for (; i != VecNumElts; ++i) - WidenMask.push_back(PoisonMaskElem); + SmallVector WidenMask(VecNumElts, PoisonMaskElem); + std::iota(WidenMask.begin(), WidenMask.begin() + SubVecNumElts, 0); + std::fill(WidenMask.begin() + SubVecNumElts, WidenMask.end(), PoisonMaskElem); auto *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask); Worklist.pushValue(WidenShuffle); - SmallVector Mask; - unsigned int j; - for (i = 0; i != SubVecIdx; ++i) - Mask.push_back(i); - for (j = 0; j != SubVecNumElts; ++j) - Mask.push_back(DstNumElts + j); - for (i = SubVecIdx + SubVecNumElts; i != DstNumElts; ++i) - Mask.push_back(i); + SmallVector Mask(DstNumElts); + std::iota(Mask.begin(), Mask.begin() + SubVecIdx, 0); + std::iota(Mask.begin() + SubVecIdx, Mask.begin() + SubVecIdx + SubVecNumElts, DstNumElts); + std::iota(Mask.begin() + SubVecIdx + SubVecNumElts, Mask.end(), SubVecIdx + SubVecNumElts); auto *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask); replaceValue(I, *Shuffle); From 2abc2e342342cd1ad3c6367dfb2b78b2fa283839 Mon Sep 17 00:00:00 2001 From: Lauren Date: Tue, 1 Jul 2025 13:22:04 -0400 Subject: [PATCH 4/4] [VectorCombine] Remove redundant `fill` and reduce three loops to two `iota` calls --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 55c320103afb2..609bbbdea2c6b 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -835,7 +835,7 @@ bool VectorCombine::foldVectorInsertToShuffle(Instruction &I) { unsigned SubVecIdx = SubVecPtr->getZExtValue(); // Ensure insertion of SubVec doesn't exceed Dst bounds. - if (SubVecIdx % SubVecNumElts != 0 || SubVecIdx + SubVecNumElts > DstNumElts) + if ((SubVecIdx % SubVecNumElts != 0) || (SubVecIdx + SubVecNumElts > DstNumElts)) return false; // An insert that entirely overwrites Vec with SubVec is a nop. @@ -850,18 +850,17 @@ bool VectorCombine::foldVectorInsertToShuffle(Instruction &I) { // undefined. SmallVector WidenMask(VecNumElts, PoisonMaskElem); std::iota(WidenMask.begin(), WidenMask.begin() + SubVecNumElts, 0); - std::fill(WidenMask.begin() + SubVecNumElts, WidenMask.end(), PoisonMaskElem); auto *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask); Worklist.pushValue(WidenShuffle); SmallVector Mask(DstNumElts); - std::iota(Mask.begin(), Mask.begin() + SubVecIdx, 0); - std::iota(Mask.begin() + SubVecIdx, Mask.begin() + SubVecIdx + SubVecNumElts, DstNumElts); - std::iota(Mask.begin() + SubVecIdx + SubVecNumElts, Mask.end(), SubVecIdx + SubVecNumElts); + std::iota(Mask.begin(), Mask.end(), 0); + std::iota(Mask.begin() + SubVecIdx, Mask.begin() + SubVecIdx + SubVecNumElts, + DstNumElts); - auto *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask); - replaceValue(I, *Shuffle); + auto *InsertShuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask); + replaceValue(I, *InsertShuffle); return true; }