diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 5b398d3b75f59..df29024a86f67 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3462,52 +3462,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } break; } - case Intrinsic::vector_insert: { - Value *Vec = II->getArgOperand(0); - Value *SubVec = II->getArgOperand(1); - Value *Idx = II->getArgOperand(2); - auto *DstTy = dyn_cast(II->getType()); - auto *VecTy = dyn_cast(Vec->getType()); - auto *SubVecTy = dyn_cast(SubVec->getType()); - - // Only canonicalize if the destination vector, Vec, and SubVec are all - // fixed vectors. - if (DstTy && VecTy && SubVecTy) { - unsigned DstNumElts = DstTy->getNumElements(); - unsigned VecNumElts = VecTy->getNumElements(); - unsigned SubVecNumElts = SubVecTy->getNumElements(); - unsigned IdxN = cast(Idx)->getZExtValue(); - - // An insert that entirely overwrites Vec with SubVec is a nop. - if (VecNumElts == SubVecNumElts) - return replaceInstUsesWith(CI, SubVec); - - // Widen SubVec into a vector of the same width as Vec, since - // shufflevector requires the two input vectors to be the same width. - // Elements beyond the bounds of SubVec within the widened vector are - // undefined. - SmallVector WidenMask; - unsigned i; - for (i = 0; i != SubVecNumElts; ++i) - WidenMask.push_back(i); - for (; i != VecNumElts; ++i) - WidenMask.push_back(PoisonMaskElem); - - Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask); - - SmallVector Mask; - for (unsigned i = 0; i != IdxN; ++i) - Mask.push_back(i); - for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i) - Mask.push_back(i); - for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i) - Mask.push_back(i); - - Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask); - return replaceInstUsesWith(CI, Shuffle); - } - break; - } case Intrinsic::vector_extract: { Value *Vec = II->getArgOperand(0); Value *Idx = II->getArgOperand(1); diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 19e82099e87f0..609bbbdea2c6b 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -112,6 +112,7 @@ class VectorCombine { bool foldExtractExtract(Instruction &I); bool foldInsExtFNeg(Instruction &I); bool foldInsExtBinop(Instruction &I); + bool foldVectorInsertToShuffle(Instruction &I); bool foldInsExtVectorToShuffle(Instruction &I); bool foldBitOpOfBitcasts(Instruction &I); bool foldBitcastShuffle(Instruction &I); @@ -804,6 +805,65 @@ bool VectorCombine::foldInsExtBinop(Instruction &I) { return true; } +/// Try to fold vector_insert intrinsics into shufflevector instructions. +bool VectorCombine::foldVectorInsertToShuffle(Instruction &I) { + auto *II = dyn_cast(&I); + // This optimization only applies to vector_insert intrinsics. + if (!II || II->getIntrinsicID() != Intrinsic::vector_insert) + return false; + + Value *Vec = II->getArgOperand(0); + Value *SubVec = II->getArgOperand(1); + Value *Idx = II->getArgOperand(2); + + // Caller guarantees DstTy is a fixed vector. + auto *DstTy = cast(II->getType()); + auto *VecTy = dyn_cast(Vec->getType()); + auto *SubVecTy = dyn_cast(SubVec->getType()); + + // Only canonicalize if Vec and SubVec are both fixed vectors. + if (!VecTy || !SubVecTy) + return false; + + unsigned DstNumElts = DstTy->getNumElements(); + unsigned VecNumElts = VecTy->getNumElements(); + unsigned SubVecNumElts = SubVecTy->getNumElements(); + auto *SubVecPtr = dyn_cast(Idx); + if (!SubVecPtr) + return false; + + unsigned SubVecIdx = SubVecPtr->getZExtValue(); + + // Ensure insertion of SubVec doesn't exceed Dst bounds. + if ((SubVecIdx % SubVecNumElts != 0) || (SubVecIdx + SubVecNumElts > DstNumElts)) + return false; + + // An insert that entirely overwrites Vec with SubVec is a nop. + if (VecNumElts == SubVecNumElts) { + replaceValue(I, *SubVec); + return true; + } + + // Widen SubVec into a vector of the same width as Vec, since + // shufflevector requires the two input vectors to be the same width. + // Elements beyond the bounds of SubVec within the widened vector are + // undefined. + SmallVector WidenMask(VecNumElts, PoisonMaskElem); + std::iota(WidenMask.begin(), WidenMask.begin() + SubVecNumElts, 0); + + auto *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask); + Worklist.pushValue(WidenShuffle); + + SmallVector Mask(DstNumElts); + std::iota(Mask.begin(), Mask.end(), 0); + std::iota(Mask.begin() + SubVecIdx, Mask.begin() + SubVecIdx + SubVecNumElts, + DstNumElts); + + auto *InsertShuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask); + replaceValue(I, *InsertShuffle); + return true; +} + bool VectorCombine::foldBitOpOfBitcasts(Instruction &I) { // Match: bitop(bitcast(x), bitcast(y)) -> bitcast(bitop(x, y)) Value *LHSSrc, *RHSSrc; @@ -3639,6 +3699,9 @@ bool VectorCombine::run() { // dispatching to folding functions if there's no chance of matching. if (IsFixedVectorType) { switch (Opcode) { + case Instruction::Call: + MadeChange |= foldVectorInsertToShuffle(I); + break; case Instruction::InsertElement: MadeChange |= vectorizeLoadInsert(I); break; diff --git a/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll b/llvm/test/Transforms/VectorCombine/canonicalize-vector-insert.ll similarity index 84% rename from llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll rename to llvm/test/Transforms/VectorCombine/canonicalize-vector-insert.ll index ab7a50e55db0f..af6fe52c07920 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll +++ b/llvm/test/Transforms/VectorCombine/canonicalize-vector-insert.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; RUN: opt < %s -passes=vector-combine -S | FileCheck %s ; llvm.vector.insert canonicalizes to shufflevector in the fixed case. In the ; scalable case, we lower to the INSERT_SUBVECTOR ISD node. @@ -31,7 +31,7 @@ define <8 x i32> @trivial_nop(<8 x i32> %vec, <8 x i32> %subvec) { define <8 x i32> @valid_insertion_a(<8 x i32> %vec, <2 x i32> %subvec) { ; CHECK-LABEL: @valid_insertion_a( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; %1 = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 0) @@ -71,7 +71,7 @@ define <8 x i32> @valid_insertion_d(<8 x i32> %vec, <2 x i32> %subvec) { define <8 x i32> @valid_insertion_e(<8 x i32> %vec, <4 x i32> %subvec) { ; CHECK-LABEL: @valid_insertion_e( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC:%.*]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; %1 = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 0) @@ -91,7 +91,7 @@ define <8 x i32> @valid_insertion_f(<8 x i32> %vec, <4 x i32> %subvec) { define <8 x i32> @valid_insertion_g(<8 x i32> %vec, <3 x i32> %subvec) { ; CHECK-LABEL: @valid_insertion_g( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[SUBVEC:%.*]], <3 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; %1 = call <8 x i32> @llvm.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 0) @@ -108,6 +108,17 @@ define <8 x i32> @valid_insertion_h(<8 x i32> %vec, <3 x i32> %subvec) { ret <8 x i32> %1 } +; Tests insertion at middle index +define <8 x i32> @valid_insertion_i(<8 x i32> %vec, <2 x i32> %subvec) { +; CHECK-LABEL: @valid_insertion_i( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[RESULT]] +; + %result = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 2) + ret <8 x i32> %result +} + ; ============================================================================ ; ; Scalable cases ; ============================================================================ ;