diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 95e1f96c71b48..3e459f5ea4ce5 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -2282,6 +2282,17 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) { if (!Match0 && !Match1) return false; + // If the outer shuffle is a permute, then create a fake inner all-poison + // shuffle. This is easier than accounting for length-changing shuffles below. + SmallVector PoisonMask1; + if (!Match1 && isa(OuterV1)) { + X1 = X0; + Y1 = Y0; + PoisonMask1.append(InnerMask0.size(), PoisonMaskElem); + InnerMask1 = PoisonMask1; + Match1 = true; // fake match + } + X0 = Match0 ? X0 : OuterV0; Y0 = Match0 ? Y0 : OuterV0; X1 = Match1 ? X1 : OuterV1; @@ -2356,11 +2367,11 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) { // Try to merge the shuffles if the new shuffle is not costly. InstructionCost InnerCost0 = 0; if (Match0) - InnerCost0 = TTI.getInstructionCost(cast(OuterV0), CostKind); + InnerCost0 = TTI.getInstructionCost(cast(OuterV0), CostKind); InstructionCost InnerCost1 = 0; if (Match1) - InnerCost1 = TTI.getInstructionCost(cast(OuterV1), CostKind); + InnerCost1 = TTI.getInstructionCost(cast(OuterV1), CostKind); InstructionCost OuterCost = TTI.getInstructionCost(&I, CostKind); diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll index 1c9bc77ac3bef..1c128c8f56a03 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll @@ -262,10 +262,8 @@ define <8 x half> @splatandidentity(<8 x half> %a, <8 x half> %b) { define <8 x half> @splattwice(<8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: @splattwice( -; CHECK-NEXT: [[AS:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[AS]], <4 x half> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[BS]], <4 x half> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <8 x half> [[R]] ; diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll index 642d07a8f3253..e85c092b1b213 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll @@ -32,8 +32,7 @@ define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) ; SSE-NEXT: ret <4 x double> [[INS]] ; ; AVX-LABEL: @src_ins2_v4f64_ext0_v2f64( -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> -; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; AVX-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> ; AVX-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 0 @@ -48,8 +47,7 @@ define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) ; SSE-NEXT: ret <4 x double> [[INS]] ; ; AVX-LABEL: @src_ins3_v4f64_ext0_v2f64( -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> -; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; AVX-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> ; AVX-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 0 @@ -86,8 +84,7 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) ; SSE-NEXT: ret <4 x double> [[INS]] ; ; AVX-LABEL: @src_ins2_v4f64_ext1_v2f64( -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> -; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; AVX-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> ; AVX-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 1 @@ -96,10 +93,14 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) } define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { -; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: ret <4 x double> [[INS]] +; SSE-LABEL: @src_ins3_v4f64_ext1_v2f64( +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; SSE-NEXT: ret <4 x double> [[INS]] +; +; AVX-LABEL: @src_ins3_v4f64_ext1_v2f64( +; AVX-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; AVX-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 1 %ins = insertelement <4 x double> poison, double %ext, i32 3 @@ -119,8 +120,7 @@ define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: ret <2 x double> [[INS]] ; %ext = extractelement <4 x double> %b, i32 1 @@ -152,8 +152,7 @@ define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: ret <2 x double> [[INS]] ; %ext = extractelement <4 x double> %b, i32 0 @@ -164,8 +163,7 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: ret <2 x double> [[INS]] +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %ext = extractelement <4 x double> %b, i32 1 %ins = insertelement <2 x double> poison, double %ext, i32 1 @@ -174,8 +172,7 @@ define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) { ; SSE-LABEL: @src_ins1_v2f64_ext2_v4f64( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> -; SSE-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> ; SSE-NEXT: ret <2 x double> [[INS]] ; ; AVX-LABEL: @src_ins1_v2f64_ext2_v4f64( @@ -190,8 +187,7 @@ define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: ret <2 x double> [[INS]] ; %ext = extractelement <4 x double> %b, i32 3 diff --git a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll index 73476308916fb..40437ca345224 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll @@ -578,8 +578,7 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(ptr align 16 derefer ; CHECK-NEXT: [[L:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4 ; CHECK-NEXT: store <1 x i32> [[L]], ptr [[STORE_PTR:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i32> [[L]], <1 x i32> poison, <8 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[R]] +; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %l = load <1 x i32>, ptr %p, align 4 store <1 x i32> %l, ptr %store_ptr