Skip to content

Commit e6525d9

Browse files
committed
keep extidx if it within dest vector bound
1 parent 824c68e commit e6525d9

File tree

3 files changed

+44
-45
lines changed

3 files changed

+44
-45
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3084,21 +3084,22 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
30843084
SmallVector<int> Mask(NumDstElts, PoisonMaskElem);
30853085

30863086
bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
3087+
bool IsExtIdxInBounds = ExtIdx < NumDstElts;
30873088
bool NeedDstSrcSwap = isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec);
30883089
if (NeedDstSrcSwap) {
30893090
SK = TargetTransformInfo::SK_PermuteSingleSrc;
3090-
if (!NeedExpOrNarrow)
3091-
Mask[InsIdx] = ExtIdx;
3092-
else
3091+
if (!IsExtIdxInBounds && NeedExpOrNarrow)
30933092
Mask[InsIdx] = 0;
3093+
else
3094+
Mask[InsIdx] = ExtIdx;
30943095
std::swap(DstVec, SrcVec);
30953096
} else {
30963097
SK = TargetTransformInfo::SK_PermuteTwoSrc;
30973098
std::iota(Mask.begin(), Mask.end(), 0);
3098-
if (!NeedExpOrNarrow)
3099-
Mask[InsIdx] = ExtIdx + NumDstElts;
3100-
else
3099+
if (!IsExtIdxInBounds && NeedExpOrNarrow)
31013100
Mask[InsIdx] = NumDstElts;
3101+
else
3102+
Mask[InsIdx] = ExtIdx + NumDstElts;
31023103
}
31033104

31043105
// Cost
@@ -3123,7 +3124,10 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
31233124
// first element has an ExtIdx, so that the first element of the vector
31243125
// being created is always the target to be extracted.
31253126
ExtToVecMask.assign(NumDstElts, PoisonMaskElem);
3126-
ExtToVecMask[0] = ExtIdx;
3127+
if (IsExtIdxInBounds)
3128+
ExtToVecMask[ExtIdx] = ExtIdx;
3129+
else
3130+
ExtToVecMask[0] = ExtIdx;
31273131
// Add cost for expanding or narrowing
31283132
NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
31293133
DstVecTy, ExtToVecMask, CostKind);

llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,15 @@ define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
5858
}
5959

6060
define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
61-
; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64(
62-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
63-
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0
64-
; CHECK-NEXT: ret <4 x double> [[INS]]
61+
; SSE-LABEL: @src_ins0_v4f64_ext1_v2f64(
62+
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
63+
; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
64+
; SSE-NEXT: ret <4 x double> [[INS]]
65+
;
66+
; AVX-LABEL: @src_ins0_v4f64_ext1_v2f64(
67+
; AVX-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
68+
; AVX-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0
69+
; AVX-NEXT: ret <4 x double> [[INS]]
6570
;
6671
%ext = extractelement <2 x double> %b, i32 1
6772
%ins = insertelement <4 x double> poison, double %ext, i32 0
@@ -70,8 +75,8 @@ define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
7075

7176
define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
7277
; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
73-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
74-
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>
78+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
79+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
7580
; CHECK-NEXT: ret <4 x double> [[INS]]
7681
;
7782
%ext = extractelement <2 x double> %b, i32 1
@@ -80,15 +85,10 @@ define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
8085
}
8186

8287
define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
83-
; SSE-LABEL: @src_ins2_v4f64_ext1_v2f64(
84-
; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
85-
; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
86-
; SSE-NEXT: ret <4 x double> [[INS]]
87-
;
88-
; AVX-LABEL: @src_ins2_v4f64_ext1_v2f64(
89-
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
90-
; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
91-
; AVX-NEXT: ret <4 x double> [[INS]]
88+
; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
89+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
90+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 poison>
91+
; CHECK-NEXT: ret <4 x double> [[INS]]
9292
;
9393
%ext = extractelement <2 x double> %b, i32 1
9494
%ins = insertelement <4 x double> poison, double %ext, i32 2
@@ -97,8 +97,8 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
9797

9898
define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
9999
; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
100-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
101-
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
100+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
101+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 1>
102102
; CHECK-NEXT: ret <4 x double> [[INS]]
103103
;
104104
%ext = extractelement <2 x double> %b, i32 1
@@ -163,8 +163,8 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
163163

164164
define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
165165
; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
166-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
167-
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
166+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 1>
167+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 1>
168168
; CHECK-NEXT: ret <2 x double> [[INS]]
169169
;
170170
%ext = extractelement <4 x double> %b, i32 1

llvm/test/Transforms/VectorCombine/X86/extract-insert.ll

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
4949

5050
define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
5151
; SSE-LABEL: @src_ins0_v4f64_ext1_v2f64(
52-
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
53-
; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
52+
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
53+
; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 5, i32 1, i32 2, i32 3>
5454
; SSE-NEXT: ret <4 x double> [[INS]]
5555
;
5656
; AVX-LABEL: @src_ins0_v4f64_ext1_v2f64(
@@ -64,15 +64,10 @@ define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
6464
}
6565

6666
define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
67-
; SSE-LABEL: @src_ins1_v4f64_ext1_v2f64(
68-
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
69-
; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 4, i32 2, i32 3>
70-
; SSE-NEXT: ret <4 x double> [[INS]]
71-
;
72-
; AVX-LABEL: @src_ins1_v4f64_ext1_v2f64(
73-
; AVX-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
74-
; AVX-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 1
75-
; AVX-NEXT: ret <4 x double> [[INS]]
67+
; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
68+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
69+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
70+
; CHECK-NEXT: ret <4 x double> [[INS]]
7671
;
7772
%ext = extractelement <2 x double> %b, i32 1
7873
%ins = insertelement <4 x double> %a, double %ext, i32 1
@@ -81,8 +76,8 @@ define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
8176

8277
define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
8378
; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
84-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
85-
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
79+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
80+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 5, i32 3>
8681
; CHECK-NEXT: ret <4 x double> [[INS]]
8782
;
8883
%ext = extractelement <2 x double> %b, i32 1
@@ -92,8 +87,8 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
9287

9388
define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
9489
; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
95-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
96-
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 4>
90+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
91+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 5>
9792
; CHECK-NEXT: ret <4 x double> [[INS]]
9893
;
9994
%ext = extractelement <2 x double> %b, i32 1
@@ -114,8 +109,8 @@ define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
114109

115110
define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
116111
; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
117-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
118-
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 2, i32 1>
112+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 1>
113+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 3, i32 1>
119114
; CHECK-NEXT: ret <2 x double> [[INS]]
120115
;
121116
%ext = extractelement <4 x double> %b, i32 1
@@ -158,8 +153,8 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
158153

159154
define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
160155
; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
161-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
162-
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
156+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 1>
157+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 3>
163158
; CHECK-NEXT: ret <2 x double> [[INS]]
164159
;
165160
%ext = extractelement <4 x double> %b, i32 1

0 commit comments

Comments
 (0)