Skip to content

Commit 45b8d86

Browse files
committed
[Xtensa] Imrpove CTPOP lowering. Minor fixes
int SHL/SRC/SRA operations selection.
1 parent ccbeae9 commit 45b8d86

File tree

5 files changed

+100
-150
lines changed

5 files changed

+100
-150
lines changed

llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -153,24 +153,33 @@ void XtensaDAGToDAGISel::Select(SDNode *Node) {
153153
SDValue N0 = Node->getOperand(0);
154154
SDValue N1 = Node->getOperand(1);
155155
auto *C = dyn_cast<ConstantSDNode>(N1);
156-
// If C is constant in range [0..15] then we can generate SRLI
157-
// instruction using pattern matching, otherwise generate SRL
158-
if (!C || !isUInt<4>(C->getZExtValue())) {
159-
SDNode *SSR = CurDAG->getMachineNode(Xtensa::SSR, DL, MVT::Glue, N1);
160-
SDNode *SRL =
161-
CurDAG->getMachineNode(Xtensa::SRL, DL, VT, N0, SDValue(SSR, 0));
162-
ReplaceNode(Node, SRL);
156+
157+
// If C is constant then we can generate SRLI
158+
// instruction using pattern matching or EXTUI, otherwise generate SRL
159+
if (C) {
160+
if (isUInt<4>(C->getZExtValue()))
161+
break;
162+
unsigned ShAmt = C->getZExtValue();
163+
SDNode *EXTUI = CurDAG->getMachineNode(
164+
Xtensa::EXTUI, DL, VT, N0, CurDAG->getTargetConstant(ShAmt, DL, VT),
165+
CurDAG->getTargetConstant(32 - ShAmt, DL, VT));
166+
ReplaceNode(Node, EXTUI);
163167
return;
164168
}
165-
break;
169+
170+
SDNode *SSR = CurDAG->getMachineNode(Xtensa::SSR, DL, MVT::Glue, N1);
171+
SDNode *SRL =
172+
CurDAG->getMachineNode(Xtensa::SRL, DL, VT, N0, SDValue(SSR, 0));
173+
ReplaceNode(Node, SRL);
174+
return;
166175
}
167176
case ISD::SRA: {
168177
SDValue N0 = Node->getOperand(0);
169178
SDValue N1 = Node->getOperand(1);
170179
auto *C = dyn_cast<ConstantSDNode>(N1);
171-
// If C is constant in range [0..31] then we can generate SRAI
180+
// If C is constant then we can generate SRAI
172181
// instruction using pattern matching, otherwise generate SRA
173-
if (!C || !isUInt<5>(C->getZExtValue())) {
182+
if (!C) {
174183
SDNode *SSR = CurDAG->getMachineNode(Xtensa::SSR, DL, MVT::Glue, N1);
175184
SDNode *SRA =
176185
CurDAG->getMachineNode(Xtensa::SRA, DL, VT, N0, SDValue(SSR, 0));

llvm/lib/Target/Xtensa/XtensaISelLowering.cpp

Lines changed: 2 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -827,47 +827,8 @@ SDValue XtensaTargetLowering::LowerShiftRightParts(SDValue Op,
827827
}
828828

829829
SDValue XtensaTargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
830-
EVT VT = Op->getValueType(0);
831-
SDValue Val = Op.getOperand(0);
832-
SDLoc DL(Op);
833-
834-
if (VT != MVT::i32)
835-
return SDValue();
836-
837-
// CTPOP expansion:
838-
// Val = (Val - (Val >> 1)) & 0x55555555
839-
// Val = ((Val >> 2) & 0x33333333) + (Val & 0x33333333)
840-
// Val = ((Val >> 4) + Val) & 0x0f0f0f0f
841-
// Val = (Val >> 8) + Val
842-
// Val = (extract bits [16, 20] from Val) + Val
843-
// Val = extract bits [0, 5] from Val
844-
845-
SDValue Mask = DAG.getConstant(0x55555555, DL, VT);
846-
SDValue Shift =
847-
DAG.getNode(ISD::SRL, DL, VT, Val, DAG.getConstant(1, DL, VT));
848-
SDValue ShiftAndMask = DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
849-
Val = DAG.getNode(ISD::SUB, DL, VT, Val, ShiftAndMask);
850-
851-
Mask = DAG.getConstant(0x33333333, DL, VT);
852-
Shift = DAG.getNode(ISD::SRL, DL, VT, Val, DAG.getConstant(2, DL, VT));
853-
SDValue ValAndMask = DAG.getNode(ISD::AND, DL, VT, Val, Mask);
854-
ShiftAndMask = DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
855-
Val = DAG.getNode(ISD::ADD, DL, VT, ValAndMask, ShiftAndMask);
856-
857-
Mask = DAG.getConstant(0x0f0f0f0f, DL, VT);
858-
Shift = DAG.getNode(ISD::SRL, DL, VT, Val, DAG.getConstant(4, DL, VT));
859-
Val = DAG.getNode(ISD::ADD, DL, VT, Val, Shift);
860-
Val = DAG.getNode(ISD::AND, DL, VT, Val, Mask);
861-
862-
Shift = DAG.getNode(ISD::SRL, DL, VT, Val, DAG.getConstant(8, DL, VT));
863-
Val = DAG.getNode(ISD::ADD, DL, VT, Val, Shift);
864-
865-
Shift = DAG.getNode(XtensaISD::EXTUI, DL, VT, Val,
866-
DAG.getConstant(16, DL, VT), DAG.getConstant(5, DL, VT));
867-
Val = DAG.getNode(ISD::ADD, DL, VT, Val, Shift);
868-
869-
return DAG.getNode(XtensaISD::EXTUI, DL, VT, Val, DAG.getConstant(0, DL, VT),
870-
DAG.getConstant(6, DL, VT));
830+
auto &TLI = DAG.getTargetLoweringInfo();
831+
return TLI.expandCTPOP(Op.getNode(), DAG);
871832
}
872833

873834
bool XtensaTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,

llvm/test/CodeGen/Xtensa/bswap.ll

Lines changed: 59 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,12 @@ define i16 @test_bswap_i16(i16 %a) nounwind {
2424

2525
define i32 @test_bswap_i32(i32 %a) nounwind {
2626
; XTENSA-LABEL: test_bswap_i32:
27-
; XTENSA: movi a8, 24
28-
; XTENSA-NEXT: ssr a8
29-
; XTENSA-NEXT: srl a8, a2
30-
; XTENSA-NEXT: srli a9, a2, 8
31-
; XTENSA-NEXT: l32r a10, .LCPI1_0
32-
; XTENSA-NEXT: and a9, a9, a10
33-
; XTENSA-NEXT: or a8, a9, a8
34-
; XTENSA-NEXT: and a9, a2, a10
27+
; XTENSA: srli a8, a2, 8
28+
; XTENSA-NEXT: l32r a9, .LCPI1_0
29+
; XTENSA-NEXT: and a8, a8, a9
30+
; XTENSA-NEXT: extui a10, a2, 24, 8
31+
; XTENSA-NEXT: or a8, a8, a10
32+
; XTENSA-NEXT: and a9, a2, a9
3533
; XTENSA-NEXT: slli a9, a9, 8
3634
; XTENSA-NEXT: slli a10, a2, 24
3735
; XTENSA-NEXT: or a9, a10, a9
@@ -43,28 +41,25 @@ define i32 @test_bswap_i32(i32 %a) nounwind {
4341

4442
define i64 @test_bswap_i64(i64 %a) nounwind {
4543
; XTENSA-LABEL: test_bswap_i64:
46-
; XTENSA: movi a9, 24
47-
; XTENSA-NEXT: ssr a9
48-
; XTENSA-NEXT: srl a8, a3
49-
; XTENSA-NEXT: srli a10, a3, 8
50-
; XTENSA-NEXT: l32r a11, .LCPI2_0
51-
; XTENSA-NEXT: and a10, a10, a11
52-
; XTENSA-NEXT: or a8, a10, a8
53-
; XTENSA-NEXT: and a10, a3, a11
44+
; XTENSA: srli a8, a3, 8
45+
; XTENSA-NEXT: l32r a9, .LCPI2_0
46+
; XTENSA-NEXT: and a8, a8, a9
47+
; XTENSA-NEXT: extui a10, a3, 24, 8
48+
; XTENSA-NEXT: or a8, a8, a10
49+
; XTENSA-NEXT: and a10, a3, a9
5450
; XTENSA-NEXT: slli a10, a10, 8
55-
; XTENSA-NEXT: slli a7, a3, 24
56-
; XTENSA-NEXT: or a10, a7, a10
51+
; XTENSA-NEXT: slli a11, a3, 24
52+
; XTENSA-NEXT: or a10, a11, a10
5753
; XTENSA-NEXT: or a8, a10, a8
58-
; XTENSA-NEXT: ssr a9
59-
; XTENSA-NEXT: srl a9, a2
6054
; XTENSA-NEXT: srli a10, a2, 8
61-
; XTENSA-NEXT: and a10, a10, a11
62-
; XTENSA-NEXT: or a9, a10, a9
63-
; XTENSA-NEXT: and a10, a2, a11
64-
; XTENSA-NEXT: slli a10, a10, 8
55+
; XTENSA-NEXT: and a10, a10, a9
56+
; XTENSA-NEXT: extui a11, a2, 24, 8
57+
; XTENSA-NEXT: or a10, a10, a11
58+
; XTENSA-NEXT: and a9, a2, a9
59+
; XTENSA-NEXT: slli a9, a9, 8
6560
; XTENSA-NEXT: slli a11, a2, 24
66-
; XTENSA-NEXT: or a10, a11, a10
67-
; XTENSA-NEXT: or a3, a10, a9
61+
; XTENSA-NEXT: or a9, a11, a9
62+
; XTENSA-NEXT: or a3, a9, a10
6863
; XTENSA-NEXT: or a2, a8, a8
6964
; XTENSA-NEXT: ret
7065
%tmp = call i64 @llvm.bswap.i64(i64 %a)
@@ -129,14 +124,12 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
129124

130125
define i32 @test_bitreverse_i32(i32 %a) nounwind {
131126
; XTENSA-LABEL: test_bitreverse_i32:
132-
; XTENSA: movi a8, 24
133-
; XTENSA-NEXT: ssr a8
134-
; XTENSA-NEXT: srl a8, a2
135-
; XTENSA-NEXT: srli a9, a2, 8
136-
; XTENSA-NEXT: l32r a10, .LCPI5_0
137-
; XTENSA-NEXT: and a9, a9, a10
138-
; XTENSA-NEXT: or a8, a9, a8
139-
; XTENSA-NEXT: and a9, a2, a10
127+
; XTENSA: srli a8, a2, 8
128+
; XTENSA-NEXT: l32r a9, .LCPI5_0
129+
; XTENSA-NEXT: and a8, a8, a9
130+
; XTENSA-NEXT: extui a10, a2, 24, 8
131+
; XTENSA-NEXT: or a8, a8, a10
132+
; XTENSA-NEXT: and a9, a2, a9
140133
; XTENSA-NEXT: slli a9, a9, 8
141134
; XTENSA-NEXT: slli a10, a2, 24
142135
; XTENSA-NEXT: or a9, a10, a9
@@ -166,59 +159,56 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind {
166159

167160
define i64 @test_bitreverse_i64(i64 %a) nounwind {
168161
; XTENSA-LABEL: test_bitreverse_i64:
169-
; XTENSA: movi a10, 24
170-
; XTENSA-NEXT: ssr a10
171-
; XTENSA-NEXT: srl a8, a3
172-
; XTENSA-NEXT: srli a11, a3, 8
162+
; XTENSA: srli a8, a3, 8
173163
; XTENSA-NEXT: l32r a9, .LCPI6_0
174-
; XTENSA-NEXT: and a11, a11, a9
175-
; XTENSA-NEXT: or a8, a11, a8
176-
; XTENSA-NEXT: and a11, a3, a9
177-
; XTENSA-NEXT: slli a11, a11, 8
178-
; XTENSA-NEXT: slli a7, a3, 24
179-
; XTENSA-NEXT: or a11, a7, a11
180-
; XTENSA-NEXT: or a8, a11, a8
181-
; XTENSA-NEXT: srli a7, a8, 4
164+
; XTENSA-NEXT: and a8, a8, a9
165+
; XTENSA-NEXT: extui a10, a3, 24, 8
166+
; XTENSA-NEXT: or a8, a8, a10
167+
; XTENSA-NEXT: and a10, a3, a9
168+
; XTENSA-NEXT: slli a10, a10, 8
169+
; XTENSA-NEXT: slli a11, a3, 24
170+
; XTENSA-NEXT: or a10, a11, a10
171+
; XTENSA-NEXT: or a8, a10, a8
172+
; XTENSA-NEXT: srli a10, a8, 4
182173
; XTENSA-NEXT: l32r a11, .LCPI6_1
183-
; XTENSA-NEXT: and a7, a7, a11
174+
; XTENSA-NEXT: and a10, a10, a11
184175
; XTENSA-NEXT: and a8, a8, a11
185176
; XTENSA-NEXT: slli a8, a8, 4
186-
; XTENSA-NEXT: or a8, a7, a8
187-
; XTENSA-NEXT: srli a7, a8, 2
188-
; XTENSA-NEXT: l32r a6, .LCPI6_2
189-
; XTENSA-NEXT: and a7, a7, a6
190-
; XTENSA-NEXT: and a8, a8, a6
177+
; XTENSA-NEXT: or a8, a10, a8
178+
; XTENSA-NEXT: srli a10, a8, 2
179+
; XTENSA-NEXT: l32r a7, .LCPI6_2
180+
; XTENSA-NEXT: and a10, a10, a7
181+
; XTENSA-NEXT: and a8, a8, a7
191182
; XTENSA-NEXT: slli a8, a8, 2
192-
; XTENSA-NEXT: or a8, a7, a8
193-
; XTENSA-NEXT: srli a7, a8, 1
194-
; XTENSA-NEXT: l32r a5, .LCPI6_3
195-
; XTENSA-NEXT: and a7, a7, a5
196-
; XTENSA-NEXT: and a8, a8, a5
183+
; XTENSA-NEXT: or a8, a10, a8
184+
; XTENSA-NEXT: srli a10, a8, 1
185+
; XTENSA-NEXT: l32r a6, .LCPI6_3
186+
; XTENSA-NEXT: and a10, a10, a6
187+
; XTENSA-NEXT: and a8, a8, a6
197188
; XTENSA-NEXT: slli a8, a8, 1
198-
; XTENSA-NEXT: or a8, a7, a8
199-
; XTENSA-NEXT: ssr a10
200-
; XTENSA-NEXT: srl a10, a2
201-
; XTENSA-NEXT: srli a7, a2, 8
202-
; XTENSA-NEXT: and a7, a7, a9
203-
; XTENSA-NEXT: or a10, a7, a10
189+
; XTENSA-NEXT: or a8, a10, a8
190+
; XTENSA-NEXT: srli a10, a2, 8
191+
; XTENSA-NEXT: and a10, a10, a9
192+
; XTENSA-NEXT: extui a5, a2, 24, 8
193+
; XTENSA-NEXT: or a10, a10, a5
204194
; XTENSA-NEXT: and a9, a2, a9
205195
; XTENSA-NEXT: slli a9, a9, 8
206-
; XTENSA-NEXT: slli a7, a2, 24
207-
; XTENSA-NEXT: or a9, a7, a9
196+
; XTENSA-NEXT: slli a5, a2, 24
197+
; XTENSA-NEXT: or a9, a5, a9
208198
; XTENSA-NEXT: or a9, a9, a10
209199
; XTENSA-NEXT: srli a10, a9, 4
210200
; XTENSA-NEXT: and a10, a10, a11
211201
; XTENSA-NEXT: and a9, a9, a11
212202
; XTENSA-NEXT: slli a9, a9, 4
213203
; XTENSA-NEXT: or a9, a10, a9
214204
; XTENSA-NEXT: srli a10, a9, 2
215-
; XTENSA-NEXT: and a10, a10, a6
216-
; XTENSA-NEXT: and a9, a9, a6
205+
; XTENSA-NEXT: and a10, a10, a7
206+
; XTENSA-NEXT: and a9, a9, a7
217207
; XTENSA-NEXT: slli a9, a9, 2
218208
; XTENSA-NEXT: or a9, a10, a9
219209
; XTENSA-NEXT: srli a10, a9, 1
220-
; XTENSA-NEXT: and a10, a10, a5
221-
; XTENSA-NEXT: and a9, a9, a5
210+
; XTENSA-NEXT: and a10, a10, a6
211+
; XTENSA-NEXT: and a9, a9, a6
222212
; XTENSA-NEXT: slli a9, a9, 1
223213
; XTENSA-NEXT: or a3, a10, a9
224214
; XTENSA-NEXT: or a2, a8, a8

llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll

Lines changed: 17 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
2929
; XTENSA-NEXT: add a8, a8, a9
3030
; XTENSA-NEXT: l32r a9, .LCPI0_2
3131
; XTENSA-NEXT: and a8, a8, a9
32-
; XTENSA-NEXT: srli a9, a8, 8
32+
; XTENSA-NEXT: slli a9, a8, 8
3333
; XTENSA-NEXT: add a8, a8, a9
34-
; XTENSA-NEXT: extui a9, a8, 16, 5
34+
; XTENSA-NEXT: slli a9, a8, 16
3535
; XTENSA-NEXT: add a8, a8, a9
36-
; XTENSA-NEXT: extui a8, a8, 0, 6
36+
; XTENSA-NEXT: extui a8, a8, 24, 8
3737
; XTENSA-NEXT: .LBB0_2: # %cond.end
3838
; XTENSA-NEXT: or a2, a8, a8
3939
; XTENSA-NEXT: ret
@@ -60,11 +60,11 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind {
6060
; XTENSA-NEXT: add a8, a8, a9
6161
; XTENSA-NEXT: l32r a9, .LCPI1_2
6262
; XTENSA-NEXT: and a8, a8, a9
63-
; XTENSA-NEXT: srli a9, a8, 8
63+
; XTENSA-NEXT: slli a9, a8, 8
6464
; XTENSA-NEXT: add a8, a8, a9
65-
; XTENSA-NEXT: extui a9, a8, 16, 5
65+
; XTENSA-NEXT: slli a9, a8, 16
6666
; XTENSA-NEXT: add a8, a8, a9
67-
; XTENSA-NEXT: extui a2, a8, 0, 6
67+
; XTENSA-NEXT: extui a2, a8, 24, 8
6868
; XTENSA-NEXT: ret
6969
%tmp = call i32 @llvm.cttz.i32(i32 %a, i1 true)
7070
ret i32 %tmp
@@ -85,9 +85,7 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
8585
; XTENSA-NEXT: or a8, a8, a9
8686
; XTENSA-NEXT: srli a9, a8, 8
8787
; XTENSA-NEXT: or a8, a8, a9
88-
; XTENSA-NEXT: movi a9, 16
89-
; XTENSA-NEXT: ssr a9
90-
; XTENSA-NEXT: srl a9, a8
88+
; XTENSA-NEXT: extui a9, a8, 16, 16
9189
; XTENSA-NEXT: or a8, a8, a9
9290
; XTENSA-NEXT: movi a9, -1
9391
; XTENSA-NEXT: xor a8, a8, a9
@@ -104,11 +102,11 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
104102
; XTENSA-NEXT: add a8, a8, a9
105103
; XTENSA-NEXT: l32r a9, .LCPI2_2
106104
; XTENSA-NEXT: and a8, a8, a9
107-
; XTENSA-NEXT: srli a9, a8, 8
105+
; XTENSA-NEXT: slli a9, a8, 8
108106
; XTENSA-NEXT: add a8, a8, a9
109-
; XTENSA-NEXT: extui a9, a8, 16, 5
107+
; XTENSA-NEXT: slli a9, a8, 16
110108
; XTENSA-NEXT: add a8, a8, a9
111-
; XTENSA-NEXT: extui a2, a8, 0, 6
109+
; XTENSA-NEXT: extui a2, a8, 24, 8
112110
; XTENSA-NEXT: .LBB2_2: # %cond.end
113111
; XTENSA-NEXT: ret
114112
%tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
@@ -125,9 +123,7 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
125123
; XTENSA-NEXT: or a8, a8, a9
126124
; XTENSA-NEXT: srli a9, a8, 8
127125
; XTENSA-NEXT: or a8, a8, a9
128-
; XTENSA-NEXT: movi a9, 16
129-
; XTENSA-NEXT: ssr a9
130-
; XTENSA-NEXT: srl a9, a8
126+
; XTENSA-NEXT: extui a9, a8, 16, 16
131127
; XTENSA-NEXT: or a8, a8, a9
132128
; XTENSA-NEXT: movi a9, -1
133129
; XTENSA-NEXT: xor a8, a8, a9
@@ -144,11 +140,11 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
144140
; XTENSA-NEXT: add a8, a8, a9
145141
; XTENSA-NEXT: l32r a9, .LCPI3_2
146142
; XTENSA-NEXT: and a8, a8, a9
147-
; XTENSA-NEXT: srli a9, a8, 8
143+
; XTENSA-NEXT: slli a9, a8, 8
148144
; XTENSA-NEXT: add a8, a8, a9
149-
; XTENSA-NEXT: extui a9, a8, 16, 5
145+
; XTENSA-NEXT: slli a9, a8, 16
150146
; XTENSA-NEXT: add a8, a8, a9
151-
; XTENSA-NEXT: extui a2, a8, 0, 6
147+
; XTENSA-NEXT: extui a2, a8, 24, 8
152148
; XTENSA-NEXT: ret
153149
%tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
154150
ret i32 %tmp
@@ -169,11 +165,11 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
169165
; XTENSA-NEXT: add a8, a8, a9
170166
; XTENSA-NEXT: l32r a9, .LCPI4_2
171167
; XTENSA-NEXT: and a8, a8, a9
172-
; XTENSA-NEXT: srli a9, a8, 8
168+
; XTENSA-NEXT: slli a9, a8, 8
173169
; XTENSA-NEXT: add a8, a8, a9
174-
; XTENSA-NEXT: extui a9, a8, 16, 5
170+
; XTENSA-NEXT: slli a9, a8, 16
175171
; XTENSA-NEXT: add a8, a8, a9
176-
; XTENSA-NEXT: extui a2, a8, 0, 6
172+
; XTENSA-NEXT: extui a2, a8, 24, 8
177173
; XTENSA-NEXT: ret
178174
%1 = call i32 @llvm.ctpop.i32(i32 %a)
179175
ret i32 %1

0 commit comments

Comments
 (0)