Skip to content

Commit 2eba07b

Browse files
committed
[AArch64] Fix failure with inline asm and svcount
This fixes an issue where the compiler runs into an assertion failure for the following example: register svcount_t pred asm("pn8") = svptrue_c8(); asm("ld1w { z0.s, z4.s, z8.s, z12.s }, %[pred]/z, [x0]\n" : : [pred] "Uph" (pred) : "memory", "cc"); Here the register constraint that ends up in the LLVM IR is "{pn8}", but the code in `TargetRegisterInfo::getRegForInlineAsmConstraint` that parses that string, follows a path where it queries a suitable register class for this register (<=> PPRorPNR regclass), for which it then chooses `nxv16i1` as a suitable type. These choices individually are correct, but the combined result isn't, because the type should be `aarch64svcount`. This then results in issues later on in SelectionDAGBuilder.cpp in CopyToReg because the type of the actual value and the computed type from the constraint don't match. This PR pre-empts this issue by parsing the predicate explicitly and returning the correct register class.
1 parent 11ed7f2 commit 2eba07b

File tree

2 files changed

+96
-0
lines changed

2 files changed

+96
-0
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11804,6 +11804,36 @@ const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
1180411804

1180511805
enum class PredicateConstraint { Uph, Upl, Upa };
1180611806

11807+
// Returns a {Reg, RegisterClass} tuple if the constraint is
11808+
// a specific predicate register.
11809+
//
11810+
// For some constraint like "{pn3}" the default path in
11811+
// TargetLowering::getRegForInlineAsmConstraint() leads it to determine that a
11812+
// suitable register class for this register is "PPRorPNR", after which it
11813+
// determines that nxv16i1 is an appropriate type for the constraint, which is
11814+
// not what we want. The code here pre-empts this by matching the register
11815+
// explicitly.
11816+
static std::optional<std::pair<unsigned, const TargetRegisterClass *>>
11817+
parsePredicateRegAsConstraint(StringRef Constraint) {
11818+
if (!Constraint.starts_with('{') || !Constraint.ends_with('}') ||
11819+
Constraint[1] != 'p')
11820+
return std::nullopt;
11821+
11822+
Constraint = Constraint.substr(2, Constraint.size() - 3);
11823+
bool IsPredicateAsCount = Constraint.starts_with("n");
11824+
if (IsPredicateAsCount)
11825+
Constraint = Constraint.drop_front(1);
11826+
11827+
unsigned V;
11828+
if (Constraint.getAsInteger(10, V) || V > 31)
11829+
return std::nullopt;
11830+
11831+
if (IsPredicateAsCount)
11832+
return std::make_pair(AArch64::PN0 + V, &AArch64::PNRRegClass);
11833+
else
11834+
return std::make_pair(AArch64::P0 + V, &AArch64::PPRRegClass);
11835+
}
11836+
1180711837
static std::optional<PredicateConstraint>
1180811838
parsePredicateConstraint(StringRef Constraint) {
1180911839
return StringSwitch<std::optional<PredicateConstraint>>(Constraint)
@@ -12051,6 +12081,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
1205112081
break;
1205212082
}
1205312083
} else {
12084+
if (const auto P = parsePredicateRegAsConstraint(Constraint))
12085+
return *P;
1205412086
if (const auto PC = parsePredicateConstraint(Constraint))
1205512087
if (const auto *RegClass = getPredicateRegisterClass(*PC, VT))
1205612088
return std::make_pair(0U, RegClass);

llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,3 +119,67 @@ define <vscale x 8 x half> @test_svfadd_f16_Uph_constraint(<vscale x 16 x i1> %P
119119
%1 = tail call <vscale x 8 x half> asm "fadd $0.h, $1/m, $2.h, $3.h", "=w,@3Uph,w,w"(<vscale x 16 x i1> %Pg, <vscale x 8 x half> %Zn, <vscale x 8 x half> %Zm)
120120
ret <vscale x 8 x half> %1
121121
}
122+
123+
define void @explicit_p0(ptr %p) {
124+
; CHECK-LABEL: name: explicit_p0
125+
; CHECK: bb.0 (%ir-block.0):
126+
; CHECK-NEXT: liveins: $x0
127+
; CHECK-NEXT: {{ $}}
128+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
129+
; CHECK-NEXT: [[PTRUE_B:%[0-9]+]]:ppr = PTRUE_B 31, implicit $vg
130+
; CHECK-NEXT: $p0 = COPY [[PTRUE_B]]
131+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]]
132+
; CHECK-NEXT: INLINEASM &"ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, 2818058 /* regdef:GPR64common */, def %1, 9 /* reguse */, $p0, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
133+
; CHECK-NEXT: RET_ReallyLR
134+
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.b8(i32 31)
135+
%2 = tail call i64 asm sideeffect "ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", "=r,{p0},0"(<vscale x 16 x i1> %1, ptr %p)
136+
ret void
137+
}
138+
139+
define void @explicit_p8_invalid(ptr %p) {
140+
; CHECK-LABEL: name: explicit_p8_invalid
141+
; CHECK: bb.0 (%ir-block.0):
142+
; CHECK-NEXT: liveins: $x0
143+
; CHECK-NEXT: {{ $}}
144+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
145+
; CHECK-NEXT: [[PTRUE_B:%[0-9]+]]:ppr = PTRUE_B 31, implicit $vg
146+
; CHECK-NEXT: $p8 = COPY [[PTRUE_B]]
147+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]]
148+
; CHECK-NEXT: INLINEASM &"ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, 2818058 /* regdef:GPR64common */, def %1, 9 /* reguse */, $p8, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
149+
; CHECK-NEXT: RET_ReallyLR
150+
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.b8(i32 31)
151+
%2 = tail call i64 asm sideeffect "ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", "=r,{p8},0"(<vscale x 16 x i1> %1, ptr %p)
152+
ret void
153+
}
154+
155+
define void @explicit_pn8(ptr %p) {
156+
; CHECK-LABEL: name: explicit_pn8
157+
; CHECK: bb.0 (%ir-block.0):
158+
; CHECK-NEXT: liveins: $x0
159+
; CHECK-NEXT: {{ $}}
160+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
161+
; CHECK-NEXT: [[PTRUE_C_B:%[0-9]+]]:pnr_p8to15 = PTRUE_C_B implicit $vg
162+
; CHECK-NEXT: $pn8 = COPY [[PTRUE_C_B]]
163+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]]
164+
; CHECK-NEXT: INLINEASM &"ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, 2818058 /* regdef:GPR64common */, def %1, 9 /* reguse */, $pn8, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
165+
; CHECK-NEXT: RET_ReallyLR
166+
%1 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
167+
%2 = tail call i64 asm sideeffect "ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", "=r,{pn8},0"(target("aarch64.svcount") %1, ptr %p)
168+
ret void
169+
}
170+
171+
define void @explicit_pn0_invalid(ptr %p) {
172+
; CHECK-LABEL: name: explicit_pn0_invalid
173+
; CHECK: bb.0 (%ir-block.0):
174+
; CHECK-NEXT: liveins: $x0
175+
; CHECK-NEXT: {{ $}}
176+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
177+
; CHECK-NEXT: [[PTRUE_C_B:%[0-9]+]]:pnr_p8to15 = PTRUE_C_B implicit $vg
178+
; CHECK-NEXT: $pn0 = COPY [[PTRUE_C_B]]
179+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]]
180+
; CHECK-NEXT: INLINEASM &"ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, 2818058 /* regdef:GPR64common */, def %1, 9 /* reguse */, $pn0, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
181+
; CHECK-NEXT: RET_ReallyLR
182+
%1 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
183+
%2 = tail call i64 asm sideeffect "ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", "=r,{pn0},0"(target("aarch64.svcount") %1, ptr %p)
184+
ret void
185+
}

0 commit comments

Comments
 (0)