Skip to content

Commit 704af5e

Browse files
committed
re-enable direct bitcasts for Int/Float vector transmutes (but not ones involving pointers)
1 parent af1b680 commit 704af5e

File tree

2 files changed

+206
-0
lines changed

2 files changed

+206
-0
lines changed

compiler/rustc_codegen_ssa/src/mir/rvalue.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,19 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
253253
return OperandValue::poison(bx, cast);
254254
}
255255

256+
// To or from pointers takes different methods, so we use this to restrict
257+
// the SimdVector case to types which can be `bitcast` between each other.
258+
#[inline]
259+
fn vector_can_bitcast(x: abi::Scalar) -> bool {
260+
matches!(
261+
x,
262+
abi::Scalar::Initialized {
263+
value: abi::Primitive::Int(..) | abi::Primitive::Float(..),
264+
..
265+
}
266+
)
267+
}
268+
256269
let cx = bx.cx();
257270
match (operand.val, operand.layout.backend_repr, cast.backend_repr) {
258271
_ if cast.is_zst() => OperandValue::ZeroSized,
@@ -269,6 +282,14 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
269282
) if from_scalar.size(cx) == to_scalar.size(cx) => {
270283
OperandValue::Immediate(transmute_scalar(bx, imm, from_scalar, to_scalar))
271284
}
285+
(
286+
OperandValue::Immediate(imm),
287+
abi::BackendRepr::SimdVector { element: from_scalar, .. },
288+
abi::BackendRepr::SimdVector { element: to_scalar, .. },
289+
) if vector_can_bitcast(from_scalar) && vector_can_bitcast(to_scalar) => {
290+
let to_backend_ty = bx.cx().immediate_backend_type(cast);
291+
OperandValue::Immediate(bx.bitcast(imm, to_backend_ty))
292+
}
272293
(
273294
OperandValue::Pair(imm_a, imm_b),
274295
abi::BackendRepr::ScalarPair(in_a, in_b),
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
//@ compile-flags: -Copt-level=3 -C no-prepopulate-passes
2+
//@ only-64bit (so I don't need to worry about usize)
3+
//@ revisions: aarch64 x86_64
4+
//@ [aarch64] only-aarch64
5+
//@ [aarch64] compile-flags: -C target-feature=+neon
6+
//@ [x86_64] only-x86_64
7+
//@ [x86_64] compile-flags: -C target-feature=+sse2
8+
9+
#![crate_type = "lib"]
10+
#![feature(core_intrinsics)]
11+
#![feature(repr_simd)]
12+
13+
use std::intrinsics::transmute;
14+
15+
// These tests use the "C" ABI so that the vectors in question aren't passed and
16+
// returned though memory (as they are in the "Rust" ABI), which greatly
17+
// simplifies seeing the difference between the in-operand cases vs the ones
18+
// that fallback to just using the `LocalKind::Memory` path.
19+
20+
#[repr(simd)]
21+
pub struct I32X4([i32; 4]);
22+
#[repr(simd)]
23+
pub struct I64X2([i64; 2]);
24+
#[repr(simd)]
25+
pub struct F32X4([f32; 4]);
26+
#[repr(simd)]
27+
pub struct F64X2([f64; 2]);
28+
#[repr(simd)]
29+
pub struct PtrX2([*const (); 2]);
30+
31+
// CHECK-LABEL: <2 x i64> @mixed_int(<4 x i32> %v)
32+
#[no_mangle]
33+
pub extern "C" fn mixed_int(v: I32X4) -> I64X2 {
34+
// CHECK-NOT: alloca
35+
// CHECK: %[[RET:.+]] = bitcast <4 x i32> %v to <2 x i64>
36+
// CHECK: ret <2 x i64> %[[RET]]
37+
unsafe { transmute(v) }
38+
}
39+
40+
// CHECK-LABEL: <2 x double> @mixed_float(<4 x float> %v)
41+
#[no_mangle]
42+
pub extern "C" fn mixed_float(v: F32X4) -> F64X2 {
43+
// CHECK-NOT: alloca
44+
// CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <2 x double>
45+
// CHECK: ret <2 x double> %[[RET]]
46+
unsafe { transmute(v) }
47+
}
48+
49+
// CHECK-LABEL: <4 x i32> @float_int_same_lanes(<4 x float> %v)
50+
#[no_mangle]
51+
pub extern "C" fn float_int_same_lanes(v: F32X4) -> I32X4 {
52+
// CHECK-NOT: alloca
53+
// CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <4 x i32>
54+
// CHECK: ret <4 x i32> %[[RET]]
55+
unsafe { transmute(v) }
56+
}
57+
58+
// CHECK-LABEL: <2 x double> @int_float_same_lanes(<2 x i64> %v)
59+
#[no_mangle]
60+
pub extern "C" fn int_float_same_lanes(v: I64X2) -> F64X2 {
61+
// CHECK-NOT: alloca
62+
// CHECK: %[[RET:.+]] = bitcast <2 x i64> %v to <2 x double>
63+
// CHECK: ret <2 x double> %[[RET]]
64+
unsafe { transmute(v) }
65+
}
66+
67+
// CHECK-LABEL: <2 x i64> @float_int_widen(<4 x float> %v)
68+
#[no_mangle]
69+
pub extern "C" fn float_int_widen(v: F32X4) -> I64X2 {
70+
// CHECK-NOT: alloca
71+
// CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <2 x i64>
72+
// CHECK: ret <2 x i64> %[[RET]]
73+
unsafe { transmute(v) }
74+
}
75+
76+
// CHECK-LABEL: <2 x double> @int_float_widen(<4 x i32> %v)
77+
#[no_mangle]
78+
pub extern "C" fn int_float_widen(v: I32X4) -> F64X2 {
79+
// CHECK-NOT: alloca
80+
// CHECK: %[[RET:.+]] = bitcast <4 x i32> %v to <2 x double>
81+
// CHECK: ret <2 x double> %[[RET]]
82+
unsafe { transmute(v) }
83+
}
84+
85+
// CHECK-LABEL: <4 x i32> @float_int_narrow(<2 x double> %v)
86+
#[no_mangle]
87+
pub extern "C" fn float_int_narrow(v: F64X2) -> I32X4 {
88+
// CHECK-NOT: alloca
89+
// CHECK: %[[RET:.+]] = bitcast <2 x double> %v to <4 x i32>
90+
// CHECK: ret <4 x i32> %[[RET]]
91+
unsafe { transmute(v) }
92+
}
93+
94+
// CHECK-LABEL: <4 x float> @int_float_narrow(<2 x i64> %v)
95+
#[no_mangle]
96+
pub extern "C" fn int_float_narrow(v: I64X2) -> F32X4 {
97+
// CHECK-NOT: alloca
98+
// CHECK: %[[RET:.+]] = bitcast <2 x i64> %v to <4 x float>
99+
// CHECK: ret <4 x float> %[[RET]]
100+
unsafe { transmute(v) }
101+
}
102+
103+
// CHECK-LABEL: <2 x ptr> @float_ptr_same_lanes(<2 x double> %v)
104+
#[no_mangle]
105+
pub extern "C" fn float_ptr_same_lanes(v: F64X2) -> PtrX2 {
106+
// CHECK-NOT: alloca
107+
// CHECK: %[[TEMP:.+]] = alloca [16 x i8]
108+
// CHECK-NOT: alloca
109+
// CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
110+
// CHECK: store <2 x double> %v, ptr %[[TEMP]]
111+
// CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
112+
// CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
113+
// CHECK: ret <2 x ptr> %[[RET]]
114+
unsafe { transmute(v) }
115+
}
116+
117+
// CHECK-LABEL: <2 x double> @ptr_float_same_lanes(<2 x ptr> %v)
118+
#[no_mangle]
119+
pub extern "C" fn ptr_float_same_lanes(v: PtrX2) -> F64X2 {
120+
// CHECK-NOT: alloca
121+
// CHECK: %[[TEMP:.+]] = alloca [16 x i8]
122+
// CHECK-NOT: alloca
123+
// CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
124+
// CHECK: store <2 x ptr> %v, ptr %[[TEMP]]
125+
// CHECK: %[[RET:.+]] = load <2 x double>, ptr %[[TEMP]]
126+
// CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
127+
// CHECK: ret <2 x double> %[[RET]]
128+
unsafe { transmute(v) }
129+
}
130+
131+
// CHECK-LABEL: <2 x ptr> @int_ptr_same_lanes(<2 x i64> %v)
132+
#[no_mangle]
133+
pub extern "C" fn int_ptr_same_lanes(v: I64X2) -> PtrX2 {
134+
// CHECK-NOT: alloca
135+
// CHECK: %[[TEMP:.+]] = alloca [16 x i8]
136+
// CHECK-NOT: alloca
137+
// CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
138+
// CHECK: store <2 x i64> %v, ptr %[[TEMP]]
139+
// CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
140+
// CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
141+
// CHECK: ret <2 x ptr> %[[RET]]
142+
unsafe { transmute(v) }
143+
}
144+
145+
// CHECK-LABEL: <2 x i64> @ptr_int_same_lanes(<2 x ptr> %v)
146+
#[no_mangle]
147+
pub extern "C" fn ptr_int_same_lanes(v: PtrX2) -> I64X2 {
148+
// CHECK-NOT: alloca
149+
// CHECK: %[[TEMP:.+]] = alloca [16 x i8]
150+
// CHECK-NOT: alloca
151+
// CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
152+
// CHECK: store <2 x ptr> %v, ptr %[[TEMP]]
153+
// CHECK: %[[RET:.+]] = load <2 x i64>, ptr %[[TEMP]]
154+
// CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
155+
// CHECK: ret <2 x i64> %[[RET]]
156+
unsafe { transmute(v) }
157+
}
158+
159+
// CHECK-LABEL: <2 x ptr> @float_ptr_widen(<4 x float> %v)
160+
#[no_mangle]
161+
pub extern "C" fn float_ptr_widen(v: F32X4) -> PtrX2 {
162+
// CHECK-NOT: alloca
163+
// CHECK: %[[TEMP:.+]] = alloca [16 x i8]
164+
// CHECK-NOT: alloca
165+
// CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
166+
// CHECK: store <4 x float> %v, ptr %[[TEMP]]
167+
// CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
168+
// CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
169+
// CHECK: ret <2 x ptr> %[[RET]]
170+
unsafe { transmute(v) }
171+
}
172+
173+
// CHECK-LABEL: <2 x ptr> @int_ptr_widen(<4 x i32> %v)
174+
#[no_mangle]
175+
pub extern "C" fn int_ptr_widen(v: I32X4) -> PtrX2 {
176+
// CHECK-NOT: alloca
177+
// CHECK: %[[TEMP:.+]] = alloca [16 x i8]
178+
// CHECK-NOT: alloca
179+
// CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
180+
// CHECK: store <4 x i32> %v, ptr %[[TEMP]]
181+
// CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
182+
// CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
183+
// CHECK: ret <2 x ptr> %[[RET]]
184+
unsafe { transmute(v) }
185+
}

0 commit comments

Comments
 (0)