Skip to content

Commit 4876443

Browse files
[SYCL][NVPTX] Removes unsupported memory orders for atomic load/store (#4106)
NVPTX currently only supports atomic loads and stores with at most monotonic ordering. Newer PTX versions have support for acquire and release, but NVPTX does not currently support this. These changes removes the current faulty implementation of atomic loads and stores for acquire, release, and seq_cst in libclc for PTX. These are replaced by trap instructions, causing runtime errors, which is to be preferred due to memory-order selection being determined by a function parameter in libclc. Signed-off-by: Steffen Larsen <[email protected]>
1 parent 2e591fb commit 4876443

File tree

2 files changed

+68
-66
lines changed

2 files changed

+68
-66
lines changed

libclc/generic/libspirv/atomic/atomic_load.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ TYPE __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_##MEM_ORDER(volatile AS c
2121
_Z18__spirv_AtomicLoadPU3##AS_MANGLED##K##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \
2222
volatile AS const TYPE *p, enum Scope scope, \
2323
enum MemorySemanticsMask semantics) { \
24-
if (semantics == Acquire) { \
24+
if (semantics & Acquire) { \
2525
return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_acquire(p); \
2626
} \
27-
if (semantics == SequentiallyConsistent) { \
27+
if (semantics & SequentiallyConsistent) { \
2828
return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_seq_cst(p); \
2929
} \
3030
return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_unordered(p); \

libclc/ptx-nvidiacl/libspirv/atomic/loadstore_helpers.ll

Lines changed: 66 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
44
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
55
#endif
66

7+
declare void @llvm.trap()
8+
79
define i32 @__clc__atomic_load_global_4_unordered(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
810
entry:
911
%0 = load atomic volatile i32, i32 addrspace(1)* %ptr unordered, align 4
@@ -54,99 +56,99 @@ entry:
5456

5557
define i32 @__clc__atomic_load_global_4_acquire(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
5658
entry:
57-
%0 = load atomic volatile i32, i32 addrspace(1)* %ptr acquire, align 4
58-
ret i32 %0
59+
tail call void @llvm.trap()
60+
unreachable
5961
}
6062

6163
define i32 @__clc__atomic_load_local_4_acquire(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline {
6264
entry:
63-
%0 = load atomic volatile i32, i32 addrspace(3)* %ptr acquire, align 4
64-
ret i32 %0
65+
tail call void @llvm.trap()
66+
unreachable
6567
}
6668

6769
define i64 @__clc__atomic_load_global_8_acquire(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
6870
entry:
69-
%0 = load atomic volatile i64, i64 addrspace(1)* %ptr acquire, align 8
70-
ret i64 %0
71+
tail call void @llvm.trap()
72+
unreachable
7173
}
7274

7375
define i64 @__clc__atomic_load_local_8_acquire(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline {
7476
entry:
75-
%0 = load atomic volatile i64, i64 addrspace(3)* %ptr acquire, align 8
76-
ret i64 %0
77+
tail call void @llvm.trap()
78+
unreachable
7779
}
7880

7981
define i32 @__clc__atomic_uload_global_4_acquire(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
8082
entry:
81-
%0 = load atomic volatile i32, i32 addrspace(1)* %ptr acquire, align 4
82-
ret i32 %0
83+
tail call void @llvm.trap()
84+
unreachable
8385
}
8486

8587
define i32 @__clc__atomic_uload_local_4_acquire(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline {
8688
entry:
87-
%0 = load atomic volatile i32, i32 addrspace(3)* %ptr acquire, align 4
88-
ret i32 %0
89+
tail call void @llvm.trap()
90+
unreachable
8991
}
9092

9193
define i64 @__clc__atomic_uload_global_8_acquire(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
9294
entry:
93-
%0 = load atomic volatile i64, i64 addrspace(1)* %ptr acquire, align 8
94-
ret i64 %0
95+
tail call void @llvm.trap()
96+
unreachable
9597
}
9698

9799
define i64 @__clc__atomic_uload_local_8_acquire(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline {
98100
entry:
99-
%0 = load atomic volatile i64, i64 addrspace(3)* %ptr acquire, align 8
100-
ret i64 %0
101+
tail call void @llvm.trap()
102+
unreachable
101103
}
102104

103105

104106
define i32 @__clc__atomic_load_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
105107
entry:
106-
%0 = load atomic volatile i32, i32 addrspace(1)* %ptr seq_cst, align 4
107-
ret i32 %0
108+
tail call void @llvm.trap()
109+
unreachable
108110
}
109111

110112
define i32 @__clc__atomic_load_local_4_seq_cst(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline {
111113
entry:
112-
%0 = load atomic volatile i32, i32 addrspace(3)* %ptr seq_cst, align 4
113-
ret i32 %0
114+
tail call void @llvm.trap()
115+
unreachable
114116
}
115117

116118
define i64 @__clc__atomic_load_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
117119
entry:
118-
%0 = load atomic volatile i64, i64 addrspace(1)* %ptr seq_cst, align 8
119-
ret i64 %0
120+
tail call void @llvm.trap()
121+
unreachable
120122
}
121123

122124
define i64 @__clc__atomic_load_local_8_seq_cst(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline {
123125
entry:
124-
%0 = load atomic volatile i64, i64 addrspace(3)* %ptr seq_cst, align 8
125-
ret i64 %0
126+
tail call void @llvm.trap()
127+
unreachable
126128
}
127129

128130
define i32 @__clc__atomic_uload_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
129131
entry:
130-
%0 = load atomic volatile i32, i32 addrspace(1)* %ptr seq_cst, align 4
131-
ret i32 %0
132+
tail call void @llvm.trap()
133+
unreachable
132134
}
133135

134136
define i32 @__clc__atomic_uload_local_4_seq_cst(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline {
135137
entry:
136-
%0 = load atomic volatile i32, i32 addrspace(3)* %ptr seq_cst, align 4
137-
ret i32 %0
138+
tail call void @llvm.trap()
139+
unreachable
138140
}
139141

140142
define i64 @__clc__atomic_uload_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
141143
entry:
142-
%0 = load atomic volatile i64, i64 addrspace(1)* %ptr seq_cst, align 8
143-
ret i64 %0
144+
tail call void @llvm.trap()
145+
unreachable
144146
}
145147

146148
define i64 @__clc__atomic_uload_local_8_seq_cst(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline {
147149
entry:
148-
%0 = load atomic volatile i64, i64 addrspace(3)* %ptr seq_cst, align 8
149-
ret i64 %0
150+
tail call void @llvm.trap()
151+
unreachable
150152
}
151153

152154
define void @__clc__atomic_store_global_4_unordered(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
@@ -199,96 +201,96 @@ entry:
199201

200202
define void @__clc__atomic_store_global_4_release(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
201203
entry:
202-
store atomic volatile i32 %value, i32 addrspace(1)* %ptr release, align 4
203-
ret void
204+
tail call void @llvm.trap()
205+
unreachable
204206
}
205207

206208
define void @__clc__atomic_store_local_4_release(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
207209
entry:
208-
store atomic volatile i32 %value, i32 addrspace(3)* %ptr release, align 4
209-
ret void
210+
tail call void @llvm.trap()
211+
unreachable
210212
}
211213

212214
define void @__clc__atomic_store_global_8_release(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
213215
entry:
214-
store atomic volatile i64 %value, i64 addrspace(1)* %ptr release, align 8
215-
ret void
216+
tail call void @llvm.trap()
217+
unreachable
216218
}
217219

218220
define void @__clc__atomic_store_local_8_release(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
219221
entry:
220-
store atomic volatile i64 %value, i64 addrspace(3)* %ptr release, align 8
221-
ret void
222+
tail call void @llvm.trap()
223+
unreachable
222224
}
223225

224226
define void @__clc__atomic_ustore_global_4_release(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
225227
entry:
226-
store atomic volatile i32 %value, i32 addrspace(1)* %ptr release, align 4
227-
ret void
228+
tail call void @llvm.trap()
229+
unreachable
228230
}
229231

230232
define void @__clc__atomic_ustore_local_4_release(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
231233
entry:
232-
store atomic volatile i32 %value, i32 addrspace(3)* %ptr release, align 4
233-
ret void
234+
tail call void @llvm.trap()
235+
unreachable
234236
}
235237

236238
define void @__clc__atomic_ustore_global_8_release(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
237239
entry:
238-
store atomic volatile i64 %value, i64 addrspace(1)* %ptr release, align 8
239-
ret void
240+
tail call void @llvm.trap()
241+
unreachable
240242
}
241243

242244
define void @__clc__atomic_ustore_local_8_release(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
243245
entry:
244-
store atomic volatile i64 %value, i64 addrspace(3)* %ptr release, align 8
245-
ret void
246+
tail call void @llvm.trap()
247+
unreachable
246248
}
247249

248250
define void @__clc__atomic_store_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
249251
entry:
250-
store atomic volatile i32 %value, i32 addrspace(1)* %ptr seq_cst, align 4
251-
ret void
252+
tail call void @llvm.trap()
253+
unreachable
252254
}
253255

254256
define void @__clc__atomic_store_local_4_seq_cst(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
255257
entry:
256-
store atomic volatile i32 %value, i32 addrspace(3)* %ptr seq_cst, align 4
257-
ret void
258+
tail call void @llvm.trap()
259+
unreachable
258260
}
259261

260262
define void @__clc__atomic_store_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
261263
entry:
262-
store atomic volatile i64 %value, i64 addrspace(1)* %ptr seq_cst, align 8
263-
ret void
264+
tail call void @llvm.trap()
265+
unreachable
264266
}
265267

266268
define void @__clc__atomic_store_local_8_seq_cst(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
267269
entry:
268-
store atomic volatile i64 %value, i64 addrspace(3)* %ptr seq_cst, align 8
269-
ret void
270+
tail call void @llvm.trap()
271+
unreachable
270272
}
271273

272274
define void @__clc__atomic_ustore_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
273275
entry:
274-
store atomic volatile i32 %value, i32 addrspace(1)* %ptr seq_cst, align 4
275-
ret void
276+
tail call void @llvm.trap()
277+
unreachable
276278
}
277279

278280
define void @__clc__atomic_ustore_local_4_seq_cst(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
279281
entry:
280-
store atomic volatile i32 %value, i32 addrspace(3)* %ptr seq_cst, align 4
281-
ret void
282+
tail call void @llvm.trap()
283+
unreachable
282284
}
283285

284286
define void @__clc__atomic_ustore_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
285287
entry:
286-
store atomic volatile i64 %value, i64 addrspace(1)* %ptr seq_cst, align 8
287-
ret void
288+
tail call void @llvm.trap()
289+
unreachable
288290
}
289291

290292
define void @__clc__atomic_ustore_local_8_seq_cst(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
291293
entry:
292-
store atomic volatile i64 %value, i64 addrspace(3)* %ptr seq_cst, align 8
293-
ret void
294+
tail call void @llvm.trap()
295+
unreachable
294296
}

0 commit comments

Comments
 (0)