diff --git a/libclc/generic/libspirv/atomic/atomic_load.cl b/libclc/generic/libspirv/atomic/atomic_load.cl index 96009c3c0b8ac..7538b00606b6c 100644 --- a/libclc/generic/libspirv/atomic/atomic_load.cl +++ b/libclc/generic/libspirv/atomic/atomic_load.cl @@ -21,10 +21,10 @@ TYPE __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_##MEM_ORDER(volatile AS c _Z18__spirv_AtomicLoadPU3##AS_MANGLED##K##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ volatile AS const TYPE *p, enum Scope scope, \ enum MemorySemanticsMask semantics) { \ - if (semantics == Acquire) { \ + if (semantics & Acquire) { \ return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_acquire(p); \ } \ - if (semantics == SequentiallyConsistent) { \ + if (semantics & SequentiallyConsistent) { \ return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_seq_cst(p); \ } \ return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_unordered(p); \ diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/loadstore_helpers.ll b/libclc/ptx-nvidiacl/libspirv/atomic/loadstore_helpers.ll index c96ba0e9d9e8a..56b28ae8c1050 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/loadstore_helpers.ll +++ b/libclc/ptx-nvidiacl/libspirv/atomic/loadstore_helpers.ll @@ -4,6 +4,8 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" #endif +declare void @llvm.trap() + define i32 @__clc__atomic_load_global_4_unordered(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { entry: %0 = load atomic volatile i32, i32 addrspace(1)* %ptr unordered, align 4 @@ -54,99 +56,99 @@ entry: define i32 @__clc__atomic_load_global_4_acquire(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { entry: - %0 = load atomic volatile i32, i32 addrspace(1)* %ptr acquire, align 4 - ret i32 %0 + tail call void @llvm.trap() + unreachable } define i32 @__clc__atomic_load_local_4_acquire(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { entry: - %0 = load atomic volatile i32, i32 addrspace(3)* %ptr acquire, align 4 - ret i32 %0 + tail call void @llvm.trap() + unreachable } define i64 @__clc__atomic_load_global_8_acquire(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { entry: - %0 = load atomic volatile i64, i64 addrspace(1)* %ptr acquire, align 8 - ret i64 %0 + tail call void @llvm.trap() + unreachable } define i64 @__clc__atomic_load_local_8_acquire(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { entry: - %0 = load atomic volatile i64, i64 addrspace(3)* %ptr acquire, align 8 - ret i64 %0 + tail call void @llvm.trap() + unreachable } define i32 @__clc__atomic_uload_global_4_acquire(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { entry: - %0 = load atomic volatile i32, i32 addrspace(1)* %ptr acquire, align 4 - ret i32 %0 + tail call void @llvm.trap() + unreachable } define i32 @__clc__atomic_uload_local_4_acquire(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { entry: - %0 = load atomic volatile i32, i32 addrspace(3)* %ptr acquire, align 4 - ret i32 %0 + tail call void @llvm.trap() + unreachable } define i64 @__clc__atomic_uload_global_8_acquire(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { entry: - %0 = load atomic volatile i64, i64 addrspace(1)* %ptr acquire, align 8 - ret i64 %0 + tail call void @llvm.trap() + unreachable } define i64 @__clc__atomic_uload_local_8_acquire(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { entry: - %0 = load atomic volatile i64, i64 addrspace(3)* %ptr acquire, align 8 - ret i64 %0 + tail call void @llvm.trap() + unreachable } define i32 @__clc__atomic_load_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { entry: - %0 = load atomic volatile i32, i32 addrspace(1)* %ptr seq_cst, align 4 - ret i32 %0 + tail call void @llvm.trap() + unreachable } define i32 @__clc__atomic_load_local_4_seq_cst(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { entry: - %0 = load atomic volatile i32, i32 addrspace(3)* %ptr seq_cst, align 4 - ret i32 %0 + tail call void @llvm.trap() + unreachable } define i64 @__clc__atomic_load_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { entry: - %0 = load atomic volatile i64, i64 addrspace(1)* %ptr seq_cst, align 8 - ret i64 %0 + tail call void @llvm.trap() + unreachable } define i64 @__clc__atomic_load_local_8_seq_cst(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { entry: - %0 = load atomic volatile i64, i64 addrspace(3)* %ptr seq_cst, align 8 - ret i64 %0 + tail call void @llvm.trap() + unreachable } define i32 @__clc__atomic_uload_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { entry: - %0 = load atomic volatile i32, i32 addrspace(1)* %ptr seq_cst, align 4 - ret i32 %0 + tail call void @llvm.trap() + unreachable } define i32 @__clc__atomic_uload_local_4_seq_cst(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { entry: - %0 = load atomic volatile i32, i32 addrspace(3)* %ptr seq_cst, align 4 - ret i32 %0 + tail call void @llvm.trap() + unreachable } define i64 @__clc__atomic_uload_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { entry: - %0 = load atomic volatile i64, i64 addrspace(1)* %ptr seq_cst, align 8 - ret i64 %0 + tail call void @llvm.trap() + unreachable } define i64 @__clc__atomic_uload_local_8_seq_cst(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { entry: - %0 = load atomic volatile i64, i64 addrspace(3)* %ptr seq_cst, align 8 - ret i64 %0 + tail call void @llvm.trap() + unreachable } define void @__clc__atomic_store_global_4_unordered(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { @@ -199,96 +201,96 @@ entry: define void @__clc__atomic_store_global_4_release(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { entry: - store atomic volatile i32 %value, i32 addrspace(1)* %ptr release, align 4 - ret void + tail call void @llvm.trap() + unreachable } define void @__clc__atomic_store_local_4_release(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { entry: - store atomic volatile i32 %value, i32 addrspace(3)* %ptr release, align 4 - ret void + tail call void @llvm.trap() + unreachable } define void @__clc__atomic_store_global_8_release(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { entry: - store atomic volatile i64 %value, i64 addrspace(1)* %ptr release, align 8 - ret void + tail call void @llvm.trap() + unreachable } define void @__clc__atomic_store_local_8_release(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { entry: - store atomic volatile i64 %value, i64 addrspace(3)* %ptr release, align 8 - ret void + tail call void @llvm.trap() + unreachable } define void @__clc__atomic_ustore_global_4_release(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { entry: - store atomic volatile i32 %value, i32 addrspace(1)* %ptr release, align 4 - ret void + tail call void @llvm.trap() + unreachable } define void @__clc__atomic_ustore_local_4_release(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { entry: - store atomic volatile i32 %value, i32 addrspace(3)* %ptr release, align 4 - ret void + tail call void @llvm.trap() + unreachable } define void @__clc__atomic_ustore_global_8_release(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { entry: - store atomic volatile i64 %value, i64 addrspace(1)* %ptr release, align 8 - ret void + tail call void @llvm.trap() + unreachable } define void @__clc__atomic_ustore_local_8_release(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { entry: - store atomic volatile i64 %value, i64 addrspace(3)* %ptr release, align 8 - ret void + tail call void @llvm.trap() + unreachable } define void @__clc__atomic_store_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { entry: - store atomic volatile i32 %value, i32 addrspace(1)* %ptr seq_cst, align 4 - ret void + tail call void @llvm.trap() + unreachable } define void @__clc__atomic_store_local_4_seq_cst(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { entry: - store atomic volatile i32 %value, i32 addrspace(3)* %ptr seq_cst, align 4 - ret void + tail call void @llvm.trap() + unreachable } define void @__clc__atomic_store_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { entry: - store atomic volatile i64 %value, i64 addrspace(1)* %ptr seq_cst, align 8 - ret void + tail call void @llvm.trap() + unreachable } define void @__clc__atomic_store_local_8_seq_cst(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { entry: - store atomic volatile i64 %value, i64 addrspace(3)* %ptr seq_cst, align 8 - ret void + tail call void @llvm.trap() + unreachable } define void @__clc__atomic_ustore_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { entry: - store atomic volatile i32 %value, i32 addrspace(1)* %ptr seq_cst, align 4 - ret void + tail call void @llvm.trap() + unreachable } define void @__clc__atomic_ustore_local_4_seq_cst(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { entry: - store atomic volatile i32 %value, i32 addrspace(3)* %ptr seq_cst, align 4 - ret void + tail call void @llvm.trap() + unreachable } define void @__clc__atomic_ustore_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { entry: - store atomic volatile i64 %value, i64 addrspace(1)* %ptr seq_cst, align 8 - ret void + tail call void @llvm.trap() + unreachable } define void @__clc__atomic_ustore_local_8_seq_cst(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { entry: - store atomic volatile i64 %value, i64 addrspace(3)* %ptr seq_cst, align 8 - ret void + tail call void @llvm.trap() + unreachable }