diff --git a/libclc/generic/include/spirv/atomic/atomic_decl.inc b/libclc/generic/include/spirv/atomic/atomic_decl.inc index 6999df203e253..22a802b66f308 100644 --- a/libclc/generic/include/spirv/atomic/atomic_decl.inc +++ b/libclc/generic/include/spirv/atomic/atomic_decl.inc @@ -28,6 +28,8 @@ __CLC_DECLARE_ATOMIC_ADDRSPACE(ulong, m, __SPIRV_FUNCTION_U, __SPIRV_FUNCTION_U_ #ifdef cl_khr_int64_base_atomics __CLC_DECLARE_ATOMIC_ADDRSPACE(long, l, __SPIRV_FUNCTION_S, __SPIRV_FUNCTION_S_LEN) __CLC_DECLARE_ATOMIC_ADDRSPACE(ulong, m, __SPIRV_FUNCTION_U, __SPIRV_FUNCTION_U_LEN) +__CLC_DECLARE_ATOMIC_ADDRSPACE(long, x, __SPIRV_FUNCTION_S, __SPIRV_FUNCTION_S_LEN) +__CLC_DECLARE_ATOMIC_ADDRSPACE(ulong, y, __SPIRV_FUNCTION_U, __SPIRV_FUNCTION_U_LEN) #endif #endif diff --git a/libclc/generic/include/spirv/atomic/atomic_load.h b/libclc/generic/include/spirv/atomic/atomic_load.h new file mode 100644 index 0000000000000..ff382377a5481 --- /dev/null +++ b/libclc/generic/include/spirv/atomic/atomic_load.h @@ -0,0 +1,29 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. +#define DECL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ +_CLC_DECL TYPE _Z18__spirv_AtomicLoadPU3##AS_MANGLED##K##TYPE_MANGLED##N5__spv5ScopeENS1_19MemorySemanticsMaskE( \ + volatile AS const TYPE *, enum Scope, enum MemorySemanticsMask); + +#define DECL_AS(TYPE, TYPE_MANGLED) \ +DECL(TYPE, TYPE_MANGLED, global, AS1) \ +DECL(TYPE, TYPE_MANGLED, local, AS3) + +DECL_AS(int, i) +DECL_AS(unsigned int, j) + +#ifdef cl_khr_int64_base_atomics +DECL_AS(long, l) +DECL_AS(unsigned long, m) +DECL_AS(long, x) +DECL_AS(unsigned long, y) +#endif + +#undef DECL_AS +#undef DECL diff --git a/libclc/generic/include/spirv/atomic/atomic_store.h b/libclc/generic/include/spirv/atomic/atomic_store.h new file mode 100644 index 0000000000000..f6941622eea80 --- /dev/null +++ b/libclc/generic/include/spirv/atomic/atomic_store.h @@ -0,0 +1,29 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. +#define DECL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ +_CLC_DECL void _Z19__spirv_AtomicStorePU3##AS_MANGLED##TYPE_MANGLED##N5__spv5ScopeENS1_19MemorySemanticsMaskE##TYPE_MANGLED( \ + volatile AS TYPE *, enum Scope, enum MemorySemanticsMask, TYPE); + +#define DECL_AS(TYPE, TYPE_MANGLED) \ +DECL(TYPE, TYPE_MANGLED, global, AS1) \ +DECL(TYPE, TYPE_MANGLED, local, AS3) + +DECL_AS(int, i) +DECL_AS(unsigned int, j) + +#ifdef cl_khr_int64_base_atomics +DECL_AS(long, l) +DECL_AS(unsigned long, m) +DECL_AS(long, x) +DECL_AS(unsigned long, y) +#endif + +#undef DECL_AS +#undef DECL diff --git a/libclc/generic/include/spirv/spirv.h b/libclc/generic/include/spirv/spirv.h index e9b9ed2c6ca71..e0e797524cee0 100644 --- a/libclc/generic/include/spirv/spirv.h +++ b/libclc/generic/include/spirv/spirv.h @@ -235,6 +235,8 @@ #include #include #include +#include +#include /* cl_khr extension atomics are omitted from __spirv */ diff --git a/libclc/generic/libspirv/SOURCES b/libclc/generic/libspirv/SOURCES index 07ea0be5ef01c..422bbf403d60d 100644 --- a/libclc/generic/libspirv/SOURCES +++ b/libclc/generic/libspirv/SOURCES @@ -12,6 +12,8 @@ atomic/atomic_or.cl atomic/atomic_sub.cl atomic/atomic_xchg.cl atomic/atomic_xor.cl +atomic/atomic_load.cl +atomic/atomic_store.cl common/degrees.cl common/mix.cl common/radians.cl diff --git a/libclc/generic/libspirv/atomic/atomic_add.cl b/libclc/generic/libspirv/atomic/atomic_add.cl index 5ce89a6232f12..1d34605ed87ef 100644 --- a/libclc/generic/libspirv/atomic/atomic_add.cl +++ b/libclc/generic/libspirv/atomic/atomic_add.cl @@ -26,5 +26,9 @@ IMPL(long, l, global, AS1, __sync_fetch_and_add_8) IMPL(unsigned long, m, global, AS1, __sync_fetch_and_add_8) IMPL(long, l, local, AS3, __sync_fetch_and_add_8) IMPL(unsigned long, m, local, AS3, __sync_fetch_and_add_8) +IMPL(long, x, global, AS1, __sync_fetch_and_add_8) +IMPL(unsigned long, y, global, AS1, __sync_fetch_and_add_8) +IMPL(long, x, local, AS3, __sync_fetch_and_add_8) +IMPL(unsigned long, y, local, AS3, __sync_fetch_and_add_8) #endif #undef IMPL diff --git a/libclc/generic/libspirv/atomic/atomic_and.cl b/libclc/generic/libspirv/atomic/atomic_and.cl index 6310a2466b081..83d26edb96088 100644 --- a/libclc/generic/libspirv/atomic/atomic_and.cl +++ b/libclc/generic/libspirv/atomic/atomic_and.cl @@ -26,5 +26,9 @@ IMPL(long, l, global, AS1, __sync_fetch_and_and_8) IMPL(unsigned long, m, global, AS1, __sync_fetch_and_and_8) IMPL(long, l, local, AS3, __sync_fetch_and_and_8) IMPL(unsigned long, m, local, AS3, __sync_fetch_and_and_8) +IMPL(long, x, global, AS1, __sync_fetch_and_and_8) +IMPL(unsigned long, y, global, AS1, __sync_fetch_and_and_8) +IMPL(long, x, local, AS3, __sync_fetch_and_and_8) +IMPL(unsigned long, y, local, AS3, __sync_fetch_and_and_8) #endif #undef IMPL diff --git a/libclc/generic/libspirv/atomic/atomic_cmpxchg.cl b/libclc/generic/libspirv/atomic/atomic_cmpxchg.cl index 161ee89723706..d21e2eb058270 100644 --- a/libclc/generic/libspirv/atomic/atomic_cmpxchg.cl +++ b/libclc/generic/libspirv/atomic/atomic_cmpxchg.cl @@ -50,4 +50,24 @@ _CLC_DEF ulong _Z29__spirv_AtomicCompareExchangePU3AS1mN5__spv5ScopeENS1_19Memor volatile global ulong *p, enum Scope scope, enum MemorySemanticsMask eq, enum MemorySemanticsMask neq, ulong val, ulong cmp) { return __sync_val_compare_and_swap_8(p, cmp, val); } + +_CLC_DEF long _Z29__spirv_AtomicCompareExchangePU3AS3xN5__spv5ScopeENS1_19MemorySemanticsMaskES3_xx( + volatile local long *p, enum Scope scope, enum MemorySemanticsMask eq, enum MemorySemanticsMask neq, long val, long cmp) { + return __sync_val_compare_and_swap_8(p, cmp, val); +} + +_CLC_DEF long _Z29__spirv_AtomicCompareExchangePU3AS1xN5__spv5ScopeENS1_19MemorySemanticsMaskES3_xx( + volatile global long *p, enum Scope scope, enum MemorySemanticsMask eq, enum MemorySemanticsMask neq, long val, long cmp) { + return __sync_val_compare_and_swap_8(p, cmp, val); +} + +_CLC_DEF ulong _Z29__spirv_AtomicCompareExchangePU3AS3yN5__spv5ScopeENS1_19MemorySemanticsMaskES3_yy( + volatile local ulong *p, enum Scope scope, enum MemorySemanticsMask eq, enum MemorySemanticsMask neq, ulong val, ulong cmp) { + return __sync_val_compare_and_swap_8(p, cmp, val); +} + +_CLC_DEF ulong _Z29__spirv_AtomicCompareExchangePU3AS1yN5__spv5ScopeENS1_19MemorySemanticsMaskES3_yy( + volatile global ulong *p, enum Scope scope, enum MemorySemanticsMask eq, enum MemorySemanticsMask neq, ulong val, ulong cmp) { + return __sync_val_compare_and_swap_8(p, cmp, val); +} #endif diff --git a/libclc/generic/libspirv/atomic/atomic_load.cl b/libclc/generic/libspirv/atomic/atomic_load.cl new file mode 100644 index 0000000000000..5367e8aa1a943 --- /dev/null +++ b/libclc/generic/libspirv/atomic/atomic_load.cl @@ -0,0 +1,47 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. + +#define FDECL(TYPE, PREFIX, AS, BYTE_SIZE, MEM_ORDER) \ +TYPE __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_##MEM_ORDER(volatile AS const TYPE *); + +#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, PREFIX, BYTE_SIZE) \ +FDECL(TYPE, PREFIX, AS, BYTE_SIZE, unordered) \ +FDECL(TYPE, PREFIX, AS, BYTE_SIZE, acquire) \ +FDECL(TYPE, PREFIX, AS, BYTE_SIZE, seq_cst) \ +_CLC_DEF TYPE _Z18__spirv_AtomicLoadPU3##AS_MANGLED##K##TYPE_MANGLED##N5__spv5ScopeENS1_19MemorySemanticsMaskE( \ + volatile AS const TYPE *p, enum Scope scope, enum MemorySemanticsMask semantics) { \ + if (semantics == Acquire) { \ + return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_acquire(p); \ + } \ + if (semantics == SequentiallyConsistent) { \ + return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_seq_cst(p); \ + } \ + return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_unordered(p); \ +} + +#define IMPL_AS(TYPE, TYPE_MANGLED, PREFIX, BYTE_SIZE) \ +IMPL(TYPE, TYPE_MANGLED, global, AS1, PREFIX, BYTE_SIZE) \ +IMPL(TYPE, TYPE_MANGLED, local, AS3, PREFIX, BYTE_SIZE) + +IMPL_AS(int, i, , 4) +IMPL_AS(unsigned int, j, u, 4) + +#ifdef cl_khr_int64_base_atomics +IMPL_AS(long, l, , 8) +IMPL_AS(unsigned long, m, u, 8) +IMPL_AS(long, x, , 8) +IMPL_AS(unsigned long, y, u, 8) +#endif + +#undef FDECL +#undef IMPL_AS +#undef IMPL diff --git a/libclc/generic/libspirv/atomic/atomic_max.cl b/libclc/generic/libspirv/atomic/atomic_max.cl index b19faea36850b..b3db4c881c6f5 100644 --- a/libclc/generic/libspirv/atomic/atomic_max.cl +++ b/libclc/generic/libspirv/atomic/atomic_max.cl @@ -31,5 +31,9 @@ IMPL(long, l, global, AS1, __spirv_AtomicSMax, __clc, max_global_8) IMPL(unsigned long, m, global, AS1, __spirv_AtomicUMax, __clc, umax_global_8) IMPL(long, l, local, AS3, __spirv_AtomicSMax, __clc, max_local_8) IMPL(unsigned long, m, local, AS3, __spirv_AtomicUMax, __clc, umax_local_8) +IMPL(long, x, global, AS1, __spirv_AtomicSMax, __clc, max_global_8) +IMPL(unsigned long, y, global, AS1, __spirv_AtomicUMax, __clc, umax_global_8) +IMPL(long, x, local, AS3, __spirv_AtomicSMax, __clc, max_local_8) +IMPL(unsigned long, y, local, AS3, __spirv_AtomicUMax, __clc, umax_local_8) #endif #undef IMPL diff --git a/libclc/generic/libspirv/atomic/atomic_min.cl b/libclc/generic/libspirv/atomic/atomic_min.cl index 4e354ce1d5245..1539d2d72f9a7 100644 --- a/libclc/generic/libspirv/atomic/atomic_min.cl +++ b/libclc/generic/libspirv/atomic/atomic_min.cl @@ -31,5 +31,9 @@ IMPL(long, l, global, AS1, __spirv_AtomicSMin, __clc, min_global_8) IMPL(unsigned long, m, global, AS1, __spirv_AtomicUMin, __clc, umin_global_8) IMPL(long, l, local, AS3, __spirv_AtomicSMin, __clc, min_local_8) IMPL(unsigned long, m, local, AS3, __spirv_AtomicUMin, __clc, umin_local_8) +IMPL(long, x, global, AS1, __spirv_AtomicSMin, __clc, min_global_8) +IMPL(unsigned long, y, global, AS1, __spirv_AtomicUMin, __clc, umin_global_8) +IMPL(long, x, local, AS3, __spirv_AtomicSMin, __clc, min_local_8) +IMPL(unsigned long, y, local, AS3, __spirv_AtomicUMin, __clc, umin_local_8) #endif #undef IMPL diff --git a/libclc/generic/libspirv/atomic/atomic_or.cl b/libclc/generic/libspirv/atomic/atomic_or.cl index f4a933d6ff0b9..63b17b2f215bd 100644 --- a/libclc/generic/libspirv/atomic/atomic_or.cl +++ b/libclc/generic/libspirv/atomic/atomic_or.cl @@ -26,5 +26,9 @@ IMPL(long, l, global, AS1, __sync_fetch_and_or_8) IMPL(unsigned long, m, global, AS1, __sync_fetch_and_or_8) IMPL(long, l, local, AS3, __sync_fetch_and_or_8) IMPL(unsigned long, m, local, AS3, __sync_fetch_and_or_8) +IMPL(long, x, global, AS1, __sync_fetch_and_or_8) +IMPL(unsigned long, y, global, AS1, __sync_fetch_and_or_8) +IMPL(long, x, local, AS3, __sync_fetch_and_or_8) +IMPL(unsigned long, y, local, AS3, __sync_fetch_and_or_8) #endif #undef IMPL diff --git a/libclc/generic/libspirv/atomic/atomic_store.cl b/libclc/generic/libspirv/atomic/atomic_store.cl new file mode 100644 index 0000000000000..d5e25c54e6b87 --- /dev/null +++ b/libclc/generic/libspirv/atomic/atomic_store.cl @@ -0,0 +1,59 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. + +_CLC_DEF void _Z19__spirv_AtomicStorePU3AS1fN5__spv5ScopeENS1_19MemorySemanticsMaskEf( + volatile global float *p, enum Scope scope, enum MemorySemanticsMask semantics, float val) { + _Z19__spirv_AtomicStorePU3AS1jN5__spv5ScopeENS1_19MemorySemanticsMaskEj( + (volatile global uint *)p, scope, semantics, as_uint(val)); +} + +_CLC_DEF void _Z19__spirv_AtomicStorePU3AS3fN5__spv5ScopeENS1_19MemorySemanticsMaskEf( + volatile local float *p, enum Scope scope, enum MemorySemanticsMask semantics, float val) { + _Z19__spirv_AtomicStorePU3AS3jN5__spv5ScopeENS1_19MemorySemanticsMaskEj( + (volatile local uint *)p, scope, semantics, as_uint(val)); +} + +#define FDECL(TYPE, PREFIX, AS, BYTE_SIZE, MEM_ORDER) \ +TYPE __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_##MEM_ORDER(volatile AS const TYPE *, TYPE); + +#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, PREFIX, BYTE_SIZE) \ +FDECL(TYPE, PREFIX, AS, BYTE_SIZE, unordered) \ +FDECL(TYPE, PREFIX, AS, BYTE_SIZE, release) \ +FDECL(TYPE, PREFIX, AS, BYTE_SIZE, seq_cst) \ +_CLC_DEF void _Z19__spirv_AtomicStorePU3##AS_MANGLED##TYPE_MANGLED##N5__spv5ScopeENS1_19MemorySemanticsMaskE##TYPE_MANGLED( \ + volatile AS TYPE *p, enum Scope scope, enum MemorySemanticsMask semantics, TYPE val) { \ + if (semantics == Release) { \ + __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_release(p, val); \ + } else if (semantics == SequentiallyConsistent) { \ + __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_seq_cst(p, val); \ + } else { \ + __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_unordered(p, val); \ + } \ +} + +#define IMPL_AS(TYPE, TYPE_MANGLED, PREFIX, BYTE_SIZE) \ +IMPL(TYPE, TYPE_MANGLED, global, AS1, PREFIX, BYTE_SIZE) \ +IMPL(TYPE, TYPE_MANGLED, local, AS3, PREFIX, BYTE_SIZE) + +IMPL_AS(int, i, , 4) +IMPL_AS(unsigned int, j, u, 4) + +#ifdef cl_khr_int64_base_atomics +IMPL_AS(long, l, , 8) +IMPL_AS(unsigned long, m, u, 8) +IMPL_AS(long, x, , 8) +IMPL_AS(unsigned long, y, u, 8) +#endif + +#undef FDECL +#undef IMPL_AS +#undef IMPL diff --git a/libclc/generic/libspirv/atomic/atomic_sub.cl b/libclc/generic/libspirv/atomic/atomic_sub.cl index 039cc03d0edc7..f3851b2d04016 100644 --- a/libclc/generic/libspirv/atomic/atomic_sub.cl +++ b/libclc/generic/libspirv/atomic/atomic_sub.cl @@ -26,5 +26,9 @@ IMPL(long, l, global, AS1, __sync_fetch_and_sub_8) IMPL(unsigned long, m, global, AS1, __sync_fetch_and_sub_8) IMPL(long, l, local, AS3, __sync_fetch_and_sub_8) IMPL(unsigned long, m, local, AS3, __sync_fetch_and_sub_8) +IMPL(long, x, global, AS1, __sync_fetch_and_sub_8) +IMPL(unsigned long, y, global, AS1, __sync_fetch_and_sub_8) +IMPL(long, x, local, AS3, __sync_fetch_and_sub_8) +IMPL(unsigned long, y, local, AS3, __sync_fetch_and_sub_8) #endif #undef IMPL diff --git a/libclc/generic/libspirv/atomic/atomic_xchg.cl b/libclc/generic/libspirv/atomic/atomic_xchg.cl index d3cc220bf34c9..00db7fdb4a14c 100644 --- a/libclc/generic/libspirv/atomic/atomic_xchg.cl +++ b/libclc/generic/libspirv/atomic/atomic_xchg.cl @@ -38,5 +38,9 @@ IMPL(long, l, global, AS1, __sync_swap_8) IMPL(unsigned long, m, global, AS1, __sync_swap_8) IMPL(long, l, local, AS3, __sync_swap_8) IMPL(unsigned long, m, local, AS3, __sync_swap_8) +IMPL(long, x, global, AS1, __sync_swap_8) +IMPL(unsigned long, y, global, AS1, __sync_swap_8) +IMPL(long, x, local, AS3, __sync_swap_8) +IMPL(unsigned long, y, local, AS3, __sync_swap_8) #endif #undef IMPL diff --git a/libclc/generic/libspirv/atomic/atomic_xor.cl b/libclc/generic/libspirv/atomic/atomic_xor.cl index 3d9dd66b9f7af..711bded7ac6cf 100644 --- a/libclc/generic/libspirv/atomic/atomic_xor.cl +++ b/libclc/generic/libspirv/atomic/atomic_xor.cl @@ -26,5 +26,9 @@ IMPL(long, l, global, AS1, __sync_fetch_and_xor_8) IMPL(unsigned long, m, global, AS1, __sync_fetch_and_xor_8) IMPL(long, l, local, AS3, __sync_fetch_and_xor_8) IMPL(unsigned long, m, local, AS3, __sync_fetch_and_xor_8) +IMPL(long, x, global, AS1, __sync_fetch_and_xor_8) +IMPL(unsigned long, y, global, AS1, __sync_fetch_and_xor_8) +IMPL(long, x, local, AS3, __sync_fetch_and_xor_8) +IMPL(unsigned long, y, local, AS3, __sync_fetch_and_xor_8) #endif #undef IMPL diff --git a/libclc/ptx-nvidiacl/libspirv/SOURCES b/libclc/ptx-nvidiacl/libspirv/SOURCES index 4dccc735830b1..63777c2fd3917 100644 --- a/libclc/ptx-nvidiacl/libspirv/SOURCES +++ b/libclc/ptx-nvidiacl/libspirv/SOURCES @@ -1,3 +1,5 @@ +atomic/loadstore_helpers.ll +cl_khr_int64_extended_atomics/minmax_helpers.ll synchronization/barrier.cl workitem/get_global_id.cl workitem/get_global_offset.cl diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/loadstore_helpers.ll b/libclc/ptx-nvidiacl/libspirv/atomic/loadstore_helpers.ll new file mode 100644 index 0000000000000..c96ba0e9d9e8a --- /dev/null +++ b/libclc/ptx-nvidiacl/libspirv/atomic/loadstore_helpers.ll @@ -0,0 +1,294 @@ +#if __clang_major__ >= 7 +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +#else +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" +#endif + +define i32 @__clc__atomic_load_global_4_unordered(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i32, i32 addrspace(1)* %ptr unordered, align 4 + ret i32 %0 +} + +define i32 @__clc__atomic_load_local_4_unordered(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i32, i32 addrspace(3)* %ptr unordered, align 4 + ret i32 %0 +} + +define i64 @__clc__atomic_load_global_8_unordered(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i64, i64 addrspace(1)* %ptr unordered, align 8 + ret i64 %0 +} + +define i64 @__clc__atomic_load_local_8_unordered(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i64, i64 addrspace(3)* %ptr unordered, align 8 + ret i64 %0 +} + +define i32 @__clc__atomic_uload_global_4_unordered(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i32, i32 addrspace(1)* %ptr unordered, align 4 + ret i32 %0 +} + +define i32 @__clc__atomic_uload_local_4_unordered(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i32, i32 addrspace(3)* %ptr unordered, align 4 + ret i32 %0 +} + +define i64 @__clc__atomic_uload_global_8_unordered(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i64, i64 addrspace(1)* %ptr unordered, align 8 + ret i64 %0 +} + +define i64 @__clc__atomic_uload_local_8_unordered(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i64, i64 addrspace(3)* %ptr unordered, align 8 + ret i64 %0 +} + +define i32 @__clc__atomic_load_global_4_acquire(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i32, i32 addrspace(1)* %ptr acquire, align 4 + ret i32 %0 +} + +define i32 @__clc__atomic_load_local_4_acquire(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i32, i32 addrspace(3)* %ptr acquire, align 4 + ret i32 %0 +} + +define i64 @__clc__atomic_load_global_8_acquire(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i64, i64 addrspace(1)* %ptr acquire, align 8 + ret i64 %0 +} + +define i64 @__clc__atomic_load_local_8_acquire(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i64, i64 addrspace(3)* %ptr acquire, align 8 + ret i64 %0 +} + +define i32 @__clc__atomic_uload_global_4_acquire(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i32, i32 addrspace(1)* %ptr acquire, align 4 + ret i32 %0 +} + +define i32 @__clc__atomic_uload_local_4_acquire(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i32, i32 addrspace(3)* %ptr acquire, align 4 + ret i32 %0 +} + +define i64 @__clc__atomic_uload_global_8_acquire(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i64, i64 addrspace(1)* %ptr acquire, align 8 + ret i64 %0 +} + +define i64 @__clc__atomic_uload_local_8_acquire(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i64, i64 addrspace(3)* %ptr acquire, align 8 + ret i64 %0 +} + + +define i32 @__clc__atomic_load_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i32, i32 addrspace(1)* %ptr seq_cst, align 4 + ret i32 %0 +} + +define i32 @__clc__atomic_load_local_4_seq_cst(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i32, i32 addrspace(3)* %ptr seq_cst, align 4 + ret i32 %0 +} + +define i64 @__clc__atomic_load_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i64, i64 addrspace(1)* %ptr seq_cst, align 8 + ret i64 %0 +} + +define i64 @__clc__atomic_load_local_8_seq_cst(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i64, i64 addrspace(3)* %ptr seq_cst, align 8 + ret i64 %0 +} + +define i32 @__clc__atomic_uload_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i32, i32 addrspace(1)* %ptr seq_cst, align 4 + ret i32 %0 +} + +define i32 @__clc__atomic_uload_local_4_seq_cst(i32 addrspace(3)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i32, i32 addrspace(3)* %ptr seq_cst, align 4 + ret i32 %0 +} + +define i64 @__clc__atomic_uload_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i64, i64 addrspace(1)* %ptr seq_cst, align 8 + ret i64 %0 +} + +define i64 @__clc__atomic_uload_local_8_seq_cst(i64 addrspace(3)* nocapture %ptr) nounwind alwaysinline { +entry: + %0 = load atomic volatile i64, i64 addrspace(3)* %ptr seq_cst, align 8 + ret i64 %0 +} + +define void @__clc__atomic_store_global_4_unordered(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { +entry: + store atomic volatile i32 %value, i32 addrspace(1)* %ptr unordered, align 4 + ret void +} + +define void @__clc__atomic_store_local_4_unordered(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { +entry: + store atomic volatile i32 %value, i32 addrspace(3)* %ptr unordered, align 4 + ret void +} + +define void @__clc__atomic_store_global_8_unordered(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + store atomic volatile i64 %value, i64 addrspace(1)* %ptr unordered, align 8 + ret void +} + +define void @__clc__atomic_store_local_8_unordered(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + store atomic volatile i64 %value, i64 addrspace(3)* %ptr unordered, align 8 + ret void +} + +define void @__clc__atomic_ustore_global_4_unordered(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { +entry: + store atomic volatile i32 %value, i32 addrspace(1)* %ptr unordered, align 4 + ret void +} + +define void @__clc__atomic_ustore_local_4_unordered(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { +entry: + store atomic volatile i32 %value, i32 addrspace(3)* %ptr unordered, align 4 + ret void +} + +define void @__clc__atomic_ustore_global_8_unordered(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + store atomic volatile i64 %value, i64 addrspace(1)* %ptr unordered, align 8 + ret void +} + +define void @__clc__atomic_ustore_local_8_unordered(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + store atomic volatile i64 %value, i64 addrspace(3)* %ptr unordered, align 8 + ret void +} + +define void @__clc__atomic_store_global_4_release(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { +entry: + store atomic volatile i32 %value, i32 addrspace(1)* %ptr release, align 4 + ret void +} + +define void @__clc__atomic_store_local_4_release(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { +entry: + store atomic volatile i32 %value, i32 addrspace(3)* %ptr release, align 4 + ret void +} + +define void @__clc__atomic_store_global_8_release(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + store atomic volatile i64 %value, i64 addrspace(1)* %ptr release, align 8 + ret void +} + +define void @__clc__atomic_store_local_8_release(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + store atomic volatile i64 %value, i64 addrspace(3)* %ptr release, align 8 + ret void +} + +define void @__clc__atomic_ustore_global_4_release(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { +entry: + store atomic volatile i32 %value, i32 addrspace(1)* %ptr release, align 4 + ret void +} + +define void @__clc__atomic_ustore_local_4_release(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { +entry: + store atomic volatile i32 %value, i32 addrspace(3)* %ptr release, align 4 + ret void +} + +define void @__clc__atomic_ustore_global_8_release(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + store atomic volatile i64 %value, i64 addrspace(1)* %ptr release, align 8 + ret void +} + +define void @__clc__atomic_ustore_local_8_release(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + store atomic volatile i64 %value, i64 addrspace(3)* %ptr release, align 8 + ret void +} + +define void @__clc__atomic_store_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { +entry: + store atomic volatile i32 %value, i32 addrspace(1)* %ptr seq_cst, align 4 + ret void +} + +define void @__clc__atomic_store_local_4_seq_cst(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { +entry: + store atomic volatile i32 %value, i32 addrspace(3)* %ptr seq_cst, align 4 + ret void +} + +define void @__clc__atomic_store_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + store atomic volatile i64 %value, i64 addrspace(1)* %ptr seq_cst, align 8 + ret void +} + +define void @__clc__atomic_store_local_8_seq_cst(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + store atomic volatile i64 %value, i64 addrspace(3)* %ptr seq_cst, align 8 + ret void +} + +define void @__clc__atomic_ustore_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { +entry: + store atomic volatile i32 %value, i32 addrspace(1)* %ptr seq_cst, align 4 + ret void +} + +define void @__clc__atomic_ustore_local_4_seq_cst(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { +entry: + store atomic volatile i32 %value, i32 addrspace(3)* %ptr seq_cst, align 4 + ret void +} + +define void @__clc__atomic_ustore_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + store atomic volatile i64 %value, i64 addrspace(1)* %ptr seq_cst, align 8 + ret void +} + +define void @__clc__atomic_ustore_local_8_seq_cst(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + store atomic volatile i64 %value, i64 addrspace(3)* %ptr seq_cst, align 8 + ret void +} diff --git a/libclc/ptx-nvidiacl/libspirv/cl_khr_int64_extended_atomics/minmax_helpers.ll b/libclc/ptx-nvidiacl/libspirv/cl_khr_int64_extended_atomics/minmax_helpers.ll new file mode 100644 index 0000000000000..98f1f54718a1f --- /dev/null +++ b/libclc/ptx-nvidiacl/libspirv/cl_khr_int64_extended_atomics/minmax_helpers.ll @@ -0,0 +1,53 @@ +#if __clang_major__ >= 7 +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +#else +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" +#endif + +define i64 @__clc__sync_fetch_and_min_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_umin_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_min_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile min i64 addrspace(3)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_umin_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile umin i64 addrspace(3)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_max_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_umax_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_max_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile max i64 addrspace(3)* %ptr, i64 %value seq_cst + ret i64 %0 +} + +define i64 @__clc__sync_fetch_and_umax_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline { +entry: + %0 = atomicrmw volatile umax i64 addrspace(3)* %ptr, i64 %value seq_cst + ret i64 %0 +}