From e8a0f51b7170f60b768d19473f91fdd77e7cf4bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tadej=20Ciglari=C4=8D?= Date: Fri, 18 Mar 2022 12:27:56 +0000 Subject: [PATCH 1/4] add support for generic AS in atomics --- .../libspirv/atomic/atomic_cmpxchg.cl | 96 +++++++++-------- .../libspirv/atomic/atomic_helpers.h | 81 +++++++------- .../libspirv/atomic/atomic_inc_dec_helpers.h | 31 +++--- .../libspirv/atomic/atomic_load.cl | 67 ++++++------ .../libspirv/atomic/atomic_max.cl | 100 +++++++++--------- .../libspirv/atomic/atomic_min.cl | 100 +++++++++--------- .../libspirv/atomic/atomic_store.cl | 73 +++++++------ .../libspirv/atomic/atomic_sub.cl | 17 ++- 8 files changed, 286 insertions(+), 279 deletions(-) diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl index 5d9466bd1e5a4..0e3bd2816d674 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl @@ -18,7 +18,7 @@ int __clc_nvvm_reflect_arch(); case Workgroup: { \ if (__clc_nvvm_reflect_arch() >= 600) { \ TYPE_NV res = \ - __nvvm_atom##ORDER##_cta_##OP##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ + __nvvm_atom##ORDER##_cta_##OP##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ (ADDR_SPACE TYPE_NV *)pointer, *(TYPE_NV *)&value, cmp); \ return *(TYPE *)&res; \ } \ @@ -32,61 +32,63 @@ int __clc_nvvm_reflect_arch(); default: { \ if (__clc_nvvm_reflect_arch() >= 600) { \ TYPE_NV res = \ - __nvvm_atom##ORDER##_sys_##OP##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ + __nvvm_atom##ORDER##_sys_##OP##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ (ADDR_SPACE TYPE_NV *)pointer, *(TYPE_NV *)&value, cmp); \ return *(TYPE *)&res; \ } \ } \ } -#define __CLC_NVVM_ATOMIC_CAS_IMPL( \ - TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, OP_MANGLED, ADDR_SPACE, \ - ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \ - _CLC_DECL TYPE \ - _Z29__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_MANGLED##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics1, \ - enum MemorySemanticsMask semantics2, TYPE cmp, TYPE value) { \ - /* Semantics mask may include memory order, storage class and other info \ -Memory order is stored in the lowest 5 bits */ \ - unsigned int order = (semantics1 | semantics2) & 0x1F; \ - switch (order) { \ - case None: \ - __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, ) \ - case Acquire: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, _acquire) \ - } \ - case Release: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, _release) \ - } \ - case AcquireRelease: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \ - } \ - } \ - __builtin_trap(); \ - __builtin_unreachable(); \ +#define __CLC_NVVM_ATOMIC_CAS_IMPL(FN_MANGLED, TYPE, TYPE_MANGLED, TYPE_NV, \ + TYPE_MANGLED_NV, OP, ADDR_SPACE, \ + ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_DECL TYPE FN_MANGLED( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics1, \ + enum MemorySemanticsMask semantics2, TYPE cmp, TYPE value) { \ + /* Semantics mask may include memory order, storage class and other info \ +Memory order is stored in the lowest 5 bits */ \ + unsigned int order = (semantics1 | semantics2) & 0x1F; \ + switch (order) { \ + case None: \ + __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, ) \ + case Acquire: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, _acquire) \ + } \ + case Release: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, _release) \ + } \ + case AcquireRelease: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \ + } \ + } \ + __builtin_trap(); \ + __builtin_unreachable(); \ } -#define __CLC_NVVM_ATOMIC_CAS(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ - OP, OP_MANGLED) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - OP_MANGLED, __global, AS1, _global_) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ - OP, OP_MANGLED, __local, AS3, _shared_) +#define __CLC_NVVM_ATOMIC_CAS(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ + OP) \ + __CLC_NVVM_ATOMIC_CAS_IMPL( \ + _Z29__spirv_AtomicCompareExchange##P##TYPE_MANGLED##N5__spv5Scope4FlagENS0_19MemorySemanticsMask4FlagES4_##TYPE_MANGLED##TYPE_MANGLED, \ + TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, , _gen_) \ + __CLC_NVVM_ATOMIC_CAS_IMPL( \ + _Z29__spirv_AtomicCompareExchange##PU3AS1##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_MANGLED##TYPE_MANGLED, \ + TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, __global, _global_) \ + __CLC_NVVM_ATOMIC_CAS_IMPL( \ + _Z29__spirv_AtomicCompareExchange##PU3AS3##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_MANGLED##TYPE_MANGLED, \ + TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, __local, _shared_) -__CLC_NVVM_ATOMIC_CAS(int, i, int, i, cas, CompareExchange) -__CLC_NVVM_ATOMIC_CAS(long, l, long, l, cas, CompareExchange) -__CLC_NVVM_ATOMIC_CAS(unsigned int, j, int, i, cas, CompareExchange) -__CLC_NVVM_ATOMIC_CAS(unsigned long, m, long, l, cas, CompareExchange) +__CLC_NVVM_ATOMIC_CAS(int, i, int, i, cas) +__CLC_NVVM_ATOMIC_CAS(long, l, long, l, cas) +__CLC_NVVM_ATOMIC_CAS(unsigned int, j, int, i, cas) +__CLC_NVVM_ATOMIC_CAS(unsigned long, m, long, l, cas) #undef __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER #undef __CLC_NVVM_ATOMIC_CAS diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h index d6a127c9a2609..da6e64ab5cc1d 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h @@ -42,47 +42,48 @@ extern int __clc_nvvm_reflect_arch(); } \ } -#define __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ - OP, NAME_MANGLED, ADDR_SPACE, \ - ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \ - _CLC_DECL TYPE \ - NAME_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE value) { \ - /* Semantics mask may include memory order, storage class and other info \ -Memory order is stored in the lowest 5 bits */ \ - unsigned int order = semantics & 0x1F; \ - switch (order) { \ - case None: \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, ) \ - case Acquire: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, _acquire) \ - } \ - case Release: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, _release) \ - } \ - case AcquireRelease: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \ - } \ - } \ - __builtin_trap(); \ - __builtin_unreachable(); \ +#define __CLC_NVVM_ATOMIC_IMPL(FN_MANGLED, TYPE, TYPE_MANGLED, TYPE_NV, \ + TYPE_MANGLED_NV, OP, ADDR_SPACE, ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_DECL TYPE FN_MANGLED( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE value) { \ + /* Semantics mask may include memory order, storage class and other info \ +Memory order is stored in the lowest 5 bits */ \ + unsigned int order = semantics & 0x1F; \ + switch (order) { \ + case None: \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, ) \ + case Acquire: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, _acquire) \ + } \ + case Release: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, _release) \ + } \ + case AcquireRelease: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \ + } \ + } \ + __builtin_trap(); \ + __builtin_unreachable(); \ } -#define __CLC_NVVM_ATOMIC(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - NAME_MANGLED) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - NAME_MANGLED, __global, AS1, _global_) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - NAME_MANGLED, __local, AS3, _shared_) +#define __CLC_NVVM_ATOMIC(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ + NAME_MANGLED) \ + __CLC_NVVM_ATOMIC_IMPL( \ + NAME_MANGLED##P##TYPE_MANGLED##N5__spv5Scope4FlagENS0_19MemorySemanticsMask4FlagE##TYPE_MANGLED, \ + TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, , _gen_) \ + __CLC_NVVM_ATOMIC_IMPL( \ + NAME_MANGLED##PU3AS1##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED, \ + TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, __global, _global_) \ + __CLC_NVVM_ATOMIC_IMPL( \ + NAME_MANGLED##PU3AS3##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED, \ + TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, __local, _shared_) #endif diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h index 5b480fc22a340..7143d59880cd4 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h @@ -12,25 +12,26 @@ #include #include -#define __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, \ - ADDR_SPACE, ADDR_SPACE_MANGLED) \ - TYPE \ - _Z21__spirv_AtomicIAddEXTPU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *, enum Scope, enum MemorySemanticsMask, \ - TYPE); \ - _CLC_DECL TYPE \ - _Z24__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics) { \ - return _Z21__spirv_AtomicIAddEXTPU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - pointer, scope, semantics, VAL); \ +#define __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, \ + ADDR_SPACE, ADDR_SPACE_MANGLED, \ + SUBSTITUTION) \ + TYPE \ + _Z21__spirv_AtomicIAddEXTP##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile ADDR_SPACE TYPE *, enum Scope, enum MemorySemanticsMask, \ + TYPE); \ + __attribute__((always_inline)) _CLC_DECL TYPE \ + _Z24__spirv_Atomic##OP_MANGLED##P##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics) { \ + return _Z21__spirv_AtomicIAddEXTP##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + pointer, scope, semantics, VAL); \ } #define __CLC_NVVM_ATOMIC_INCDEC(TYPE, TYPE_MANGLED, OP_MANGLED, VAL) \ - __attribute__((always_inline)) \ + __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, , , 0) \ __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, __global, \ - AS1) __attribute__((always_inline)) \ + U3AS1, 1) \ __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, __local, \ - AS3) + U3AS3, 1) #endif diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl index 7917d57f53dd8..1362cec6c1661 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl @@ -33,42 +33,43 @@ extern int __clc_nvvm_reflect_arch(); } \ } -#define __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \ - TYPE_MANGLED_NV, ADDR_SPACE, \ - ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \ - _CLC_DECL TYPE \ - _Z18__spirv_AtomicLoadPU3##ADDR_SPACE_MANGLED##K##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - const volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics) { \ - /* Semantics mask may include memory order, storage class and other info \ -Memory order is stored in the lowest 5 bits */ \ - unsigned int order = semantics & 0x1F; \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - switch (order) { \ - case None: \ - __CLC_NVVM_ATOMIC_LOAD_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ - ADDR_SPACE, ADDR_SPACE_NV, ) \ - case Acquire: \ - __CLC_NVVM_ATOMIC_LOAD_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ - ADDR_SPACE, ADDR_SPACE_NV, _acquire) \ - } \ - } else { \ - if (order == None) { \ - TYPE_NV res = __nvvm_volatile_ld##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ - (ADDR_SPACE TYPE_NV *)pointer); \ - return *(TYPE *)&res; \ - } \ - } \ - __builtin_trap(); \ - __builtin_unreachable(); \ +#define __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \ + TYPE_MANGLED_NV, ADDR_SPACE, \ + ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_DECL TYPE \ + _Z18__spirv_AtomicLoadP##ADDR_SPACE_MANGLED##K##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + const volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics) { \ + /* Semantics mask may include memory order, storage class and other info \ +Memory order is stored in the lowest 5 bits */ \ + unsigned int order = semantics & 0x1F; \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + switch (order) { \ + case None: \ + __CLC_NVVM_ATOMIC_LOAD_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + ADDR_SPACE, ADDR_SPACE_NV, ) \ + case Acquire: \ + __CLC_NVVM_ATOMIC_LOAD_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + ADDR_SPACE, ADDR_SPACE_NV, _acquire) \ + } \ + } else { \ + if (order == None) { \ + TYPE_NV res = __nvvm_volatile_ld##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ + (ADDR_SPACE TYPE_NV *)pointer); \ + return *(TYPE *)&res; \ + } \ + } \ + __builtin_trap(); \ + __builtin_unreachable(); \ } #define __CLC_NVVM_ATOMIC_LOAD(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV) \ - __attribute__((always_inline)) __CLC_NVVM_ATOMIC_LOAD_IMPL( \ - TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, __global, AS1, _global_) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \ - TYPE_MANGLED_NV, __local, AS3, _shared_) + __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, , \ + , _gen_) \ + __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ + __global, U3AS1, _global_) \ + __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ + __local, U3AS3, _shared_) __CLC_NVVM_ATOMIC_LOAD(int, i, int, i) __CLC_NVVM_ATOMIC_LOAD(uint, j, int, i) diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_max.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_max.cl index 0f58a51ab3c20..60fe1e4f47105 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_max.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_max.cl @@ -21,61 +21,61 @@ __CLC_NVVM_ATOMIC(unsigned long, m, unsigned long, ul, max, #undef __CLC_NVVM_ATOMIC #undef __CLC_NVVM_ATOMIC_IMPL -#define __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, \ - TYPE_INT_MANGLED, OP_MANGLED, ADDR_SPACE, \ - ADDR_SPACE_MANGLED) \ - TYPE_INT \ - _Z18__spirv_AtomicLoadPU3##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - volatile ADDR_SPACE const TYPE_INT *, enum Scope, \ - enum MemorySemanticsMask); \ - TYPE_INT \ - _Z29__spirv_AtomicCompareExchange##PU3##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ - volatile ADDR_SPACE TYPE_INT *, enum Scope, enum MemorySemanticsMask, \ - enum MemorySemanticsMask, TYPE_INT, TYPE_INT); \ - _CLC_DECL TYPE \ - _Z21__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - enum MemorySemanticsMask load_order; \ - switch (semantics) { \ - case SequentiallyConsistent: \ - load_order = SequentiallyConsistent; \ - break; \ - case Acquire: \ - case AcquireRelease: \ - load_order = Acquire; \ - break; \ - default: \ - load_order = None; \ - } \ - volatile ADDR_SPACE TYPE_INT *pointer_int = \ - (volatile ADDR_SPACE TYPE_INT *)pointer; \ - TYPE_INT val_int = *(TYPE_INT *)&val; \ - TYPE_INT old_int = \ - _Z18__spirv_AtomicLoadPU3##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - pointer_int, scope, load_order); \ - TYPE old = *(TYPE *)&old_int; \ - while (val > old) { \ - TYPE_INT tmp_int = \ - _Z29__spirv_AtomicCompareExchange##PU3##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ - pointer_int, scope, semantics, semantics, val_int, old_int); \ - if (old_int == tmp_int) { \ - return *(TYPE *)&tmp_int; \ - } \ - old_int = tmp_int; \ - old = *(TYPE *)&old_int; \ - } \ - return old; \ +#define __CLC_NVVM_ATOMIC_MAX_IMPL( \ + TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, OP_MANGLED, ADDR_SPACE, \ + ADDR_SPACE_MANGLED, SUBSTITUTION1, SUBSTITUTION2) \ + TYPE_INT \ + _Z18__spirv_AtomicLoadP##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + volatile ADDR_SPACE const TYPE_INT *, enum Scope, \ + enum MemorySemanticsMask); \ + TYPE_INT \ + _Z29__spirv_AtomicCompareExchange##P##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ + volatile ADDR_SPACE TYPE_INT *, enum Scope, enum MemorySemanticsMask, \ + enum MemorySemanticsMask, TYPE_INT, TYPE_INT); \ + __attribute__((always_inline)) _CLC_DECL TYPE \ + _Z21__spirv_Atomic##OP_MANGLED##P##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + enum MemorySemanticsMask load_order; \ + switch (semantics) { \ + case SequentiallyConsistent: \ + load_order = SequentiallyConsistent; \ + break; \ + case Acquire: \ + case AcquireRelease: \ + load_order = Acquire; \ + break; \ + default: \ + load_order = None; \ + } \ + volatile ADDR_SPACE TYPE_INT *pointer_int = \ + (volatile ADDR_SPACE TYPE_INT *)pointer; \ + TYPE_INT val_int = *(TYPE_INT *)&val; \ + TYPE_INT old_int = \ + _Z18__spirv_AtomicLoadP##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + pointer_int, scope, load_order); \ + TYPE old = *(TYPE *)&old_int; \ + while (val > old) { \ + TYPE_INT tmp_int = \ + _Z29__spirv_AtomicCompareExchange##P##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ + pointer_int, scope, semantics, semantics, val_int, old_int); \ + if (old_int == tmp_int) { \ + return *(TYPE *)&tmp_int; \ + } \ + old_int = tmp_int; \ + old = *(TYPE *)&old_int; \ + } \ + return old; \ } #define __CLC_NVVM_ATOMIC_MAX(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ OP_MANGLED) \ - __attribute__((always_inline)) \ __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED, __global, AS1) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, \ - TYPE_INT_MANGLED, OP_MANGLED, __local, AS3) + OP_MANGLED, , , 0, 4) \ + __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ + OP_MANGLED, __global, U3AS1, 1, 5) \ + __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ + OP_MANGLED, __local, U3AS3, 1, 5) __CLC_NVVM_ATOMIC_MAX(float, f, int, i, FMaxEXT) __CLC_NVVM_ATOMIC_MAX(double, d, long, l, FMaxEXT) \ No newline at end of file diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_min.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_min.cl index e51f691fa2007..a4f68b10b5665 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_min.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_min.cl @@ -19,61 +19,61 @@ __CLC_NVVM_ATOMIC(ulong, m, ulong, ul, min, _Z18__spirv_AtomicUMin) #undef __CLC_NVVM_ATOMIC #undef __CLC_NVVM_ATOMIC_IMPL -#define __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, \ - TYPE_INT_MANGLED, OP_MANGLED, ADDR_SPACE, \ - ADDR_SPACE_MANGLED) \ - TYPE_INT \ - _Z18__spirv_AtomicLoadPU3##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - volatile ADDR_SPACE const TYPE_INT *, enum Scope, \ - enum MemorySemanticsMask); \ - TYPE_INT \ - _Z29__spirv_AtomicCompareExchange##PU3##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ - volatile ADDR_SPACE TYPE_INT *, enum Scope, enum MemorySemanticsMask, \ - enum MemorySemanticsMask, TYPE_INT, TYPE_INT); \ - _CLC_DECL TYPE \ - _Z21__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - enum MemorySemanticsMask load_order; \ - switch (semantics) { \ - case SequentiallyConsistent: \ - load_order = SequentiallyConsistent; \ - break; \ - case Acquire: \ - case AcquireRelease: \ - load_order = Acquire; \ - break; \ - default: \ - load_order = None; \ - } \ - volatile ADDR_SPACE TYPE_INT *pointer_int = \ - (volatile ADDR_SPACE TYPE_INT *)pointer; \ - TYPE_INT val_int = *(TYPE_INT *)&val; \ - TYPE_INT old_int = \ - _Z18__spirv_AtomicLoadPU3##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - pointer_int, scope, load_order); \ - TYPE old = *(TYPE *)&old_int; \ - while (val < old) { \ - TYPE_INT tmp_int = \ - _Z29__spirv_AtomicCompareExchange##PU3##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ - pointer_int, scope, semantics, semantics, val_int, old_int); \ - if (old_int == tmp_int) { \ - return *(TYPE *)&tmp_int; \ - } \ - old_int = tmp_int; \ - old = *(TYPE *)&old_int; \ - } \ - return old; \ +#define __CLC_NVVM_ATOMIC_MIN_IMPL( \ + TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, OP_MANGLED, ADDR_SPACE, \ + ADDR_SPACE_MANGLED, SUBSTITUTION1, SUBSTITUTION2) \ + TYPE_INT \ + _Z18__spirv_AtomicLoadP##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + volatile ADDR_SPACE const TYPE_INT *, enum Scope, \ + enum MemorySemanticsMask); \ + TYPE_INT \ + _Z29__spirv_AtomicCompareExchange##P##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ + volatile ADDR_SPACE TYPE_INT *, enum Scope, enum MemorySemanticsMask, \ + enum MemorySemanticsMask, TYPE_INT, TYPE_INT); \ + __attribute__((always_inline)) _CLC_DECL TYPE \ + _Z21__spirv_Atomic##OP_MANGLED##P##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + enum MemorySemanticsMask load_order; \ + switch (semantics) { \ + case SequentiallyConsistent: \ + load_order = SequentiallyConsistent; \ + break; \ + case Acquire: \ + case AcquireRelease: \ + load_order = Acquire; \ + break; \ + default: \ + load_order = None; \ + } \ + volatile ADDR_SPACE TYPE_INT *pointer_int = \ + (volatile ADDR_SPACE TYPE_INT *)pointer; \ + TYPE_INT val_int = *(TYPE_INT *)&val; \ + TYPE_INT old_int = \ + _Z18__spirv_AtomicLoadP##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + pointer_int, scope, load_order); \ + TYPE old = *(TYPE *)&old_int; \ + while (val < old) { \ + TYPE_INT tmp_int = \ + _Z29__spirv_AtomicCompareExchange##P##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ + pointer_int, scope, semantics, semantics, val_int, old_int); \ + if (old_int == tmp_int) { \ + return *(TYPE *)&tmp_int; \ + } \ + old_int = tmp_int; \ + old = *(TYPE *)&old_int; \ + } \ + return old; \ } #define __CLC_NVVM_ATOMIC_MIN(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ OP_MANGLED) \ - __attribute__((always_inline)) \ __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED, __global, AS1) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, \ - TYPE_INT_MANGLED, OP_MANGLED, __local, AS3) + OP_MANGLED, , , 0, 4) \ + __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ + OP_MANGLED, __global, U3AS1, 1, 5) \ + __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ + OP_MANGLED, __local, U3AS3, 1, 5) __CLC_NVVM_ATOMIC_MIN(float, f, int, i, FMinEXT) __CLC_NVVM_ATOMIC_MIN(double, d, long, l, FMinEXT) \ No newline at end of file diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl index 541723544415f..5bea2d20e515f 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl @@ -33,43 +33,46 @@ extern int __clc_nvvm_reflect_arch(); } \ } -#define __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \ - TYPE_MANGLED_NV, ADDR_SPACE, \ - ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \ - _CLC_DECL void \ - _Z19__spirv_AtomicStorePU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE value) { \ - /* Semantics mask may include memory order, storage class and other info \ -Memory order is stored in the lowest 5 bits */ \ - unsigned int order = semantics & 0x1F; \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - switch (order) { \ - case None: \ - __CLC_NVVM_ATOMIC_STORE_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ - ADDR_SPACE, ADDR_SPACE_NV, ) \ - case Release: \ - __CLC_NVVM_ATOMIC_STORE_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ - ADDR_SPACE, ADDR_SPACE_NV, \ - _release) \ - } \ - } else { \ - if (order == None) { \ - __nvvm_volatile_st##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ - (ADDR_SPACE TYPE_NV *)pointer, *(TYPE_NV *)&value); \ - return; \ - } \ - } \ - __builtin_trap(); \ - __builtin_unreachable(); \ +#define __CLC_NVVM_ATOMIC_STORE_IMPL(FN_MANGLED, TYPE, TYPE_MANGLED, TYPE_NV, \ + TYPE_MANGLED_NV, ADDR_SPACE, \ + ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_DECL void FN_MANGLED( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE value) { \ + /* Semantics mask may include memory order, storage class and other info \ +Memory order is stored in the lowest 5 bits */ \ + unsigned int order = semantics & 0x1F; \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + switch (order) { \ + case None: \ + __CLC_NVVM_ATOMIC_STORE_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + ADDR_SPACE, ADDR_SPACE_NV, ) \ + case Release: \ + __CLC_NVVM_ATOMIC_STORE_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + ADDR_SPACE, ADDR_SPACE_NV, \ + _release) \ + } \ + } else { \ + if (order == None) { \ + __nvvm_volatile_st##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ + (ADDR_SPACE TYPE_NV *)pointer, *(TYPE_NV *)&value); \ + return; \ + } \ + } \ + __builtin_trap(); \ + __builtin_unreachable(); \ } -#define __CLC_NVVM_ATOMIC_STORE(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV) \ - __attribute__((always_inline)) __CLC_NVVM_ATOMIC_STORE_IMPL( \ - TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, __global, AS1, _global_) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \ - TYPE_MANGLED_NV, __local, AS3, _shared_) +#define __CLC_NVVM_ATOMIC_STORE(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV) \ + __CLC_NVVM_ATOMIC_STORE_IMPL( \ + _Z19__spirv_AtomicStoreP##TYPE_MANGLED##N5__spv5Scope4FlagENS0_19MemorySemanticsMask4FlagE##TYPE_MANGLED, \ + TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, , _gen_) \ + __CLC_NVVM_ATOMIC_STORE_IMPL( \ + _Z19__spirv_AtomicStorePU3AS1##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED, \ + TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, __global, _global_) \ + __CLC_NVVM_ATOMIC_STORE_IMPL( \ + _Z19__spirv_AtomicStorePU3AS3##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED, \ + TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, __local, _shared_) __CLC_NVVM_ATOMIC_STORE(int, i, int, i) __CLC_NVVM_ATOMIC_STORE(uint, j, int, i) diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_sub.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_sub.cl index cbadea7ec22f1..56f7ed0f72d91 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_sub.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_sub.cl @@ -10,24 +10,23 @@ #include #define __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, ADDR_SPACE, \ - ADDR_SPACE_MANGLED) \ + ADDR_SPACE_MANGLED, SUBSTITUTION) \ TYPE \ - _Z18__spirv_AtomicIAddPU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + _Z18__spirv_AtomicIAddP##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ volatile ADDR_SPACE TYPE *, enum Scope, enum MemorySemanticsMask, \ TYPE); \ - _CLC_DECL TYPE \ - _Z18__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + __attribute__((always_inline)) _CLC_DECL TYPE \ + _Z18__spirv_Atomic##OP_MANGLED##P##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ enum MemorySemanticsMask semantics, TYPE val) { \ - return _Z18__spirv_AtomicIAddPU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + return _Z18__spirv_AtomicIAddP##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ pointer, scope, semantics, -val); \ } #define __CLC_NVVM_ATOMIC_SUB(TYPE, TYPE_MANGLED, OP_MANGLED) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __global, AS1) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __local, AS3) + __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, , , 0) \ + __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __global, U3AS1, 1) \ + __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __local, U3AS3, 1) __CLC_NVVM_ATOMIC_SUB(int, i, ISub) __CLC_NVVM_ATOMIC_SUB(unsigned int, j, ISub) From a37353ad82b4d94f0544e723136c132762478784 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tadej=20Ciglari=C4=8D?= Date: Mon, 21 Mar 2022 16:17:34 +0000 Subject: [PATCH 2/4] fix a bug in libclc remangler --- libclc/utils/libclc-remangler/LibclcRemangler.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libclc/utils/libclc-remangler/LibclcRemangler.cpp b/libclc/utils/libclc-remangler/LibclcRemangler.cpp index dc172cce2b9c2..73119de8a7ef0 100644 --- a/libclc/utils/libclc-remangler/LibclcRemangler.cpp +++ b/libclc/utils/libclc-remangler/LibclcRemangler.cpp @@ -208,8 +208,7 @@ class Remangler { size_t index = 0; if (findSub(node, &index)) { OB << 'S'; - if (index != 0) - OB << index; + OB << index; OB << '_'; return true; } From 0c113e07b3ce102b320c6bedecbfaa28e952307e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tadej=20Ciglari=C4=8D?= Date: Mon, 21 Mar 2022 16:31:56 +0000 Subject: [PATCH 3/4] format --- .../libspirv/atomic/atomic_inc_dec_helpers.h | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h index 7143d59880cd4..400268eee5e4d 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h @@ -12,19 +12,19 @@ #include #include -#define __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, \ - ADDR_SPACE, ADDR_SPACE_MANGLED, \ - SUBSTITUTION) \ - TYPE \ - _Z21__spirv_AtomicIAddEXTP##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *, enum Scope, enum MemorySemanticsMask, \ - TYPE); \ - __attribute__((always_inline)) _CLC_DECL TYPE \ +#define __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, \ + ADDR_SPACE, ADDR_SPACE_MANGLED, \ + SUBSTITUTION) \ + TYPE \ + _Z21__spirv_AtomicIAddEXTP##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile ADDR_SPACE TYPE *, enum Scope, enum MemorySemanticsMask, \ + TYPE); \ + __attribute__((always_inline)) _CLC_DECL TYPE \ _Z24__spirv_Atomic##OP_MANGLED##P##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics) { \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics) { \ return _Z21__spirv_AtomicIAddEXTP##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - pointer, scope, semantics, VAL); \ + pointer, scope, semantics, VAL); \ } #define __CLC_NVVM_ATOMIC_INCDEC(TYPE, TYPE_MANGLED, OP_MANGLED, VAL) \ From ae9663ff54aa16042dbb24c9e27fceeab4fa5fc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tadej=20Ciglari=C4=8D?= Date: Mon, 9 May 2022 15:40:14 +0100 Subject: [PATCH 4/4] format --- .../libspirv/atomic/atomic_helpers.h | 90 +++++++++---------- 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h index 0ab42d9dfb0ef..a7b6dfba5d881 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h @@ -72,51 +72,51 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); } \ } -#define __CLC_NVVM_ATOMIC_IMPL(FN_MANGLED, TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ - OP, ADDR_SPACE, ADDR_SPACE_NV) \ - __attribute__((always_inline)) _CLC_DECL TYPE FN_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE value) { \ - /* Semantics mask may include memory order, storage class and other info \ -Memory order is stored in the lowest 5 bits */ \ - unsigned int order = semantics & 0x1F; \ - switch (order) { \ - case None: \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, ) \ - break; \ - case Acquire: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, _acquire) \ - } else { \ - __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ - OP, ADDR_SPACE, ADDR_SPACE_NV) \ - } \ - break; \ - case Release: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, _release) \ - } else { \ - __spirv_MemoryBarrier(scope, Release); \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, ) \ - } \ - break; \ - case AcquireRelease: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \ - } else { \ - __spirv_MemoryBarrier(scope, Release); \ - __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ - OP, ADDR_SPACE, ADDR_SPACE_NV) \ - } \ - break; \ - } \ - __builtin_trap(); \ - __builtin_unreachable(); \ +#define __CLC_NVVM_ATOMIC_IMPL(FN_MANGLED, TYPE, TYPE_MANGLED, TYPE_NV, \ + TYPE_MANGLED_NV, OP, ADDR_SPACE, ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_DECL TYPE FN_MANGLED( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE value) { \ + /* Semantics mask may include memory order, storage class and other info \ +Memory order is stored in the lowest 5 bits */ \ + unsigned int order = semantics & 0x1F; \ + switch (order) { \ + case None: \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, ) \ + break; \ + case Acquire: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, _acquire) \ + } else { \ + __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + OP, ADDR_SPACE, ADDR_SPACE_NV) \ + } \ + break; \ + case Release: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, _release) \ + } else { \ + __spirv_MemoryBarrier(scope, Release); \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, ) \ + } \ + break; \ + case AcquireRelease: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \ + } else { \ + __spirv_MemoryBarrier(scope, Release); \ + __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + OP, ADDR_SPACE, ADDR_SPACE_NV) \ + } \ + break; \ + } \ + __builtin_trap(); \ + __builtin_unreachable(); \ } #define __CLC_NVVM_ATOMIC(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \