From e1eeb3d66026d6deb42673d5e1c07d4b6b8790db Mon Sep 17 00:00:00 2001 From: Jakub Chlanda Date: Mon, 31 Oct 2022 10:10:45 -0700 Subject: [PATCH 1/3] [LIBCLC] Add support for more generic atomic operations --- .../libspirv/atomic/atomic_cmpxchg.cl | 111 ++++++++++-------- .../libspirv/atomic/atomic_inc_dec_helpers.h | 34 +++--- .../libspirv/atomic/atomic_load.cl | 78 ++++++------ .../libspirv/atomic/atomic_max.cl | 109 +++++++++-------- .../libspirv/atomic/atomic_min.cl | 109 +++++++++-------- .../libspirv/atomic/atomic_store.cl | 84 ++++++------- .../libspirv/atomic/atomic_sub.cl | 37 +++--- 7 files changed, 298 insertions(+), 264 deletions(-) diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl index 4cc0443e28438..5797ad33a993b 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl @@ -69,63 +69,70 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); } \ } -#define __CLC_NVVM_ATOMIC_CAS_IMPL( \ - TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, OP_MANGLED, ADDR_SPACE, \ - ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \ - _CLC_DECL TYPE \ - _Z29__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_MANGLED##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics1, \ - enum MemorySemanticsMask semantics2, TYPE cmp, TYPE value) { \ - /* Semantics mask may include memory order, storage class and other info \ -Memory order is stored in the lowest 5 bits */ \ - unsigned int order = (semantics1 | semantics2) & 0x1F; \ - switch (order) { \ - case None: \ - __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, ) \ - case Acquire: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, _acquire) \ - } else { \ - __CLC_NVVM_ATOMIC_CAS_IMPL_ACQUIRE_FENCE( \ - TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, ADDR_SPACE, ADDR_SPACE_NV) \ - } \ - break; \ - case Release: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, _release) \ - } else { \ - __spirv_MemoryBarrier(scope, Release); \ - __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, ) \ - } \ - break; \ - case AcquireRelease: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \ - } else { \ - __spirv_MemoryBarrier(scope, Release); \ - __CLC_NVVM_ATOMIC_CAS_IMPL_ACQUIRE_FENCE( \ - TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, ADDR_SPACE, ADDR_SPACE_NV) \ - } \ - break; \ - } \ - __builtin_trap(); \ - __builtin_unreachable(); \ +// Type __spirv_AtomicCompareExchange(AS Type *P, __spv::Scope::Flag S, +// __spv::MemorySemanticsMask::Flag E, +// __spv::MemorySemanticsMask::Flag U, +// Type V, Type C); +#define __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \ + TYPE_MANGLED_NV, OP, OP_MANGLED, \ + ADDR_SPACE, POINTER_AND_ADDR_SPACE_MANGLED, \ + ADDR_SPACE_NV, SUBSTITUTION, SUBSTITUTION2) \ + __attribute__((always_inline)) _CLC_DECL TYPE _Z29__spirv_\ +Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5\ +__spv5Scope4FlagENS##SUBSTITUTION##_19Memory\ +SemanticsMask4FlagES##SUBSTITUTION2##_##TYPE_MANGLED##TYPE_MANGLED( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics1, \ + enum MemorySemanticsMask semantics2, TYPE cmp, TYPE value) { \ + /* Semantics mask may include memory order, storage class and other info \ +Memory order is stored in the lowest 5 bits */ \ + unsigned int order = (semantics1 | semantics2) & 0x1F; \ + switch (order) { \ + case None: \ + __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, ) \ + case Acquire: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, _acquire) \ + } else { \ + __CLC_NVVM_ATOMIC_CAS_IMPL_ACQUIRE_FENCE( \ + TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, ADDR_SPACE, ADDR_SPACE_NV) \ + } \ + break; \ + case Release: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, _release) \ + } else { \ + __spirv_MemoryBarrier(scope, Release); \ + __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, ) \ + } \ + break; \ + case AcquireRelease: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \ + } else { \ + __spirv_MemoryBarrier(scope, Release); \ + __CLC_NVVM_ATOMIC_CAS_IMPL_ACQUIRE_FENCE( \ + TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, ADDR_SPACE, ADDR_SPACE_NV) \ + } \ + break; \ + } \ + __builtin_trap(); \ + __builtin_unreachable(); \ } #define __CLC_NVVM_ATOMIC_CAS(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ OP, OP_MANGLED) \ - __attribute__((always_inline)) \ __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - OP_MANGLED, __global, AS1, _global_) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ - OP, OP_MANGLED, __local, AS3, _shared_) + OP_MANGLED, __global, PU3AS1, _global_, 1, 5) \ + __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ + OP_MANGLED, __local, PU3AS3, _shared_, 1, 5) \ + __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ + OP_MANGLED, , P, _gen_, 0, 4) __CLC_NVVM_ATOMIC_CAS(int, i, int, i, cas, CompareExchange) __CLC_NVVM_ATOMIC_CAS(long, l, long, l, cas, CompareExchange) diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h index 5b480fc22a340..fee6e177403bf 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h @@ -12,25 +12,29 @@ #include #include -#define __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, \ - ADDR_SPACE, ADDR_SPACE_MANGLED) \ - TYPE \ - _Z21__spirv_AtomicIAddEXTPU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *, enum Scope, enum MemorySemanticsMask, \ - TYPE); \ - _CLC_DECL TYPE \ - _Z24__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics) { \ - return _Z21__spirv_AtomicIAddEXTPU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - pointer, scope, semantics, VAL); \ +#define __CLC_NVVM_ATOMIC_INCDEC_IMPL( \ + TYPE, TYPE_MANGLED, OP_MANGLED, VAL, ADDR_SPACE, \ + POINTER_AND_ADDR_SPACE_MANGLED, SUBSTITUTION) \ + TYPE _Z21__spirv_\ +AtomicIAddEXT##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv\ +5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile ADDR_SPACE TYPE *, enum Scope, enum MemorySemanticsMask, TYPE); \ + __attribute__((always_inline)) _CLC_DECL TYPE _Z24__spirv_\ +Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv\ +5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics) { \ + return _Z21__spirv_\ +AtomicIAddEXT##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv\ +5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + pointer, scope, semantics, VAL); \ } #define __CLC_NVVM_ATOMIC_INCDEC(TYPE, TYPE_MANGLED, OP_MANGLED, VAL) \ - __attribute__((always_inline)) \ __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, __global, \ - AS1) __attribute__((always_inline)) \ + PU3AS1, 1) \ __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, __local, \ - AS3) + PU3AS3, 1) \ + __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, , P, 0) #endif diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl index 8f59efc072887..54483b8c5ec25 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl @@ -34,47 +34,49 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); } \ } -#define __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \ - TYPE_MANGLED_NV, ADDR_SPACE, \ - ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \ - _CLC_DECL TYPE \ - _Z18__spirv_AtomicLoadPU3##ADDR_SPACE_MANGLED##K##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - const volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics) { \ - /* Semantics mask may include memory order, storage class and other info \ -Memory order is stored in the lowest 5 bits */ \ - unsigned int order = semantics & 0x1F; \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - switch (order) { \ - case None: \ - __CLC_NVVM_ATOMIC_LOAD_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ - ADDR_SPACE, ADDR_SPACE_NV, ) \ - case Acquire: \ - __CLC_NVVM_ATOMIC_LOAD_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ - ADDR_SPACE, ADDR_SPACE_NV, _acquire) \ - } \ - } else { \ - TYPE_NV res = __nvvm_volatile_ld##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ - (ADDR_SPACE TYPE_NV *)pointer); \ - switch (order) { \ - case None: \ - return *(TYPE *)&res; \ - case Acquire: { \ - __spirv_MemoryBarrier(scope, Acquire); \ - return *(TYPE *)&res; \ - } \ - } \ - } \ - __builtin_trap(); \ - __builtin_unreachable(); \ +#define __CLC_NVVM_ATOMIC_LOAD_IMPL( \ + TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, ADDR_SPACE, \ + POINTER_AND_ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_DECL TYPE _Z18__spirv_\ +AtomicLoad##POINTER_AND_ADDR_SPACE_MANGLED##K##TYPE_MANGLED##N5__spv5\ +Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + const volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics) { \ + /* Semantics mask may include memory order, storage class and other info \ +Memory order is stored in the lowest 5 bits */ \ + unsigned int order = semantics & 0x1F; \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + switch (order) { \ + case None: \ + __CLC_NVVM_ATOMIC_LOAD_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + ADDR_SPACE, ADDR_SPACE_NV, ) \ + case Acquire: \ + __CLC_NVVM_ATOMIC_LOAD_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + ADDR_SPACE, ADDR_SPACE_NV, _acquire) \ + } \ + } else { \ + TYPE_NV res = __nvvm_volatile_ld##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ + (ADDR_SPACE TYPE_NV *)pointer); \ + switch (order) { \ + case None: \ + return *(TYPE *)&res; \ + case Acquire: { \ + __spirv_MemoryBarrier(scope, Acquire); \ + return *(TYPE *)&res; \ + } \ + } \ + } \ + __builtin_trap(); \ + __builtin_unreachable(); \ } #define __CLC_NVVM_ATOMIC_LOAD(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV) \ - __attribute__((always_inline)) __CLC_NVVM_ATOMIC_LOAD_IMPL( \ - TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, __global, AS1, _global_) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \ - TYPE_MANGLED_NV, __local, AS3, _shared_) + __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ + __global, PU3AS1, _global_) \ + __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ + __local, PU3AS3, _shared_) \ + __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, , \ + P, _gen_) __CLC_NVVM_ATOMIC_LOAD(int, i, int, i) __CLC_NVVM_ATOMIC_LOAD(uint, j, int, i) diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_max.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_max.cl index 0f58a51ab3c20..63d7dbc2f78f5 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_max.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_max.cl @@ -21,61 +21,68 @@ __CLC_NVVM_ATOMIC(unsigned long, m, unsigned long, ul, max, #undef __CLC_NVVM_ATOMIC #undef __CLC_NVVM_ATOMIC_IMPL -#define __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, \ - TYPE_INT_MANGLED, OP_MANGLED, ADDR_SPACE, \ - ADDR_SPACE_MANGLED) \ - TYPE_INT \ - _Z18__spirv_AtomicLoadPU3##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - volatile ADDR_SPACE const TYPE_INT *, enum Scope, \ - enum MemorySemanticsMask); \ - TYPE_INT \ - _Z29__spirv_AtomicCompareExchange##PU3##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ - volatile ADDR_SPACE TYPE_INT *, enum Scope, enum MemorySemanticsMask, \ - enum MemorySemanticsMask, TYPE_INT, TYPE_INT); \ - _CLC_DECL TYPE \ - _Z21__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - enum MemorySemanticsMask load_order; \ - switch (semantics) { \ - case SequentiallyConsistent: \ - load_order = SequentiallyConsistent; \ - break; \ - case Acquire: \ - case AcquireRelease: \ - load_order = Acquire; \ - break; \ - default: \ - load_order = None; \ - } \ - volatile ADDR_SPACE TYPE_INT *pointer_int = \ - (volatile ADDR_SPACE TYPE_INT *)pointer; \ - TYPE_INT val_int = *(TYPE_INT *)&val; \ - TYPE_INT old_int = \ - _Z18__spirv_AtomicLoadPU3##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - pointer_int, scope, load_order); \ - TYPE old = *(TYPE *)&old_int; \ - while (val > old) { \ - TYPE_INT tmp_int = \ - _Z29__spirv_AtomicCompareExchange##PU3##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ - pointer_int, scope, semantics, semantics, val_int, old_int); \ - if (old_int == tmp_int) { \ - return *(TYPE *)&tmp_int; \ - } \ - old_int = tmp_int; \ - old = *(TYPE *)&old_int; \ - } \ - return old; \ +#define __CLC_NVVM_ATOMIC_MAX_IMPL( \ + TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, OP_MANGLED, ADDR_SPACE, \ + POINTER_AND_ADDR_SPACE_MANGLED, SUBSTITUTION1, SUBSTITUTION2) \ + TYPE_INT \ + _Z18__spirv_\ +AtomicLoad##POINTER_AND_ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4\ +FlagENS1_19MemorySemanticsMask4FlagE(volatile ADDR_SPACE const TYPE_INT *, \ + enum Scope, enum MemorySemanticsMask); \ + TYPE_INT \ + _Z29__spirv_\ +AtomicCompareExchange##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__sp\ +v5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask\ +4FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ + volatile ADDR_SPACE TYPE_INT *, enum Scope, enum MemorySemanticsMask, \ + enum MemorySemanticsMask, TYPE_INT, TYPE_INT); \ + __attribute__((always_inline)) _CLC_DECL TYPE _Z21__spirv_\ +Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope\ +4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + enum MemorySemanticsMask load_order; \ + switch (semantics) { \ + case SequentiallyConsistent: \ + load_order = SequentiallyConsistent; \ + break; \ + case Acquire: \ + case AcquireRelease: \ + load_order = Acquire; \ + break; \ + default: \ + load_order = None; \ + } \ + volatile ADDR_SPACE TYPE_INT *pointer_int = \ + (volatile ADDR_SPACE TYPE_INT *)pointer; \ + TYPE_INT val_int = *(TYPE_INT *)&val; \ + TYPE_INT old_int = _Z18__spirv_\ +AtomicLoad##POINTER_AND_ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4\ +FlagENS1_19MemorySemanticsMask4FlagE(pointer_int, scope, load_order); \ + TYPE old = *(TYPE *)&old_int; \ + while (val > old) { \ + TYPE_INT tmp_int = _Z29__spirv_\ +AtomicCompareExchange##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__sp\ +v5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask\ +4FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ + pointer_int, scope, semantics, semantics, val_int, old_int); \ + if (old_int == tmp_int) { \ + return *(TYPE *)&tmp_int; \ + } \ + old_int = tmp_int; \ + old = *(TYPE *)&old_int; \ + } \ + return old; \ } #define __CLC_NVVM_ATOMIC_MAX(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ OP_MANGLED) \ - __attribute__((always_inline)) \ __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED, __global, AS1) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, \ - TYPE_INT_MANGLED, OP_MANGLED, __local, AS3) + OP_MANGLED, __global, PU3AS1, 1, 5) \ + __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ + OP_MANGLED, __local, PU3AS3, 1, 5) \ + __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ + OP_MANGLED, , P, 0, 4) __CLC_NVVM_ATOMIC_MAX(float, f, int, i, FMaxEXT) -__CLC_NVVM_ATOMIC_MAX(double, d, long, l, FMaxEXT) \ No newline at end of file +__CLC_NVVM_ATOMIC_MAX(double, d, long, l, FMaxEXT) diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_min.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_min.cl index e51f691fa2007..d2440e616efae 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_min.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_min.cl @@ -19,61 +19,68 @@ __CLC_NVVM_ATOMIC(ulong, m, ulong, ul, min, _Z18__spirv_AtomicUMin) #undef __CLC_NVVM_ATOMIC #undef __CLC_NVVM_ATOMIC_IMPL -#define __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, \ - TYPE_INT_MANGLED, OP_MANGLED, ADDR_SPACE, \ - ADDR_SPACE_MANGLED) \ - TYPE_INT \ - _Z18__spirv_AtomicLoadPU3##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - volatile ADDR_SPACE const TYPE_INT *, enum Scope, \ - enum MemorySemanticsMask); \ - TYPE_INT \ - _Z29__spirv_AtomicCompareExchange##PU3##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ - volatile ADDR_SPACE TYPE_INT *, enum Scope, enum MemorySemanticsMask, \ - enum MemorySemanticsMask, TYPE_INT, TYPE_INT); \ - _CLC_DECL TYPE \ - _Z21__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - enum MemorySemanticsMask load_order; \ - switch (semantics) { \ - case SequentiallyConsistent: \ - load_order = SequentiallyConsistent; \ - break; \ - case Acquire: \ - case AcquireRelease: \ - load_order = Acquire; \ - break; \ - default: \ - load_order = None; \ - } \ - volatile ADDR_SPACE TYPE_INT *pointer_int = \ - (volatile ADDR_SPACE TYPE_INT *)pointer; \ - TYPE_INT val_int = *(TYPE_INT *)&val; \ - TYPE_INT old_int = \ - _Z18__spirv_AtomicLoadPU3##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - pointer_int, scope, load_order); \ - TYPE old = *(TYPE *)&old_int; \ - while (val < old) { \ - TYPE_INT tmp_int = \ - _Z29__spirv_AtomicCompareExchange##PU3##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ - pointer_int, scope, semantics, semantics, val_int, old_int); \ - if (old_int == tmp_int) { \ - return *(TYPE *)&tmp_int; \ - } \ - old_int = tmp_int; \ - old = *(TYPE *)&old_int; \ - } \ - return old; \ +#define __CLC_NVVM_ATOMIC_MIN_IMPL( \ + TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, OP_MANGLED, ADDR_SPACE, \ + POINTER_AND_ADDR_SPACE_MANGLED, SUBSTITUTION1, SUBSTITUTION2) \ + TYPE_INT \ + _Z18__spirv_\ +AtomicLoad##POINTER_AND_ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4\ +FlagENS1_19MemorySemanticsMask4FlagE(volatile ADDR_SPACE const TYPE_INT *, \ + enum Scope, enum MemorySemanticsMask); \ + TYPE_INT \ + _Z29__spirv_\ +AtomicCompareExchange##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__sp\ +v5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4\ +FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ + volatile ADDR_SPACE TYPE_INT *, enum Scope, enum MemorySemanticsMask, \ + enum MemorySemanticsMask, TYPE_INT, TYPE_INT); \ + __attribute__((always_inline)) _CLC_DECL TYPE _Z21__spirv_\ +Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope\ +4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + enum MemorySemanticsMask load_order; \ + switch (semantics) { \ + case SequentiallyConsistent: \ + load_order = SequentiallyConsistent; \ + break; \ + case Acquire: \ + case AcquireRelease: \ + load_order = Acquire; \ + break; \ + default: \ + load_order = None; \ + } \ + volatile ADDR_SPACE TYPE_INT *pointer_int = \ + (volatile ADDR_SPACE TYPE_INT *)pointer; \ + TYPE_INT val_int = *(TYPE_INT *)&val; \ + TYPE_INT old_int = _Z18__spirv_\ +AtomicLoad##POINTER_AND_ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4\ +FlagENS1_19MemorySemanticsMask4FlagE(pointer_int, scope, load_order); \ + TYPE old = *(TYPE *)&old_int; \ + while (val < old) { \ + TYPE_INT tmp_int = _Z29__spirv_\ +AtomicCompareExchange##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__sp\ +v5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4\ +FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ + pointer_int, scope, semantics, semantics, val_int, old_int); \ + if (old_int == tmp_int) { \ + return *(TYPE *)&tmp_int; \ + } \ + old_int = tmp_int; \ + old = *(TYPE *)&old_int; \ + } \ + return old; \ } #define __CLC_NVVM_ATOMIC_MIN(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ OP_MANGLED) \ - __attribute__((always_inline)) \ __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED, __global, AS1) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, \ - TYPE_INT_MANGLED, OP_MANGLED, __local, AS3) + OP_MANGLED, __global, PU3AS1, 1, 5) \ + __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ + OP_MANGLED, __local, PU3AS3, 1, 5) \ + __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ + OP_MANGLED, , P, 0, 4) __CLC_NVVM_ATOMIC_MIN(float, f, int, i, FMinEXT) -__CLC_NVVM_ATOMIC_MIN(double, d, long, l, FMinEXT) \ No newline at end of file +__CLC_NVVM_ATOMIC_MIN(double, d, long, l, FMinEXT) diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl index 87bf16b79a6c4..e3a9054f25ae0 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl @@ -34,50 +34,52 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); } \ } -#define __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \ - TYPE_MANGLED_NV, ADDR_SPACE, \ - ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \ - _CLC_DECL void \ - _Z19__spirv_AtomicStorePU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE value) { \ - /* Semantics mask may include memory order, storage class and other info \ -Memory order is stored in the lowest 5 bits */ \ - unsigned int order = semantics & 0x1F; \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - switch (order) { \ - case None: \ - __CLC_NVVM_ATOMIC_STORE_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ - ADDR_SPACE, ADDR_SPACE_NV, ) \ - case Release: \ - __CLC_NVVM_ATOMIC_STORE_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ - ADDR_SPACE, ADDR_SPACE_NV, \ - _release) \ - } \ - } else { \ - switch (order) { \ - case Release: \ - __spirv_MemoryBarrier(scope, Release); \ - __nvvm_volatile_st##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ - (ADDR_SPACE TYPE_NV *)pointer, *(TYPE_NV *)&value); \ - return; \ - case None: { \ - __nvvm_volatile_st##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ - (ADDR_SPACE TYPE_NV *)pointer, *(TYPE_NV *)&value); \ - return; \ - } \ - } \ - } \ - __builtin_trap(); \ - __builtin_unreachable(); \ +#define __CLC_NVVM_ATOMIC_STORE_IMPL( \ + TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, ADDR_SPACE, \ + POINTER_AND_ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_DECL void _Z19__spirv_\ +AtomicStore##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagEN\ +S1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE value) { \ + /* Semantics mask may include memory order, storage class and other info \ +Memory order is stored in the lowest 5 bits */ \ + unsigned int order = semantics & 0x1F; \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + switch (order) { \ + case None: \ + __CLC_NVVM_ATOMIC_STORE_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + ADDR_SPACE, ADDR_SPACE_NV, ) \ + case Release: \ + __CLC_NVVM_ATOMIC_STORE_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + ADDR_SPACE, ADDR_SPACE_NV, \ + _release) \ + } \ + } else { \ + switch (order) { \ + case Release: \ + __spirv_MemoryBarrier(scope, Release); \ + __nvvm_volatile_st##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ + (ADDR_SPACE TYPE_NV *)pointer, *(TYPE_NV *)&value); \ + return; \ + case None: { \ + __nvvm_volatile_st##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ + (ADDR_SPACE TYPE_NV *)pointer, *(TYPE_NV *)&value); \ + return; \ + } \ + } \ + } \ + __builtin_trap(); \ + __builtin_unreachable(); \ } #define __CLC_NVVM_ATOMIC_STORE(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV) \ - __attribute__((always_inline)) __CLC_NVVM_ATOMIC_STORE_IMPL( \ - TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, __global, AS1, _global_) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \ - TYPE_MANGLED_NV, __local, AS3, _shared_) + __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ + __global, PU3AS1, _global_) \ + __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ + __local, PU3AS3, _shared_) \ + __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, , \ + P, _gen_) __CLC_NVVM_ATOMIC_STORE(int, i, int, i) __CLC_NVVM_ATOMIC_STORE(uint, j, int, i) diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_sub.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_sub.cl index cbadea7ec22f1..b86174940e8e1 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_sub.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_sub.cl @@ -9,25 +9,30 @@ #include #include -#define __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, ADDR_SPACE, \ - ADDR_SPACE_MANGLED) \ - TYPE \ - _Z18__spirv_AtomicIAddPU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *, enum Scope, enum MemorySemanticsMask, \ - TYPE); \ - _CLC_DECL TYPE \ - _Z18__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - return _Z18__spirv_AtomicIAddPU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - pointer, scope, semantics, -val); \ +#define __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, ADDR_SPACE, \ + POINTER_AND_ADDR_SPACE_MANGLED, \ + SUBSTITUTION) \ + TYPE _Z18__spirv_\ +AtomicIAdd##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagEN\ +S##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile ADDR_SPACE TYPE *, enum Scope, enum MemorySemanticsMask, TYPE); \ + __attribute__((always_inline)) _CLC_DECL TYPE _Z18__spirv_\ +Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope\ +4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + return _Z18__spirv_\ +AtomicIAdd##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagEN\ +S##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED(pointer, scope, \ + semantics, -val); \ } #define __CLC_NVVM_ATOMIC_SUB(TYPE, TYPE_MANGLED, OP_MANGLED) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __global, AS1) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __local, AS3) + __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __global, PU3AS1, \ + 1) \ + __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __local, PU3AS3, \ + 1) \ + __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, , P, 0) __CLC_NVVM_ATOMIC_SUB(int, i, ISub) __CLC_NVVM_ATOMIC_SUB(unsigned int, j, ISub) From d2eb42f2422e4fde23c7ffb0c8bb33b3327a6a51 Mon Sep 17 00:00:00 2001 From: Jakub Chlanda Date: Tue, 13 Dec 2022 09:13:00 +0100 Subject: [PATCH 2/3] Update libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl Co-authored-by: Alexey Bader --- libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl index 5797ad33a993b..710d693909e67 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl @@ -76,7 +76,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); #define __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \ TYPE_MANGLED_NV, OP, OP_MANGLED, \ ADDR_SPACE, POINTER_AND_ADDR_SPACE_MANGLED, \ - ADDR_SPACE_NV, SUBSTITUTION, SUBSTITUTION2) \ + ADDR_SPACE_NV, SUBSTITUTION1, SUBSTITUTION2) \ __attribute__((always_inline)) _CLC_DECL TYPE _Z29__spirv_\ Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5\ __spv5Scope4FlagENS##SUBSTITUTION##_19Memory\ From b35194fc6eff82b951356586242e5272a159a70b Mon Sep 17 00:00:00 2001 From: Jakub Chlanda Date: Tue, 13 Dec 2022 09:13:08 +0100 Subject: [PATCH 3/3] Update libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl Co-authored-by: Alexey Bader --- libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl index 710d693909e67..078eb86cdc294 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl @@ -79,7 +79,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); ADDR_SPACE_NV, SUBSTITUTION1, SUBSTITUTION2) \ __attribute__((always_inline)) _CLC_DECL TYPE _Z29__spirv_\ Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5\ -__spv5Scope4FlagENS##SUBSTITUTION##_19Memory\ +__spv5Scope4FlagENS##SUBSTITUTION1##_19Memory\ SemanticsMask4FlagES##SUBSTITUTION2##_##TYPE_MANGLED##TYPE_MANGLED( \ volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ enum MemorySemanticsMask semantics1, \