diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl index 4cc0443e28438..d5db9eb6defb1 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_cmpxchg.cl @@ -70,10 +70,10 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); } #define __CLC_NVVM_ATOMIC_CAS_IMPL( \ - TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, OP_MANGLED, ADDR_SPACE, \ - ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \ - _CLC_DECL TYPE \ - _Z29__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_MANGLED##TYPE_MANGLED( \ + FN_MANGLED, TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, ADDR_SPACE, \ + ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_DECL TYPE \ + FN_MANGLED( \ volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ enum MemorySemanticsMask semantics1, \ enum MemorySemanticsMask semantics2, TYPE cmp, TYPE value) { \ @@ -118,21 +118,24 @@ Memory order is stored in the lowest 5 bits */ __builtin_unreachable(); \ } -#define __CLC_NVVM_ATOMIC_CAS(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ - OP, OP_MANGLED) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - OP_MANGLED, __global, AS1, _global_) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ - OP, OP_MANGLED, __local, AS3, _shared_) +#define __CLC_NVVM_ATOMIC_CAS(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ + OP) \ + __CLC_NVVM_ATOMIC_CAS_IMPL( \ + _Z29__spirv_AtomicCompareExchange##P##TYPE_MANGLED##N5__spv5Scope4FlagENS0_19MemorySemanticsMask4FlagES4_##TYPE_MANGLED##TYPE_MANGLED, \ + TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, , _gen_) \ + __CLC_NVVM_ATOMIC_CAS_IMPL( \ + _Z29__spirv_AtomicCompareExchange##PU3AS1##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_MANGLED##TYPE_MANGLED, \ + TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, __global, _global_) \ + __CLC_NVVM_ATOMIC_CAS_IMPL( \ + _Z29__spirv_AtomicCompareExchange##PU3AS3##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_MANGLED##TYPE_MANGLED, \ + TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, __local, _shared_) -__CLC_NVVM_ATOMIC_CAS(int, i, int, i, cas, CompareExchange) -__CLC_NVVM_ATOMIC_CAS(long, l, long, l, cas, CompareExchange) -__CLC_NVVM_ATOMIC_CAS(unsigned int, j, int, i, cas, CompareExchange) -__CLC_NVVM_ATOMIC_CAS(unsigned long, m, long, l, cas, CompareExchange) -__CLC_NVVM_ATOMIC_CAS(float, f, float, f, cas, CompareExchange) -__CLC_NVVM_ATOMIC_CAS(double, d, double, d, cas, CompareExchange) +__CLC_NVVM_ATOMIC_CAS(int, i, int, i, cas) +__CLC_NVVM_ATOMIC_CAS(long, l, long, l, cas) +__CLC_NVVM_ATOMIC_CAS(unsigned int, j, int, i, cas) +__CLC_NVVM_ATOMIC_CAS(unsigned long, m, long, l, cas) +__CLC_NVVM_ATOMIC_CAS(float, f, float, f, cas) +__CLC_NVVM_ATOMIC_CAS(double, d, double, d, cas) #undef __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER #undef __CLC_NVVM_ATOMIC_CAS diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h index f46eec448bdcb..a7b6dfba5d881 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_helpers.h @@ -72,62 +72,63 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); } \ } -#define __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ - OP, NAME_MANGLED, ADDR_SPACE, \ - ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \ - _CLC_DECL TYPE \ - NAME_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE value) { \ - /* Semantics mask may include memory order, storage class and other info \ -Memory order is stored in the lowest 5 bits */ \ - unsigned int order = semantics & 0x1F; \ - switch (order) { \ - case None: \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, ) \ - break; \ - case Acquire: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, _acquire) \ - } else { \ - __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ - OP, ADDR_SPACE, ADDR_SPACE_NV) \ - } \ - break; \ - case Release: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, _release) \ - } else { \ - __spirv_MemoryBarrier(scope, Release); \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, ) \ - } \ - break; \ - case AcquireRelease: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ - ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \ - } else { \ - __spirv_MemoryBarrier(scope, Release); \ - __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ - OP, ADDR_SPACE, ADDR_SPACE_NV) \ - } \ - break; \ - } \ - __builtin_trap(); \ - __builtin_unreachable(); \ +#define __CLC_NVVM_ATOMIC_IMPL(FN_MANGLED, TYPE, TYPE_MANGLED, TYPE_NV, \ + TYPE_MANGLED_NV, OP, ADDR_SPACE, ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_DECL TYPE FN_MANGLED( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE value) { \ + /* Semantics mask may include memory order, storage class and other info \ +Memory order is stored in the lowest 5 bits */ \ + unsigned int order = semantics & 0x1F; \ + switch (order) { \ + case None: \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, ) \ + break; \ + case Acquire: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, _acquire) \ + } else { \ + __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + OP, ADDR_SPACE, ADDR_SPACE_NV) \ + } \ + break; \ + case Release: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, _release) \ + } else { \ + __spirv_MemoryBarrier(scope, Release); \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, ) \ + } \ + break; \ + case AcquireRelease: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + ADDR_SPACE, ADDR_SPACE_NV, _acq_rel) \ + } else { \ + __spirv_MemoryBarrier(scope, Release); \ + __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + OP, ADDR_SPACE, ADDR_SPACE_NV) \ + } \ + break; \ + } \ + __builtin_trap(); \ + __builtin_unreachable(); \ } -#define __CLC_NVVM_ATOMIC(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - NAME_MANGLED) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - NAME_MANGLED, __global, AS1, _global_) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - NAME_MANGLED, __local, AS3, _shared_) +#define __CLC_NVVM_ATOMIC(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ + NAME_MANGLED) \ + __CLC_NVVM_ATOMIC_IMPL( \ + NAME_MANGLED##P##TYPE_MANGLED##N5__spv5Scope4FlagENS0_19MemorySemanticsMask4FlagE##TYPE_MANGLED, \ + TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, , _gen_) \ + __CLC_NVVM_ATOMIC_IMPL( \ + NAME_MANGLED##PU3AS1##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED, \ + TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, __global, _global_) \ + __CLC_NVVM_ATOMIC_IMPL( \ + NAME_MANGLED##PU3AS3##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED, \ + TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, __local, _shared_) #endif diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h index 5b480fc22a340..400268eee5e4d 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h @@ -12,25 +12,26 @@ #include #include -#define __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, \ - ADDR_SPACE, ADDR_SPACE_MANGLED) \ - TYPE \ - _Z21__spirv_AtomicIAddEXTPU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *, enum Scope, enum MemorySemanticsMask, \ - TYPE); \ - _CLC_DECL TYPE \ - _Z24__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics) { \ - return _Z21__spirv_AtomicIAddEXTPU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - pointer, scope, semantics, VAL); \ +#define __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, \ + ADDR_SPACE, ADDR_SPACE_MANGLED, \ + SUBSTITUTION) \ + TYPE \ + _Z21__spirv_AtomicIAddEXTP##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile ADDR_SPACE TYPE *, enum Scope, enum MemorySemanticsMask, \ + TYPE); \ + __attribute__((always_inline)) _CLC_DECL TYPE \ + _Z24__spirv_Atomic##OP_MANGLED##P##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics) { \ + return _Z21__spirv_AtomicIAddEXTP##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + pointer, scope, semantics, VAL); \ } #define __CLC_NVVM_ATOMIC_INCDEC(TYPE, TYPE_MANGLED, OP_MANGLED, VAL) \ - __attribute__((always_inline)) \ + __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, , , 0) \ __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, __global, \ - AS1) __attribute__((always_inline)) \ + U3AS1, 1) \ __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, __local, \ - AS3) + U3AS3, 1) #endif diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl index 8f59efc072887..7d16b6ce90559 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl @@ -37,8 +37,8 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); #define __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \ TYPE_MANGLED_NV, ADDR_SPACE, \ ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \ - _CLC_DECL TYPE \ - _Z18__spirv_AtomicLoadPU3##ADDR_SPACE_MANGLED##K##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + __attribute__((always_inline)) _CLC_DECL TYPE \ + _Z18__spirv_AtomicLoadP##ADDR_SPACE_MANGLED##K##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ const volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ enum MemorySemanticsMask semantics) { \ /* Semantics mask may include memory order, storage class and other info \ @@ -70,11 +70,12 @@ Memory order is stored in the lowest 5 bits */ } #define __CLC_NVVM_ATOMIC_LOAD(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV) \ - __attribute__((always_inline)) __CLC_NVVM_ATOMIC_LOAD_IMPL( \ - TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, __global, AS1, _global_) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \ - TYPE_MANGLED_NV, __local, AS3, _shared_) + __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, , \ + , _gen_) \ + __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ + __global, U3AS1, _global_) \ + __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ + __local, U3AS3, _shared_) __CLC_NVVM_ATOMIC_LOAD(int, i, int, i) __CLC_NVVM_ATOMIC_LOAD(uint, j, int, i) diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_max.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_max.cl index 0f58a51ab3c20..60fe1e4f47105 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_max.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_max.cl @@ -21,61 +21,61 @@ __CLC_NVVM_ATOMIC(unsigned long, m, unsigned long, ul, max, #undef __CLC_NVVM_ATOMIC #undef __CLC_NVVM_ATOMIC_IMPL -#define __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, \ - TYPE_INT_MANGLED, OP_MANGLED, ADDR_SPACE, \ - ADDR_SPACE_MANGLED) \ - TYPE_INT \ - _Z18__spirv_AtomicLoadPU3##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - volatile ADDR_SPACE const TYPE_INT *, enum Scope, \ - enum MemorySemanticsMask); \ - TYPE_INT \ - _Z29__spirv_AtomicCompareExchange##PU3##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ - volatile ADDR_SPACE TYPE_INT *, enum Scope, enum MemorySemanticsMask, \ - enum MemorySemanticsMask, TYPE_INT, TYPE_INT); \ - _CLC_DECL TYPE \ - _Z21__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - enum MemorySemanticsMask load_order; \ - switch (semantics) { \ - case SequentiallyConsistent: \ - load_order = SequentiallyConsistent; \ - break; \ - case Acquire: \ - case AcquireRelease: \ - load_order = Acquire; \ - break; \ - default: \ - load_order = None; \ - } \ - volatile ADDR_SPACE TYPE_INT *pointer_int = \ - (volatile ADDR_SPACE TYPE_INT *)pointer; \ - TYPE_INT val_int = *(TYPE_INT *)&val; \ - TYPE_INT old_int = \ - _Z18__spirv_AtomicLoadPU3##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - pointer_int, scope, load_order); \ - TYPE old = *(TYPE *)&old_int; \ - while (val > old) { \ - TYPE_INT tmp_int = \ - _Z29__spirv_AtomicCompareExchange##PU3##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ - pointer_int, scope, semantics, semantics, val_int, old_int); \ - if (old_int == tmp_int) { \ - return *(TYPE *)&tmp_int; \ - } \ - old_int = tmp_int; \ - old = *(TYPE *)&old_int; \ - } \ - return old; \ +#define __CLC_NVVM_ATOMIC_MAX_IMPL( \ + TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, OP_MANGLED, ADDR_SPACE, \ + ADDR_SPACE_MANGLED, SUBSTITUTION1, SUBSTITUTION2) \ + TYPE_INT \ + _Z18__spirv_AtomicLoadP##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + volatile ADDR_SPACE const TYPE_INT *, enum Scope, \ + enum MemorySemanticsMask); \ + TYPE_INT \ + _Z29__spirv_AtomicCompareExchange##P##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ + volatile ADDR_SPACE TYPE_INT *, enum Scope, enum MemorySemanticsMask, \ + enum MemorySemanticsMask, TYPE_INT, TYPE_INT); \ + __attribute__((always_inline)) _CLC_DECL TYPE \ + _Z21__spirv_Atomic##OP_MANGLED##P##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + enum MemorySemanticsMask load_order; \ + switch (semantics) { \ + case SequentiallyConsistent: \ + load_order = SequentiallyConsistent; \ + break; \ + case Acquire: \ + case AcquireRelease: \ + load_order = Acquire; \ + break; \ + default: \ + load_order = None; \ + } \ + volatile ADDR_SPACE TYPE_INT *pointer_int = \ + (volatile ADDR_SPACE TYPE_INT *)pointer; \ + TYPE_INT val_int = *(TYPE_INT *)&val; \ + TYPE_INT old_int = \ + _Z18__spirv_AtomicLoadP##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + pointer_int, scope, load_order); \ + TYPE old = *(TYPE *)&old_int; \ + while (val > old) { \ + TYPE_INT tmp_int = \ + _Z29__spirv_AtomicCompareExchange##P##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ + pointer_int, scope, semantics, semantics, val_int, old_int); \ + if (old_int == tmp_int) { \ + return *(TYPE *)&tmp_int; \ + } \ + old_int = tmp_int; \ + old = *(TYPE *)&old_int; \ + } \ + return old; \ } #define __CLC_NVVM_ATOMIC_MAX(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ OP_MANGLED) \ - __attribute__((always_inline)) \ __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED, __global, AS1) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, \ - TYPE_INT_MANGLED, OP_MANGLED, __local, AS3) + OP_MANGLED, , , 0, 4) \ + __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ + OP_MANGLED, __global, U3AS1, 1, 5) \ + __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ + OP_MANGLED, __local, U3AS3, 1, 5) __CLC_NVVM_ATOMIC_MAX(float, f, int, i, FMaxEXT) __CLC_NVVM_ATOMIC_MAX(double, d, long, l, FMaxEXT) \ No newline at end of file diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_min.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_min.cl index e51f691fa2007..a4f68b10b5665 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_min.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_min.cl @@ -19,61 +19,61 @@ __CLC_NVVM_ATOMIC(ulong, m, ulong, ul, min, _Z18__spirv_AtomicUMin) #undef __CLC_NVVM_ATOMIC #undef __CLC_NVVM_ATOMIC_IMPL -#define __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, \ - TYPE_INT_MANGLED, OP_MANGLED, ADDR_SPACE, \ - ADDR_SPACE_MANGLED) \ - TYPE_INT \ - _Z18__spirv_AtomicLoadPU3##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - volatile ADDR_SPACE const TYPE_INT *, enum Scope, \ - enum MemorySemanticsMask); \ - TYPE_INT \ - _Z29__spirv_AtomicCompareExchange##PU3##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ - volatile ADDR_SPACE TYPE_INT *, enum Scope, enum MemorySemanticsMask, \ - enum MemorySemanticsMask, TYPE_INT, TYPE_INT); \ - _CLC_DECL TYPE \ - _Z21__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - enum MemorySemanticsMask load_order; \ - switch (semantics) { \ - case SequentiallyConsistent: \ - load_order = SequentiallyConsistent; \ - break; \ - case Acquire: \ - case AcquireRelease: \ - load_order = Acquire; \ - break; \ - default: \ - load_order = None; \ - } \ - volatile ADDR_SPACE TYPE_INT *pointer_int = \ - (volatile ADDR_SPACE TYPE_INT *)pointer; \ - TYPE_INT val_int = *(TYPE_INT *)&val; \ - TYPE_INT old_int = \ - _Z18__spirv_AtomicLoadPU3##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - pointer_int, scope, load_order); \ - TYPE old = *(TYPE *)&old_int; \ - while (val < old) { \ - TYPE_INT tmp_int = \ - _Z29__spirv_AtomicCompareExchange##PU3##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ - pointer_int, scope, semantics, semantics, val_int, old_int); \ - if (old_int == tmp_int) { \ - return *(TYPE *)&tmp_int; \ - } \ - old_int = tmp_int; \ - old = *(TYPE *)&old_int; \ - } \ - return old; \ +#define __CLC_NVVM_ATOMIC_MIN_IMPL( \ + TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, OP_MANGLED, ADDR_SPACE, \ + ADDR_SPACE_MANGLED, SUBSTITUTION1, SUBSTITUTION2) \ + TYPE_INT \ + _Z18__spirv_AtomicLoadP##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + volatile ADDR_SPACE const TYPE_INT *, enum Scope, \ + enum MemorySemanticsMask); \ + TYPE_INT \ + _Z29__spirv_AtomicCompareExchange##P##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ + volatile ADDR_SPACE TYPE_INT *, enum Scope, enum MemorySemanticsMask, \ + enum MemorySemanticsMask, TYPE_INT, TYPE_INT); \ + __attribute__((always_inline)) _CLC_DECL TYPE \ + _Z21__spirv_Atomic##OP_MANGLED##P##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + enum MemorySemanticsMask load_order; \ + switch (semantics) { \ + case SequentiallyConsistent: \ + load_order = SequentiallyConsistent; \ + break; \ + case Acquire: \ + case AcquireRelease: \ + load_order = Acquire; \ + break; \ + default: \ + load_order = None; \ + } \ + volatile ADDR_SPACE TYPE_INT *pointer_int = \ + (volatile ADDR_SPACE TYPE_INT *)pointer; \ + TYPE_INT val_int = *(TYPE_INT *)&val; \ + TYPE_INT old_int = \ + _Z18__spirv_AtomicLoadP##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + pointer_int, scope, load_order); \ + TYPE old = *(TYPE *)&old_int; \ + while (val < old) { \ + TYPE_INT tmp_int = \ + _Z29__spirv_AtomicCompareExchange##P##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ + pointer_int, scope, semantics, semantics, val_int, old_int); \ + if (old_int == tmp_int) { \ + return *(TYPE *)&tmp_int; \ + } \ + old_int = tmp_int; \ + old = *(TYPE *)&old_int; \ + } \ + return old; \ } #define __CLC_NVVM_ATOMIC_MIN(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ OP_MANGLED) \ - __attribute__((always_inline)) \ __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED, __global, AS1) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, \ - TYPE_INT_MANGLED, OP_MANGLED, __local, AS3) + OP_MANGLED, , , 0, 4) \ + __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ + OP_MANGLED, __global, U3AS1, 1, 5) \ + __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ + OP_MANGLED, __local, U3AS3, 1, 5) __CLC_NVVM_ATOMIC_MIN(float, f, int, i, FMinEXT) __CLC_NVVM_ATOMIC_MIN(double, d, long, l, FMinEXT) \ No newline at end of file diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl index 87bf16b79a6c4..d86e6f20121a2 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl @@ -34,11 +34,10 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(unsigned int, unsigned int); } \ } -#define __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \ +#define __CLC_NVVM_ATOMIC_STORE_IMPL(FN_MANGLED, TYPE, TYPE_MANGLED, TYPE_NV, \ TYPE_MANGLED_NV, ADDR_SPACE, \ - ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \ - _CLC_DECL void \ - _Z19__spirv_AtomicStorePU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_DECL void FN_MANGLED( \ volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ enum MemorySemanticsMask semantics, TYPE value) { \ /* Semantics mask may include memory order, storage class and other info \ @@ -72,12 +71,16 @@ Memory order is stored in the lowest 5 bits */ __builtin_unreachable(); \ } -#define __CLC_NVVM_ATOMIC_STORE(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV) \ - __attribute__((always_inline)) __CLC_NVVM_ATOMIC_STORE_IMPL( \ - TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, __global, AS1, _global_) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \ - TYPE_MANGLED_NV, __local, AS3, _shared_) +#define __CLC_NVVM_ATOMIC_STORE(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV) \ + __CLC_NVVM_ATOMIC_STORE_IMPL( \ + _Z19__spirv_AtomicStoreP##TYPE_MANGLED##N5__spv5Scope4FlagENS0_19MemorySemanticsMask4FlagE##TYPE_MANGLED, \ + TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, , _gen_) \ + __CLC_NVVM_ATOMIC_STORE_IMPL( \ + _Z19__spirv_AtomicStorePU3AS1##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED, \ + TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, __global, _global_) \ + __CLC_NVVM_ATOMIC_STORE_IMPL( \ + _Z19__spirv_AtomicStorePU3AS3##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED, \ + TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, __local, _shared_) __CLC_NVVM_ATOMIC_STORE(int, i, int, i) __CLC_NVVM_ATOMIC_STORE(uint, j, int, i) diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_sub.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_sub.cl index cbadea7ec22f1..56f7ed0f72d91 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_sub.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_sub.cl @@ -10,24 +10,23 @@ #include #define __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, ADDR_SPACE, \ - ADDR_SPACE_MANGLED) \ + ADDR_SPACE_MANGLED, SUBSTITUTION) \ TYPE \ - _Z18__spirv_AtomicIAddPU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + _Z18__spirv_AtomicIAddP##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ volatile ADDR_SPACE TYPE *, enum Scope, enum MemorySemanticsMask, \ TYPE); \ - _CLC_DECL TYPE \ - _Z18__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + __attribute__((always_inline)) _CLC_DECL TYPE \ + _Z18__spirv_Atomic##OP_MANGLED##P##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ enum MemorySemanticsMask semantics, TYPE val) { \ - return _Z18__spirv_AtomicIAddPU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + return _Z18__spirv_AtomicIAddP##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ pointer, scope, semantics, -val); \ } #define __CLC_NVVM_ATOMIC_SUB(TYPE, TYPE_MANGLED, OP_MANGLED) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __global, AS1) \ - __attribute__((always_inline)) \ - __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __local, AS3) + __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, , , 0) \ + __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __global, U3AS1, 1) \ + __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __local, U3AS3, 1) __CLC_NVVM_ATOMIC_SUB(int, i, ISub) __CLC_NVVM_ATOMIC_SUB(unsigned int, j, ISub) diff --git a/libclc/utils/libclc-remangler/LibclcRemangler.cpp b/libclc/utils/libclc-remangler/LibclcRemangler.cpp index 788c07800ef43..b0a4bebe37b2a 100644 --- a/libclc/utils/libclc-remangler/LibclcRemangler.cpp +++ b/libclc/utils/libclc-remangler/LibclcRemangler.cpp @@ -210,8 +210,7 @@ class Remangler { size_t index = 0; if (findSub(node, &index)) { OB << 'S'; - if (index != 0) - OB << index; + OB << index; OB << '_'; return true; }