diff --git a/libclc/ptx-nvidiacl/libspirv/SOURCES b/libclc/ptx-nvidiacl/libspirv/SOURCES index 31faf012de6fa..387f91ce39ab3 100644 --- a/libclc/ptx-nvidiacl/libspirv/SOURCES +++ b/libclc/ptx-nvidiacl/libspirv/SOURCES @@ -95,6 +95,7 @@ atomic/atomic_dec.cl atomic/atomic_inc.cl atomic/atomic_max.cl atomic/atomic_min.cl +atomic/atomic_sub.cl atomic/atomic_xchg.cl atomic/atomic_or.cl atomic/atomic_xor.cl diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h index a602848d63ec8..7269d32f2df12 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_inc_dec_helpers.h @@ -23,7 +23,7 @@ volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ enum MemorySemanticsMask semantics) { \ return _Z21__spirv_AtomicIAddEXTPU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - pointer, scope, semantics, 1); \ + pointer, scope, semantics, VAL); \ } #define __CLC_NVVM_ATOMIC_INCDEC(TYPE, TYPE_MANGLED, OP_MANGLED, VAL) \ diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_max.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_max.cl index d8f9562f3bde2..beabf2e8be43a 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_max.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_max.cl @@ -20,3 +20,60 @@ __CLC_NVVM_ATOMIC(unsigned long, m, unsigned long, ul, max, #undef __CLC_NVVM_ATOMIC_TYPES #undef __CLC_NVVM_ATOMIC #undef __CLC_NVVM_ATOMIC_IMPL + +#define __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, \ + TYPE_INT_MANGLED, OP_MANGLED, ADDR_SPACE, \ + ADDR_SPACE_MANGLED) \ + TYPE_INT \ + _Z18__spirv_AtomicLoadPU3##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + volatile ADDR_SPACE const TYPE_INT *, enum Scope, \ + enum MemorySemanticsMask); \ + TYPE_INT \ + _Z29__spirv_AtomicCompareExchange##PU3##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ + volatile ADDR_SPACE TYPE_INT *, enum Scope, enum MemorySemanticsMask, \ + enum MemorySemanticsMask, TYPE_INT, TYPE_INT); \ + _CLC_DECL TYPE \ + _Z21__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + enum MemorySemanticsMask load_order; \ + switch (semantics) { \ + case SequentiallyConsistent: \ + load_order = SequentiallyConsistent; \ + break; \ + case Acquire: \ + case AcquireRelease: \ + load_order = Acquire; \ + break; \ + default: \ + load_order = None; \ + } \ + volatile ADDR_SPACE TYPE_INT *pointer_int = \ + (volatile ADDR_SPACE TYPE_INT *)pointer; \ + TYPE_INT val_int = *(TYPE_INT *)&val; \ + TYPE_INT old_int = \ + _Z18__spirv_AtomicLoadPU3##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + pointer_int, scope, load_order); \ + TYPE old = *(TYPE *)&old_int; \ + while (val > old) { \ + TYPE_INT tmp_int = \ + _Z29__spirv_AtomicCompareExchange##PU3##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ + pointer_int, scope, semantics, semantics, val_int, old_int); \ + if (old_int == tmp_int) { \ + return *(TYPE *)&tmp_int; \ + } \ + old_int = tmp_int; \ + old = *(TYPE *)&old_int; \ + } \ + return old; \ + } + +#define __CLC_NVVM_ATOMIC_MAX(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ + OP_MANGLED) \ + __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ + OP_MANGLED, __global, AS1) \ + __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ + OP_MANGLED, __local, AS3) + +__CLC_NVVM_ATOMIC_MAX(float, f, int, i, FMaxEXT) +__CLC_NVVM_ATOMIC_MAX(double, d, long, l, FMaxEXT) \ No newline at end of file diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_min.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_min.cl index 9525509dd01ea..6a1d823e0980d 100644 --- a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_min.cl +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_min.cl @@ -18,3 +18,60 @@ __CLC_NVVM_ATOMIC(ulong, m, ulong, ul, min, _Z18__spirv_AtomicUMin) #undef __CLC_NVVM_ATOMIC_TYPES #undef __CLC_NVVM_ATOMIC #undef __CLC_NVVM_ATOMIC_IMPL + +#define __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, \ + TYPE_INT_MANGLED, OP_MANGLED, ADDR_SPACE, \ + ADDR_SPACE_MANGLED) \ + TYPE_INT \ + _Z18__spirv_AtomicLoadPU3##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + volatile ADDR_SPACE const TYPE_INT *, enum Scope, \ + enum MemorySemanticsMask); \ + TYPE_INT \ + _Z29__spirv_AtomicCompareExchange##PU3##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ + volatile ADDR_SPACE TYPE_INT *, enum Scope, enum MemorySemanticsMask, \ + enum MemorySemanticsMask, TYPE_INT, TYPE_INT); \ + _CLC_DECL TYPE \ + _Z21__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + enum MemorySemanticsMask load_order; \ + switch (semantics) { \ + case SequentiallyConsistent: \ + load_order = SequentiallyConsistent; \ + break; \ + case Acquire: \ + case AcquireRelease: \ + load_order = Acquire; \ + break; \ + default: \ + load_order = None; \ + } \ + volatile ADDR_SPACE TYPE_INT *pointer_int = \ + (volatile ADDR_SPACE TYPE_INT *)pointer; \ + TYPE_INT val_int = *(TYPE_INT *)&val; \ + TYPE_INT old_int = \ + _Z18__spirv_AtomicLoadPU3##ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + pointer_int, scope, load_order); \ + TYPE old = *(TYPE *)&old_int; \ + while (val < old) { \ + TYPE_INT tmp_int = \ + _Z29__spirv_AtomicCompareExchange##PU3##ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ + pointer_int, scope, semantics, semantics, val_int, old_int); \ + if (old_int == tmp_int) { \ + return *(TYPE *)&tmp_int; \ + } \ + old_int = tmp_int; \ + old = *(TYPE *)&old_int; \ + } \ + return old; \ + } + +#define __CLC_NVVM_ATOMIC_MIN(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ + OP_MANGLED) \ + __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ + OP_MANGLED, __global, AS1) \ + __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ + OP_MANGLED, __local, AS3) + +__CLC_NVVM_ATOMIC_MIN(float, f, int, i, FMinEXT) +__CLC_NVVM_ATOMIC_MIN(double, d, long, l, FMinEXT) \ No newline at end of file diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_sub.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_sub.cl new file mode 100644 index 0000000000000..7486ec9a931fe --- /dev/null +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_sub.cl @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#define __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, ADDR_SPACE, \ + ADDR_SPACE_MANGLED) \ + TYPE \ + _Z18__spirv_AtomicIAddPU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile ADDR_SPACE TYPE *, enum Scope, enum MemorySemanticsMask, \ + TYPE); \ + _CLC_DECL TYPE \ + _Z18__spirv_Atomic##OP_MANGLED##PU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE val) { \ + return _Z18__spirv_AtomicIAddPU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + pointer, scope, semantics, -val); \ + } + +#define __CLC_NVVM_ATOMIC_SUB(TYPE, TYPE_MANGLED, OP_MANGLED) \ + __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __global, AS1) \ + __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __local, AS3) + +__CLC_NVVM_ATOMIC_SUB(int, i, ISub) +__CLC_NVVM_ATOMIC_SUB(unsigned int, j, ISub) +__CLC_NVVM_ATOMIC_SUB(long, l, ISub) +__CLC_NVVM_ATOMIC_SUB(unsigned long, m, ISub) + +#undef __CLC_NVVM_ATOMIC_SUB_IMPL +#undef __CLC_NVVM_ATOMIC_SUB