From e2bceafe7b90408c6a70190ced453fbf2e74c985 Mon Sep 17 00:00:00 2001 From: James Brodman Date: Mon, 20 Jul 2020 17:09:06 -0400 Subject: [PATCH 01/13] Update namespaces to separate language/intel hw and adopt 2020 convention Signed-off-by: James Brodman --- .../include/CL/sycl/{ => ext}/intel/esimd.hpp | 8 +- .../intel/esimd/detail/esimd_host_util.hpp | 0 .../intel/esimd/detail/esimd_intrin.hpp | 52 +- .../intel/esimd/detail/esimd_math_intrin.hpp | 102 +-- .../esimd/detail/esimd_memory_intrin.hpp | 690 ++++++++++++++++++ .../intel/esimd/detail/esimd_region.hpp | 2 + .../intel/esimd/detail/esimd_types.hpp | 6 +- .../intel/esimd/detail/esimd_util.hpp | 2 + .../CL/sycl/{ => ext}/intel/esimd/esimd.hpp | 6 +- .../sycl/{ => ext}/intel/esimd/esimd_enum.hpp | 2 + .../sycl/{ => ext}/intel/esimd/esimd_math.hpp | 0 .../{ => ext}/intel/esimd/esimd_memory.hpp | 12 +- .../sycl/{ => ext}/intel/esimd/esimd_view.hpp | 2 + .../{ => ext}/intel/fpga_device_selector.hpp | 2 + .../sycl/{ => ext}/intel/fpga_extensions.hpp | 6 +- .../CL/sycl/{ => ext}/intel/fpga_reg.hpp | 2 + .../CL/sycl/{intel => ext/oneapi}/atomic.hpp | 6 +- .../{intel => ext/oneapi}/atomic_enums.hpp | 10 +- .../{intel => ext/oneapi}/atomic_fence.hpp | 8 +- .../sycl/{intel => ext/oneapi}/atomic_ref.hpp | 14 +- .../sycl/{intel => ext/oneapi}/builtins.hpp | 26 +- .../oneapi}/function_pointer.hpp | 6 +- .../sycl/{intel => ext/oneapi}/functional.hpp | 24 +- .../{intel => ext/oneapi}/group_algorithm.hpp | 23 +- .../CL/sycl/{intel => ext/oneapi}/pipes.hpp | 6 +- .../sycl/{intel => ext/oneapi}/reduction.hpp | 60 +- .../oneapi}/spec_constant.hpp | 6 +- .../sycl/{intel => ext/oneapi}/sub_group.hpp | 22 +- .../esimd/detail/esimd_memory_intrin.hpp | 663 ----------------- 29 files changed, 922 insertions(+), 846 deletions(-) rename sycl/include/CL/sycl/{ => ext}/intel/esimd.hpp (78%) rename sycl/include/CL/sycl/{ => ext}/intel/esimd/detail/esimd_host_util.hpp (100%) rename sycl/include/CL/sycl/{ => ext}/intel/esimd/detail/esimd_intrin.hpp (83%) rename sycl/include/CL/sycl/{ => ext}/intel/esimd/detail/esimd_math_intrin.hpp (90%) create mode 100644 sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_memory_intrin.hpp rename sycl/include/CL/sycl/{ => ext}/intel/esimd/detail/esimd_region.hpp (99%) rename sycl/include/CL/sycl/{ => ext}/intel/esimd/detail/esimd_types.hpp (98%) rename sycl/include/CL/sycl/{ => ext}/intel/esimd/detail/esimd_util.hpp (99%) rename sycl/include/CL/sycl/{ => ext}/intel/esimd/esimd.hpp (99%) rename sycl/include/CL/sycl/{ => ext}/intel/esimd/esimd_enum.hpp (98%) rename sycl/include/CL/sycl/{ => ext}/intel/esimd/esimd_math.hpp (100%) rename sycl/include/CL/sycl/{ => ext}/intel/esimd/esimd_memory.hpp (98%) rename sycl/include/CL/sycl/{ => ext}/intel/esimd/esimd_view.hpp (99%) rename sycl/include/CL/sycl/{ => ext}/intel/fpga_device_selector.hpp (97%) rename sycl/include/CL/sycl/{ => ext}/intel/fpga_extensions.hpp (73%) rename sycl/include/CL/sycl/{ => ext}/intel/fpga_reg.hpp (96%) rename sycl/include/CL/sycl/{intel => ext/oneapi}/atomic.hpp (73%) rename sycl/include/CL/sycl/{intel => ext/oneapi}/atomic_enums.hpp (94%) rename sycl/include/CL/sycl/{intel => ext/oneapi}/atomic_fence.hpp (89%) rename sycl/include/CL/sycl/{intel => ext/oneapi}/atomic_ref.hpp (98%) rename sycl/include/CL/sycl/{intel => ext/oneapi}/builtins.hpp (81%) rename sycl/include/CL/sycl/{intel => ext/oneapi}/function_pointer.hpp (97%) rename sycl/include/CL/sycl/{intel => ext/oneapi}/functional.hpp (83%) rename sycl/include/CL/sycl/{intel => ext/oneapi}/group_algorithm.hpp (98%) rename sycl/include/CL/sycl/{intel => ext/oneapi}/pipes.hpp (98%) rename sycl/include/CL/sycl/{intel => ext/oneapi}/reduction.hpp (95%) rename sycl/include/CL/sycl/{experimental => ext/oneapi}/spec_constant.hpp (95%) rename sycl/include/CL/sycl/{intel => ext/oneapi}/sub_group.hpp (97%) delete mode 100644 sycl/include/CL/sycl/intel/esimd/detail/esimd_memory_intrin.hpp diff --git a/sycl/include/CL/sycl/intel/esimd.hpp b/sycl/include/CL/sycl/ext/intel/esimd.hpp similarity index 78% rename from sycl/include/CL/sycl/intel/esimd.hpp rename to sycl/include/CL/sycl/ext/intel/esimd.hpp index 7f4b7886d2d2c..3a1cffdcd2a68 100644 --- a/sycl/include/CL/sycl/intel/esimd.hpp +++ b/sycl/include/CL/sycl/ext/intel/esimd.hpp @@ -10,10 +10,10 @@ #pragma once -#include -#include -#include -#include +#include +#include +#include +#include #ifdef __SYCL_DEVICE_ONLY__ #define SYCL_ESIMD_KERNEL __attribute__((sycl_explicit_simd)) diff --git a/sycl/include/CL/sycl/intel/esimd/detail/esimd_host_util.hpp b/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_host_util.hpp similarity index 100% rename from sycl/include/CL/sycl/intel/esimd/detail/esimd_host_util.hpp rename to sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_host_util.hpp diff --git a/sycl/include/CL/sycl/intel/esimd/detail/esimd_intrin.hpp b/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_intrin.hpp similarity index 83% rename from sycl/include/CL/sycl/intel/esimd/detail/esimd_intrin.hpp rename to sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_intrin.hpp index 23674ac3d3e91..fdaca49bf6e30 100644 --- a/sycl/include/CL/sycl/intel/esimd/detail/esimd_intrin.hpp +++ b/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_intrin.hpp @@ -11,9 +11,9 @@ #pragma once -#include -#include -#include +#include +#include +#include #include // \brief __esimd_rdregion: region access intrinsic. @@ -60,8 +60,9 @@ // template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_rdregion(sycl::intel::gpu::vector_type_t Input, uint16_t Offset); +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_rdregion(sycl::ext::intel::gpu::vector_type_t Input, + uint16_t Offset); // __esimd_wrregion returns the updated vector with the region updated. // @@ -112,10 +113,11 @@ __esimd_rdregion(sycl::intel::gpu::vector_type_t Input, uint16_t Offset); // template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_wrregion(sycl::intel::gpu::vector_type_t OldVal, - sycl::intel::gpu::vector_type_t NewVal, uint16_t Offset, - sycl::intel::gpu::mask_type_t Mask = 1); +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_wrregion(sycl::ext::intel::gpu::vector_type_t OldVal, + sycl::ext::intel::gpu::vector_type_t NewVal, + uint16_t Offset, + sycl::ext::intel::gpu::mask_type_t Mask = 1); __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -218,37 +220,41 @@ readRegion(const vector_type_t &Base, std::pair Region) { // optimization on simd object // template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_vload(const sycl::intel::gpu::vector_type_t *ptr); +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_vload(const sycl::ext::intel::gpu::vector_type_t *ptr); // vstore // // map to the backend vstore intrinsic, used by compiler to control // optimization on simd object template -SYCL_EXTERNAL void __esimd_vstore(sycl::intel::gpu::vector_type_t *ptr, - sycl::intel::gpu::vector_type_t vals); +SYCL_EXTERNAL void +__esimd_vstore(sycl::ext::intel::gpu::vector_type_t *ptr, + sycl::ext::intel::gpu::vector_type_t vals); template -SYCL_EXTERNAL uint16_t __esimd_any(sycl::intel::gpu::vector_type_t src); +SYCL_EXTERNAL uint16_t +__esimd_any(sycl::ext::intel::gpu::vector_type_t src); template -SYCL_EXTERNAL uint16_t __esimd_all(sycl::intel::gpu::vector_type_t src); +SYCL_EXTERNAL uint16_t +__esimd_all(sycl::ext::intel::gpu::vector_type_t src); #ifndef __SYCL_DEVICE_ONLY__ // Implementations of ESIMD intrinsics for the SYCL host device template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_rdregion(sycl::intel::gpu::vector_type_t Input, uint16_t Offset) { +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_rdregion(sycl::ext::intel::gpu::vector_type_t Input, + uint16_t Offset) { uint16_t EltOffset = Offset / sizeof(T); assert(Offset % sizeof(T) == 0); int NumRows = M / Width; assert(M % Width == 0); - sycl::intel::gpu::vector_type_t Result; + sycl::ext::intel::gpu::vector_type_t Result; int Index = 0; for (int i = 0; i < NumRows; ++i) { for (int j = 0; j < Width; ++j) { @@ -260,17 +266,17 @@ __esimd_rdregion(sycl::intel::gpu::vector_type_t Input, uint16_t Offset) { template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_wrregion(sycl::intel::gpu::vector_type_t OldVal, - sycl::intel::gpu::vector_type_t NewVal, uint16_t Offset, - sycl::intel::gpu::mask_type_t Mask) { +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_wrregion(sycl::ext::intel::gpu::vector_type_t OldVal, + sycl::ext::intel::gpu::vector_type_t NewVal, + uint16_t Offset, sycl::ext::intel::gpu::mask_type_t Mask) { uint16_t EltOffset = Offset / sizeof(T); assert(Offset % sizeof(T) == 0); int NumRows = M / Width; assert(M % Width == 0); - sycl::intel::gpu::vector_type_t Result = OldVal; + sycl::ext::intel::gpu::vector_type_t Result = OldVal; int Index = 0; for (int i = 0; i < NumRows; ++i) { for (int j = 0; j < Width; ++j) { diff --git a/sycl/include/CL/sycl/intel/esimd/detail/esimd_math_intrin.hpp b/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_math_intrin.hpp similarity index 90% rename from sycl/include/CL/sycl/intel/esimd/detail/esimd_math_intrin.hpp rename to sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_math_intrin.hpp index c3f5a9d141305..ad091c82fc694 100644 --- a/sycl/include/CL/sycl/intel/esimd/detail/esimd_math_intrin.hpp +++ b/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_math_intrin.hpp @@ -11,12 +11,12 @@ #pragma once -#include -#include -#include +#include +#include +#include #include -using sycl::intel::gpu::vector_type_t; +using sycl::ext::intel::gpu::vector_type_t; // saturation intrinsics template @@ -210,39 +210,39 @@ SYCL_EXTERNAL vector_type_t __esimd_dp4a(vector_type_t src0, // Reduction functions template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_reduced_fmax(sycl::intel::gpu::vector_type_t src1, - sycl::intel::gpu::vector_type_t src2); +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_reduced_fmax(sycl::ext::intel::gpu::vector_type_t src1, + sycl::ext::intel::gpu::vector_type_t src2); template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_reduced_umax(sycl::intel::gpu::vector_type_t src1, - sycl::intel::gpu::vector_type_t src2); +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_reduced_umax(sycl::ext::intel::gpu::vector_type_t src1, + sycl::ext::intel::gpu::vector_type_t src2); template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_reduced_smax(sycl::intel::gpu::vector_type_t src1, - sycl::intel::gpu::vector_type_t src2); +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_reduced_smax(sycl::ext::intel::gpu::vector_type_t src1, + sycl::ext::intel::gpu::vector_type_t src2); template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_reduced_fmin(sycl::intel::gpu::vector_type_t src1, - sycl::intel::gpu::vector_type_t src2); +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_reduced_fmin(sycl::ext::intel::gpu::vector_type_t src1, + sycl::ext::intel::gpu::vector_type_t src2); template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_reduced_umin(sycl::intel::gpu::vector_type_t src1, - sycl::intel::gpu::vector_type_t src2); +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_reduced_umin(sycl::ext::intel::gpu::vector_type_t src1, + sycl::ext::intel::gpu::vector_type_t src2); template -sycl::intel::gpu::vector_type_t SYCL_EXTERNAL -__esimd_reduced_smin(sycl::intel::gpu::vector_type_t src1, - sycl::intel::gpu::vector_type_t src2); +sycl::ext::intel::gpu::vector_type_t SYCL_EXTERNAL +__esimd_reduced_smin(sycl::ext::intel::gpu::vector_type_t src1, + sycl::ext::intel::gpu::vector_type_t src2); template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_dp4(sycl::intel::gpu::vector_type_t v1, - sycl::intel::gpu::vector_type_t v2); +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_dp4(sycl::ext::intel::gpu::vector_type_t v1, + sycl::ext::intel::gpu::vector_type_t v2); #ifndef __SYCL_DEVICE_ONLY__ @@ -1096,10 +1096,10 @@ SYCL_EXTERNAL vector_type_t __esimd_dp4a(vector_type_t src0, }; template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_reduced_max(sycl::intel::gpu::vector_type_t src1, - sycl::intel::gpu::vector_type_t src2) { - sycl::intel::gpu::vector_type_t retv; +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_reduced_max(sycl::ext::intel::gpu::vector_type_t src1, + sycl::ext::intel::gpu::vector_type_t src2) { + sycl::ext::intel::gpu::vector_type_t retv; for (int I = 0; I < N; I++) { if (src1[I] >= src2[I]) { retv[I] = src1[I]; @@ -1111,31 +1111,31 @@ __esimd_reduced_max(sycl::intel::gpu::vector_type_t src1, } template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_reduced_fmax(sycl::intel::gpu::vector_type_t src1, - sycl::intel::gpu::vector_type_t src2) { +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_reduced_fmax(sycl::ext::intel::gpu::vector_type_t src1, + sycl::ext::intel::gpu::vector_type_t src2) { return __esimd_reduced_max(src1, src2); } template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_reduced_umax(sycl::intel::gpu::vector_type_t src1, - sycl::intel::gpu::vector_type_t src2) { +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_reduced_umax(sycl::ext::intel::gpu::vector_type_t src1, + sycl::ext::intel::gpu::vector_type_t src2) { return __esimd_reduced_max(src1, src2); } template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_reduced_smax(sycl::intel::gpu::vector_type_t src1, - sycl::intel::gpu::vector_type_t src2) { +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_reduced_smax(sycl::ext::intel::gpu::vector_type_t src1, + sycl::ext::intel::gpu::vector_type_t src2) { return __esimd_reduced_max(src1, src2); } template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_reduced_min(sycl::intel::gpu::vector_type_t src1, - sycl::intel::gpu::vector_type_t src2) { - sycl::intel::gpu::vector_type_t retv; +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_reduced_min(sycl::ext::intel::gpu::vector_type_t src1, + sycl::ext::intel::gpu::vector_type_t src2) { + sycl::ext::intel::gpu::vector_type_t retv; for (int I = 0; I < N; I++) { if (src1[I] <= src2[I]) { retv[I] = src1[I]; @@ -1147,23 +1147,23 @@ __esimd_reduced_min(sycl::intel::gpu::vector_type_t src1, } template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_reduced_fmin(sycl::intel::gpu::vector_type_t src1, - sycl::intel::gpu::vector_type_t src2) { +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_reduced_fmin(sycl::ext::intel::gpu::vector_type_t src1, + sycl::ext::intel::gpu::vector_type_t src2) { return __esimd_reduced_min(src1, src2); } template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_reduced_umin(sycl::intel::gpu::vector_type_t src1, - sycl::intel::gpu::vector_type_t src2) { +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_reduced_umin(sycl::ext::intel::gpu::vector_type_t src1, + sycl::ext::intel::gpu::vector_type_t src2) { return __esimd_reduced_min(src1, src2); } template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_reduced_smin(sycl::intel::gpu::vector_type_t src1, - sycl::intel::gpu::vector_type_t src2) { +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_reduced_smin(sycl::ext::intel::gpu::vector_type_t src1, + sycl::ext::intel::gpu::vector_type_t src2) { return __esimd_reduced_min(src1, src2); } diff --git a/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_memory_intrin.hpp b/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_memory_intrin.hpp new file mode 100644 index 0000000000000..e28dad78b048e --- /dev/null +++ b/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_memory_intrin.hpp @@ -0,0 +1,690 @@ +//==------------ esimd_memory_intrin.hpp - DPC++ Explicit SIMD API ---------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Declares Explicit SIMD intrinsics used to implement working with +// the SIMD classes objects. +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include + +// flat_read does flat-address gather +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t< + Ty, N * sycl::ext::intel::gpu::ElemsPerAddrDecoding(NumBlk)> +__esimd_flat_read(sycl::ext::intel::gpu::vector_type_t addrs, + int ElemsPerAddr = NumBlk, + sycl::ext::intel::gpu::vector_type_t pred = 1); + +// flat_write does flat-address scatter +template +SYCL_EXTERNAL void __esimd_flat_write( + sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t< + Ty, N * sycl::ext::intel::gpu::ElemsPerAddrDecoding(NumBlk)> + vals, + int ElemsPerAddr = NumBlk, + sycl::ext::intel::gpu::vector_type_t pred = 1); + +// flat_block_read reads a block of data from one flat address +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_flat_block_read_unaligned(uint64_t addr); + +// flat_block_write writes a block of data using one flat address +template +SYCL_EXTERNAL void +__esimd_flat_block_write(uint64_t addr, + sycl::ext::intel::gpu::vector_type_t vals); + +// Reads a block of data from given surface at given offset. +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_block_read(SurfIndAliasTy surf_ind, uint32_t offset); + +// Writes given block of data to a surface with given index at given offset. +template +SYCL_EXTERNAL void +__esimd_block_write(SurfIndAliasTy surf_ind, uint32_t offset, + sycl::ext::intel::gpu::vector_type_t vals); + +// flat_read4 does flat-address gather4 +template +sycl::ext::intel::gpu::vector_type_t SYCL_EXTERNAL +__esimd_flat_read4(sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t pred = 1); + +// flat_write does flat-address scatter +template +SYCL_EXTERNAL void __esimd_flat_write4( + sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t vals, + sycl::ext::intel::gpu::vector_type_t pred = 1); + +// flat_atomic: flat-address atomic +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_flat_atomic0(sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t pred); + +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_flat_atomic1(sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t src0, + sycl::ext::intel::gpu::vector_type_t pred); + +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_flat_atomic2(sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t src0, + sycl::ext::intel::gpu::vector_type_t src1, + sycl::ext::intel::gpu::vector_type_t pred); + +// esimd_barrier, generic group barrier +SYCL_EXTERNAL void __esimd_barrier(); + +// slm_fence sets the SLM read/write order +SYCL_EXTERNAL void __esimd_slm_fence(uint8_t cntl); + +// slm_read does SLM gather +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_slm_read(sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t pred = 1); + +// slm_write does SLM scatter +template +SYCL_EXTERNAL void +__esimd_slm_write(sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t vals, + sycl::ext::intel::gpu::vector_type_t pred = 1); + +// slm_block_read reads a block of data from SLM +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_slm_block_read(uint32_t addr); + +// slm_block_write writes a block of data to SLM +template +SYCL_EXTERNAL void +__esimd_slm_block_write(uint32_t addr, + sycl::ext::intel::gpu::vector_type_t vals); + +// slm_read4 does SLM gather4 +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_slm_read4(sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t pred = 1); + +// slm_write4 does SLM scatter4 +template +SYCL_EXTERNAL void __esimd_slm_write4( + sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t vals, + sycl::ext::intel::gpu::vector_type_t pred = 1); + +// slm_atomic: SLM atomic +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_slm_atomic0(sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t pred); + +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_slm_atomic1(sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t src0, + sycl::ext::intel::gpu::vector_type_t pred); + +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_slm_atomic2(sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t src0, + sycl::ext::intel::gpu::vector_type_t src1, + sycl::ext::intel::gpu::vector_type_t pred); + +// Media block load +// +// @param Ty the element data type. +// +// @param M the hight of the 2D block. +// +// @param N the width of the 2D block. +// +// @param TACC type of the surface handle. +// +// @param modifier top/bottom field surface access control. +// +// @param handle the surface handle. +// +// @param plane planar surface index. +// +// @param width the width of the return block. +// +// @param x X-coordinate of the left upper rectangle corner in BYTES. +// +// @param y Y-coordinate of the left upper rectangle corner in ROWS. +// +// @return the linearized 2D block data read from surface. +// +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_media_block_load(unsigned modififer, TACC handle, unsigned plane, + unsigned width, unsigned x, unsigned y); + +// Media block store +// +// @param Ty the element data type. +// +// @param M the hight of the 2D block. +// +// @param N the width of the 2D block. +// +// @param TACC type of the surface handle. +// +// @param modifier top/bottom field surface access control. +// +// @param handle the surface handle. +// +// @param plane planar surface index. +// +// @param width the width of the return block. +// +// @param x X-coordinate of the left upper rectangle corner in BYTES. +// +// @param y Y-coordinate of the left upper rectangle corner in ROWS. +// +// @param vals the linearized 2D block data to be written to surface. +// +template +SYCL_EXTERNAL void +__esimd_media_block_store(unsigned modififer, TACC handle, unsigned plane, + unsigned width, unsigned x, unsigned y, + sycl::ext::intel::gpu::vector_type_t vals); + +#ifndef __SYCL_DEVICE_ONLY__ + +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t< + Ty, N * sycl::ext::intel::gpu::ElemsPerAddrDecoding(NumBlk)> +__esimd_flat_read(sycl::ext::intel::gpu::vector_type_t addrs, + int ElemsPerAddr, + sycl::ext::intel::gpu::vector_type_t pred) { + auto NumBlkDecoded = sycl::ext::intel::gpu::ElemsPerAddrDecoding(NumBlk); + sycl::ext::intel::gpu::vector_type_t< + Ty, N * sycl::ext::intel::gpu::ElemsPerAddrDecoding(NumBlk)> + V; + ElemsPerAddr = sycl::ext::intel::gpu::ElemsPerAddrDecoding(ElemsPerAddr); + + for (int I = 0; I < N; I++) { + if (pred[I]) { + Ty *Addr = reinterpret_cast(addrs[I]); + if (sizeof(Ty) == 2) + ElemsPerAddr = ElemsPerAddr / 2; + if (sizeof(Ty) <= 2) { + for (int J = 0; J < NumBlkDecoded && J < ElemsPerAddr; J++) + V[I * NumBlkDecoded + J] = *(Addr + J); + } else { + for (int J = 0; J < NumBlkDecoded && J < ElemsPerAddr; J++) + V[J * N + I] = *(Addr + J); + } + } + } + return V; +} + +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_flat_read4(sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t pred) { + sycl::ext::intel::gpu::vector_type_t V; + unsigned int Next = 0; + + if constexpr (HasR(Mask)) { + for (int I = 0; I < N; I++, Next++) { + if (pred[I]) { + Ty *Addr = reinterpret_cast(addrs[I]); + V[Next] = *Addr; + } + } + } + + if constexpr (HasG(Mask)) { + for (int I = 0; I < N; I++, Next++) { + if (pred[I]) { + Ty *Addr = reinterpret_cast(addrs[I] + sizeof(Ty)); + V[Next] = *Addr; + } + } + } + + if constexpr (HasB(Mask)) { + for (int I = 0; I < N; I++, Next++) { + if (pred[I]) { + Ty *Addr = reinterpret_cast(addrs[I] + sizeof(Ty) + sizeof(Ty)); + V[Next] = *Addr; + } + } + } + + if constexpr (HasA(Mask)) { + for (int I = 0; I < N; I++, Next++) { + if (pred[I]) { + Ty *Addr = reinterpret_cast(addrs[I] + sizeof(Ty) + sizeof(Ty) + + sizeof(Ty)); + V[Next] = *Addr; + } + } + } + + return V; +} + +template +SYCL_EXTERNAL void __esimd_flat_write( + sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t< + Ty, N * sycl::ext::intel::gpu::ElemsPerAddrDecoding(NumBlk)> + vals, + int ElemsPerAddr, sycl::ext::intel::gpu::vector_type_t pred) { + auto NumBlkDecoded = sycl::ext::intel::gpu::ElemsPerAddrDecoding(NumBlk); + ElemsPerAddr = sycl::ext::intel::gpu::ElemsPerAddrDecoding(ElemsPerAddr); + + for (int I = 0; I < N; I++) { + if (pred[I]) { + Ty *Addr = reinterpret_cast(addrs[I]); + if (sizeof(Ty) == 2) + ElemsPerAddr = ElemsPerAddr / 2; + if (sizeof(Ty) <= 2) { + for (int J = 0; J < NumBlkDecoded && J < ElemsPerAddr; J++) + *(Addr + J) = vals[I * NumBlkDecoded + J]; + } else { + for (int J = 0; J < NumBlkDecoded && J < ElemsPerAddr; J++) + *(Addr + J) = vals[J * N + I]; + } + } + } +} + +template +SYCL_EXTERNAL void __esimd_flat_write4( + sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t vals, + sycl::ext::intel::gpu::vector_type_t pred) { + sycl::ext::intel::gpu::vector_type_t V; + unsigned int Next = 0; + + if constexpr (HasR(Mask)) { + for (int I = 0; I < N; I++, Next++) { + if (pred[I]) { + Ty *Addr = reinterpret_cast(addrs[I]); + *Addr = vals[Next]; + } + } + } + + if constexpr (HasG(Mask)) { + for (int I = 0; I < N; I++, Next++) { + if (pred[I]) { + Ty *Addr = reinterpret_cast(addrs[I] + sizeof(Ty)); + *Addr = vals[Next]; + } + } + } + + if constexpr (HasB(Mask)) { + for (int I = 0; I < N; I++, Next++) { + if (pred[I]) { + Ty *Addr = reinterpret_cast(addrs[I] + sizeof(Ty) + sizeof(Ty)); + *Addr = vals[Next]; + } + } + } + + if constexpr (HasA(Mask)) { + for (int I = 0; I < N; I++, Next++) { + if (pred[I]) { + Ty *Addr = reinterpret_cast(addrs[I] + sizeof(Ty) + sizeof(Ty) + + sizeof(Ty)); + *Addr = vals[Next]; + } + } + } +} + +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_flat_block_read_unaligned(uint64_t addr) { + sycl::ext::intel::gpu::vector_type_t V; + + for (int I = 0; I < N; I++) { + Ty *Addr = reinterpret_cast(addr + I * sizeof(Ty)); + V[I] = *Addr; + } + return V; +} + +template +SYCL_EXTERNAL void +__esimd_flat_block_write(uint64_t addr, + sycl::ext::intel::gpu::vector_type_t vals) { + for (int I = 0; I < N; I++) { + Ty *Addr = reinterpret_cast(addr + I * sizeof(Ty)); + *Addr = vals[I]; + } +} + +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_media_block_load(unsigned modififer, TACC handle, unsigned plane, + unsigned width, unsigned x, unsigned y) { + // On host the input surface is modeled as sycl image 2d object, + // and the read/write access is done through accessor, + // which is passed in as the handle argument. + auto range = + sycl::ext::intel::gpu::AccessorPrivateProxy::getImageRange(handle); + unsigned bpp = + sycl::ext::intel::gpu::AccessorPrivateProxy::getElemSize(handle); + unsigned vpp = bpp / sizeof(Ty); + unsigned int i = x / bpp; + unsigned int j = y; + + assert(x % bpp == 0); + unsigned int xbound = range[0] - 1; + unsigned int ybound = range[1] - 1; + + sycl::ext::intel::gpu::vector_type_t vals; + for (int row = 0; row < M; row++) { + for (int col = 0; col < N; col += vpp) { + unsigned int xoff = (i > xbound) ? xbound : i; + unsigned int yoff = (j > ybound) ? ybound : j; + auto coords = cl::sycl::cl_int2(xoff, yoff); + cl::sycl::cl_uint4 data = handle.read(coords); + + sycl::ext::intel::gpu::vector_type_t res; + for (int idx = 0; idx < 4; idx++) { + res[idx] = data[idx]; + } + + constexpr int refN = sizeof(cl::sycl::cl_uint4) / sizeof(Ty); + unsigned int stride = sizeof(cl::sycl::cl_uint4) / bpp; + using refTy = sycl::ext::intel::gpu::vector_type_t; + auto ref = reinterpret_cast(res); + + unsigned int offset1 = col + row * N; + unsigned int offset2 = 0; + for (int idx = 0; idx < vpp; idx++) { + vals[offset1] = ref[offset2]; + offset1++; + offset2 += stride; + } + i++; + } + i = x / bpp; + j++; + } + + return vals; +} + +template +SYCL_EXTERNAL void __esimd_media_block_store( + unsigned modififer, TACC handle, unsigned plane, unsigned width, unsigned x, + unsigned y, sycl::ext::intel::gpu::vector_type_t vals) { + unsigned bpp = + sycl::ext::intel::gpu::AccessorPrivateProxy::getElemSize(handle); + unsigned vpp = bpp / sizeof(Ty); + auto range = + sycl::ext::intel::gpu::AccessorPrivateProxy::getImageRange(handle); + unsigned int i = x / bpp; + unsigned int j = y; + + assert(x % bpp == 0); + + for (int row = 0; row < M; row++) { + for (int col = 0; col < N; col += vpp) { + constexpr int Sz = sizeof(cl::sycl::cl_uint4) / sizeof(Ty); + sycl::ext::intel::gpu::vector_type_t res = 0; + + unsigned int offset1 = col + row * N; + unsigned int offset2 = 0; + unsigned int stride = sizeof(cl::sycl::cl_uint4) / bpp; + for (int idx = 0; idx < vpp; idx++) { + res[offset2] = vals[offset1]; + offset1++; + offset2 += stride; + } + + using refTy = sycl::ext::intel::gpu::vector_type_t; + auto ref = reinterpret_cast(res); + + cl::sycl::cl_uint4 data; + for (int idx = 0; idx < 4; idx++) { + data[idx] = ref[idx]; + } + + if (i < range[0] && j < range[1]) { + auto coords = cl::sycl::cl_int2(i, j); + handle.write(coords, data); + } + i++; + } + i = x / bpp; + j++; + } +} + +template +SYCL_EXTERNAL uint16_t +__esimd_any(sycl::ext::intel::gpu::vector_type_t src) { + for (unsigned int i = 0; i != N; i++) { + if (src[i] != 0) + return 1; + } + return 0; +} + +template +SYCL_EXTERNAL uint16_t +__esimd_all(sycl::ext::intel::gpu::vector_type_t src) { + for (unsigned int i = 0; i != N; i++) { + if (src[i] == 0) + return 0; + } + return 1; +} + +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_dp4(sycl::ext::intel::gpu::vector_type_t v1, + sycl::ext::intel::gpu::vector_type_t v2) { + sycl::ext::intel::gpu::vector_type_t retv; + for (auto i = 0; i != N; i += 4) { + Ty dp = (v1[i] * v2[i]) + (v1[i + 1] * v2[i + 1]) + + (v1[i + 2] * v2[i + 2]) + (v1[i + 3] * v2[i + 3]); + retv[i] = dp; + retv[i + 1] = dp; + retv[i + 2] = dp; + retv[i + 3] = dp; + } + return retv; +} + +/// TODO +SYCL_EXTERNAL void __esimd_barrier() {} + +SYCL_EXTERNAL void __esimd_slm_fence(uint8_t cntl) {} + +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_slm_read(sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t pred) { + sycl::ext::intel::gpu::vector_type_t retv; + return retv; +} + +// slm_write does SLM scatter +template +SYCL_EXTERNAL void +__esimd_slm_write(sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t vals, + sycl::ext::intel::gpu::vector_type_t pred) {} + +// slm_block_read reads a block of data from SLM +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_slm_block_read(uint32_t addr) { + sycl::ext::intel::gpu::vector_type_t retv; + return retv; +} + +// slm_block_write writes a block of data to SLM +template +SYCL_EXTERNAL void +__esimd_slm_block_write(uint32_t addr, + sycl::ext::intel::gpu::vector_type_t vals) {} + +// slm_read4 does SLM gather4 +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_slm_read4(sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t pred) { + sycl::ext::intel::gpu::vector_type_t retv; + return retv; +} + +// slm_write4 does SLM scatter4 +template +SYCL_EXTERNAL void __esimd_slm_write4( + sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t vals, + sycl::ext::intel::gpu::vector_type_t pred) {} + +// slm_atomic: SLM atomic +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_slm_atomic0(sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t pred) { + sycl::ext::intel::gpu::vector_type_t retv; + return retv; +} + +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_slm_atomic1(sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t src0, + sycl::ext::intel::gpu::vector_type_t pred) { + sycl::ext::intel::gpu::vector_type_t retv; + return retv; +} + +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_slm_atomic2(sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t src0, + sycl::ext::intel::gpu::vector_type_t src1, + sycl::ext::intel::gpu::vector_type_t pred) { + sycl::ext::intel::gpu::vector_type_t retv; + return retv; +} + +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_flat_atomic0(sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t pred) { + sycl::ext::intel::gpu::vector_type_t retv; + return retv; +} + +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_flat_atomic1(sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t src0, + sycl::ext::intel::gpu::vector_type_t pred) { + sycl::ext::intel::gpu::vector_type_t retv; + return retv; +} + +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_flat_atomic2(sycl::ext::intel::gpu::vector_type_t addrs, + sycl::ext::intel::gpu::vector_type_t src0, + sycl::ext::intel::gpu::vector_type_t src1, + sycl::ext::intel::gpu::vector_type_t pred) { + sycl::ext::intel::gpu::vector_type_t retv; + return retv; +} + +template +SYCL_EXTERNAL sycl::ext::intel::gpu::vector_type_t +__esimd_block_read(SurfIndAliasTy surf_ind, uint32_t offset) { + throw cl::sycl::feature_not_supported(); + return sycl::ext::intel::gpu::vector_type_t(); +} + +template +SYCL_EXTERNAL void +__esimd_block_write(SurfIndAliasTy surf_ind, uint32_t offset, + sycl::ext::intel::gpu::vector_type_t vals) { + + throw cl::sycl::feature_not_supported(); +} + +#endif // __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/CL/sycl/intel/esimd/detail/esimd_region.hpp b/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_region.hpp similarity index 99% rename from sycl/include/CL/sycl/intel/esimd/detail/esimd_region.hpp rename to sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_region.hpp index c1576415a882b..39910609e7942 100644 --- a/sycl/include/CL/sycl/intel/esimd/detail/esimd_region.hpp +++ b/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_region.hpp @@ -17,6 +17,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +namespace ext { namespace intel { namespace gpu { @@ -115,5 +116,6 @@ template T getBaseRegion(std::pair Reg) { } // namespace gpu } // namespace intel +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/esimd/detail/esimd_types.hpp b/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_types.hpp similarity index 98% rename from sycl/include/CL/sycl/intel/esimd/detail/esimd_types.hpp rename to sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_types.hpp index 7ff12e9113dda..eb6b1fd87914c 100644 --- a/sycl/include/CL/sycl/intel/esimd/detail/esimd_types.hpp +++ b/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_types.hpp @@ -12,14 +12,15 @@ #include #include // to define C++14,17 extensions +#include +#include #include -#include -#include #include #include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +namespace ext { namespace intel { namespace gpu { @@ -257,5 +258,6 @@ inline std::istream &operator>>(std::istream &I, half &rhs) { } // namespace gpu } // namespace intel +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/esimd/detail/esimd_util.hpp b/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_util.hpp similarity index 99% rename from sycl/include/CL/sycl/intel/esimd/detail/esimd_util.hpp rename to sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_util.hpp index 54d769db75b4b..e0b7323a8bafe 100755 --- a/sycl/include/CL/sycl/intel/esimd/detail/esimd_util.hpp +++ b/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_util.hpp @@ -41,6 +41,7 @@ static ESIMD_INLINE constexpr unsigned log2(unsigned n) { __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +namespace ext { namespace intel { namespace gpu { @@ -211,5 +212,6 @@ template <> struct word_type { using type = ushort; }; } // namespace details } // namespace gpu } // namespace intel +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/esimd/esimd.hpp b/sycl/include/CL/sycl/ext/intel/esimd/esimd.hpp similarity index 99% rename from sycl/include/CL/sycl/intel/esimd/esimd.hpp rename to sycl/include/CL/sycl/ext/intel/esimd/esimd.hpp index 757055dfa00fe..c24cac7ad0b67 100644 --- a/sycl/include/CL/sycl/intel/esimd/esimd.hpp +++ b/sycl/include/CL/sycl/ext/intel/esimd/esimd.hpp @@ -10,11 +10,12 @@ #pragma once -#include -#include +#include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +namespace ext { namespace intel { namespace gpu { @@ -445,6 +446,7 @@ ESIMD_INLINE simd convert(simd val) { } // namespace gpu } // namespace intel +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/esimd/esimd_enum.hpp b/sycl/include/CL/sycl/ext/intel/esimd/esimd_enum.hpp similarity index 98% rename from sycl/include/CL/sycl/intel/esimd/esimd_enum.hpp rename to sycl/include/CL/sycl/ext/intel/esimd/esimd_enum.hpp index 4b901ea079119..626d6002af35a 100644 --- a/sycl/include/CL/sycl/intel/esimd/esimd_enum.hpp +++ b/sycl/include/CL/sycl/ext/intel/esimd/esimd_enum.hpp @@ -15,6 +15,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +namespace ext { namespace intel { namespace gpu { @@ -107,5 +108,6 @@ enum class CacheHint : uint8_t { } // namespace gpu } // namespace intel +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/esimd/esimd_math.hpp b/sycl/include/CL/sycl/ext/intel/esimd/esimd_math.hpp similarity index 100% rename from sycl/include/CL/sycl/intel/esimd/esimd_math.hpp rename to sycl/include/CL/sycl/ext/intel/esimd/esimd_math.hpp diff --git a/sycl/include/CL/sycl/intel/esimd/esimd_memory.hpp b/sycl/include/CL/sycl/ext/intel/esimd/esimd_memory.hpp similarity index 98% rename from sycl/include/CL/sycl/intel/esimd/esimd_memory.hpp rename to sycl/include/CL/sycl/ext/intel/esimd/esimd_memory.hpp index 84d175e981595..0dda839f89a16 100644 --- a/sycl/include/CL/sycl/intel/esimd/esimd_memory.hpp +++ b/sycl/include/CL/sycl/ext/intel/esimd/esimd_memory.hpp @@ -11,15 +11,16 @@ #pragma once #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +namespace ext { namespace intel { namespace gpu { @@ -645,5 +646,6 @@ SYCL_EXTERNAL void slm_init(uint32_t size) {} #endif } // namespace gpu } // namespace intel +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/esimd/esimd_view.hpp b/sycl/include/CL/sycl/ext/intel/esimd/esimd_view.hpp similarity index 99% rename from sycl/include/CL/sycl/intel/esimd/esimd_view.hpp rename to sycl/include/CL/sycl/ext/intel/esimd/esimd_view.hpp index 57338a0c51e86..abded4def0c25 100644 --- a/sycl/include/CL/sycl/intel/esimd/esimd_view.hpp +++ b/sycl/include/CL/sycl/ext/intel/esimd/esimd_view.hpp @@ -14,6 +14,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +namespace ext { namespace intel { namespace gpu { @@ -381,5 +382,6 @@ template class simd_view { } // namespace gpu } // namespace intel +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/fpga_device_selector.hpp b/sycl/include/CL/sycl/ext/intel/fpga_device_selector.hpp similarity index 97% rename from sycl/include/CL/sycl/intel/fpga_device_selector.hpp rename to sycl/include/CL/sycl/ext/intel/fpga_device_selector.hpp index d5f9cab31180c..83d9e7683bdf2 100644 --- a/sycl/include/CL/sycl/intel/fpga_device_selector.hpp +++ b/sycl/include/CL/sycl/ext/intel/fpga_device_selector.hpp @@ -12,6 +12,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +namespace ext { namespace intel { class platform_selector : public device_selector { @@ -48,5 +49,6 @@ class fpga_emulator_selector : public platform_selector { }; } // namespace intel +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/fpga_extensions.hpp b/sycl/include/CL/sycl/ext/intel/fpga_extensions.hpp similarity index 73% rename from sycl/include/CL/sycl/intel/fpga_extensions.hpp rename to sycl/include/CL/sycl/ext/intel/fpga_extensions.hpp index a9fca1e6139d2..08975ae5051af 100644 --- a/sycl/include/CL/sycl/intel/fpga_extensions.hpp +++ b/sycl/include/CL/sycl/ext/intel/fpga_extensions.hpp @@ -7,6 +7,6 @@ //===----------------------------------------------------------------------===// #pragma once -#include -#include -#include +#include +#include +#include diff --git a/sycl/include/CL/sycl/intel/fpga_reg.hpp b/sycl/include/CL/sycl/ext/intel/fpga_reg.hpp similarity index 96% rename from sycl/include/CL/sycl/intel/fpga_reg.hpp rename to sycl/include/CL/sycl/ext/intel/fpga_reg.hpp index 0078dd66c383c..f183d420ca2d0 100644 --- a/sycl/include/CL/sycl/intel/fpga_reg.hpp +++ b/sycl/include/CL/sycl/ext/intel/fpga_reg.hpp @@ -12,6 +12,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +namespace ext { namespace intel { template T fpga_reg(const T &t) { @@ -23,6 +24,7 @@ template T fpga_reg(const T &t) { } } // namespace intel +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/atomic.hpp b/sycl/include/CL/sycl/ext/oneapi/atomic.hpp similarity index 73% rename from sycl/include/CL/sycl/intel/atomic.hpp rename to sycl/include/CL/sycl/ext/oneapi/atomic.hpp index bbc49ecc210d9..7712c4071a8e5 100644 --- a/sycl/include/CL/sycl/intel/atomic.hpp +++ b/sycl/include/CL/sycl/ext/oneapi/atomic.hpp @@ -8,6 +8,6 @@ #pragma once -#include -#include -#include +#include +#include +#include diff --git a/sycl/include/CL/sycl/intel/atomic_enums.hpp b/sycl/include/CL/sycl/ext/oneapi/atomic_enums.hpp similarity index 94% rename from sycl/include/CL/sycl/intel/atomic_enums.hpp rename to sycl/include/CL/sycl/ext/oneapi/atomic_enums.hpp index a85c9902cd524..3c48f4d5b52bd 100644 --- a/sycl/include/CL/sycl/intel/atomic_enums.hpp +++ b/sycl/include/CL/sycl/ext/oneapi/atomic_enums.hpp @@ -20,7 +20,8 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -namespace intel { +namespace ext { +namespace oneapi { enum class memory_order : int { relaxed, @@ -63,7 +64,7 @@ namespace detail { // Nested ternary conditions in else branch required for C++11 #if __cplusplus >= 201402L static inline constexpr std::memory_order -getStdMemoryOrder(::cl::sycl::intel::memory_order order) { +getStdMemoryOrder(::cl::sycl::ext::oneapi::memory_order order) { switch (order) { case memory_order::relaxed: return std::memory_order_relaxed; @@ -81,7 +82,7 @@ getStdMemoryOrder(::cl::sycl::intel::memory_order order) { } #else static inline constexpr std::memory_order -getStdMemoryOrder(::cl::sycl::intel::memory_order order) { +getStdMemoryOrder(::cl::sycl::ext::oneapi::memory_order order) { return (order == memory_order::relaxed) ? std::memory_order_relaxed : (order == memory_order::__consume_unsupported) @@ -98,6 +99,7 @@ getStdMemoryOrder(::cl::sycl::intel::memory_order order) { } // namespace detail #endif // __SYCL_DEVICE_ONLY__ -} // namespace intel +} // namespace oneapi +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/atomic_fence.hpp b/sycl/include/CL/sycl/ext/oneapi/atomic_fence.hpp similarity index 89% rename from sycl/include/CL/sycl/intel/atomic_fence.hpp rename to sycl/include/CL/sycl/ext/oneapi/atomic_fence.hpp index aba95c060b878..a5089efe44db2 100644 --- a/sycl/include/CL/sycl/intel/atomic_fence.hpp +++ b/sycl/include/CL/sycl/ext/oneapi/atomic_fence.hpp @@ -10,7 +10,7 @@ #include #include -#include +#include #ifndef __SYCL_DEVICE_ONLY__ #include @@ -18,7 +18,8 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -namespace intel { +namespace ext { +namespace oneapi { namespace detail { using namespace cl::sycl::detail; } @@ -35,6 +36,7 @@ static inline void atomic_fence(memory_order order, memory_scope scope) { #endif } -} // namespace intel +} // namespace oneapi +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/atomic_ref.hpp b/sycl/include/CL/sycl/ext/oneapi/atomic_ref.hpp similarity index 98% rename from sycl/include/CL/sycl/intel/atomic_ref.hpp rename to sycl/include/CL/sycl/ext/oneapi/atomic_ref.hpp index f6e8d4ff68616..833ae4fe8e924 100644 --- a/sycl/include/CL/sycl/intel/atomic_ref.hpp +++ b/sycl/include/CL/sycl/ext/oneapi/atomic_ref.hpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #ifndef __SYCL_DEVICE_ONLY__ #include @@ -27,14 +27,15 @@ namespace sycl { template class multi_ptr; -namespace intel { +namespace ext { +namespace oneapi { namespace detail { -// Import from detail:: into intel::detail:: to improve readability later +// Import from detail:: into oneapi::detail:: to improve readability later using namespace ::cl::sycl::detail; -using memory_order = cl::sycl::intel::memory_order; -using memory_scope = cl::sycl::intel::memory_scope; +using memory_order = cl::sycl::ext::oneapi::memory_order; +using memory_scope = cl::sycl::ext::oneapi::memory_scope; template using IsValidAtomicType = @@ -527,6 +528,7 @@ class atomic_ref : public detail::atomic_ref_impl::atomic_ref_impl; }; -} // namespace intel +} // namespace oneapi +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/builtins.hpp b/sycl/include/CL/sycl/ext/oneapi/builtins.hpp similarity index 81% rename from sycl/include/CL/sycl/intel/builtins.hpp rename to sycl/include/CL/sycl/ext/oneapi/builtins.hpp index a59258a2290ba..90d5ee5ff4913 100644 --- a/sycl/include/CL/sycl/intel/builtins.hpp +++ b/sycl/include/CL/sycl/ext/oneapi/builtins.hpp @@ -18,8 +18,8 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -namespace intel { -namespace experimental { +namespace ext { +namespace oneapi { // Provides functionality to print data from kernels in a C way: // - On non-host devices this function is directly mapped to printf from @@ -30,9 +30,9 @@ namespace experimental { // Please refer to corresponding section in OpenCL C specification to find // information about format string and its differences from standard C rules. // -// This function is placed under 'experimental' namespace on purpose, because it -// has too much caveats you need to be aware of before using it. Please find -// them below and read carefully before using it: +// This function is placed under 'experimental' namespace on purpose, because +// it has too much caveats you need to be aware of before using it. Please +// find them below and read carefully before using it: // // - According to the OpenCL spec, the format string must be // resolvable at compile time i.e. cannot be dynamically created by the @@ -43,14 +43,14 @@ namespace experimental { // test/built-ins/printf.cpp for examples // FIXME: this potentially can be done on SYCL FE side automatically // -// - The format string is interpreted according to the OpenCL C spec, where all -// data types has fixed size, opposed to C++ types which doesn't guarantee +// - The format string is interpreted according to the OpenCL C spec, where +// all data types has fixed size, opposed to C++ types which doesn't guarantee // the exact width of particular data types (except, may be, char). This might // lead to unexpected result, for example: %ld in OpenCL C means that printed -// argument has 'long' type which is 64-bit wide by the OpenCL C spec. However, -// by C++ spec long is just at least 32-bit wide, so, you need to ensure (by -// performing a cast, for example) that if you use %ld specifier, you pass -// 64-bit argument to the cl::sycl::experimental::printf +// argument has 'long' type which is 64-bit wide by the OpenCL C spec. +// However, by C++ spec long is just at least 32-bit wide, so, you need to +// ensure (by performing a cast, for example) that if you use %ld specifier, +// you pass 64-bit argument to the cl::sycl::experimental::printf // // - OpenCL spec defines several additional features, like, for example, 'v' // modifier which allows to print OpenCL vectors: note that these features are @@ -67,8 +67,8 @@ int printf(const CONSTANT_AS char *__format, Args... args) { #endif } -} // namespace experimental -} // namespace intel +} // namespace oneapi +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/function_pointer.hpp b/sycl/include/CL/sycl/ext/oneapi/function_pointer.hpp similarity index 97% rename from sycl/include/CL/sycl/intel/function_pointer.hpp rename to sycl/include/CL/sycl/ext/oneapi/function_pointer.hpp index f812be911b788..5f664318d0417 100644 --- a/sycl/include/CL/sycl/intel/function_pointer.hpp +++ b/sycl/include/CL/sycl/ext/oneapi/function_pointer.hpp @@ -21,7 +21,8 @@ namespace detail { __SYCL_EXPORT cl_ulong getDeviceFunctionPointerImpl(device &D, program &P, const char *FuncName); } -namespace intel { +namespace ext { +namespace oneapi { // This is a preview extension implementation, intended to provide early // access to a feature for review and community feedback. @@ -83,6 +84,7 @@ device_func_ptr_holder_t get_device_func_ptr(FuncType F, const char *FuncName, return sycl::detail::getDeviceFunctionPointerImpl(D, P, FuncName); } -} // namespace intel +} // namespace oneapi +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/functional.hpp b/sycl/include/CL/sycl/ext/oneapi/functional.hpp similarity index 83% rename from sycl/include/CL/sycl/intel/functional.hpp rename to sycl/include/CL/sycl/ext/oneapi/functional.hpp index ee4ed21b33ffd..96c84314a939a 100644 --- a/sycl/include/CL/sycl/intel/functional.hpp +++ b/sycl/include/CL/sycl/ext/oneapi/functional.hpp @@ -11,7 +11,8 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -namespace intel { +namespace ext { +namespace oneapi { template struct minimum { T operator()(const T &lhs, const T &rhs) const { @@ -57,7 +58,8 @@ template using bit_or = std::bit_or; template using bit_xor = std::bit_xor; template using bit_and = std::bit_and; -} // namespace intel +} // namespace oneapi +} // namespace ext #ifdef __SYCL_DEVICE_ONLY__ namespace detail { @@ -93,15 +95,15 @@ struct GroupOpTag::value>> { return Ret; \ } -__SYCL_CALC_OVERLOAD(GroupOpISigned, SMin, intel::minimum) -__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, UMin, intel::minimum) -__SYCL_CALC_OVERLOAD(GroupOpFP, FMin, intel::minimum) -__SYCL_CALC_OVERLOAD(GroupOpISigned, SMax, intel::maximum) -__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, UMax, intel::maximum) -__SYCL_CALC_OVERLOAD(GroupOpFP, FMax, intel::maximum) -__SYCL_CALC_OVERLOAD(GroupOpISigned, IAdd, intel::plus) -__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, IAdd, intel::plus) -__SYCL_CALC_OVERLOAD(GroupOpFP, FAdd, intel::plus) +__SYCL_CALC_OVERLOAD(GroupOpISigned, SMin, ext::oneapi::minimum) +__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, UMin, ext::oneapi::minimum) +__SYCL_CALC_OVERLOAD(GroupOpFP, FMin, ext::oneapi::minimum) +__SYCL_CALC_OVERLOAD(GroupOpISigned, SMax, ext::oneapi::maximum) +__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, UMax, ext::oneapi::maximum) +__SYCL_CALC_OVERLOAD(GroupOpFP, FMax, ext::oneapi::maximum) +__SYCL_CALC_OVERLOAD(GroupOpISigned, IAdd, ext::oneapi::plus) +__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, IAdd, ext::oneapi::plus) +__SYCL_CALC_OVERLOAD(GroupOpFP, FAdd, ext::oneapi::plus) #undef __SYCL_CALC_OVERLOAD diff --git a/sycl/include/CL/sycl/intel/group_algorithm.hpp b/sycl/include/CL/sycl/ext/oneapi/group_algorithm.hpp similarity index 98% rename from sycl/include/CL/sycl/intel/group_algorithm.hpp rename to sycl/include/CL/sycl/ext/oneapi/group_algorithm.hpp index e49ed1592e4da..5dfa09e6418e5 100644 --- a/sycl/include/CL/sycl/intel/group_algorithm.hpp +++ b/sycl/include/CL/sycl/ext/oneapi/group_algorithm.hpp @@ -12,9 +12,9 @@ #include #include #include +#include +#include #include -#include -#include #ifndef __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__ __SYCL_INLINE_NAMESPACE(cl) { @@ -32,7 +32,8 @@ template <> inline size_t get_local_linear_range>(group<3> g) { return g.get_local_range(0) * g.get_local_range(1) * g.get_local_range(2); } template <> -inline size_t get_local_linear_range(intel::sub_group g) { +inline size_t +get_local_linear_range(ext::oneapi::sub_group g) { return g.get_local_range()[0]; } @@ -53,8 +54,8 @@ __SYCL_GROUP_GET_LOCAL_LINEAR_ID(3); #endif // __SYCL_DEVICE_ONLY__ template <> -inline intel::sub_group::linear_id_type -get_local_linear_id(intel::sub_group g) { +inline ext::oneapi::sub_group::linear_id_type +get_local_linear_id(ext::oneapi::sub_group g) { return g.get_local_id()[0]; } @@ -79,15 +80,15 @@ template <> inline id<3> linear_id_to_id(range<3> r, size_t linear_id) { template struct identity {}; -template struct identity> { +template struct identity> { static constexpr T value = 0; }; -template struct identity> { +template struct identity> { static constexpr T value = (std::numeric_limits::max)(); }; -template struct identity> { +template struct identity> { static constexpr T value = std::numeric_limits::lowest(); }; @@ -112,7 +113,8 @@ Function for_each(Group g, Ptr first, Ptr last, Function f) { } // namespace detail -namespace intel { +namespace ext { +namespace oneapi { template using EnableIfIsScalarArithmetic = cl::sycl::detail::enable_if_t< @@ -822,7 +824,8 @@ template bool leader(Group g) { #endif } -} // namespace intel +} // namespace oneapi +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) #endif // __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__ diff --git a/sycl/include/CL/sycl/intel/pipes.hpp b/sycl/include/CL/sycl/ext/oneapi/pipes.hpp similarity index 98% rename from sycl/include/CL/sycl/intel/pipes.hpp rename to sycl/include/CL/sycl/ext/oneapi/pipes.hpp index 8396bc1e215fc..cfa906654a4b8 100644 --- a/sycl/include/CL/sycl/intel/pipes.hpp +++ b/sycl/include/CL/sycl/ext/oneapi/pipes.hpp @@ -14,7 +14,8 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -namespace intel { +namespace ext { +namespace oneapi { template class pipe { public: @@ -198,6 +199,7 @@ class kernel_writeable_io_pipe { #endif // __SYCL_DEVICE_ONLY__ }; -} // namespace intel +} // namespace oneapi +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/reduction.hpp b/sycl/include/CL/sycl/ext/oneapi/reduction.hpp similarity index 95% rename from sycl/include/CL/sycl/intel/reduction.hpp rename to sycl/include/CL/sycl/ext/oneapi/reduction.hpp index 01b44cb429d6b..79f0f66677aa4 100644 --- a/sycl/include/CL/sycl/intel/reduction.hpp +++ b/sycl/include/CL/sycl/ext/oneapi/reduction.hpp @@ -9,11 +9,12 @@ #pragma once #include -#include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -namespace intel { +namespace ext { +namespace oneapi { namespace detail { @@ -27,8 +28,8 @@ using cl::sycl::detail::remove_AS; template using IsReduPlus = detail::bool_constant< - std::is_same>::value || - std::is_same>::value>; + std::is_same>::value || + std::is_same>::value>; template using IsReduMultiplies = detail::bool_constant< @@ -37,28 +38,28 @@ using IsReduMultiplies = detail::bool_constant< template using IsReduMinimum = detail::bool_constant< - std::is_same>::value || - std::is_same>::value>; + std::is_same>::value || + std::is_same>::value>; template using IsReduMaximum = detail::bool_constant< - std::is_same>::value || - std::is_same>::value>; + std::is_same>::value || + std::is_same>::value>; template using IsReduBitOR = detail::bool_constant< - std::is_same>::value || - std::is_same>::value>; + std::is_same>::value || + std::is_same>::value>; template using IsReduBitXOR = detail::bool_constant< - std::is_same>::value || - std::is_same>::value>; + std::is_same>::value || + std::is_same>::value>; template using IsReduBitAND = detail::bool_constant< - std::is_same>::value || - std::is_same>::value>; + std::is_same>::value || + std::is_same>::value>; template using IsReduOptForFastAtomicFetch = @@ -166,7 +167,7 @@ class reducer { /// using those operations, which are based on functionality provided by /// sycl::atomic class. /// -/// For example, it is known that 0 is identity for intel::plus operations +/// For example, it is known that 0 is identity for ext::oneapi::plus operations /// accepting native scalar types to which scalar 0 is convertible. /// Also, for int32/64 types the atomic_combine() is lowered to /// sycl::atomic::fetch_add(). @@ -308,7 +309,8 @@ class reducer enable_if_t::type, T>::value && (is_geninteger32bit::value || is_geninteger64bit::value) && @@ -318,7 +320,8 @@ class reducer enable_if_t::type, T>::value && (is_geninteger32bit::value || is_geninteger64bit::value) && @@ -599,11 +602,12 @@ struct get_reduction_aux_kernel_name_t { /// Implements a command group function that enqueues a kernel that calls /// user's lambda function KernelFunc and also does one iteration of reduction /// of elements computed in user's lambda function. -/// This version uses intel::reduce() algorithm to reduce elements in each +/// This version uses ext::oneapi::reduce() algorithm to reduce elements in each /// of work-groups, then it calls fast sycl atomic operations to update /// user's reduction variable. /// -/// Briefly: calls user's lambda, intel::reduce() + atomic, INT + ADD/MIN/MAX. +/// Briefly: calls user's lambda, ext::oneapi::reduce() + atomic, INT + +/// ADD/MIN/MAX. template enable_if_t @@ -622,7 +626,7 @@ reduCGFuncImpl(handler &CGH, KernelType KernelFunc, const nd_range &Range, (UniformWG || NDIt.get_global_linear_id() < NWorkItems) ? Reducer.MValue : Reducer.getIdentity(); - Reducer.MValue = intel::reduce(NDIt.get_group(), Val, BOp); + Reducer.MValue = ext::oneapi::reduce(NDIt.get_group(), Val, BOp); if (NDIt.get_local_linear_id() == 0) Reducer.atomic_combine(Reduction::getOutPointer(Out)); }); @@ -716,11 +720,11 @@ reduCGFunc(handler &CGH, KernelType KernelFunc, const nd_range &Range, /// Implements a command group function that enqueues a kernel that /// calls user's lambda function and does one iteration of reduction /// of elements in each of work-groups. -/// This version uses intel::reduce() algorithm to reduce elements in each +/// This version uses ext::oneapi::reduce() algorithm to reduce elements in each /// of work-groups. At the end of each work-groups the partial sum is written /// to a global buffer. /// -/// Briefly: user's lambda, intel:reduce(), FP + ADD/MIN/MAX. +/// Briefly: user's lambda, ext::oneapi:reduce(), FP + ADD/MIN/MAX. template enable_if_t @@ -750,7 +754,7 @@ reduCGFuncImpl(handler &CGH, KernelType KernelFunc, const nd_range &Range, ? Reducer.MValue : Reducer.getIdentity(); typename Reduction::binary_operation BOp; - PSum = intel::reduce(NDIt.get_group(), PSum, BOp); + PSum = ext::oneapi::reduce(NDIt.get_group(), PSum, BOp); if (NDIt.get_local_linear_id() == 0) { if (IsUpdateOfUserVar) PSum = BOp(*(Reduction::getOutPointer(Out)), PSum); @@ -863,11 +867,12 @@ reduCGFunc(handler &CGH, KernelType KernelFunc, const nd_range &Range, /// Implements a command group function that enqueues a kernel that does one /// iteration of reduction of elements in each of work-groups. -/// This version uses intel::reduce() algorithm to reduce elements in each +/// This version uses ext::oneapi::reduce() algorithm to reduce elements in each /// of work-groups. At the end of each work-groups the partial sum is written /// to a global buffer. /// -/// Briefly: aux kernel, intel:reduce(), reproducible results,FP + ADD/MIN/MAX +/// Briefly: aux kernel, ext::oneapi:reduce(), reproducible results,FP + +/// ADD/MIN/MAX template enable_if_t @@ -887,7 +892,7 @@ reduAuxCGFuncImpl(handler &CGH, const nd_range &Range, size_t NWorkItems, (UniformWG || (GID < NWorkItems)) ? In[GID] : Reduction::reducer_type::getIdentity(); - PSum = intel::reduce(NDIt.get_group(), PSum, BOp); + PSum = ext::oneapi::reduce(NDIt.get_group(), PSum, BOp); if (NDIt.get_local_linear_id() == 0) { if (IsUpdateOfUserVar) PSum = BOp(*(Reduction::getOutPointer(Out)), PSum); @@ -1054,6 +1059,7 @@ reduction(T *VarPtr, BinaryOperation) { access::mode::read_write>(VarPtr); } -} // namespace intel +} // namespace oneapi +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/experimental/spec_constant.hpp b/sycl/include/CL/sycl/ext/oneapi/spec_constant.hpp similarity index 95% rename from sycl/include/CL/sycl/experimental/spec_constant.hpp rename to sycl/include/CL/sycl/ext/oneapi/spec_constant.hpp index 104137fdba9c5..99eea95e39cb4 100644 --- a/sycl/include/CL/sycl/experimental/spec_constant.hpp +++ b/sycl/include/CL/sycl/ext/oneapi/spec_constant.hpp @@ -22,7 +22,8 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -namespace experimental { +namespace ext { +namespace oneapi { class spec_const_error : public compile_program_error { using compile_program_error::compile_program_error; @@ -56,6 +57,7 @@ template class spec_constant { } }; -} // namespace experimental +} // namespace oneapi +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/sub_group.hpp b/sycl/include/CL/sycl/ext/oneapi/sub_group.hpp similarity index 97% rename from sycl/include/CL/sycl/intel/sub_group.hpp rename to sycl/include/CL/sycl/ext/oneapi/sub_group.hpp index 2c65f08218990..62475b1f230ff 100644 --- a/sycl/include/CL/sycl/intel/sub_group.hpp +++ b/sycl/include/CL/sycl/ext/oneapi/sub_group.hpp @@ -16,8 +16,8 @@ #include #include #include +#include #include -#include #include #include @@ -96,7 +96,8 @@ void store(multi_ptr dst, const vec &x) { } // namespace detail -namespace intel { +namespace ext { +namespace oneapi { struct sub_group { @@ -451,7 +452,7 @@ struct sub_group { /* --- deprecated collective functions --- */ template __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::intel::broadcast instead.") + "sycl::ext::oneapi::broadcast instead.") EnableIfIsScalarArithmetic broadcast(T x, id<1> local_id) const { #ifdef __SYCL_DEVICE_ONLY__ return sycl::detail::spirv::GroupBroadcast(x, local_id); @@ -465,7 +466,7 @@ struct sub_group { template __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::intel::reduce instead.") + "sycl::ext::oneapi::reduce instead.") EnableIfIsScalarArithmetic reduce(T x, BinaryOperation op) const { #ifdef __SYCL_DEVICE_ONLY__ return sycl::detail::calc __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::intel::reduce instead.") + "sycl::ext::oneapi::reduce instead.") EnableIfIsScalarArithmetic reduce(T x, T init, BinaryOperation op) const { #ifdef __SYCL_DEVICE_ONLY__ return op(init, reduce(x, op)); @@ -496,7 +497,7 @@ struct sub_group { template __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::intel::exclusive_scan instead.") + "sycl::ext::oneapi::exclusive_scan instead.") EnableIfIsScalarArithmetic exclusive_scan(T x, BinaryOperation op) const { #ifdef __SYCL_DEVICE_ONLY__ return sycl::detail::calc __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::intel::exclusive_scan instead.") + "sycl::ext::oneapi::exclusive_scan instead.") EnableIfIsScalarArithmetic exclusive_scan(T x, T init, BinaryOperation op) const { #ifdef __SYCL_DEVICE_ONLY__ @@ -535,7 +536,7 @@ struct sub_group { template __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::intel::inclusive_scan instead.") + "sycl::ext::oneapi::inclusive_scan instead.") EnableIfIsScalarArithmetic inclusive_scan(T x, BinaryOperation op) const { #ifdef __SYCL_DEVICE_ONLY__ return sycl::detail::calc __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::intel::inclusive_scan instead.") + "sycl::ext::oneapi::inclusive_scan instead.") EnableIfIsScalarArithmetic inclusive_scan(T x, BinaryOperation op, T init) const { #ifdef __SYCL_DEVICE_ONLY__ @@ -572,6 +573,7 @@ struct sub_group { template friend class cl::sycl::nd_item; sub_group() = default; }; -} // namespace intel +} // namespace oneapi +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/esimd/detail/esimd_memory_intrin.hpp b/sycl/include/CL/sycl/intel/esimd/detail/esimd_memory_intrin.hpp deleted file mode 100644 index d712fccf0d956..0000000000000 --- a/sycl/include/CL/sycl/intel/esimd/detail/esimd_memory_intrin.hpp +++ /dev/null @@ -1,663 +0,0 @@ -//==------------ esimd_memory_intrin.hpp - DPC++ Explicit SIMD API ---------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Declares Explicit SIMD intrinsics used to implement working with -// the SIMD classes objects. -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include -#include -#include - -// flat_read does flat-address gather -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t< - Ty, N * sycl::intel::gpu::ElemsPerAddrDecoding(NumBlk)> -__esimd_flat_read(sycl::intel::gpu::vector_type_t addrs, - int ElemsPerAddr = NumBlk, - sycl::intel::gpu::vector_type_t pred = 1); - -// flat_write does flat-address scatter -template -SYCL_EXTERNAL void -__esimd_flat_write(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t< - Ty, N * sycl::intel::gpu::ElemsPerAddrDecoding(NumBlk)> - vals, - int ElemsPerAddr = NumBlk, - sycl::intel::gpu::vector_type_t pred = 1); - -// flat_block_read reads a block of data from one flat address -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_flat_block_read_unaligned(uint64_t addr); - -// flat_block_write writes a block of data using one flat address -template -SYCL_EXTERNAL void -__esimd_flat_block_write(uint64_t addr, - sycl::intel::gpu::vector_type_t vals); - -// Reads a block of data from given surface at given offset. -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_block_read(SurfIndAliasTy surf_ind, uint32_t offset); - -// Writes given block of data to a surface with given index at given offset. -template -SYCL_EXTERNAL void -__esimd_block_write(SurfIndAliasTy surf_ind, uint32_t offset, - sycl::intel::gpu::vector_type_t vals); - -// flat_read4 does flat-address gather4 -template -sycl::intel::gpu::vector_type_t SYCL_EXTERNAL -__esimd_flat_read4(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t pred = 1); - -// flat_write does flat-address scatter -template -SYCL_EXTERNAL void __esimd_flat_write4( - sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t vals, - sycl::intel::gpu::vector_type_t pred = 1); - -// flat_atomic: flat-address atomic -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_flat_atomic0(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t pred); - -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_flat_atomic1(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t src0, - sycl::intel::gpu::vector_type_t pred); - -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_flat_atomic2(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t src0, - sycl::intel::gpu::vector_type_t src1, - sycl::intel::gpu::vector_type_t pred); - -// esimd_barrier, generic group barrier -SYCL_EXTERNAL void __esimd_barrier(); - -// slm_fence sets the SLM read/write order -SYCL_EXTERNAL void __esimd_slm_fence(uint8_t cntl); - -// slm_read does SLM gather -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_slm_read(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t pred = 1); - -// slm_write does SLM scatter -template -SYCL_EXTERNAL void -__esimd_slm_write(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t vals, - sycl::intel::gpu::vector_type_t pred = 1); - -// slm_block_read reads a block of data from SLM -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_slm_block_read(uint32_t addr); - -// slm_block_write writes a block of data to SLM -template -SYCL_EXTERNAL void -__esimd_slm_block_write(uint32_t addr, - sycl::intel::gpu::vector_type_t vals); - -// slm_read4 does SLM gather4 -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_slm_read4(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t pred = 1); - -// slm_write4 does SLM scatter4 -template -SYCL_EXTERNAL void __esimd_slm_write4( - sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t vals, - sycl::intel::gpu::vector_type_t pred = 1); - -// slm_atomic: SLM atomic -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_slm_atomic0(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t pred); - -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_slm_atomic1(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t src0, - sycl::intel::gpu::vector_type_t pred); - -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_slm_atomic2(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t src0, - sycl::intel::gpu::vector_type_t src1, - sycl::intel::gpu::vector_type_t pred); - -// Media block load -// -// @param Ty the element data type. -// -// @param M the hight of the 2D block. -// -// @param N the width of the 2D block. -// -// @param TACC type of the surface handle. -// -// @param modifier top/bottom field surface access control. -// -// @param handle the surface handle. -// -// @param plane planar surface index. -// -// @param width the width of the return block. -// -// @param x X-coordinate of the left upper rectangle corner in BYTES. -// -// @param y Y-coordinate of the left upper rectangle corner in ROWS. -// -// @return the linearized 2D block data read from surface. -// -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_media_block_load(unsigned modififer, TACC handle, unsigned plane, - unsigned width, unsigned x, unsigned y); - -// Media block store -// -// @param Ty the element data type. -// -// @param M the hight of the 2D block. -// -// @param N the width of the 2D block. -// -// @param TACC type of the surface handle. -// -// @param modifier top/bottom field surface access control. -// -// @param handle the surface handle. -// -// @param plane planar surface index. -// -// @param width the width of the return block. -// -// @param x X-coordinate of the left upper rectangle corner in BYTES. -// -// @param y Y-coordinate of the left upper rectangle corner in ROWS. -// -// @param vals the linearized 2D block data to be written to surface. -// -template -SYCL_EXTERNAL void -__esimd_media_block_store(unsigned modififer, TACC handle, unsigned plane, - unsigned width, unsigned x, unsigned y, - sycl::intel::gpu::vector_type_t vals); - -#ifndef __SYCL_DEVICE_ONLY__ - -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t< - Ty, N * sycl::intel::gpu::ElemsPerAddrDecoding(NumBlk)> -__esimd_flat_read(sycl::intel::gpu::vector_type_t addrs, - int ElemsPerAddr, - sycl::intel::gpu::vector_type_t pred) { - auto NumBlkDecoded = sycl::intel::gpu::ElemsPerAddrDecoding(NumBlk); - sycl::intel::gpu::vector_type_t< - Ty, N * sycl::intel::gpu::ElemsPerAddrDecoding(NumBlk)> - V; - ElemsPerAddr = sycl::intel::gpu::ElemsPerAddrDecoding(ElemsPerAddr); - - for (int I = 0; I < N; I++) { - if (pred[I]) { - Ty *Addr = reinterpret_cast(addrs[I]); - if (sizeof(Ty) == 2) - ElemsPerAddr = ElemsPerAddr / 2; - if (sizeof(Ty) <= 2) { - for (int J = 0; J < NumBlkDecoded && J < ElemsPerAddr; J++) - V[I * NumBlkDecoded + J] = *(Addr + J); - } else { - for (int J = 0; J < NumBlkDecoded && J < ElemsPerAddr; J++) - V[J * N + I] = *(Addr + J); - } - } - } - return V; -} - -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_flat_read4(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t pred) { - sycl::intel::gpu::vector_type_t V; - unsigned int Next = 0; - - if constexpr (HasR(Mask)) { - for (int I = 0; I < N; I++, Next++) { - if (pred[I]) { - Ty *Addr = reinterpret_cast(addrs[I]); - V[Next] = *Addr; - } - } - } - - if constexpr (HasG(Mask)) { - for (int I = 0; I < N; I++, Next++) { - if (pred[I]) { - Ty *Addr = reinterpret_cast(addrs[I] + sizeof(Ty)); - V[Next] = *Addr; - } - } - } - - if constexpr (HasB(Mask)) { - for (int I = 0; I < N; I++, Next++) { - if (pred[I]) { - Ty *Addr = reinterpret_cast(addrs[I] + sizeof(Ty) + sizeof(Ty)); - V[Next] = *Addr; - } - } - } - - if constexpr (HasA(Mask)) { - for (int I = 0; I < N; I++, Next++) { - if (pred[I]) { - Ty *Addr = reinterpret_cast(addrs[I] + sizeof(Ty) + sizeof(Ty) + - sizeof(Ty)); - V[Next] = *Addr; - } - } - } - - return V; -} - -template -SYCL_EXTERNAL void -__esimd_flat_write(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t< - Ty, N * sycl::intel::gpu::ElemsPerAddrDecoding(NumBlk)> - vals, - int ElemsPerAddr, - sycl::intel::gpu::vector_type_t pred) { - auto NumBlkDecoded = sycl::intel::gpu::ElemsPerAddrDecoding(NumBlk); - ElemsPerAddr = sycl::intel::gpu::ElemsPerAddrDecoding(ElemsPerAddr); - - for (int I = 0; I < N; I++) { - if (pred[I]) { - Ty *Addr = reinterpret_cast(addrs[I]); - if (sizeof(Ty) == 2) - ElemsPerAddr = ElemsPerAddr / 2; - if (sizeof(Ty) <= 2) { - for (int J = 0; J < NumBlkDecoded && J < ElemsPerAddr; J++) - *(Addr + J) = vals[I * NumBlkDecoded + J]; - } else { - for (int J = 0; J < NumBlkDecoded && J < ElemsPerAddr; J++) - *(Addr + J) = vals[J * N + I]; - } - } - } -} - -template -SYCL_EXTERNAL void __esimd_flat_write4( - sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t vals, - sycl::intel::gpu::vector_type_t pred) { - sycl::intel::gpu::vector_type_t V; - unsigned int Next = 0; - - if constexpr (HasR(Mask)) { - for (int I = 0; I < N; I++, Next++) { - if (pred[I]) { - Ty *Addr = reinterpret_cast(addrs[I]); - *Addr = vals[Next]; - } - } - } - - if constexpr (HasG(Mask)) { - for (int I = 0; I < N; I++, Next++) { - if (pred[I]) { - Ty *Addr = reinterpret_cast(addrs[I] + sizeof(Ty)); - *Addr = vals[Next]; - } - } - } - - if constexpr (HasB(Mask)) { - for (int I = 0; I < N; I++, Next++) { - if (pred[I]) { - Ty *Addr = reinterpret_cast(addrs[I] + sizeof(Ty) + sizeof(Ty)); - *Addr = vals[Next]; - } - } - } - - if constexpr (HasA(Mask)) { - for (int I = 0; I < N; I++, Next++) { - if (pred[I]) { - Ty *Addr = reinterpret_cast(addrs[I] + sizeof(Ty) + sizeof(Ty) + - sizeof(Ty)); - *Addr = vals[Next]; - } - } - } -} - -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_flat_block_read_unaligned(uint64_t addr) { - sycl::intel::gpu::vector_type_t V; - - for (int I = 0; I < N; I++) { - Ty *Addr = reinterpret_cast(addr + I * sizeof(Ty)); - V[I] = *Addr; - } - return V; -} - -template -SYCL_EXTERNAL void -__esimd_flat_block_write(uint64_t addr, - sycl::intel::gpu::vector_type_t vals) { - for (int I = 0; I < N; I++) { - Ty *Addr = reinterpret_cast(addr + I * sizeof(Ty)); - *Addr = vals[I]; - } -} - -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_media_block_load(unsigned modififer, TACC handle, unsigned plane, - unsigned width, unsigned x, unsigned y) { - // On host the input surface is modeled as sycl image 2d object, - // and the read/write access is done through accessor, - // which is passed in as the handle argument. - auto range = sycl::intel::gpu::AccessorPrivateProxy::getImageRange(handle); - unsigned bpp = sycl::intel::gpu::AccessorPrivateProxy::getElemSize(handle); - unsigned vpp = bpp / sizeof(Ty); - unsigned int i = x / bpp; - unsigned int j = y; - - assert(x % bpp == 0); - unsigned int xbound = range[0] - 1; - unsigned int ybound = range[1] - 1; - - sycl::intel::gpu::vector_type_t vals; - for (int row = 0; row < M; row++) { - for (int col = 0; col < N; col += vpp) { - unsigned int xoff = (i > xbound) ? xbound : i; - unsigned int yoff = (j > ybound) ? ybound : j; - auto coords = cl::sycl::cl_int2(xoff, yoff); - cl::sycl::cl_uint4 data = handle.read(coords); - - sycl::intel::gpu::vector_type_t res; - for (int idx = 0; idx < 4; idx++) { - res[idx] = data[idx]; - } - - constexpr int refN = sizeof(cl::sycl::cl_uint4) / sizeof(Ty); - unsigned int stride = sizeof(cl::sycl::cl_uint4) / bpp; - using refTy = sycl::intel::gpu::vector_type_t; - auto ref = reinterpret_cast(res); - - unsigned int offset1 = col + row * N; - unsigned int offset2 = 0; - for (int idx = 0; idx < vpp; idx++) { - vals[offset1] = ref[offset2]; - offset1++; - offset2 += stride; - } - i++; - } - i = x / bpp; - j++; - } - - return vals; -} - -template -SYCL_EXTERNAL void -__esimd_media_block_store(unsigned modififer, TACC handle, unsigned plane, - unsigned width, unsigned x, unsigned y, - sycl::intel::gpu::vector_type_t vals) { - unsigned bpp = sycl::intel::gpu::AccessorPrivateProxy::getElemSize(handle); - unsigned vpp = bpp / sizeof(Ty); - auto range = sycl::intel::gpu::AccessorPrivateProxy::getImageRange(handle); - unsigned int i = x / bpp; - unsigned int j = y; - - assert(x % bpp == 0); - - for (int row = 0; row < M; row++) { - for (int col = 0; col < N; col += vpp) { - constexpr int Sz = sizeof(cl::sycl::cl_uint4) / sizeof(Ty); - sycl::intel::gpu::vector_type_t res = 0; - - unsigned int offset1 = col + row * N; - unsigned int offset2 = 0; - unsigned int stride = sizeof(cl::sycl::cl_uint4) / bpp; - for (int idx = 0; idx < vpp; idx++) { - res[offset2] = vals[offset1]; - offset1++; - offset2 += stride; - } - - using refTy = sycl::intel::gpu::vector_type_t; - auto ref = reinterpret_cast(res); - - cl::sycl::cl_uint4 data; - for (int idx = 0; idx < 4; idx++) { - data[idx] = ref[idx]; - } - - if (i < range[0] && j < range[1]) { - auto coords = cl::sycl::cl_int2(i, j); - handle.write(coords, data); - } - i++; - } - i = x / bpp; - j++; - } -} - -template -SYCL_EXTERNAL uint16_t __esimd_any(sycl::intel::gpu::vector_type_t src) { - for (unsigned int i = 0; i != N; i++) { - if (src[i] != 0) - return 1; - } - return 0; -} - -template -SYCL_EXTERNAL uint16_t __esimd_all(sycl::intel::gpu::vector_type_t src) { - for (unsigned int i = 0; i != N; i++) { - if (src[i] == 0) - return 0; - } - return 1; -} - -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_dp4(sycl::intel::gpu::vector_type_t v1, - sycl::intel::gpu::vector_type_t v2) { - sycl::intel::gpu::vector_type_t retv; - for (auto i = 0; i != N; i += 4) { - Ty dp = (v1[i] * v2[i]) + (v1[i + 1] * v2[i + 1]) + - (v1[i + 2] * v2[i + 2]) + (v1[i + 3] * v2[i + 3]); - retv[i] = dp; - retv[i + 1] = dp; - retv[i + 2] = dp; - retv[i + 3] = dp; - } - return retv; -} - -/// TODO -SYCL_EXTERNAL void __esimd_barrier() {} - -SYCL_EXTERNAL void __esimd_slm_fence(uint8_t cntl) {} - -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_slm_read(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t pred) { - sycl::intel::gpu::vector_type_t retv; - return retv; -} - -// slm_write does SLM scatter -template -SYCL_EXTERNAL void -__esimd_slm_write(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t vals, - sycl::intel::gpu::vector_type_t pred) {} - -// slm_block_read reads a block of data from SLM -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_slm_block_read(uint32_t addr) { - sycl::intel::gpu::vector_type_t retv; - return retv; -} - -// slm_block_write writes a block of data to SLM -template -SYCL_EXTERNAL void -__esimd_slm_block_write(uint32_t addr, - sycl::intel::gpu::vector_type_t vals) {} - -// slm_read4 does SLM gather4 -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_slm_read4(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t pred) { - sycl::intel::gpu::vector_type_t retv; - return retv; -} - -// slm_write4 does SLM scatter4 -template -SYCL_EXTERNAL void __esimd_slm_write4( - sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t vals, - sycl::intel::gpu::vector_type_t pred) {} - -// slm_atomic: SLM atomic -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_slm_atomic0(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t pred) { - sycl::intel::gpu::vector_type_t retv; - return retv; -} - -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_slm_atomic1(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t src0, - sycl::intel::gpu::vector_type_t pred) { - sycl::intel::gpu::vector_type_t retv; - return retv; -} - -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_slm_atomic2(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t src0, - sycl::intel::gpu::vector_type_t src1, - sycl::intel::gpu::vector_type_t pred) { - sycl::intel::gpu::vector_type_t retv; - return retv; -} - -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_flat_atomic0(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t pred) { - sycl::intel::gpu::vector_type_t retv; - return retv; -} - -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_flat_atomic1(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t src0, - sycl::intel::gpu::vector_type_t pred) { - sycl::intel::gpu::vector_type_t retv; - return retv; -} - -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_flat_atomic2(sycl::intel::gpu::vector_type_t addrs, - sycl::intel::gpu::vector_type_t src0, - sycl::intel::gpu::vector_type_t src1, - sycl::intel::gpu::vector_type_t pred) { - sycl::intel::gpu::vector_type_t retv; - return retv; -} - -template -SYCL_EXTERNAL sycl::intel::gpu::vector_type_t -__esimd_block_read(SurfIndAliasTy surf_ind, uint32_t offset) { - throw cl::sycl::feature_not_supported(); - return sycl::intel::gpu::vector_type_t(); -} - -template -SYCL_EXTERNAL void -__esimd_block_write(SurfIndAliasTy surf_ind, uint32_t offset, - sycl::intel::gpu::vector_type_t vals) { - - throw cl::sycl::feature_not_supported(); -} - -#endif // __SYCL_DEVICE_ONLY__ From cdfec56196944baaf195f8475ee6ab385b0e6247 Mon Sep 17 00:00:00 2001 From: James Brodman Date: Tue, 21 Jul 2020 16:13:30 -0400 Subject: [PATCH 02/13] Fix comp fails and update tests to use new namespaces Signed-off-by: James Brodman --- sycl/include/CL/sycl.hpp | 12 +-- sycl/include/CL/sycl/accessor.hpp | 6 +- sycl/include/CL/sycl/builtins.hpp | 6 +- sycl/include/CL/sycl/detail/accessor_impl.hpp | 4 +- sycl/include/CL/sycl/detail/spirv.hpp | 101 ++++++++++-------- sycl/include/CL/sycl/detail/type_traits.hpp | 8 +- .../ext/intel/esimd/detail/esimd_intrin.hpp | 2 + .../ext/intel/esimd/detail/esimd_util.hpp | 32 +++--- .../include/CL/sycl/ext/intel/esimd/esimd.hpp | 2 +- .../CL/sycl/ext/intel/esimd/esimd_math.hpp | 12 ++- .../CL/sycl/ext/intel/esimd/esimd_memory.hpp | 2 +- .../CL/sycl/ext/intel/esimd/esimd_view.hpp | 2 +- sycl/include/CL/sycl/ext/intel/fpga_reg.hpp | 2 +- .../include/CL/sycl/ext/oneapi/atomic_ref.hpp | 8 +- .../CL/sycl/ext/oneapi/group_algorithm.hpp | 67 ++++++------ sycl/include/CL/sycl/ext/oneapi/reduction.hpp | 1 + sycl/include/CL/sycl/handler.hpp | 32 +++--- sycl/include/CL/sycl/nd_item.hpp | 6 +- sycl/include/CL/sycl/pipes.hpp | 4 +- sycl/include/CL/sycl/program.hpp | 8 +- sycl/source/detail/program_impl.cpp | 4 +- .../program_manager/program_manager.cpp | 4 +- sycl/source/function_pointer.cpp | 6 +- sycl/test/atomic_ref/add.cpp | 10 +- sycl/test/atomic_ref/compare_exchange.cpp | 4 +- sycl/test/atomic_ref/exchange.cpp | 4 +- sycl/test/atomic_ref/load.cpp | 4 +- sycl/test/atomic_ref/max.cpp | 4 +- sycl/test/atomic_ref/min.cpp | 4 +- sycl/test/atomic_ref/store.cpp | 4 +- sycl/test/atomic_ref/sub.cpp | 10 +- .../basic_tests/esimd/block_load_store.cpp | 4 +- sycl/test/basic_tests/esimd/esimd_math.cpp | 4 +- sycl/test/basic_tests/esimd/flat_atomic.cpp | 4 +- .../basic_tests/esimd/gather4_scatter4.cpp | 4 +- .../test/basic_tests/esimd/gather_scatter.cpp | 4 +- sycl/test/basic_tests/esimd/global_var.cpp | 2 +- sycl/test/basic_tests/esimd/simd.cpp | 4 +- sycl/test/basic_tests/esimd/simd_merge.cpp | 4 +- sycl/test/basic_tests/esimd/simd_view.cpp | 4 +- sycl/test/basic_tests/esimd/slm_atomic.cpp | 4 +- sycl/test/basic_tests/esimd/slm_block.cpp | 4 +- sycl/test/basic_tests/esimd/slm_load.cpp | 4 +- sycl/test/basic_tests/esimd/slm_load4.cpp | 4 +- sycl/test/built-ins/printf.cpp | 22 ++-- sycl/test/built-ins/scalar_integer.cpp | 2 +- sycl/test/built-ins/vector_integer.cpp | 2 +- .../function-pointers/fp-as-kernel-arg.cpp | 4 +- .../pass-fp-through-buffer.cpp | 8 +- sycl/test/group-algorithm/all_of.cpp | 2 +- sycl/test/group-algorithm/any_of.cpp | 2 +- sycl/test/group-algorithm/broadcast.cpp | 2 +- sycl/test/group-algorithm/exclusive_scan.cpp | 2 +- sycl/test/group-algorithm/inclusive_scan.cpp | 2 +- sycl/test/group-algorithm/leader.cpp | 2 +- sycl/test/group-algorithm/none_of.cpp | 2 +- sycl/test/group-algorithm/reduce.cpp | 2 +- sycl/test/linear_id/linear-sub_group.cpp | 2 +- sycl/test/reduction/reduction_ctor.cpp | 22 ++-- .../reduction/reduction_nd_conditional.cpp | 10 +- sycl/test/reduction/reduction_nd_ext_type.hpp | 14 +-- sycl/test/reduction/reduction_nd_s0_dw.cpp | 40 +++---- sycl/test/reduction/reduction_nd_s0_rw.cpp | 40 +++---- sycl/test/reduction/reduction_nd_s1_dw.cpp | 40 +++---- sycl/test/reduction/reduction_nd_s1_rw.cpp | 40 +++---- sycl/test/reduction/reduction_placeholder.cpp | 14 +-- sycl/test/reduction/reduction_transparent.cpp | 8 +- sycl/test/reduction/reduction_usm.cpp | 16 +-- .../regression/sub-group-store-const-ref.cpp | 2 +- sycl/test/spec_const/spec_const_hw.cpp | 6 +- sycl/test/spec_const/spec_const_neg.cpp | 6 +- sycl/test/spec_const/spec_const_redefine.cpp | 4 +- sycl/test/spec_const/spec_const_types.cpp | 24 ++--- sycl/test/sub_group/attributes.cpp | 17 +-- sycl/test/sub_group/barrier.cpp | 8 +- sycl/test/sub_group/broadcast.hpp | 2 +- sycl/test/sub_group/common.cpp | 2 +- sycl/test/sub_group/common_ocl.cpp | 2 +- sycl/test/sub_group/generic-shuffle.cpp | 4 +- sycl/test/sub_group/helper.hpp | 28 +++-- sycl/test/sub_group/load_store.cpp | 13 ++- sycl/test/sub_group/reduce.hpp | 26 ++--- sycl/test/sub_group/scan.hpp | 34 +++--- sycl/test/sub_group/shuffle.hpp | 4 +- sycl/test/sub_group/vote.cpp | 2 +- 85 files changed, 475 insertions(+), 434 deletions(-) mode change 100755 => 100644 sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_util.hpp diff --git a/sycl/include/CL/sycl.hpp b/sycl/include/CL/sycl.hpp index 4fd3c55b0952c..c5c20813d014a 100644 --- a/sycl/include/CL/sycl.hpp +++ b/sycl/include/CL/sycl.hpp @@ -18,16 +18,16 @@ #include #include #include +#include +#include +#include +#include +#include +#include #include #include #include #include -#include -#include -#include -#include -#include -#include #include #include #include diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp index 7b26a13f475e7..6a4fc49e72916 100755 --- a/sycl/include/CL/sycl/accessor.hpp +++ b/sycl/include/CL/sycl/accessor.hpp @@ -197,12 +197,14 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +namespace ext { namespace intel { namespace gpu { // Forward declare a "back-door" access class to support ESIMD. class AccessorPrivateProxy; } // namespace gpu } // namespace intel +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) @@ -431,7 +433,7 @@ class image_accessor #endif private: - friend class sycl::intel::gpu::AccessorPrivateProxy; + friend class sycl::ext::intel::gpu::AccessorPrivateProxy; #if defined(__SYCL_DEVICE_ONLY__) && defined(__SYCL_EXPLICIT_SIMD__) const OCLImageTy getNativeImageObj() const { return MImageObj; } @@ -881,7 +883,7 @@ class accessor : #endif // __SYCL_DEVICE_ONLY__ private: - friend class sycl::intel::gpu::AccessorPrivateProxy; + friend class sycl::ext::intel::gpu::AccessorPrivateProxy; public: using value_type = DataT; diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index 9671987643f41..fbf01679463bf 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -724,14 +724,16 @@ detail::enable_if_t::value, T> clz(T x) __NOEXC { return __sycl_std::__invoke_clz(x); } -namespace intel { +namespace ext { +namespace oneapi { // geninteger ctz (geninteger x) template sycl::detail::enable_if_t::value, T> ctz(T x) __NOEXC { return __sycl_std::__invoke_ctz(x); } -} // namespace intel +} // namespace oneapi +} // namespace ext // geninteger mad_hi (geninteger a, geninteger b, geninteger c) template diff --git a/sycl/include/CL/sycl/detail/accessor_impl.hpp b/sycl/include/CL/sycl/detail/accessor_impl.hpp index 76676014975c2..c5913a6cf5272 100644 --- a/sycl/include/CL/sycl/detail/accessor_impl.hpp +++ b/sycl/include/CL/sycl/detail/accessor_impl.hpp @@ -17,12 +17,14 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +namespace ext { namespace intel { namespace gpu { // Forward declare a "back-door" access class to support ESIMD. class AccessorPrivateProxy; } // namespace gpu } // namespace intel +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) @@ -170,7 +172,7 @@ class AccessorBaseHost { AccessorImplPtr impl; private: - friend class sycl::intel::gpu::AccessorPrivateProxy; + friend class sycl::ext::intel::gpu::AccessorPrivateProxy; }; class __SYCL_EXPORT LocalAccessorImplHost { diff --git a/sycl/include/CL/sycl/detail/spirv.hpp b/sycl/include/CL/sycl/detail/spirv.hpp index d662e2afc7880..989cf6a3a096a 100644 --- a/sycl/include/CL/sycl/detail/spirv.hpp +++ b/sycl/include/CL/sycl/detail/spirv.hpp @@ -12,14 +12,16 @@ #include #include #include -#include +#include #ifdef __SYCL_DEVICE_ONLY__ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -namespace intel { +namespace ext { +namespace oneapi { struct sub_group; -} // namespace intel +} // namespace oneapi +} // namespace ext namespace detail { namespace spirv { @@ -29,7 +31,7 @@ template struct group_scope> { static constexpr __spv::Scope::Flag value = __spv::Scope::Flag::Workgroup; }; -template <> struct group_scope<::cl::sycl::intel::sub_group> { +template <> struct group_scope<::cl::sycl::ext::oneapi::sub_group> { static constexpr __spv::Scope::Flag value = __spv::Scope::Flag::Subgroup; }; @@ -87,23 +89,23 @@ T GroupBroadcast(T x, id local_id) { // Single happens-before means semantics should always apply to all spaces // Although consume is unsupported, forwarding to acquire is valid static inline constexpr __spv::MemorySemanticsMask::Flag -getMemorySemanticsMask(intel::memory_order Order) { +getMemorySemanticsMask(ext::oneapi::memory_order Order) { __spv::MemorySemanticsMask::Flag SpvOrder = __spv::MemorySemanticsMask::None; switch (Order) { - case intel::memory_order::relaxed: + case ext::oneapi::memory_order::relaxed: SpvOrder = __spv::MemorySemanticsMask::None; break; - case intel::memory_order::__consume_unsupported: - case intel::memory_order::acquire: + case ext::oneapi::memory_order::__consume_unsupported: + case ext::oneapi::memory_order::acquire: SpvOrder = __spv::MemorySemanticsMask::Acquire; break; - case intel::memory_order::release: + case ext::oneapi::memory_order::release: SpvOrder = __spv::MemorySemanticsMask::Release; break; - case intel::memory_order::acq_rel: + case ext::oneapi::memory_order::acq_rel: SpvOrder = __spv::MemorySemanticsMask::AcquireRelease; break; - case intel::memory_order::seq_cst: + case ext::oneapi::memory_order::seq_cst: SpvOrder = __spv::MemorySemanticsMask::SequentiallyConsistent; break; } @@ -113,17 +115,18 @@ getMemorySemanticsMask(intel::memory_order Order) { __spv::MemorySemanticsMask::CrossWorkgroupMemory); } -static inline constexpr __spv::Scope::Flag getScope(intel::memory_scope Scope) { +static inline constexpr __spv::Scope::Flag +getScope(ext::oneapi::memory_scope Scope) { switch (Scope) { - case intel::memory_scope::work_item: + case ext::oneapi::memory_scope::work_item: return __spv::Scope::Invocation; - case intel::memory_scope::sub_group: + case ext::oneapi::memory_scope::sub_group: return __spv::Scope::Subgroup; - case intel::memory_scope::work_group: + case ext::oneapi::memory_scope::work_group: return __spv::Scope::Workgroup; - case intel::memory_scope::device: + case ext::oneapi::memory_scope::device: return __spv::Scope::Device; - case intel::memory_scope::system: + case ext::oneapi::memory_scope::system: return __spv::Scope::CrossDevice; } } @@ -131,8 +134,10 @@ static inline constexpr __spv::Scope::Flag getScope(intel::memory_scope Scope) { template inline typename detail::enable_if_t::value, T> AtomicCompareExchange(multi_ptr MPtr, - intel::memory_scope Scope, intel::memory_order Success, - intel::memory_order Failure, T Desired, T Expected) { + ext::oneapi::memory_scope Scope, + ext::oneapi::memory_order Success, + ext::oneapi::memory_order Failure, T Desired, + T Expected) { auto SPIRVSuccess = getMemorySemanticsMask(Success); auto SPIRVFailure = getMemorySemanticsMask(Failure); auto SPIRVScope = getScope(Scope); @@ -144,8 +149,10 @@ AtomicCompareExchange(multi_ptr MPtr, template inline typename detail::enable_if_t::value, T> AtomicCompareExchange(multi_ptr MPtr, - intel::memory_scope Scope, intel::memory_order Success, - intel::memory_order Failure, T Desired, T Expected) { + ext::oneapi::memory_scope Scope, + ext::oneapi::memory_order Success, + ext::oneapi::memory_order Failure, T Desired, + T Expected) { using I = detail::make_unsinged_integer_t; auto SPIRVSuccess = getMemorySemanticsMask(Success); auto SPIRVFailure = getMemorySemanticsMask(Failure); @@ -162,8 +169,8 @@ AtomicCompareExchange(multi_ptr MPtr, template inline typename detail::enable_if_t::value, T> -AtomicLoad(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order) { +AtomicLoad(multi_ptr MPtr, ext::oneapi::memory_scope Scope, + ext::oneapi::memory_order Order) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -172,8 +179,8 @@ AtomicLoad(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicLoad(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order) { +AtomicLoad(multi_ptr MPtr, ext::oneapi::memory_scope Scope, + ext::oneapi::memory_order Order) { using I = detail::make_unsinged_integer_t; auto *PtrInt = reinterpret_cast::pointer_t>( @@ -186,8 +193,8 @@ AtomicLoad(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value> -AtomicStore(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicStore(multi_ptr MPtr, ext::oneapi::memory_scope Scope, + ext::oneapi::memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -196,8 +203,8 @@ AtomicStore(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value> -AtomicStore(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicStore(multi_ptr MPtr, ext::oneapi::memory_scope Scope, + ext::oneapi::memory_order Order, T Value) { using I = detail::make_unsinged_integer_t; auto *PtrInt = reinterpret_cast::pointer_t>( @@ -210,8 +217,8 @@ AtomicStore(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicExchange(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicExchange(multi_ptr MPtr, ext::oneapi::memory_scope Scope, + ext::oneapi::memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -220,8 +227,8 @@ AtomicExchange(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicExchange(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicExchange(multi_ptr MPtr, ext::oneapi::memory_scope Scope, + ext::oneapi::memory_order Order, T Value) { using I = detail::make_unsinged_integer_t; auto *PtrInt = reinterpret_cast::pointer_t>( @@ -236,8 +243,8 @@ AtomicExchange(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicIAdd(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicIAdd(multi_ptr MPtr, ext::oneapi::memory_scope Scope, + ext::oneapi::memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -246,8 +253,8 @@ AtomicIAdd(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicISub(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicISub(multi_ptr MPtr, ext::oneapi::memory_scope Scope, + ext::oneapi::memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -256,8 +263,8 @@ AtomicISub(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicAnd(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicAnd(multi_ptr MPtr, ext::oneapi::memory_scope Scope, + ext::oneapi::memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -266,8 +273,8 @@ AtomicAnd(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicOr(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicOr(multi_ptr MPtr, ext::oneapi::memory_scope Scope, + ext::oneapi::memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -276,8 +283,8 @@ AtomicOr(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicXor(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicXor(multi_ptr MPtr, ext::oneapi::memory_scope Scope, + ext::oneapi::memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -286,8 +293,8 @@ AtomicXor(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicMin(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicMin(multi_ptr MPtr, ext::oneapi::memory_scope Scope, + ext::oneapi::memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -296,8 +303,8 @@ AtomicMin(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicMax(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicMax(multi_ptr MPtr, ext::oneapi::memory_scope Scope, + ext::oneapi::memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); diff --git a/sycl/include/CL/sycl/detail/type_traits.hpp b/sycl/include/CL/sycl/detail/type_traits.hpp index 3f52acc8a2de2..5c90569490924 100644 --- a/sycl/include/CL/sycl/detail/type_traits.hpp +++ b/sycl/include/CL/sycl/detail/type_traits.hpp @@ -18,9 +18,11 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { template class group; -namespace intel { +namespace ext { +namespace oneapi { struct sub_group; -} // namespace intel +} // namespace oneapi +} // namespace ext namespace detail { namespace half_impl { class half; @@ -313,7 +315,7 @@ struct is_group> : std::true_type {}; template struct is_sub_group : std::false_type {}; -template <> struct is_sub_group : std::true_type {}; +template <> struct is_sub_group : std::true_type {}; template struct is_generic_group diff --git a/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_intrin.hpp b/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_intrin.hpp index fdaca49bf6e30..e1c19df0b9076 100644 --- a/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_intrin.hpp +++ b/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_intrin.hpp @@ -121,6 +121,7 @@ __esimd_wrregion(sycl::ext::intel::gpu::vector_type_t OldVal, __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +namespace ext { namespace intel { namespace gpu { // TODO dependencies on the std SYCL concepts like images @@ -211,6 +212,7 @@ readRegion(const vector_type_t &Base, std::pair Region) { } // namespace gpu } // namespace intel +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_util.hpp b/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_util.hpp old mode 100755 new mode 100644 index e0b7323a8bafe..b343da5a3666a --- a/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_util.hpp +++ b/sycl/include/CL/sycl/ext/intel/esimd/detail/esimd_util.hpp @@ -57,11 +57,11 @@ template struct is_esimd_vector { static const bool value = false; }; template -struct is_esimd_vector> { +struct is_esimd_vector> { static const bool value = true; }; template -struct is_esimd_vector> { +struct is_esimd_vector> { static const bool value = true; }; @@ -79,12 +79,12 @@ struct is_dword_type }; template -struct is_dword_type> { +struct is_dword_type> { static const bool value = is_dword_type::value; }; template -struct is_dword_type> { +struct is_dword_type> { static const bool value = is_dword_type::value; }; @@ -97,11 +97,12 @@ struct is_word_type typename std::remove_const::type>::value> {}; template -struct is_word_type> { +struct is_word_type> { static const bool value = is_word_type::value; }; -template struct is_word_type> { +template +struct is_word_type> { static const bool value = is_word_type::value; }; @@ -114,11 +115,12 @@ struct is_byte_type typename std::remove_const::type>::value> {}; template -struct is_byte_type> { +struct is_byte_type> { static const bool value = is_byte_type::value; }; -template struct is_byte_type> { +template +struct is_byte_type> { static const bool value = is_byte_type::value; }; @@ -152,33 +154,33 @@ struct is_qword_type typename std::remove_const::type>::value> {}; template -struct is_qword_type> { +struct is_qword_type> { static const bool value = is_qword_type::value; }; template -struct is_qword_type> { +struct is_qword_type> { static const bool value = is_qword_type::value; }; // Extends to ESIMD vector types. template -struct is_fp_or_dword_type> { +struct is_fp_or_dword_type> { static const bool value = is_fp_or_dword_type::value; }; template -struct is_fp_or_dword_type> { +struct is_fp_or_dword_type> { static const bool value = is_fp_or_dword_type::value; }; /// Convert types into vector types template struct simd_type { - using type = sycl::intel::gpu::simd; + using type = sycl::ext::intel::gpu::simd; }; template -struct simd_type> { - using type = sycl::intel::gpu::simd; +struct simd_type> { + using type = sycl::ext::intel::gpu::simd; }; template struct simd_type { diff --git a/sycl/include/CL/sycl/ext/intel/esimd/esimd.hpp b/sycl/include/CL/sycl/ext/intel/esimd/esimd.hpp index c24cac7ad0b67..6b2bd52ff4ca5 100644 --- a/sycl/include/CL/sycl/ext/intel/esimd/esimd.hpp +++ b/sycl/include/CL/sycl/ext/intel/esimd/esimd.hpp @@ -453,7 +453,7 @@ ESIMD_INLINE simd convert(simd val) { #ifndef __SYCL_DEVICE_ONLY__ template std::ostream &operator<<(std::ostream &OS, - const sycl::intel::gpu::simd &V) { + const sycl::ext::intel::gpu::simd &V) { OS << "{"; for (int I = 0; I < N; I++) { OS << V[I]; diff --git a/sycl/include/CL/sycl/ext/intel/esimd/esimd_math.hpp b/sycl/include/CL/sycl/ext/intel/esimd/esimd_math.hpp index 4eb145a315124..c04e70fee8a6c 100644 --- a/sycl/include/CL/sycl/ext/intel/esimd/esimd_math.hpp +++ b/sycl/include/CL/sycl/ext/intel/esimd/esimd_math.hpp @@ -10,15 +10,16 @@ #pragma once -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +namespace ext { namespace intel { namespace gpu { @@ -1946,5 +1947,6 @@ simd esimd_dp4(simd v1, simd v2) { } // namespace gpu } // namespace intel +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/ext/intel/esimd/esimd_memory.hpp b/sycl/include/CL/sycl/ext/intel/esimd/esimd_memory.hpp index 0dda839f89a16..67f6082e469a2 100644 --- a/sycl/include/CL/sycl/ext/intel/esimd/esimd_memory.hpp +++ b/sycl/include/CL/sycl/ext/intel/esimd/esimd_memory.hpp @@ -10,12 +10,12 @@ #pragma once -#include #include #include #include #include #include +#include #include __SYCL_INLINE_NAMESPACE(cl) { diff --git a/sycl/include/CL/sycl/ext/intel/esimd/esimd_view.hpp b/sycl/include/CL/sycl/ext/intel/esimd/esimd_view.hpp index abded4def0c25..7455b13ef509f 100644 --- a/sycl/include/CL/sycl/ext/intel/esimd/esimd_view.hpp +++ b/sycl/include/CL/sycl/ext/intel/esimd/esimd_view.hpp @@ -10,7 +10,7 @@ #pragma once -#include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/include/CL/sycl/ext/intel/fpga_reg.hpp b/sycl/include/CL/sycl/ext/intel/fpga_reg.hpp index f183d420ca2d0..a8c1b3605dce6 100644 --- a/sycl/include/CL/sycl/ext/intel/fpga_reg.hpp +++ b/sycl/include/CL/sycl/ext/intel/fpga_reg.hpp @@ -32,6 +32,6 @@ template T fpga_reg(const T &t) { // Currently clang does not support nested namespace for attributes namespace intelfpga { template T fpga_reg(const T &t) { - return cl::sycl::intel::fpga_reg(t); + return cl::sycl::ext::intel::fpga_reg(t); } } diff --git a/sycl/include/CL/sycl/ext/oneapi/atomic_ref.hpp b/sycl/include/CL/sycl/ext/oneapi/atomic_ref.hpp index 833ae4fe8e924..19bfbefefb3a2 100644 --- a/sycl/include/CL/sycl/ext/oneapi/atomic_ref.hpp +++ b/sycl/include/CL/sycl/ext/oneapi/atomic_ref.hpp @@ -128,16 +128,16 @@ class atomic_ref_base { detail::IsValidAtomicType::value, "Invalid atomic type. Valid types are arithmetic and pointer types"); static_assert(!std::is_same::value, - "intel::atomic_ref does not support bool type"); + "ext::oneapi::atomic_ref does not support bool type"); static_assert(!(std::is_same::value || std::is_same::value || std::is_same::value), - "intel::atomic_ref does not support char type"); + "ext::oneapi::atomic_ref does not support char type"); static_assert(!(std::is_same::value || std::is_same::value), - "intel::atomic_ref does not support short type"); + "ext::oneapi::atomic_ref does not support short type"); static_assert(!std::is_pointer::value, - "intel::atomic_ref does not yet support pointer types"); + "ext::oneapi::atomic_ref does not yet support pointer types"); static_assert(detail::IsValidAtomicAddressSpace::value, "Invalid atomic address_space. Valid address spaces are: " "global_space, local_space, global_device_space"); diff --git a/sycl/include/CL/sycl/ext/oneapi/group_algorithm.hpp b/sycl/include/CL/sycl/ext/oneapi/group_algorithm.hpp index 5dfa09e6418e5..abb8011de08a2 100644 --- a/sycl/include/CL/sycl/ext/oneapi/group_algorithm.hpp +++ b/sycl/include/CL/sycl/ext/oneapi/group_algorithm.hpp @@ -15,6 +15,7 @@ #include #include #include +#include #ifndef __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__ __SYCL_INLINE_NAMESPACE(cl) { @@ -33,7 +34,7 @@ template <> inline size_t get_local_linear_range>(group<3> g) { } template <> inline size_t -get_local_linear_range(ext::oneapi::sub_group g) { +get_local_linear_range(ext::oneapi::sub_group g) { return g.get_local_range()[0]; } @@ -131,7 +132,7 @@ using EnableIfIsPointer = template bool all_of(Group, bool pred) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ return sycl::detail::spirv::GroupAll(pred); #else @@ -145,7 +146,7 @@ template bool all_of(Group g, T x, Predicate pred) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); return all_of(g, pred(x)); } @@ -154,7 +155,7 @@ EnableIfIsPointer all_of(Group g, Ptr first, Ptr last, Predicate pred) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ bool partial = true; sycl::detail::for_each( @@ -174,7 +175,7 @@ EnableIfIsPointer all_of(Group g, Ptr first, Ptr last, template bool any_of(Group, bool pred) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ return sycl::detail::spirv::GroupAny(pred); #else @@ -188,7 +189,7 @@ template bool any_of(Group g, T x, Predicate pred) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); return any_of(g, pred(x)); } @@ -198,7 +199,7 @@ EnableIfIsPointer any_of(Group g, Ptr first, Ptr last, #ifdef __SYCL_DEVICE_ONLY__ static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); bool partial = false; sycl::detail::for_each( g, first, last, @@ -217,7 +218,7 @@ EnableIfIsPointer any_of(Group g, Ptr first, Ptr last, template bool none_of(Group, bool pred) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ return sycl::detail::spirv::GroupAll(!pred); #else @@ -231,7 +232,7 @@ template bool none_of(Group g, T x, Predicate pred) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); return none_of(g, pred(x)); } @@ -241,7 +242,7 @@ EnableIfIsPointer none_of(Group g, Ptr first, Ptr last, #ifdef __SYCL_DEVICE_ONLY__ static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); return !any_of(g, first, last, pred); #else (void)g; @@ -258,7 +259,7 @@ EnableIfIsScalarArithmetic broadcast(Group, T x, typename Group::id_type local_id) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ return sycl::detail::spirv::GroupBroadcast(x, local_id); #else @@ -274,7 +275,7 @@ EnableIfIsVectorArithmetic broadcast(Group g, T x, typename Group::id_type local_id) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ T result; for (int s = 0; s < x.get_size(); ++s) { @@ -295,7 +296,7 @@ EnableIfIsScalarArithmetic broadcast(Group g, T x, typename Group::linear_id_type linear_local_id) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ return broadcast( g, x, @@ -314,7 +315,7 @@ EnableIfIsVectorArithmetic broadcast(Group g, T x, typename Group::linear_id_type linear_local_id) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ T result; for (int s = 0; s < x.get_size(); ++s) { @@ -334,7 +335,7 @@ template EnableIfIsScalarArithmetic broadcast(Group g, T x) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ return broadcast(g, x, 0); #else @@ -349,7 +350,7 @@ template EnableIfIsVectorArithmetic broadcast(Group g, T x) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ T result; for (int s = 0; s < x.get_size(); ++s) { @@ -368,7 +369,7 @@ template EnableIfIsScalarArithmetic reduce(Group, T x, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same::value || @@ -389,7 +390,7 @@ template EnableIfIsVectorArithmetic reduce(Group g, T x, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same reduce(Group g, V x, T init, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same::value || @@ -430,7 +431,7 @@ EnableIfIsVectorArithmetic reduce(Group g, V x, T init, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same reduce(Group g, Ptr first, Ptr last, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same reduce(Group g, Ptr first, Ptr last, T init, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same::value || @@ -515,7 +516,7 @@ EnableIfIsScalarArithmetic exclusive_scan(Group, T x, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); // FIXME: Do not special-case for half precision static_assert(std::is_same::value || (std::is_same::value && @@ -536,7 +537,7 @@ EnableIfIsVectorArithmetic exclusive_scan(Group g, T x, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same exclusive_scan(Group g, V x, T init, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same exclusive_scan(Group g, V x, T init, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); // FIXME: Do not special-case for half precision static_assert(std::is_same::value || (std::is_same::value && @@ -607,7 +608,7 @@ exclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, T init, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same::value || @@ -669,7 +670,7 @@ EnableIfIsVectorArithmetic inclusive_scan(Group g, T x, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same inclusive_scan(Group, T x, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); // FIXME: Do not special-case for half precision static_assert(std::is_same::value || (std::is_same::value && @@ -710,7 +711,7 @@ EnableIfIsScalarArithmetic inclusive_scan(Group g, V x, BinaryOperation binary_op, T init) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); // FIXME: Do not special-case for half precision static_assert(std::is_same::value || (std::is_same::value && @@ -733,7 +734,7 @@ EnableIfIsVectorArithmetic inclusive_scan(Group g, V x, BinaryOperation binary_op, T init) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same::value || @@ -754,7 +755,7 @@ inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, BinaryOperation binary_op, T init) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same::value || @@ -812,7 +813,7 @@ EnableIfIsPointer inclusive_scan(Group g, InPtr first, template bool leader(Group g) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ext::oneapi::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ typename Group::linear_id_type linear_id = sycl::detail::get_local_linear_id(g); diff --git a/sycl/include/CL/sycl/ext/oneapi/reduction.hpp b/sycl/include/CL/sycl/ext/oneapi/reduction.hpp index 79f0f66677aa4..42d0109b55468 100644 --- a/sycl/include/CL/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/CL/sycl/ext/oneapi/reduction.hpp @@ -9,6 +9,7 @@ #pragma once #include +#include #include __SYCL_INLINE_NAMESPACE(cl) { diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 0a40efb33bd2e..80f0b38deb736 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -172,7 +172,8 @@ checkValueRange(const T &V) { } // namespace detail -namespace intel { +namespace ext { +namespace oneapi { namespace detail { template @@ -196,7 +197,8 @@ enable_if_t reduAuxCGFunc(handler &CGH, const nd_range &Range, size_t NWorkItems, Reduction &Redu); } // namespace detail -} // namespace intel +} // namespace oneapi +} // namespace ext /// Command group handler class. /// @@ -339,7 +341,7 @@ class __SYCL_EXPORT handler { // Recursively calls itself until arguments pack is fully processed. // The version for regular(standard layout) argument. template - void setArgsHelper(int ArgIndex, T &&Arg, Ts &&... Args) { + void setArgsHelper(int ArgIndex, T &&Arg, Ts &&...Args) { set_arg(ArgIndex, std::move(Arg)); setArgsHelper(++ArgIndex, std::move(Args)...); } @@ -806,7 +808,7 @@ class __SYCL_EXPORT handler { /// Registers pack of arguments(Args) with indexes starting from 0. /// /// \param Args are argument values to be set. - template void set_args(Ts &&... Args) { + template void set_args(Ts &&...Args) { setArgsHelper(0, std::move(Args)...); } @@ -968,8 +970,8 @@ class __SYCL_EXPORT handler { detail::enable_if_t parallel_for(nd_range Range, Reduction Redu, KernelType KernelFunc) { - intel::detail::reduCGFunc(*this, KernelFunc, Range, Redu, - Redu.getUserAccessor()); + ext::oneapi::detail::reduCGFunc(*this, KernelFunc, Range, Redu, + Redu.getUserAccessor()); } /// Implements parallel_for() accepting nd_range and 1 reduction variable @@ -981,8 +983,8 @@ class __SYCL_EXPORT handler { detail::enable_if_t parallel_for(nd_range Range, Reduction Redu, KernelType KernelFunc) { - intel::detail::reduCGFunc(*this, KernelFunc, Range, Redu, - Redu.getUSMPointer()); + ext::oneapi::detail::reduCGFunc(*this, KernelFunc, Range, Redu, + Redu.getUSMPointer()); } /// Implements parallel_for() accepting nd_range and 1 reduction variable @@ -1002,8 +1004,8 @@ class __SYCL_EXPORT handler { parallel_for(nd_range Range, Reduction Redu, KernelType KernelFunc) { shared_ptr_class QueueCopy = MQueue; auto RWAcc = Redu.getReadWriteScalarAcc(*this); - intel::detail::reduCGFunc(*this, KernelFunc, Range, Redu, - RWAcc); + ext::oneapi::detail::reduCGFunc(*this, KernelFunc, Range, Redu, + RWAcc); this->finalize(); // Copy from RWAcc to user's reduction accessor. @@ -1030,7 +1032,7 @@ class __SYCL_EXPORT handler { /// TODO: Need to handle more than 1 reduction in parallel_for(). /// TODO: Support HOST. The kernels called by this parallel_for() may use /// some functionality that is not yet supported on HOST such as: - /// barrier(), and intel::reduce() that also may be used in more + /// barrier(), and ext::oneapi::reduce() that also may be used in more /// optimized implementations waiting for their turn of code-review. template @@ -1052,7 +1054,7 @@ class __SYCL_EXPORT handler { // necessary to reduce all partial sums into one final sum. // 1. Call the kernel that includes user's lambda function. - intel::detail::reduCGFunc(*this, KernelFunc, Range, Redu); + ext::oneapi::detail::reduCGFunc(*this, KernelFunc, Range, Redu); shared_ptr_class QueueCopy = MQueue; this->finalize(); @@ -1082,8 +1084,8 @@ class __SYCL_EXPORT handler { // Associate it with handler manually. if (NWorkGroups == 1 && !Reduction::is_usm) Redu.associateWithHandler(AuxHandler); - intel::detail::reduAuxCGFunc(AuxHandler, Range, - NWorkItems, Redu); + ext::oneapi::detail::reduAuxCGFunc( + AuxHandler, Range, NWorkItems, Redu); MLastEvent = AuxHandler.finalize(); NWorkItems = NWorkGroups; @@ -1812,7 +1814,7 @@ class __SYCL_EXPORT handler { // in handler from reduction_impl methods. template - friend class intel::detail::reduction_impl; + friend class ext::oneapi::detail::reduction_impl; friend void detail::associateWithHandler(handler &, detail::AccessorBaseHost *, diff --git a/sycl/include/CL/sycl/nd_item.hpp b/sycl/include/CL/sycl/nd_item.hpp index 62abba368dc7f..902003505f6e9 100644 --- a/sycl/include/CL/sycl/nd_item.hpp +++ b/sycl/include/CL/sycl/nd_item.hpp @@ -12,9 +12,9 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -67,7 +67,9 @@ template class nd_item { group get_group() const { return Group; } - intel::sub_group get_sub_group() const { return intel::sub_group(); } + ext::oneapi::sub_group get_sub_group() const { + return ext::oneapi::sub_group(); + } size_t ALWAYS_INLINE get_group(int dimension) const { size_t Size = Group[dimension]; diff --git a/sycl/include/CL/sycl/pipes.hpp b/sycl/include/CL/sycl/pipes.hpp index e02fa1155592f..647c9a2a8aee8 100644 --- a/sycl/include/CL/sycl/pipes.hpp +++ b/sycl/include/CL/sycl/pipes.hpp @@ -8,11 +8,11 @@ #pragma once -#include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { template -using pipe = intel::pipe; +using pipe = ext::oneapi::pipe; } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/program.hpp b/sycl/include/CL/sycl/program.hpp index c6dbebf3f45bf..61137756c2b2a 100644 --- a/sycl/include/CL/sycl/program.hpp +++ b/sycl/include/CL/sycl/program.hpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include @@ -307,7 +307,7 @@ class __SYCL_EXPORT program { /// \return a specialization constant instance corresponding to given type ID /// passed as a template parameter template - experimental::spec_constant set_spec_constant(T Cst) { + ext::oneapi::spec_constant set_spec_constant(T Cst) { constexpr const char *Name = detail::SpecConstantInfo::getName(); static_assert(std::is_integral::value || std::is_floating_point::value, @@ -315,10 +315,10 @@ class __SYCL_EXPORT program { #ifdef __SYCL_DEVICE_ONLY__ (void)Cst; (void)Name; - return experimental::spec_constant(); + return ext::oneapi::spec_constant(); #else set_spec_constant_impl(Name, &Cst, sizeof(T)); - return experimental::spec_constant(Cst); + return ext::oneapi::spec_constant(Cst); #endif // __SYCL_DEVICE_ONLY__ } diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp index ff90729b80367..4177c4d1cd8c0 100644 --- a/sycl/source/detail/program_impl.cpp +++ b/sycl/source/detail/program_impl.cpp @@ -479,8 +479,8 @@ vector_class program_impl::get_info() const { void program_impl::set_spec_constant_impl(const char *Name, const void *ValAddr, size_t ValSize) { if (MState != program_state::none) - throw cl::sycl::experimental::spec_const_error("Invalid program state", - PI_INVALID_PROGRAM); + throw cl::sycl::ext::oneapi::spec_const_error("Invalid program state", + PI_INVALID_PROGRAM); // Reuse cached programs lock as opposed to introducing a new lock. auto LockGuard = MContext->getKernelProgramCache().acquireCachedPrograms(); spec_constant_impl &SC = SpecConstRegistry[Name]; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 78838d0178ee6..22f1fbfa86a75 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -981,7 +981,7 @@ void ProgramManager::flushSpecConstants(const program_impl &Prg, auto LockGuard = Ctx->getKernelProgramCache().acquireCachedPrograms(); auto It = NativePrograms.find(NativePrg); if (It == NativePrograms.end()) - throw sycl::experimental::spec_const_error( + throw sycl::ext::oneapi::spec_const_error( "spec constant is set in a program w/o a binary image", PI_INVALID_OPERATION); Img = It->second; diff --git a/sycl/source/function_pointer.cpp b/sycl/source/function_pointer.cpp index c273ae817c8bf..b46fd533fadd8 100644 --- a/sycl/source/function_pointer.cpp +++ b/sycl/source/function_pointer.cpp @@ -6,16 +6,16 @@ // //===----------------------------------------------------------------------===// -#include +#include #include #include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { -intel::device_func_ptr_holder_t +ext::oneapi::device_func_ptr_holder_t getDeviceFunctionPointerImpl(device &D, program &P, const char *FuncName) { - intel::device_func_ptr_holder_t FPtr = 0; + ext::oneapi::device_func_ptr_holder_t FPtr = 0; // FIXME: return value must be checked here, but since we cannot yet check // if corresponding extension is supported, let's silently ignore it here. const detail::plugin &Plugin = detail::getSyclObjImpl(P)->getPlugin(); diff --git a/sycl/test/atomic_ref/add.cpp b/sycl/test/atomic_ref/add.cpp index b152166e4f966..6face7c749bf5 100644 --- a/sycl/test/atomic_ref/add.cpp +++ b/sycl/test/atomic_ref/add.cpp @@ -10,7 +10,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ext::oneapi; template void add_fetch_test(queue q, size_t N) { @@ -26,7 +26,7 @@ void add_fetch_test(queue q, size_t N) { auto out = output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); + auto atm = atomic_ref(sum[0]); out[gid] = atm.fetch_add(T(1)); }); }); @@ -59,7 +59,7 @@ void add_plus_equal_test(queue q, size_t N) { auto out = output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); + auto atm = atomic_ref(sum[0]); out[gid] = atm += T(1); }); }); @@ -92,7 +92,7 @@ void add_pre_inc_test(queue q, size_t N) { auto out = output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); + auto atm = atomic_ref(sum[0]); out[gid] = ++atm; }); }); @@ -125,7 +125,7 @@ void add_post_inc_test(queue q, size_t N) { auto out = output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); + auto atm = atomic_ref(sum[0]); out[gid] = atm++; }); }); diff --git a/sycl/test/atomic_ref/compare_exchange.cpp b/sycl/test/atomic_ref/compare_exchange.cpp index 8f563fccb65fd..31290418a144b 100644 --- a/sycl/test/atomic_ref/compare_exchange.cpp +++ b/sycl/test/atomic_ref/compare_exchange.cpp @@ -9,7 +9,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ext::oneapi; template class compare_exchange_kernel; @@ -29,7 +29,7 @@ void compare_exchange_test(queue q, size_t N) { auto out = output_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(exc[0]); + auto atm = atomic_ref(exc[0]); T result = initial; bool success = atm.compare_exchange_strong(result, (T)gid); if (success) { diff --git a/sycl/test/atomic_ref/exchange.cpp b/sycl/test/atomic_ref/exchange.cpp index 2ce1292cfdd55..bba5dae8e29b3 100644 --- a/sycl/test/atomic_ref/exchange.cpp +++ b/sycl/test/atomic_ref/exchange.cpp @@ -9,7 +9,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ext::oneapi; template class exchange_kernel; @@ -29,7 +29,7 @@ void exchange_test(queue q, size_t N) { auto out = output_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(exc[0]); + auto atm = atomic_ref(exc[0]); out[gid] = atm.exchange(gid); }); }); diff --git a/sycl/test/atomic_ref/load.cpp b/sycl/test/atomic_ref/load.cpp index 274191b9a5ac3..4d95c4a5f1858 100644 --- a/sycl/test/atomic_ref/load.cpp +++ b/sycl/test/atomic_ref/load.cpp @@ -9,7 +9,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ext::oneapi; template class load_kernel; @@ -29,7 +29,7 @@ void load_test(queue q, size_t N) { auto out = output_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(ld[0]); + auto atm = atomic_ref(ld[0]); out[gid] = atm.load(); }); }); diff --git a/sycl/test/atomic_ref/max.cpp b/sycl/test/atomic_ref/max.cpp index c8bccf1c28067..7be6b9ac392b5 100644 --- a/sycl/test/atomic_ref/max.cpp +++ b/sycl/test/atomic_ref/max.cpp @@ -10,7 +10,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ext::oneapi; template void max_test(queue q, size_t N) { @@ -27,7 +27,7 @@ void max_test(queue q, size_t N) { auto out = output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); + auto atm = atomic_ref(val[0]); // +1 accounts for lowest() returning 0 for unsigned types out[gid] = atm.fetch_max(T(gid) + 1); diff --git a/sycl/test/atomic_ref/min.cpp b/sycl/test/atomic_ref/min.cpp index 8313c4931136c..47787a52b2eea 100644 --- a/sycl/test/atomic_ref/min.cpp +++ b/sycl/test/atomic_ref/min.cpp @@ -10,7 +10,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ext::oneapi; template void min_test(queue q, size_t N) { @@ -27,7 +27,7 @@ void min_test(queue q, size_t N) { auto out = output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); + auto atm = atomic_ref(val[0]); out[gid] = atm.fetch_min(T(gid)); }); }); diff --git a/sycl/test/atomic_ref/store.cpp b/sycl/test/atomic_ref/store.cpp index eebdba5ced095..69abba8e0580d 100644 --- a/sycl/test/atomic_ref/store.cpp +++ b/sycl/test/atomic_ref/store.cpp @@ -9,7 +9,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ext::oneapi; template class store_kernel; @@ -24,7 +24,7 @@ void store_test(queue q, size_t N) { auto st = store_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(st[0]); + auto atm = atomic_ref(st[0]); atm.store(T(gid)); }); }); diff --git a/sycl/test/atomic_ref/sub.cpp b/sycl/test/atomic_ref/sub.cpp index 52e338048e7be..13ed2c5bdafbe 100644 --- a/sycl/test/atomic_ref/sub.cpp +++ b/sycl/test/atomic_ref/sub.cpp @@ -10,7 +10,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ext::oneapi; template void sub_fetch_test(queue q, size_t N) { @@ -26,7 +26,7 @@ void sub_fetch_test(queue q, size_t N) { auto out = output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); + auto atm = atomic_ref(val[0]); out[gid] = atm.fetch_sub(T(1)); }); }); @@ -59,7 +59,7 @@ void sub_plus_equal_test(queue q, size_t N) { auto out = output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); + auto atm = atomic_ref(val[0]); out[gid] = atm -= T(1); }); }); @@ -92,7 +92,7 @@ void sub_pre_dec_test(queue q, size_t N) { auto out = output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); + auto atm = atomic_ref(val[0]); out[gid] = --atm; }); }); @@ -125,7 +125,7 @@ void sub_post_dec_test(queue q, size_t N) { auto out = output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); + auto atm = atomic_ref(val[0]); out[gid] = atm--; }); }); diff --git a/sycl/test/basic_tests/esimd/block_load_store.cpp b/sycl/test/basic_tests/esimd/block_load_store.cpp index 268df2c7991c6..b83f4f0378a02 100644 --- a/sycl/test/basic_tests/esimd/block_load_store.cpp +++ b/sycl/test/basic_tests/esimd/block_load_store.cpp @@ -2,11 +2,11 @@ // expected-no-diagnostics #include -#include +#include #include #include -using namespace sycl::intel::gpu; +using namespace sycl::ext::intel::gpu; using namespace cl::sycl; void kernel(accessor &buf) __attribute__((sycl_device)) { diff --git a/sycl/test/basic_tests/esimd/esimd_math.cpp b/sycl/test/basic_tests/esimd/esimd_math.cpp index bada49639b366..e037ebb1c3c84 100644 --- a/sycl/test/basic_tests/esimd/esimd_math.cpp +++ b/sycl/test/basic_tests/esimd/esimd_math.cpp @@ -1,11 +1,11 @@ // RUN: %clangxx -fsycl -fsycl-explicit-simd -fsycl-device-only -fsyntax-only -Xclang -verify %s // expected-no-diagnostics -#include +#include #include #include -using namespace sycl::intel::gpu; +using namespace sycl::ext::intel::gpu; bool test_esimd_mask() __attribute__((sycl_device)) { simd a(0); diff --git a/sycl/test/basic_tests/esimd/flat_atomic.cpp b/sycl/test/basic_tests/esimd/flat_atomic.cpp index 03bf1742015ed..eb8bbc1293a28 100644 --- a/sycl/test/basic_tests/esimd/flat_atomic.cpp +++ b/sycl/test/basic_tests/esimd/flat_atomic.cpp @@ -2,11 +2,11 @@ // expected-no-diagnostics #include -#include +#include #include #include -using namespace sycl::intel::gpu; +using namespace sycl::ext::intel::gpu; using namespace cl::sycl; void kernel0(accessor &buf) __attribute__((sycl_device)) { diff --git a/sycl/test/basic_tests/esimd/gather4_scatter4.cpp b/sycl/test/basic_tests/esimd/gather4_scatter4.cpp index 0e912f393afd3..5627a8757545d 100644 --- a/sycl/test/basic_tests/esimd/gather4_scatter4.cpp +++ b/sycl/test/basic_tests/esimd/gather4_scatter4.cpp @@ -2,11 +2,11 @@ // expected-no-diagnostics #include -#include +#include #include #include -using namespace sycl::intel::gpu; +using namespace sycl::ext::intel::gpu; using namespace cl::sycl; void kernel(accessor -#include +#include #include #include -using namespace sycl::intel::gpu; +using namespace sycl::ext::intel::gpu; using namespace cl::sycl; void kernel(accessor &buf) __attribute__((sycl_device)) { diff --git a/sycl/test/basic_tests/esimd/global_var.cpp b/sycl/test/basic_tests/esimd/global_var.cpp index e2b52fc51241a..87694cc844da7 100644 --- a/sycl/test/basic_tests/esimd/global_var.cpp +++ b/sycl/test/basic_tests/esimd/global_var.cpp @@ -1,7 +1,7 @@ // RUN: %clangxx -fsycl -fsycl-explicit-simd -fsycl-device-only -fsyntax-only -Xclang -verify %s // expected-no-diagnostics -#include +#include // This test checks that DPC++ compiler in ESIMD mode understands // the ESIMD_PRIVATE and ESIMD_REGISTER macros diff --git a/sycl/test/basic_tests/esimd/simd.cpp b/sycl/test/basic_tests/esimd/simd.cpp index 5eb82677e0691..b152fd55b1b6a 100644 --- a/sycl/test/basic_tests/esimd/simd.cpp +++ b/sycl/test/basic_tests/esimd/simd.cpp @@ -1,11 +1,11 @@ // RUN: %clangxx -fsycl -fsycl-explicit-simd -fsycl-device-only -fsyntax-only -Xclang -verify %s // expected-no-diagnostics -#include +#include #include #include -using namespace sycl::intel::gpu; +using namespace sycl::ext::intel::gpu; bool test_simd_ctors() __attribute__((sycl_device)) { simd v0 = 1; diff --git a/sycl/test/basic_tests/esimd/simd_merge.cpp b/sycl/test/basic_tests/esimd/simd_merge.cpp index 20c357188be79..b6b0ccea3841e 100644 --- a/sycl/test/basic_tests/esimd/simd_merge.cpp +++ b/sycl/test/basic_tests/esimd/simd_merge.cpp @@ -1,11 +1,11 @@ // RUN: %clangxx -fsycl -fsycl-explicit-simd -fsycl-device-only -fsyntax-only -Xclang -verify %s // expected-no-diagnostics -#include +#include #include #include -using namespace sycl::intel::gpu; +using namespace sycl::ext::intel::gpu; bool test_simd_merge1() __attribute__((sycl_device)) { simd v0 = 1; diff --git a/sycl/test/basic_tests/esimd/simd_view.cpp b/sycl/test/basic_tests/esimd/simd_view.cpp index 0910dace1c98a..e30829ff170e2 100644 --- a/sycl/test/basic_tests/esimd/simd_view.cpp +++ b/sycl/test/basic_tests/esimd/simd_view.cpp @@ -1,11 +1,11 @@ // RUN: %clangxx -fsycl -fsycl-explicit-simd -fsycl-device-only -fsyntax-only -Xclang -verify %s // expected-no-diagnostics -#include +#include #include #include -using namespace sycl::intel::gpu; +using namespace sycl::ext::intel::gpu; bool test_simd_view_ctors() __attribute__((sycl_device)) { simd v0(0, 1); diff --git a/sycl/test/basic_tests/esimd/slm_atomic.cpp b/sycl/test/basic_tests/esimd/slm_atomic.cpp index b7f094075e147..b8b93cdaaf148 100644 --- a/sycl/test/basic_tests/esimd/slm_atomic.cpp +++ b/sycl/test/basic_tests/esimd/slm_atomic.cpp @@ -2,11 +2,11 @@ // expected-no-diagnostics #include -#include +#include #include #include -using namespace sycl::intel::gpu; +using namespace sycl::ext::intel::gpu; using namespace cl::sycl; void kernel0() __attribute__((sycl_device)) { diff --git a/sycl/test/basic_tests/esimd/slm_block.cpp b/sycl/test/basic_tests/esimd/slm_block.cpp index a49083f5c6524..eb3c23ab3cd62 100644 --- a/sycl/test/basic_tests/esimd/slm_block.cpp +++ b/sycl/test/basic_tests/esimd/slm_block.cpp @@ -2,11 +2,11 @@ // expected-no-diagnostics #include -#include +#include #include #include -using namespace sycl::intel::gpu; +using namespace sycl::ext::intel::gpu; using namespace cl::sycl; void kernel() __attribute__((sycl_device)) { diff --git a/sycl/test/basic_tests/esimd/slm_load.cpp b/sycl/test/basic_tests/esimd/slm_load.cpp index a84dce7b25f7c..a28d4f1d3411a 100644 --- a/sycl/test/basic_tests/esimd/slm_load.cpp +++ b/sycl/test/basic_tests/esimd/slm_load.cpp @@ -2,11 +2,11 @@ // expected-no-diagnostics #include -#include +#include #include #include -using namespace sycl::intel::gpu; +using namespace sycl::ext::intel::gpu; using namespace cl::sycl; void kernel() __attribute__((sycl_device)) { diff --git a/sycl/test/basic_tests/esimd/slm_load4.cpp b/sycl/test/basic_tests/esimd/slm_load4.cpp index f1f483e72c214..c6ea7074772c6 100644 --- a/sycl/test/basic_tests/esimd/slm_load4.cpp +++ b/sycl/test/basic_tests/esimd/slm_load4.cpp @@ -2,11 +2,11 @@ // expected-no-diagnostics #include -#include +#include #include #include -using namespace sycl::intel::gpu; +using namespace sycl::ext::intel::gpu; using namespace cl::sycl; void kernel() __attribute__((sycl_device)) { diff --git a/sycl/test/built-ins/printf.cpp b/sycl/test/built-ins/printf.cpp index 6536498587729..fe1a416b9550e 100644 --- a/sycl/test/built-ins/printf.cpp +++ b/sycl/test/built-ins/printf.cpp @@ -41,7 +41,7 @@ int main() { Queue.submit([&](handler &CGH) { CGH.single_task([=]() { // String - intel::experimental::printf(format_hello_world); + ext::oneapi::printf(format_hello_world); // Due to a bug in Intel CPU Runtime for OpenCL on Windows, information // printed using such format strings (without %-specifiers) might // appear in different order if output is redirected to a file or @@ -50,8 +50,8 @@ int main() { // CHECK: {{(Hello, World!)?}} // Integral types - intel::experimental::printf(format_int, (int32_t)123); - intel::experimental::printf(format_int, (int32_t)-123); + ext::oneapi::printf(format_int, (int32_t)123); + ext::oneapi::printf(format_int, (int32_t)-123); // CHECK: 123 // CHECK-NEXT: -123 @@ -60,8 +60,8 @@ int main() { // You can declare format string in non-global scope, but in this case // static keyword is required static const CONSTANT char format[] = "%f\n"; - intel::experimental::printf(format, 33.4f); - intel::experimental::printf(format, -33.4f); + ext::oneapi::printf(format, 33.4f); + ext::oneapi::printf(format, -33.4f); } // CHECK-NEXT: 33.4 // CHECK-NEXT: -33.4 @@ -73,21 +73,21 @@ int main() { using ocl_int4 = cl::sycl::vec::vector_t; { static const CONSTANT char format[] = "%v4d\n"; - intel::experimental::printf(format, (ocl_int4)v4); + ext::oneapi::printf(format, (ocl_int4)v4); } // However, you are still able to print them by-element: { - intel::experimental::printf(format_vec, (int32_t)v4.w(), + ext::oneapi::printf(format_vec, (int32_t)v4.w(), (int32_t)v4.z(), (int32_t)v4.y(), (int32_t)v4.x()); } #else // On host side you always have to print them by-element: - intel::experimental::printf(format_vec, (int32_t)v4.x(), + ext::oneapi::printf(format_vec, (int32_t)v4.x(), (int32_t)v4.y(), (int32_t)v4.z(), (int32_t)v4.w()); - intel::experimental::printf(format_vec, (int32_t)v4.w(), + ext::oneapi::printf(format_vec, (int32_t)v4.w(), (int32_t)v4.z(), (int32_t)v4.y(), (int32_t)v4.x()); #endif // __SYCL_DEVICE_ONLY__ @@ -100,7 +100,7 @@ int main() { // According to OpenCL spec, argument should be a void pointer { static const CONSTANT char format[] = "%p\n"; - intel::experimental::printf(format, (void *)Ptr); + ext::oneapi::printf(format, (void *)Ptr); } // CHECK-NEXT: {{(0x)?[0-9a-fA-F]+$}} }); @@ -111,7 +111,7 @@ int main() { Queue.submit([&](handler &CGH) { CGH.parallel_for(range<1>(10), [=](id<1> i) { // cast to uint64_t to be sure that we pass 64-bit unsigned value - intel::experimental::printf(format_hello_world_2, (uint64_t)i.get(0)); + ext::oneapi::printf(format_hello_world_2, (uint64_t)i.get(0)); }); }); Queue.wait(); diff --git a/sycl/test/built-ins/scalar_integer.cpp b/sycl/test/built-ins/scalar_integer.cpp index 18dd76294022a..85a0eae294d5f 100644 --- a/sycl/test/built-ins/scalar_integer.cpp +++ b/sycl/test/built-ins/scalar_integer.cpp @@ -245,7 +245,7 @@ int main() { myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::intel::ctz(s::cl_int{ 0x7FFFFFF0 }); + AccR[0] = s::ext::oneapi::ctz(s::cl_int{ 0x7FFFFFF0 }); }); }); } diff --git a/sycl/test/built-ins/vector_integer.cpp b/sycl/test/built-ins/vector_integer.cpp index 5a3a3dd0c80cf..74db909d278bb 100644 --- a/sycl/test/built-ins/vector_integer.cpp +++ b/sycl/test/built-ins/vector_integer.cpp @@ -387,7 +387,7 @@ int main() { myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::intel::ctz(s::cl_int2{ 0x7FFFFFF0, 0x7FFFFFF0 }); + AccR[0] = s::ext::oneapi::ctz(s::cl_int2{ 0x7FFFFFF0, 0x7FFFFFF0 }); }); }); } diff --git a/sycl/test/function-pointers/fp-as-kernel-arg.cpp b/sycl/test/function-pointers/fp-as-kernel-arg.cpp index c68a891dcf94c..ba76fdf5dbda8 100644 --- a/sycl/test/function-pointers/fp-as-kernel-arg.cpp +++ b/sycl/test/function-pointers/fp-as-kernel-arg.cpp @@ -31,7 +31,7 @@ int main() { P.build_with_kernel_type(); cl::sycl::kernel KE = P.get_kernel(); - auto FptrStorage = cl::sycl::intel::get_device_func_ptr(&add, "add", P, D); + auto FptrStorage = cl::sycl::ext::oneapi::get_device_func_ptr(&add, "add", P, D); if (!D.is_host()) { // FIXME: update this check with query to supported extension // For now, we don't have runtimes that report required OpenCL extension and @@ -55,7 +55,7 @@ int main() { CGH.parallel_for( KE, cl::sycl::range<1>(Size), [=](cl::sycl::id<1> Index) { auto Fptr = - cl::sycl::intel::to_device_func_ptr(FptrStorage); + cl::sycl::ext::oneapi::to_device_func_ptr(FptrStorage); AccA[Index] = Fptr(AccA[Index], AccB[Index]); }); }); diff --git a/sycl/test/function-pointers/pass-fp-through-buffer.cpp b/sycl/test/function-pointers/pass-fp-through-buffer.cpp index aa2bc85ec9874..255bbb0212eeb 100644 --- a/sycl/test/function-pointers/pass-fp-through-buffer.cpp +++ b/sycl/test/function-pointers/pass-fp-through-buffer.cpp @@ -33,12 +33,12 @@ int main() { P.build_with_kernel_type(); cl::sycl::kernel KE = P.get_kernel(); - cl::sycl::buffer DispatchTable(2); + cl::sycl::buffer DispatchTable(2); { auto DTAcc = DispatchTable.get_access(); - DTAcc[0] = cl::sycl::intel::get_device_func_ptr(&add, "add", P, D); - DTAcc[1] = cl::sycl::intel::get_device_func_ptr(&sub, "sub", P, D); + DTAcc[0] = cl::sycl::ext::oneapi::get_device_func_ptr(&add, "add", P, D); + DTAcc[1] = cl::sycl::ext::oneapi::get_device_func_ptr(&sub, "sub", P, D); if (!D.is_host()) { // FIXME: update this check with query to supported extension // For now, we don't have runtimes that report required OpenCL extension @@ -69,7 +69,7 @@ int main() { CGH.parallel_for( KE, cl::sycl::range<1>(Size), [=](cl::sycl::id<1> Index) { auto FP = - cl::sycl::intel::to_device_func_ptr(AccDT[Mode]); + cl::sycl::ext::oneapi::to_device_func_ptr(AccDT[Mode]); AccA[Index] = FP(AccA[Index], AccB[Index]); }); diff --git a/sycl/test/group-algorithm/all_of.cpp b/sycl/test/group-algorithm/all_of.cpp index 2a175d000bb6f..385b8f581a424 100644 --- a/sycl/test/group-algorithm/all_of.cpp +++ b/sycl/test/group-algorithm/all_of.cpp @@ -12,7 +12,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ext::oneapi; template class all_of_kernel; diff --git a/sycl/test/group-algorithm/any_of.cpp b/sycl/test/group-algorithm/any_of.cpp index 6ce61afaffdec..db7acd83ce624 100644 --- a/sycl/test/group-algorithm/any_of.cpp +++ b/sycl/test/group-algorithm/any_of.cpp @@ -12,7 +12,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ext::oneapi; template class any_of_kernel; diff --git a/sycl/test/group-algorithm/broadcast.cpp b/sycl/test/group-algorithm/broadcast.cpp index df0887a40d4a0..c3f26be96b630 100644 --- a/sycl/test/group-algorithm/broadcast.cpp +++ b/sycl/test/group-algorithm/broadcast.cpp @@ -12,7 +12,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ext::oneapi; class broadcast_kernel; diff --git a/sycl/test/group-algorithm/exclusive_scan.cpp b/sycl/test/group-algorithm/exclusive_scan.cpp index 47dc1f6122720..537d2117b5bc7 100644 --- a/sycl/test/group-algorithm/exclusive_scan.cpp +++ b/sycl/test/group-algorithm/exclusive_scan.cpp @@ -14,7 +14,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ext::oneapi; template class exclusive_scan_kernel; diff --git a/sycl/test/group-algorithm/inclusive_scan.cpp b/sycl/test/group-algorithm/inclusive_scan.cpp index 54311a162ed9e..8e718d82ae22f 100644 --- a/sycl/test/group-algorithm/inclusive_scan.cpp +++ b/sycl/test/group-algorithm/inclusive_scan.cpp @@ -14,7 +14,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ext::oneapi; template class inclusive_scan_kernel; diff --git a/sycl/test/group-algorithm/leader.cpp b/sycl/test/group-algorithm/leader.cpp index ff02cf7e77f9e..b41c4b3ab7c4d 100644 --- a/sycl/test/group-algorithm/leader.cpp +++ b/sycl/test/group-algorithm/leader.cpp @@ -10,7 +10,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ext::oneapi; class leader_kernel; diff --git a/sycl/test/group-algorithm/none_of.cpp b/sycl/test/group-algorithm/none_of.cpp index c8b56158d20b7..e3ef07030ceab 100644 --- a/sycl/test/group-algorithm/none_of.cpp +++ b/sycl/test/group-algorithm/none_of.cpp @@ -12,7 +12,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ext::oneapi; template class none_of_kernel; diff --git a/sycl/test/group-algorithm/reduce.cpp b/sycl/test/group-algorithm/reduce.cpp index 64ed0bd82fcc2..251f3fbb1225a 100644 --- a/sycl/test/group-algorithm/reduce.cpp +++ b/sycl/test/group-algorithm/reduce.cpp @@ -13,7 +13,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ext::oneapi; template class reduce_kernel; diff --git a/sycl/test/linear_id/linear-sub_group.cpp b/sycl/test/linear_id/linear-sub_group.cpp index 2b3f75ae2182e..b354dd186a5d2 100644 --- a/sycl/test/linear_id/linear-sub_group.cpp +++ b/sycl/test/linear_id/linear-sub_group.cpp @@ -38,7 +38,7 @@ int main(int argc, char *argv[]) { nd_range<2>(range<2>(outer, inner), range<2>(outer, inner)), [=](nd_item<2> it) { id<2> idx = it.get_global_id(); - intel::sub_group sg = it.get_sub_group(); + ext::oneapi::sub_group sg = it.get_sub_group(); output[idx] = sg.get_group_id()[0] * sg.get_local_range()[0] + sg.get_local_id()[0]; }); diff --git a/sycl/test/reduction/reduction_ctor.cpp b/sycl/test/reduction/reduction_ctor.cpp index 7f8e8e9726e59..35ac1266cb11c 100644 --- a/sycl/test/reduction/reduction_ctor.cpp +++ b/sycl/test/reduction/reduction_ctor.cpp @@ -77,7 +77,7 @@ void testKnown(T Identity, T A, T B) { // This accessor is not really used in this test. accessor ReduAcc(ReduBuf, CGH); - auto Redu = intel::reduction(ReduAcc, BOp); + auto Redu = ext::oneapi::reduction(ReduAcc, BOp); assert(Redu.getIdentity() == Identity && "Failed getIdentity() check()."); test_reducer(Redu, A, B); @@ -99,7 +99,7 @@ void testUnknown(T Identity, T A, T B) { // This accessor is not really used in this test. accessor ReduAcc(ReduBuf, CGH); - auto Redu = intel::reduction(ReduAcc, Identity, BOp); + auto Redu = ext::oneapi::reduction(ReduAcc, Identity, BOp); assert(Redu.getIdentity() == Identity && "Failed getIdentity() check()."); test_reducer(Redu, Identity, A, B); @@ -119,18 +119,18 @@ void testBoth(T Identity, T A, T B) { int main() { // testKnown does not pass identity to reduction ctor. - testBoth>(0, 1, 7); + testBoth>(0, 1, 7); testBoth>(1, 1, 7); - testBoth>(0, 1, 8); - testBoth>(0, 7, 3); - testBoth>(~0, 7, 3); - testBoth>((std::numeric_limits::max)(), 7, 3); - testBoth>((std::numeric_limits::min)(), 7, 3); + testBoth>(0, 1, 8); + testBoth>(0, 7, 3); + testBoth>(~0, 7, 3); + testBoth>((std::numeric_limits::max)(), 7, 3); + testBoth>((std::numeric_limits::min)(), 7, 3); - testBoth>(0, 1, 7); + testBoth>(0, 1, 7); testBoth>(1, 1, 7); - testBoth>(getMaximumFPValue(), 7, 3); - testBoth>(getMinimumFPValue(), 7, 3); + testBoth>(getMaximumFPValue(), 7, 3); + testBoth>(getMinimumFPValue(), 7, 3); testUnknown, 0, PointPlus>(Point(0), Point(1), Point(7)); testUnknown, 1, PointPlus>(Point(0), Point(1), Point(7)); diff --git a/sycl/test/reduction/reduction_nd_conditional.cpp b/sycl/test/reduction/reduction_nd_conditional.cpp index c700097993079..49a5e3d799262 100644 --- a/sycl/test/reduction/reduction_nd_conditional.cpp +++ b/sycl/test/reduction/reduction_nd_conditional.cpp @@ -85,7 +85,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { auto In = InBuf.template get_access(CGH); accessor Out(OutBuf, CGH); - auto Redu = intel::reduction(Out, Identity, BOp); + auto Redu = ext::oneapi::reduction(Out, Identity, BOp); range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); @@ -114,10 +114,10 @@ void test(T Identity, size_t WGSize, size_t NWItems) { } int main() { - test>(0, 2, 2); - test>(0, 7, 7); - test>(0, 2, 64); - test>(0, 16, 256); + test>(0, 2, 2); + test>(0, 7, 7); + test>(0, 2, 64); + test>(0, 16, 256); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/reduction/reduction_nd_ext_type.hpp b/sycl/test/reduction/reduction_nd_ext_type.hpp index a80aefc09cd45..9cdfbb8dce2fc 100644 --- a/sycl/test/reduction/reduction_nd_ext_type.hpp +++ b/sycl/test/reduction/reduction_nd_ext_type.hpp @@ -30,7 +30,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { auto In = InBuf.template get_access(CGH); accessor Out(OutBuf, CGH); - auto Redu = intel::reduction(Out, Identity, BOp); + auto Redu = ext::oneapi::reduction(Out, Identity, BOp); range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); @@ -64,15 +64,15 @@ int runTests(const string_class &ExtensionName) { // Check some less standards WG sizes and corner cases first. test>(0, 4, 4); - test>(0, 4, 64); + test>(0, 4, 64); - test>(getMaximumFPValue(), 7, 7); - test>(getMinimumFPValue(), 7, 7 * 5); + test>(getMaximumFPValue(), 7, 7); + test>(getMinimumFPValue(), 7, 7 * 5); #if __cplusplus >= 201402L - test>(1, 3, 3 * 5); - test>(getMaximumFPValue(), 3, 3); - test>(getMinimumFPValue(), 3, 3); + test>(1, 3, 3 * 5); + test>(getMaximumFPValue(), 3, 3); + test>(getMinimumFPValue(), 3, 3); #endif // __cplusplus >= 201402L std::cout << "Test passed\n"; diff --git a/sycl/test/reduction/reduction_nd_s0_dw.cpp b/sycl/test/reduction/reduction_nd_s0_dw.cpp index 834ccf4407649..356038d9f38b6 100644 --- a/sycl/test/reduction/reduction_nd_s0_dw.cpp +++ b/sycl/test/reduction/reduction_nd_s0_dw.cpp @@ -35,7 +35,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { auto In = InBuf.template get_access(CGH); accessor Out(OutBuf, CGH); - auto Redu = intel::reduction(Out, Identity, BOp); + auto Redu = ext::oneapi::reduction(Out, Identity, BOp); range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); @@ -59,33 +59,33 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { // Check some less standards WG sizes and corner cases first. - test>(0, 2, 2); - test>(0, 7, 7); - test>(0, 9, 18); - test>(0, 49, 49 * 5); + test>(0, 2, 2); + test>(0, 7, 7); + test>(0, 9, 18); + test>(0, 49, 49 * 5); // Try some power-of-two work-group sizes. - test>(0, 2, 64); - test>(0, 4, 64); - test>(0, 8, 128); - test>(0, 16, 256); - test>(0, 32, 256); - test>(0, 64, 256); - test>(0, 128, 256); - test>(0, 256, 256); + test>(0, 2, 64); + test>(0, 4, 64); + test>(0, 8, 128); + test>(0, 16, 256); + test>(0, 32, 256); + test>(0, 64, 256); + test>(0, 128, 256); + test>(0, 256, 256); // Check with various operations. test>(1, 8, 256); - test>(0, 8, 256); - test>(0, 8, 256); - test>(~0, 8, 256); - test>((std::numeric_limits::max)(), 8, 256); - test>((std::numeric_limits::min)(), 8, 256); + test>(0, 8, 256); + test>(0, 8, 256); + test>(~0, 8, 256); + test>((std::numeric_limits::max)(), 8, 256); + test>((std::numeric_limits::min)(), 8, 256); // Check with various types. test>(1, 8, 256); - test>(getMaximumFPValue(), 8, 256); - test>(getMinimumFPValue(), 8, 256); + test>(getMaximumFPValue(), 8, 256); + test>(getMinimumFPValue(), 8, 256); // Check with CUSTOM type. test, 0, CustomVecPlus>(CustomVec(0), 8, 256); diff --git a/sycl/test/reduction/reduction_nd_s0_rw.cpp b/sycl/test/reduction/reduction_nd_s0_rw.cpp index 2040b632e07fb..88f408a1c4f8d 100644 --- a/sycl/test/reduction/reduction_nd_s0_rw.cpp +++ b/sycl/test/reduction/reduction_nd_s0_rw.cpp @@ -37,7 +37,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { auto In = InBuf.template get_access(CGH); accessor Out(OutBuf, CGH); - auto Redu = intel::reduction(Out, Identity, BOp); + auto Redu = ext::oneapi::reduction(Out, Identity, BOp); range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); @@ -61,33 +61,33 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { // Check some less standards WG sizes and corner cases first. - test>(0, 2, 2); - test>(0, 7, 7); - test>(0, 9, 18); - test>(0, 49, 49 * 5); + test>(0, 2, 2); + test>(0, 7, 7); + test>(0, 9, 18); + test>(0, 49, 49 * 5); // Try some power-of-two work-group sizes. - test>(0, 2, 64); - test>(0, 4, 64); - test>(0, 8, 128); - test>(0, 16, 256); - test>(0, 32, 256); - test>(0, 64, 256); - test>(0, 128, 256); - test>(0, 256, 256); + test>(0, 2, 64); + test>(0, 4, 64); + test>(0, 8, 128); + test>(0, 16, 256); + test>(0, 32, 256); + test>(0, 64, 256); + test>(0, 128, 256); + test>(0, 256, 256); // Check with various operations. test>(1, 8, 256); - test>(0, 8, 256); - test>(0, 8, 256); - test>(~0, 8, 256); - test>((std::numeric_limits::max)(), 8, 256); - test>((std::numeric_limits::min)(), 8, 256); + test>(0, 8, 256); + test>(0, 8, 256); + test>(~0, 8, 256); + test>((std::numeric_limits::max)(), 8, 256); + test>((std::numeric_limits::min)(), 8, 256); // Check with various types. test>(1, 8, 256); - test>(getMaximumFPValue(), 8, 256); - test>(getMinimumFPValue(), 8, 256); + test>(getMaximumFPValue(), 8, 256); + test>(getMinimumFPValue(), 8, 256); // Check with CUSTOM type. test, 0, CustomVecPlus>(CustomVec(0), 8, 256); diff --git a/sycl/test/reduction/reduction_nd_s1_dw.cpp b/sycl/test/reduction/reduction_nd_s1_dw.cpp index 9fe36d69daa8c..68b8e7cafb811 100644 --- a/sycl/test/reduction/reduction_nd_s1_dw.cpp +++ b/sycl/test/reduction/reduction_nd_s1_dw.cpp @@ -36,7 +36,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { auto In = InBuf.template get_access(CGH); accessor Out(OutBuf, CGH); - auto Redu = intel::reduction(Out, Identity, BOp); + auto Redu = ext::oneapi::reduction(Out, Identity, BOp); range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); @@ -60,33 +60,33 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { // Check some less standards WG sizes and corner cases first. - test>(0, 2, 2); - test>(0, 7, 7); - test>(0, 9, 18); - test>(0, 49, 49 * 5); + test>(0, 2, 2); + test>(0, 7, 7); + test>(0, 9, 18); + test>(0, 49, 49 * 5); // Try some power-of-two work-group sizes. - test>(0, 2, 64); - test>(0, 4, 64); - test>(0, 8, 128); - test>(0, 16, 256); - test>(0, 32, 256); - test>(0, 64, 256); - test>(0, 128, 256); - test>(0, 256, 256); + test>(0, 2, 64); + test>(0, 4, 64); + test>(0, 8, 128); + test>(0, 16, 256); + test>(0, 32, 256); + test>(0, 64, 256); + test>(0, 128, 256); + test>(0, 256, 256); // Check with various operations. test>(1, 8, 256); - test>(0, 8, 256); - test>(0, 8, 256); - test>(~0, 8, 256); - test>((std::numeric_limits::max)(), 8, 256); - test>((std::numeric_limits::min)(), 8, 256); + test>(0, 8, 256); + test>(0, 8, 256); + test>(~0, 8, 256); + test>((std::numeric_limits::max)(), 8, 256); + test>((std::numeric_limits::min)(), 8, 256); // Check with various types. test>(1, 8, 256); - test>(getMaximumFPValue(), 8, 256); - test>(getMinimumFPValue(), 8, 256); + test>(getMaximumFPValue(), 8, 256); + test>(getMinimumFPValue(), 8, 256); // Check with CUSTOM type. test, 1, CustomVecPlus>(CustomVec(0), 8, 256); diff --git a/sycl/test/reduction/reduction_nd_s1_rw.cpp b/sycl/test/reduction/reduction_nd_s1_rw.cpp index e59ed8c4785a5..64ddc1371b070 100644 --- a/sycl/test/reduction/reduction_nd_s1_rw.cpp +++ b/sycl/test/reduction/reduction_nd_s1_rw.cpp @@ -38,7 +38,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { auto In = InBuf.template get_access(CGH); accessor Out(OutBuf, CGH); - auto Redu = intel::reduction(Out, Identity, BOp); + auto Redu = ext::oneapi::reduction(Out, Identity, BOp); range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); @@ -62,33 +62,33 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { // Check some less standards WG sizes and corner cases first. - test>(0, 2, 2); - test>(0, 7, 7); - test>(0, 9, 18); - test>(0, 49, 49 * 5); + test>(0, 2, 2); + test>(0, 7, 7); + test>(0, 9, 18); + test>(0, 49, 49 * 5); // Try some power-of-two work-group sizes. - test>(0, 2, 64); - test>(0, 4, 64); - test>(0, 8, 128); - test>(0, 16, 256); - test>(0, 32, 256); - test>(0, 64, 256); - test>(0, 128, 256); - test>(0, 256, 256); + test>(0, 2, 64); + test>(0, 4, 64); + test>(0, 8, 128); + test>(0, 16, 256); + test>(0, 32, 256); + test>(0, 64, 256); + test>(0, 128, 256); + test>(0, 256, 256); // Check with various operations. test>(1, 8, 256); - test>(0, 8, 256); - test>(0, 8, 256); - test>(~0, 8, 256); - test>((std::numeric_limits::max)(), 8, 256); - test>((std::numeric_limits::min)(), 8, 256); + test>(0, 8, 256); + test>(0, 8, 256); + test>(~0, 8, 256); + test>((std::numeric_limits::max)(), 8, 256); + test>((std::numeric_limits::min)(), 8, 256); // Check with various types. test>(1, 8, 256); - test>(getMaximumFPValue(), 8, 256); - test>(getMinimumFPValue(), 8, 256); + test>(getMaximumFPValue(), 8, 256); + test>(getMinimumFPValue(), 8, 256); // Check with CUSTOM type. test, 1, CustomVecPlus>(CustomVec(0), 8, 256); diff --git a/sycl/test/reduction/reduction_placeholder.cpp b/sycl/test/reduction/reduction_placeholder.cpp index e972105bbab50..42d19f9d8025a 100644 --- a/sycl/test/reduction/reduction_placeholder.cpp +++ b/sycl/test/reduction/reduction_placeholder.cpp @@ -41,7 +41,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { Q.submit([&](handler &CGH) { auto In = InBuf.template get_access(CGH); CGH.require(Out); - auto Redu = intel::reduction(Out, Identity, BinaryOperation()); + auto Redu = ext::oneapi::reduction(Out, Identity, BinaryOperation()); range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); @@ -64,16 +64,16 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { // fast atomics and fast reduce - test>(0, 49, 49 * 5); - test>(0, 8, 8); + test>(0, 49, 49 * 5); + test>(0, 8, 8); // fast atomics - test>(0, 7, 7 * 3); - test>(0, 4, 128); + test>(0, 7, 7 * 3); + test>(0, 4, 128); // fast reduce - test>(getMaximumFPValue(), 5, 5 * 7); - test>(getMinimumFPValue(), 4, 128); + test>(getMaximumFPValue(), 5, 5 * 7); + test>(getMinimumFPValue(), 4, 128); // generic algorithm test>(1, 7, 7 * 5); diff --git a/sycl/test/reduction/reduction_transparent.cpp b/sycl/test/reduction/reduction_transparent.cpp index ee44ecd37998c..31aa24de49622 100644 --- a/sycl/test/reduction/reduction_transparent.cpp +++ b/sycl/test/reduction/reduction_transparent.cpp @@ -46,7 +46,7 @@ void testId(T Identity, size_t WGSize, size_t NWItems) { range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); CGH.parallel_for>( - NDRange, intel::reduction(Out, Identity, BOp), [=](nd_item<1> NDIt, auto &Sum) { + NDRange, ext::oneapi::reduction(Out, Identity, BOp), [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); }); @@ -86,7 +86,7 @@ void testNoId(T Identity, size_t WGSize, size_t NWItems) { range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); CGH.parallel_for>( - NDRange, intel::reduction(Out, BOp), [=](nd_item<1> NDIt, auto &Sum) { + NDRange, ext::oneapi::reduction(Out, BOp), [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); }); @@ -110,8 +110,8 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { #if __cplusplus >= 201402L - test>(getMinimumFPValue(), 7, 7 * 5); - test>(0, 7, 49); + test>(getMinimumFPValue(), 7, 7 * 5); + test>(0, 7, 49); test>(1, 4, 16); #endif // __cplusplus >= 201402L diff --git a/sycl/test/reduction/reduction_usm.cpp b/sycl/test/reduction/reduction_usm.cpp index 592a36904a8e8..6915f86af876e 100644 --- a/sycl/test/reduction/reduction_usm.cpp +++ b/sycl/test/reduction/reduction_usm.cpp @@ -7,7 +7,7 @@ // RUN: %ACC_RUN_PLACEHOLDER %t.out // RUNx: env SYCL_DEVICE_TYPE=HOST %t.out -// TODO: Enable the test for HOST when it supports intel::reduce() and barrier() +// TODO: Enable the test for HOST when it supports ext::oneapi::reduce() and barrier() // This test performs basic checks of parallel_for(nd_range, reduction, func) // with reductions initialized with USM var. @@ -62,7 +62,7 @@ void test(T Identity, size_t WGSize, size_t NWItems, usm::alloc AllocType) { // Compute. Q.submit([&](handler &CGH) { auto In = InBuf.template get_access(CGH); - auto Redu = intel::reduction(ReduVarPtr, Identity, BOp); + auto Redu = ext::oneapi::reduction(ReduVarPtr, Identity, BOp); range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); @@ -105,16 +105,16 @@ void testUSM(T Identity, size_t WGSize, size_t NWItems) { int main() { // fast atomics and fast reduce - testUSM>(0, 49, 49 * 5); - testUSM>(0, 8, 128); + testUSM>(0, 49, 49 * 5); + testUSM>(0, 8, 128); // fast atomics - testUSM>(0, 7, 7 * 3); - testUSM>(0, 4, 128); + testUSM>(0, 7, 7 * 3); + testUSM>(0, 4, 128); // fast reduce - testUSM>(getMaximumFPValue(), 5, 5 * 7); - testUSM>(getMinimumFPValue(), 4, 128); + testUSM>(getMaximumFPValue(), 5, 5 * 7); + testUSM>(getMinimumFPValue(), 4, 128); // generic algorithm testUSM>(1, 7, 7 * 5); diff --git a/sycl/test/regression/sub-group-store-const-ref.cpp b/sycl/test/regression/sub-group-store-const-ref.cpp index dd10e1d57f12f..b5763f292ea83 100644 --- a/sycl/test/regression/sub-group-store-const-ref.cpp +++ b/sycl/test/regression/sub-group-store-const-ref.cpp @@ -13,4 +13,4 @@ #include using namespace sycl; -void test(intel::sub_group sg, global_ptr ptr) { sg.store(ptr, 1); } +void test(ext::oneapi::sub_group sg, global_ptr ptr) { sg.store(ptr, 1); } diff --git a/sycl/test/spec_const/spec_const_hw.cpp b/sycl/test/spec_const/spec_const_hw.cpp index 6e051910ce680..950d4b39dc0bd 100644 --- a/sycl/test/spec_const/spec_const_hw.cpp +++ b/sycl/test/spec_const/spec_const_hw.cpp @@ -38,7 +38,7 @@ int val = 10; int get_value() { return val; } float foo( - const cl::sycl::experimental::spec_constant &f32) { + const cl::sycl::ext::oneapi::spec_constant &f32) { return f32; } @@ -69,10 +69,10 @@ int main(int argc, char **argv) { // TODO make this floating point once supported by the compiler float goldf = (float)get_value(); - cl::sycl::experimental::spec_constant i32 = + cl::sycl::ext::oneapi::spec_constant i32 = program1.set_spec_constant(goldi); - cl::sycl::experimental::spec_constant f32 = + cl::sycl::ext::oneapi::spec_constant f32 = program2.set_spec_constant(goldf); program1.build_with_kernel_type(); diff --git a/sycl/test/spec_const/spec_const_neg.cpp b/sycl/test/spec_const/spec_const_neg.cpp index 18fb8ed5d9d0c..2942146d0b7a3 100644 --- a/sycl/test/spec_const/spec_const_neg.cpp +++ b/sycl/test/spec_const/spec_const_neg.cpp @@ -45,7 +45,7 @@ int main(int argc, char **argv) { << "\n"; cl::sycl::program program1(q.get_context()); - cl::sycl::experimental::spec_constant i32 = + cl::sycl::ext::oneapi::spec_constant i32 = program1.set_spec_constant(10); std::vector veci(1); @@ -56,7 +56,7 @@ int main(int argc, char **argv) { try { // This is an attempt to set a spec constant after the program has been // built - spec_const_error should be thrown - cl::sycl::experimental::spec_constant i32 = + cl::sycl::ext::oneapi::spec_constant i32 = program1.set_spec_constant(10); cl::sycl::buffer bufi(veci.data(), veci.size()); @@ -69,7 +69,7 @@ int main(int argc, char **argv) { acci[0] = i32.get(); }); }); - } catch (cl::sycl::experimental::spec_const_error &sc_err) { + } catch (cl::sycl::ext::oneapi::spec_const_error &sc_err) { passed = true; } catch (cl::sycl::exception &e) { std::cout << "*** Exception caught: " << e.what() << "\n"; diff --git a/sycl/test/spec_const/spec_const_redefine.cpp b/sycl/test/spec_const/spec_const_redefine.cpp index ac65587905bf0..9724ea86d43c8 100644 --- a/sycl/test/spec_const/spec_const_redefine.cpp +++ b/sycl/test/spec_const/spec_const_redefine.cpp @@ -71,9 +71,9 @@ int main(int argc, char **argv) { for (int i = 0; i < n_sc_sets; i++) { cl::sycl::program program(q.get_context()); const int *sc_set = &sc_vals[i][0]; - cl::sycl::experimental::spec_constant sc0 = + cl::sycl::ext::oneapi::spec_constant sc0 = program.set_spec_constant(sc_set[0]); - cl::sycl::experimental::spec_constant sc1 = + cl::sycl::ext::oneapi::spec_constant sc1 = program.set_spec_constant(sc_set[1]); program.build_with_kernel_type(); diff --git a/sycl/test/spec_const/spec_const_types.cpp b/sycl/test/spec_const/spec_const_types.cpp index c7017b2b69726..5abf49eb3c00c 100644 --- a/sycl/test/spec_const/spec_const_types.cpp +++ b/sycl/test/spec_const/spec_const_types.cpp @@ -42,49 +42,49 @@ int main() { cl::sycl::program program(queue.get_context()); // Create specialization constants. - cl::sycl::experimental::spec_constant i1 = + cl::sycl::ext::oneapi::spec_constant i1 = program.set_spec_constant((bool)get_value()); // CHECK-DAG: _ZTS11MyBoolConst=1|0 - cl::sycl::experimental::spec_constant i8 = + cl::sycl::ext::oneapi::spec_constant i8 = program.set_spec_constant((int8_t)get_value()); // CHECK-DAG: _ZTS11MyInt8Const=1|1 - cl::sycl::experimental::spec_constant ui8 = + cl::sycl::ext::oneapi::spec_constant ui8 = program.set_spec_constant((uint8_t)get_value()); // CHECK-DAG: _ZTS12MyUInt8Const=1|2 - cl::sycl::experimental::spec_constant i16 = + cl::sycl::ext::oneapi::spec_constant i16 = program.set_spec_constant((int16_t)get_value()); // CHECK-DAG: _ZTS12MyInt16Const=1|3 - cl::sycl::experimental::spec_constant ui16 = + cl::sycl::ext::oneapi::spec_constant ui16 = program.set_spec_constant((uint16_t)get_value()); // CHECK-DAG: _ZTS13MyUInt16Const=1|4 - cl::sycl::experimental::spec_constant i32 = + cl::sycl::ext::oneapi::spec_constant i32 = program.set_spec_constant((int32_t)get_value()); // CHECK-DAG: _ZTS12MyInt32Const=1|5 - cl::sycl::experimental::spec_constant ui32 = + cl::sycl::ext::oneapi::spec_constant ui32 = program.set_spec_constant((uint32_t)get_value()); // CHECK-DAG: _ZTS13MyUInt32Const=1|6 - cl::sycl::experimental::spec_constant i64 = + cl::sycl::ext::oneapi::spec_constant i64 = program.set_spec_constant((int64_t)get_value()); // CHECK-DAG: _ZTS12MyInt64Const=1|7 - cl::sycl::experimental::spec_constant ui64 = + cl::sycl::ext::oneapi::spec_constant ui64 = program.set_spec_constant((uint64_t)get_value()); // CHECK-DAG: _ZTS13MyUInt64Const=1|8 #define HALF 0 // TODO not yet supported #if HALF - cl::sycl::experimental::spec_constant f16 = + cl::sycl::ext::oneapi::spec_constant f16 = program.set_spec_constant((cl::sycl::half)get_value()); #endif - cl::sycl::experimental::spec_constant f32 = + cl::sycl::ext::oneapi::spec_constant f32 = program.set_spec_constant((float)get_value()); // CHECK-DAG: _ZTS12MyFloatConst=1|9 - cl::sycl::experimental::spec_constant f64 = + cl::sycl::ext::oneapi::spec_constant f64 = program.set_spec_constant((double)get_value()); // CHECK-DAG: _ZTS13MyDoubleConst=1|10 diff --git a/sycl/test/sub_group/attributes.cpp b/sycl/test/sub_group/attributes.cpp index d8173d2d1cf72..28c5a99a7fa8b 100644 --- a/sycl/test/sub_group/attributes.cpp +++ b/sycl/test/sub_group/attributes.cpp @@ -18,13 +18,13 @@ #include -#define KERNEL_FUNCTOR_WITH_SIZE(SIZE) \ - class KernelFunctor##SIZE { \ - public: \ - [[cl::intel_reqd_sub_group_size(SIZE)]] void \ - operator()(cl::sycl::nd_item<1> Item) { \ - const auto GID = Item.get_global_id(); \ - } \ +#define KERNEL_FUNCTOR_WITH_SIZE(SIZE) \ + class KernelFunctor##SIZE { \ + public: \ + [[cl::intel_reqd_sub_group_size(SIZE)]] void \ + operator()(cl::sycl::nd_item<1> Item) { \ + const auto GID = Item.get_global_id(); \ + } \ }; KERNEL_FUNCTOR_WITH_SIZE(1); @@ -44,7 +44,8 @@ inline uint32_t flp2(uint32_t X) { return X - (X >> 1); } -template inline void submit(cl::sycl::queue &Q) { +template +inline void submit(cl::sycl::queue &Q) { Q.submit([](cl::sycl::handler &cgh) { Fn F; cgh.parallel_for(cl::sycl::nd_range<1>{64, 16}, F); diff --git a/sycl/test/sub_group/barrier.cpp b/sycl/test/sub_group/barrier.cpp index 25e31cbeb521c..c8306c0cc18df 100644 --- a/sycl/test/sub_group/barrier.cpp +++ b/sycl/test/sub_group/barrier.cpp @@ -19,9 +19,11 @@ #include #include #include -template class sycl_subgr; +template +class sycl_subgr; using namespace cl::sycl; -template void check(queue &Queue, size_t G = 240, size_t L = 60) { +template +void check(queue &Queue, size_t G = 240, size_t L = 60) { try { nd_range<1> NdRange(G, L); std::vector data(G); @@ -33,7 +35,7 @@ template void check(queue &Queue, size_t G = 240, size_t L = 60) { auto sgsizeacc = sgsizebuf.get_access(cgh); cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ext::oneapi::sub_group SG = NdItem.get_sub_group(); size_t lid = SG.get_local_id().get(0); size_t gid = NdItem.get_global_id(0); size_t SGoff = gid - lid; diff --git a/sycl/test/sub_group/broadcast.hpp b/sycl/test/sub_group/broadcast.hpp index b7c6128cde0c2..9f722dccc3ce7 100644 --- a/sycl/test/sub_group/broadcast.hpp +++ b/sycl/test/sub_group/broadcast.hpp @@ -22,7 +22,7 @@ void check(queue &Queue) { auto syclacc = syclbuf.template get_access(cgh); auto sgsizeacc = sgsizebuf.get_access(cgh); cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ext::oneapi::sub_group SG = NdItem.get_sub_group(); /*Broadcast GID of element with SGLID == SGID */ syclacc[NdItem.get_global_id()] = broadcast(SG, T(NdItem.get_global_id(0)), SG.get_group_id()); diff --git a/sycl/test/sub_group/common.cpp b/sycl/test/sub_group/common.cpp index 17b1a9d8166d8..4863f25a96aab 100644 --- a/sycl/test/sub_group/common.cpp +++ b/sycl/test/sub_group/common.cpp @@ -36,7 +36,7 @@ void check(queue &Queue, unsigned int G, unsigned int L) { auto sgsizeacc = sgsizebuf.get_access(cgh); auto syclacc = syclbuf.get_access(cgh); cgh.parallel_for(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ext::oneapi::sub_group SG = NdItem.get_sub_group(); syclacc[NdItem.get_global_id()].local_id = SG.get_local_id().get(0); syclacc[NdItem.get_global_id()].local_range = SG.get_local_range().get(0); diff --git a/sycl/test/sub_group/common_ocl.cpp b/sycl/test/sub_group/common_ocl.cpp index 232e6c6c11acc..4b4085afb2b24 100644 --- a/sycl/test/sub_group/common_ocl.cpp +++ b/sycl/test/sub_group/common_ocl.cpp @@ -64,7 +64,7 @@ void check(queue &Queue, const int G, const int L, const char *SpvFile) { Queue.submit([&](handler &cgh) { auto syclacc = syclbuf.get_access(cgh); cgh.parallel_for(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ext::oneapi::sub_group SG = NdItem.get_sub_group(); syclacc[NdItem.get_global_id()].local_id = SG.get_local_id().get(0); syclacc[NdItem.get_global_id()].local_range = SG.get_local_range().get(0); diff --git a/sycl/test/sub_group/generic-shuffle.cpp b/sycl/test/sub_group/generic-shuffle.cpp index d2d7e191dfa32..2f6554f61f82d 100644 --- a/sycl/test/sub_group/generic-shuffle.cpp +++ b/sycl/test/sub_group/generic-shuffle.cpp @@ -41,7 +41,7 @@ void check_pointer(queue &Queue, size_t G = 240, size_t L = 60) { auto sgsizeacc = sgsizebuf.get_access(cgh); cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ext::oneapi::sub_group SG = NdItem.get_sub_group(); uint32_t wggid = NdItem.get_global_id(0); uint32_t sgid = SG.get_group_id().get(0); if (wggid == 0) @@ -127,7 +127,7 @@ void check_struct(queue &Queue, Generator &Gen, size_t G = 240, size_t L = 60) { auto in = buf_in.template get_access(cgh); cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ext::oneapi::sub_group SG = NdItem.get_sub_group(); uint32_t wggid = NdItem.get_global_id(0); uint32_t sgid = SG.get_group_id().get(0); if (wggid == 0) diff --git a/sycl/test/sub_group/helper.hpp b/sycl/test/sub_group/helper.hpp index a171743b9fe79..bc88372c870ae 100644 --- a/sycl/test/sub_group/helper.hpp +++ b/sycl/test/sub_group/helper.hpp @@ -12,12 +12,14 @@ using namespace cl::sycl; -template struct utils { +template +struct utils { static T1 add_vec(const vec &v); static bool cmp_vec(const vec &v, const vec &r); static std::string stringify_vec(const vec &v); }; -template struct utils { +template +struct utils { static T2 add_vec(const vec &v) { return v.s0(); } static bool cmp_vec(const vec &v, const vec &r) { return v.s0() == r.s0(); @@ -26,7 +28,8 @@ template struct utils { return std::to_string((T2)v.s0()); } }; -template struct utils { +template +struct utils { static T2 add_vec(const vec &v) { return v.s0() + v.s1(); } static bool cmp_vec(const vec &v, const vec &r) { return v.s0() == r.s0() && v.s1() == r.s1(); @@ -36,7 +39,8 @@ template struct utils { std::to_string((T2)v.s1()) + " )"; } }; -template struct utils { +template +struct utils { static T2 add_vec(const vec &v) { return v.s0() + v.s1() + v.s2() + v.s3(); } @@ -50,7 +54,8 @@ template struct utils { std::to_string((T2)v.s3()) + " )"; } }; -template struct utils { +template +struct utils { static T2 add_vec(const vec &v) { return v.s0() + v.s1() + v.s2() + v.s3() + v.s4() + v.s5() + v.s6() + v.s7(); @@ -69,7 +74,8 @@ template struct utils { } }; -template struct utils { +template +struct utils { static T2 add_vec(const vec &v) { return v.s0() + v.s1() + v.s2() + v.s3() + v.s4() + v.s5() + v.s6() + v.s7() + v.s8() + v.s9() + v.sA() + v.sB() + v.sC() + v.sD() + @@ -96,7 +102,8 @@ template struct utils { } }; -template void exit_if_not_equal(T val, T ref, const char *name) { +template +void exit_if_not_equal(T val, T ref, const char *name) { if (std::is_floating_point::value) { if (std::fabs(val - ref) > 0.01) { std::cout << "Unexpected result for " << name << ": " << (double)val @@ -130,9 +137,10 @@ void exit_if_not_equal(T *val, T *ref, const char *name) { } } -template <> void exit_if_not_equal(half val, half ref, const char *name) { - int16_t cmp_val = reinterpret_cast(val); - int16_t cmp_ref = reinterpret_cast(ref); +template <> +void exit_if_not_equal(half val, half ref, const char *name) { + int16_t cmp_val = reinterpret_cast(val); + int16_t cmp_ref = reinterpret_cast(ref); if (std::abs(cmp_val - cmp_ref) > 1) { std::cout << "Unexpected result for " << name << ": " << (float)val << " expected value: " << (float)ref << std::endl; diff --git a/sycl/test/sub_group/load_store.cpp b/sycl/test/sub_group/load_store.cpp index 8366e8baca7d9..507b65a8261be 100644 --- a/sycl/test/sub_group/load_store.cpp +++ b/sycl/test/sub_group/load_store.cpp @@ -17,11 +17,13 @@ #include "helper.hpp" #include -template class sycl_subgr; +template +class sycl_subgr; using namespace cl::sycl; -template void check(queue &Queue) { +template +void check(queue &Queue) { const int G = 1024, L = 128; try { nd_range<1> NdRange(G, L); @@ -40,7 +42,7 @@ template void check(queue &Queue) { accessor LocalMem( {L}, cgh); cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ext::oneapi::sub_group SG = NdItem.get_sub_group(); if (SG.get_group_id().get(0) % N == 0) { size_t SGOffset = SG.get_group_id().get(0) * SG.get_max_local_range().get(0); @@ -94,7 +96,8 @@ template void check(queue &Queue) { exit(1); } } -template void check(queue &Queue) { +template +void check(queue &Queue) { const int G = 128, L = 64; try { nd_range<1> NdRange(G, L); @@ -114,7 +117,7 @@ template void check(queue &Queue) { accessor LocalMem( {L}, cgh); cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ext::oneapi::sub_group SG = NdItem.get_sub_group(); if (NdItem.get_global_id(0) == 0) sgsizeacc[0] = SG.get_max_local_range()[0]; size_t SGOffset = diff --git a/sycl/test/sub_group/reduce.hpp b/sycl/test/sub_group/reduce.hpp index 2fd29e30a3081..31fd2c5a459b6 100644 --- a/sycl/test/sub_group/reduce.hpp +++ b/sycl/test/sub_group/reduce.hpp @@ -26,7 +26,7 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, auto acc = buf.template get_access(cgh); cgh.parallel_for>( NdRange, [=](nd_item<1> NdItem) { - intel::sub_group sg = NdItem.get_sub_group(); + ext::oneapi::sub_group sg = NdItem.get_sub_group(); if (skip_init) { acc[NdItem.get_global_id(0)] = reduce(sg, T(NdItem.get_global_id(0)), op); @@ -73,23 +73,23 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) { L = 32; } - check_op(Queue, T(L), intel::plus(), false, G, L); - check_op(Queue, T(0), intel::plus(), true, G, L); + check_op(Queue, T(L), ext::oneapi::plus(), false, G, L); + check_op(Queue, T(0), ext::oneapi::plus(), true, G, L); - check_op(Queue, T(0), intel::minimum(), false, G, L); - check_op(Queue, T(G), intel::minimum(), true, G, L); + check_op(Queue, T(0), ext::oneapi::minimum(), false, G, L); + check_op(Queue, T(G), ext::oneapi::minimum(), true, G, L); - check_op(Queue, T(G), intel::maximum(), false, G, L); - check_op(Queue, T(0), intel::maximum(), true, G, L); + check_op(Queue, T(G), ext::oneapi::maximum(), false, G, L); + check_op(Queue, T(0), ext::oneapi::maximum(), true, G, L); #if __cplusplus >= 201402L - check_op(Queue, T(L), intel::plus<>(), false, G, L); - check_op(Queue, T(0), intel::plus<>(), true, G, L); + check_op(Queue, T(L), ext::oneapi::plus<>(), false, G, L); + check_op(Queue, T(0), ext::oneapi::plus<>(), true, G, L); - check_op(Queue, T(0), intel::minimum<>(), false, G, L); - check_op(Queue, T(G), intel::minimum<>(), true, G, L); + check_op(Queue, T(0), ext::oneapi::minimum<>(), false, G, L); + check_op(Queue, T(G), ext::oneapi::minimum<>(), true, G, L); - check_op(Queue, T(G), intel::maximum<>(), false, G, L); - check_op(Queue, T(0), intel::maximum<>(), true, G, L); + check_op(Queue, T(G), ext::oneapi::maximum<>(), false, G, L); + check_op(Queue, T(0), ext::oneapi::maximum<>(), true, G, L); #endif } diff --git a/sycl/test/sub_group/scan.hpp b/sycl/test/sub_group/scan.hpp index ebb6abda3984d..42c8c373044f9 100644 --- a/sycl/test/sub_group/scan.hpp +++ b/sycl/test/sub_group/scan.hpp @@ -28,7 +28,7 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, auto inacc = inbuf.template get_access(cgh); cgh.parallel_for>( NdRange, [=](nd_item<1> NdItem) { - intel::sub_group sg = NdItem.get_sub_group(); + ext::oneapi::sub_group sg = NdItem.get_sub_group(); if (skip_init) { exacc[NdItem.get_global_id(0)] = exclusive_scan(sg, T(NdItem.get_global_id(0)), op); @@ -81,50 +81,50 @@ void check(queue &Queue, size_t G = 120, size_t L = 60) { L = 32; } - check_op(Queue, T(L), intel::plus(), false, G, L); - check_op(Queue, T(0), intel::plus(), true, G, L); + check_op(Queue, T(L), ext::oneapi::plus(), false, G, L); + check_op(Queue, T(0), ext::oneapi::plus(), true, G, L); - check_op(Queue, T(0), intel::minimum(), false, G, L); + check_op(Queue, T(0), ext::oneapi::minimum(), false, G, L); if (std::is_floating_point::value || std::is_same::value) { - check_op(Queue, std::numeric_limits::infinity(), intel::minimum(), + check_op(Queue, std::numeric_limits::infinity(), ext::oneapi::minimum(), true, G, L); } else { - check_op(Queue, std::numeric_limits::max(), intel::minimum(), true, + check_op(Queue, std::numeric_limits::max(), ext::oneapi::minimum(), true, G, L); } - check_op(Queue, T(G), intel::maximum(), false, G, L); + check_op(Queue, T(G), ext::oneapi::maximum(), false, G, L); if (std::is_floating_point::value || std::is_same::value) { - check_op(Queue, -std::numeric_limits::infinity(), intel::maximum(), + check_op(Queue, -std::numeric_limits::infinity(), ext::oneapi::maximum(), true, G, L); } else { - check_op(Queue, std::numeric_limits::min(), intel::maximum(), true, + check_op(Queue, std::numeric_limits::min(), ext::oneapi::maximum(), true, G, L); } #if __cplusplus >= 201402L - check_op(Queue, T(L), intel::plus<>(), false, G, L); - check_op(Queue, T(0), intel::plus<>(), true, G, L); + check_op(Queue, T(L), ext::oneapi::plus<>(), false, G, L); + check_op(Queue, T(0), ext::oneapi::plus<>(), true, G, L); - check_op(Queue, T(0), intel::minimum<>(), false, G, L); + check_op(Queue, T(0), ext::oneapi::minimum<>(), false, G, L); if (std::is_floating_point::value || std::is_same::value) { - check_op(Queue, std::numeric_limits::infinity(), intel::minimum<>(), + check_op(Queue, std::numeric_limits::infinity(), ext::oneapi::minimum<>(), true, G, L); } else { - check_op(Queue, std::numeric_limits::max(), intel::minimum<>(), true, + check_op(Queue, std::numeric_limits::max(), ext::oneapi::minimum<>(), true, G, L); } - check_op(Queue, T(G), intel::maximum<>(), false, G, L); + check_op(Queue, T(G), ext::oneapi::maximum<>(), false, G, L); if (std::is_floating_point::value || std::is_same::value) { - check_op(Queue, -std::numeric_limits::infinity(), intel::maximum<>(), + check_op(Queue, -std::numeric_limits::infinity(), ext::oneapi::maximum<>(), true, G, L); } else { - check_op(Queue, std::numeric_limits::min(), intel::maximum<>(), true, + check_op(Queue, std::numeric_limits::min(), ext::oneapi::maximum<>(), true, G, L); } #endif diff --git a/sycl/test/sub_group/shuffle.hpp b/sycl/test/sub_group/shuffle.hpp index 94c82ab99c2d1..fde4a03e5fde0 100644 --- a/sycl/test/sub_group/shuffle.hpp +++ b/sycl/test/sub_group/shuffle.hpp @@ -43,7 +43,7 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) { auto sgsizeacc = sgsizebuf.get_access(cgh); cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ext::oneapi::sub_group SG = NdItem.get_sub_group(); uint32_t wggid = NdItem.get_global_id(0); uint32_t sgid = SG.get_group_id().get(0); vec vwggid(wggid), vsgid(sgid); @@ -150,7 +150,7 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) { auto sgsizeacc = sgsizebuf.get_access(cgh); cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ext::oneapi::sub_group SG = NdItem.get_sub_group(); uint32_t wggid = NdItem.get_global_id(0); uint32_t sgid = SG.get_group_id().get(0); if (wggid == 0) diff --git a/sycl/test/sub_group/vote.cpp b/sycl/test/sub_group/vote.cpp index 382266fa412c0..80283f67cdbbe 100644 --- a/sycl/test/sub_group/vote.cpp +++ b/sycl/test/sub_group/vote.cpp @@ -49,7 +49,7 @@ void check(queue Queue, const int G, const int L, const int D, const int R) { auto sganyacc = sganybuf.get_access(cgh); auto sgallacc = sgallbuf.get_access(cgh); cgh.parallel_for(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ext::oneapi::sub_group SG = NdItem.get_sub_group(); /* Set to 1 if any local ID in subgroup devided by D has remainder R */ if (any_of(SG, SG.get_local_id().get(0) % D == R)) { sganyacc[NdItem.get_global_id()] = 1; From 2444feda7c545399a46f6a99b5915f4e78963fb6 Mon Sep 17 00:00:00 2001 From: James Brodman Date: Tue, 21 Jul 2020 17:08:40 -0400 Subject: [PATCH 03/13] clang-format Signed-off-by: James Brodman --- sycl/test/atomic_ref/store.cpp | 12 +- sycl/test/built-ins/vector_integer.cpp | 192 ++++++++++++------------- 2 files changed, 101 insertions(+), 103 deletions(-) diff --git a/sycl/test/atomic_ref/store.cpp b/sycl/test/atomic_ref/store.cpp index 69abba8e0580d..c923fd2997ac7 100644 --- a/sycl/test/atomic_ref/store.cpp +++ b/sycl/test/atomic_ref/store.cpp @@ -11,11 +11,9 @@ using namespace sycl; using namespace sycl::ext::oneapi; -template -class store_kernel; +template class store_kernel; -template -void store_test(queue q, size_t N) { +template void store_test(queue q, size_t N) { T initial = std::numeric_limits::max(); T store = initial; { @@ -24,7 +22,9 @@ void store_test(queue q, size_t N) { auto st = store_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(st[0]); + auto atm = atomic_ref(st[0]); atm.store(T(gid)); }); }); @@ -55,7 +55,7 @@ int main() { store_test(q, N); store_test(q, N); store_test(q, N); - //store_test(q, N); + // store_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/built-ins/vector_integer.cpp b/sycl/test/built-ins/vector_integer.cpp index 74db909d278bb..fea10e39e44e4 100644 --- a/sycl/test/built-ins/vector_integer.cpp +++ b/sycl/test/built-ins/vector_integer.cpp @@ -14,14 +14,14 @@ namespace s = cl::sycl; int main() { // max { - s::cl_int2 r{ 0 }; + s::cl_int2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::max(s::cl_int2{ 5, 3 }, s::cl_int2{ 2, 7 }); + AccR[0] = s::max(s::cl_int2{5, 3}, s::cl_int2{2, 7}); }); }); } @@ -33,14 +33,14 @@ int main() { // max { - s::cl_uint2 r{ 0 }; + s::cl_uint2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::max(s::cl_uint2{ 5, 3 }, s::cl_uint2{ 2, 7 }); + AccR[0] = s::max(s::cl_uint2{5, 3}, s::cl_uint2{2, 7}); }); }); } @@ -52,14 +52,14 @@ int main() { // max { - s::cl_int2 r{ 0 }; + s::cl_int2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::max(s::cl_int2{ 5, 3 }, s::cl_int{ 2 }); + AccR[0] = s::max(s::cl_int2{5, 3}, s::cl_int{2}); }); }); } @@ -71,14 +71,14 @@ int main() { // max (longlong2) { - s::longlong2 r{ 0 }; + s::longlong2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::max(s::longlong2{ 5, 3 }, s::longlong{ 2 }); + AccR[0] = s::max(s::longlong2{5, 3}, s::longlong{2}); }); }); } @@ -90,14 +90,14 @@ int main() { // max { - s::cl_uint2 r{ 0 }; + s::cl_uint2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::max(s::cl_uint2{ 5, 3 }, s::cl_uint{ 2 }); + AccR[0] = s::max(s::cl_uint2{5, 3}, s::cl_uint{2}); }); }); } @@ -109,14 +109,14 @@ int main() { // max (ulonglong2) { - s::ulonglong2 r{ 0 }; + s::ulonglong2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::max(s::ulonglong2{ 5, 3 }, s::ulonglong{ 2 }); + AccR[0] = s::max(s::ulonglong2{5, 3}, s::ulonglong{2}); }); }); } @@ -125,17 +125,17 @@ int main() { assert(r1 == 5); assert(r2 == 3); } - + // min { - s::cl_int2 r{ 0 }; + s::cl_int2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::min(s::cl_int2{ 5, 3 }, s::cl_int2{ 2, 7 }); + AccR[0] = s::min(s::cl_int2{5, 3}, s::cl_int2{2, 7}); }); }); } @@ -147,14 +147,14 @@ int main() { // min { - s::cl_uint2 r{ 0 }; + s::cl_uint2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::min(s::cl_uint2{ 5, 3 }, s::cl_uint2{ 2, 7 }); + AccR[0] = s::min(s::cl_uint2{5, 3}, s::cl_uint2{2, 7}); }); }); } @@ -166,14 +166,14 @@ int main() { // min { - s::cl_int2 r{ 0 }; + s::cl_int2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::min(s::cl_int2{ 5, 3 }, s::cl_int{ 2 }); + AccR[0] = s::min(s::cl_int2{5, 3}, s::cl_int{2}); }); }); } @@ -185,14 +185,14 @@ int main() { // min { - s::cl_uint2 r{ 0 }; + s::cl_uint2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::min(s::cl_uint2{ 5, 3 }, s::cl_uint{ 2 }); + AccR[0] = s::min(s::cl_uint2{5, 3}, s::cl_uint{2}); }); }); } @@ -204,14 +204,14 @@ int main() { // abs { - s::cl_uint2 r{ 0 }; + s::cl_uint2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::abs(s::cl_int2{ -5, -2 }); + AccR[0] = s::abs(s::cl_int2{-5, -2}); }); }); } @@ -223,14 +223,14 @@ int main() { // abs (longlong) { - s::ulonglong2 r{ 0 }; + s::ulonglong2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::abs(s::longlong2{ -5, -2 }); + AccR[0] = s::abs(s::longlong2{-5, -2}); }); }); } @@ -242,14 +242,14 @@ int main() { // abs_diff { - s::cl_uint2 r{ 0 }; + s::cl_uint2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::abs_diff(s::cl_int2{ -5, -2 }, s::cl_int2{ -1, -1 }); + AccR[0] = s::abs_diff(s::cl_int2{-5, -2}, s::cl_int2{-1, -1}); }); }); } @@ -261,15 +261,15 @@ int main() { // add_sat { - s::cl_int2 r{ 0 }; + s::cl_int2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::add_sat(s::cl_int2{ 0x7FFFFFFF, 0x7FFFFFFF }, - s::cl_int2{ 100, 90 }); + AccR[0] = s::add_sat(s::cl_int2{0x7FFFFFFF, 0x7FFFFFFF}, + s::cl_int2{100, 90}); }); }); } @@ -281,15 +281,15 @@ int main() { // hadd { - s::cl_int2 r{ 0 }; + s::cl_int2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::hadd(s::cl_int2{ 0x0000007F, 0x0000007F }, - s::cl_int2{ 0x00000020, 0x00000020 }); + AccR[0] = s::hadd(s::cl_int2{0x0000007F, 0x0000007F}, + s::cl_int2{0x00000020, 0x00000020}); }); }); } @@ -301,15 +301,15 @@ int main() { // rhadd { - s::cl_int2 r{ 0 }; + s::cl_int2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::rhadd(s::cl_int2{ 0x0000007F, 0x0000007F }, - s::cl_int2{ 0x00000020, 0x00000020 }); + AccR[0] = s::rhadd(s::cl_int2{0x0000007F, 0x0000007F}, + s::cl_int2{0x00000020, 0x00000020}); }); }); } @@ -321,15 +321,15 @@ int main() { // clamp - 1 { - s::cl_int2 r{ 0 }; + s::cl_int2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::clamp(s::cl_int2{ 5, 5 }, s::cl_int2{ 10, 10 }, - s::cl_int2{ 30, 30 }); + AccR[0] = s::clamp(s::cl_int2{5, 5}, s::cl_int2{10, 10}, + s::cl_int2{30, 30}); }); }); } @@ -341,15 +341,14 @@ int main() { // clamp - 2 { - s::cl_int2 r{ 0 }; + s::cl_int2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - s::clamp(s::cl_int2{ 5, 5 }, s::cl_int{ 10 }, s::cl_int{ 30 }); + AccR[0] = s::clamp(s::cl_int2{5, 5}, s::cl_int{10}, s::cl_int{30}); }); }); } @@ -361,14 +360,14 @@ int main() { // clz { - s::cl_int2 r{ 0 }; + s::cl_int2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::clz(s::cl_int2{ 0x0FFFFFFF, 0x0FFFFFFF }); + AccR[0] = s::clz(s::cl_int2{0x0FFFFFFF, 0x0FFFFFFF}); }); }); } @@ -380,14 +379,14 @@ int main() { // ctz { - s::cl_int2 r{ 0 }; + s::cl_int2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::ext::oneapi::ctz(s::cl_int2{ 0x7FFFFFF0, 0x7FFFFFF0 }); + AccR[0] = s::ext::oneapi::ctz(s::cl_int2{0x7FFFFFF0, 0x7FFFFFF0}); }); }); } @@ -399,16 +398,16 @@ int main() { // mad_hi { - s::cl_int2 r{ 0 }; + s::cl_int2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::mad_hi(s::cl_int2{ 0x10000000, 0x10000000 }, - s::cl_int2{ 0x00000100, 0x00000100 }, - s::cl_int2{ 1, 1 }); + AccR[0] = + s::mad_hi(s::cl_int2{0x10000000, 0x10000000}, + s::cl_int2{0x00000100, 0x00000100}, s::cl_int2{1, 1}); }); }); } @@ -420,16 +419,16 @@ int main() { // mad_sat { - s::cl_int2 r{ 0 }; + s::cl_int2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::mad_sat(s::cl_int2{ 0x10000000, 0x10000000 }, - s::cl_int2{ 0x00000100, 0x00000100 }, - s::cl_int2{ 1, 1 }); + AccR[0] = + s::mad_sat(s::cl_int2{0x10000000, 0x10000000}, + s::cl_int2{0x00000100, 0x00000100}, s::cl_int2{1, 1}); }); }); } @@ -441,15 +440,15 @@ int main() { // mul_hi { - s::cl_int2 r{ 0 }; + s::cl_int2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::mul_hi(s::cl_int2{ 0x10000000, 0x10000000 }, - s::cl_int2{ 0x00000100, 0x00000100 }); + AccR[0] = s::mul_hi(s::cl_int2{0x10000000, 0x10000000}, + s::cl_int2{0x00000100, 0x00000100}); }); }); } @@ -461,15 +460,15 @@ int main() { // rotate { - s::cl_int2 r{ 0 }; + s::cl_int2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::rotate(s::cl_int2{ 0x11100000, 0x11100000 }, - s::cl_int2{ 12, 12 }); + AccR[0] = + s::rotate(s::cl_int2{0x11100000, 0x11100000}, s::cl_int2{12, 12}); }); }); } @@ -482,50 +481,49 @@ int main() { // sub_sat { auto TestSubSat = [](s::cl_int2 x, s::cl_int2 y) { - s::cl_int2 r{ 0 }; + s::cl_int2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { - AccR[0] = s::sub_sat(x, y); - }); + cgh.single_task( + [=]() { AccR[0] = s::sub_sat(x, y); }); }); } return r; }; - s::cl_int2 r1 = TestSubSat(s::cl_int2{ 10, 10 }, - s::cl_int2{ 0x80000000, 0x80000000 }); + s::cl_int2 r1 = + TestSubSat(s::cl_int2{10, 10}, s::cl_int2{0x80000000, 0x80000000}); s::cl_int r1x = r1.x(); s::cl_int r1y = r1.y(); assert(r1x == 0x7FFFFFFF); assert(r1y == 0x7FFFFFFF); - s::cl_int2 r2 = TestSubSat(s::cl_int2{ 0x7FFFFFFF, 0x80000000 }, - s::cl_int2{ 0xFFFFFFFF, 0x00000001 }); + s::cl_int2 r2 = TestSubSat(s::cl_int2{0x7FFFFFFF, 0x80000000}, + s::cl_int2{0xFFFFFFFF, 0x00000001}); s::cl_int r2x = r2.x(); s::cl_int r2y = r2.y(); assert(r2x == 0x7FFFFFFF); assert(r2y == 0x80000000); - s::cl_int2 r3 = TestSubSat(s::cl_int2{ 10499, 30678 }, - s::cl_int2{ 30678, 10499 }); + s::cl_int2 r3 = + TestSubSat(s::cl_int2{10499, 30678}, s::cl_int2{30678, 10499}); s::cl_int r3x = r3.x(); s::cl_int r3y = r3.y(); assert(r3x == -20179); - assert(r3y == 20179); + assert(r3y == 20179); } // upsample - 1 { - s::cl_ushort2 r{ 0 }; + s::cl_ushort2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::upsample(s::cl_uchar2{ 0x10, 0x10 }, - s::cl_uchar2{ 0x10, 0x10 }); + AccR[0] = + s::upsample(s::cl_uchar2{0x10, 0x10}, s::cl_uchar2{0x10, 0x10}); }); }); } @@ -537,15 +535,15 @@ int main() { // upsample - 2 { - s::cl_short2 r{ 0 }; + s::cl_short2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::upsample(s::cl_char2{ 0x10, 0x10 }, - s::cl_uchar2{ 0x10, 0x10 }); + AccR[0] = + s::upsample(s::cl_char2{0x10, 0x10}, s::cl_uchar2{0x10, 0x10}); }); }); } @@ -557,15 +555,15 @@ int main() { // upsample - 3 { - s::cl_uint2 r{ 0 }; + s::cl_uint2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::upsample(s::cl_ushort2{ 0x0010, 0x0010 }, - s::cl_ushort2{ 0x0010, 0x0010 }); + AccR[0] = s::upsample(s::cl_ushort2{0x0010, 0x0010}, + s::cl_ushort2{0x0010, 0x0010}); }); }); } @@ -577,15 +575,15 @@ int main() { // upsample - 4 { - s::cl_int2 r{ 0 }; + s::cl_int2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::upsample(s::cl_short2{ 0x0010, 0x0010 }, - s::cl_ushort2{ 0x0010, 0x0010 }); + AccR[0] = s::upsample(s::cl_short2{0x0010, 0x0010}, + s::cl_ushort2{0x0010, 0x0010}); }); }); } @@ -597,15 +595,15 @@ int main() { // upsample - 5 { - s::cl_ulong2 r{ 0 }; + s::cl_ulong2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::upsample(s::cl_uint2{ 0x00000010, 0x00000010 }, - s::cl_uint2{ 0x00000010, 0x00000010 }); + AccR[0] = s::upsample(s::cl_uint2{0x00000010, 0x00000010}, + s::cl_uint2{0x00000010, 0x00000010}); }); }); } @@ -617,15 +615,15 @@ int main() { // upsample - 6 { - s::cl_long2 r{ 0 }; + s::cl_long2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::upsample(s::cl_int2{ 0x00000010, 0x00000010 }, - s::cl_uint2{ 0x00000010, 0x00000010 }); + AccR[0] = s::upsample(s::cl_int2{0x00000010, 0x00000010}, + s::cl_uint2{0x00000010, 0x00000010}); }); }); } @@ -637,14 +635,14 @@ int main() { // popcount { - s::cl_int2 r{ 0 }; + s::cl_int2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::popcount(s::cl_int2{ 0x000000FF, 0x000000FF }); + AccR[0] = s::popcount(s::cl_int2{0x000000FF, 0x000000FF}); }); }); } @@ -656,15 +654,15 @@ int main() { // mad24 { - s::cl_int2 r{ 0 }; + s::cl_int2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::mad24(s::cl_int2{ 0xFFFFFFFF, 0xFFFFFFFF }, - s::cl_int2{ 20, 20 }, s::cl_int2{ 20, 20 }); + AccR[0] = s::mad24(s::cl_int2{0xFFFFFFFF, 0xFFFFFFFF}, + s::cl_int2{20, 20}, s::cl_int2{20, 20}); }); }); } @@ -676,15 +674,15 @@ int main() { // mul24 { - s::cl_int2 r{ 0 }; + s::cl_int2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::mul24(s::cl_int2{ 0xFFFFFFFF, 0xFFFFFFFF }, - s::cl_int2{ 20, 20 }); + AccR[0] = + s::mul24(s::cl_int2{0xFFFFFFFF, 0xFFFFFFFF}, s::cl_int2{20, 20}); }); }); } From 5e5305a447dc57ca014292cce2af727575927da5 Mon Sep 17 00:00:00 2001 From: James Brodman Date: Wed, 22 Jul 2020 15:41:53 -0400 Subject: [PATCH 04/13] more clang-format Signed-off-by: James Brodman --- sycl/include/CL/sycl/ext/oneapi/reduction.hpp | 2 +- sycl/include/CL/sycl/handler.hpp | 4 +- sycl/test/atomic_ref/add.cpp | 51 +++--- sycl/test/atomic_ref/compare_exchange.cpp | 40 +++-- sycl/test/atomic_ref/exchange.cpp | 21 ++- sycl/test/atomic_ref/load.cpp | 18 +- sycl/test/atomic_ref/max.cpp | 12 +- sycl/test/atomic_ref/min.cpp | 12 +- sycl/test/atomic_ref/sub.cpp | 51 +++--- sycl/test/built-ins/printf.cpp | 19 +- sycl/test/built-ins/scalar_integer.cpp | 168 ++++++++---------- sycl/test/enqueue_barrier/enqueue_barrier.cpp | 2 +- sycl/test/fpga_tests/fpga_io_pipes.cpp | 2 +- sycl/test/fpga_tests/fpga_pipes.cpp | 44 ++--- .../function-pointers/fp-as-kernel-arg.cpp | 16 +- .../pass-fp-through-buffer.cpp | 11 +- sycl/test/reduction/reduction_ctor.cpp | 50 +++--- sycl/test/reduction/reduction_nd_ext_type.hpp | 28 +-- sycl/test/reduction/reduction_nd_s0_dw.cpp | 18 +- sycl/test/reduction/reduction_nd_s0_rw.cpp | 18 +- sycl/test/reduction/reduction_nd_s1_dw.cpp | 18 +- sycl/test/reduction/reduction_nd_s1_rw.cpp | 18 +- sycl/test/reduction/reduction_placeholder.cpp | 18 +- sycl/test/reduction/reduction_transparent.cpp | 12 +- sycl/test/reduction/reduction_usm.cpp | 19 +- sycl/test/sub_group/attributes.cpp | 17 +- sycl/test/sub_group/barrier.cpp | 6 +- sycl/test/sub_group/helper.hpp | 33 ++-- sycl/test/sub_group/load_store.cpp | 9 +- sycl/test/sub_group/scan.hpp | 38 ++-- 30 files changed, 387 insertions(+), 388 deletions(-) diff --git a/sycl/include/CL/sycl/ext/oneapi/reduction.hpp b/sycl/include/CL/sycl/ext/oneapi/reduction.hpp index 67ced2af7bac3..0da27927f1efb 100644 --- a/sycl/include/CL/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/CL/sycl/ext/oneapi/reduction.hpp @@ -9,8 +9,8 @@ #pragma once #include -#include #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 9011ba24d4c00..1a09b34825969 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -346,7 +346,7 @@ class __SYCL_EXPORT handler { // Recursively calls itself until arguments pack is fully processed. // The version for regular(standard layout) argument. template - void setArgsHelper(int ArgIndex, T &&Arg, Ts &&...Args) { + void setArgsHelper(int ArgIndex, T &&Arg, Ts &&... Args) { set_arg(ArgIndex, std::move(Arg)); setArgsHelper(++ArgIndex, std::move(Args)...); } @@ -813,7 +813,7 @@ class __SYCL_EXPORT handler { /// Registers pack of arguments(Args) with indexes starting from 0. /// /// \param Args are argument values to be set. - template void set_args(Ts &&...Args) { + template void set_args(Ts &&... Args) { setArgsHelper(0, std::move(Args)...); } diff --git a/sycl/test/atomic_ref/add.cpp b/sycl/test/atomic_ref/add.cpp index 6face7c749bf5..6f4b86640fc53 100644 --- a/sycl/test/atomic_ref/add.cpp +++ b/sycl/test/atomic_ref/add.cpp @@ -12,8 +12,7 @@ using namespace sycl; using namespace sycl::ext::oneapi; -template -void add_fetch_test(queue q, size_t N) { +template void add_fetch_test(queue q, size_t N) { T sum = 0; std::vector output(N); std::fill(output.begin(), output.end(), 0); @@ -23,10 +22,13 @@ void add_fetch_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto sum = sum_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); + auto out = + output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); + auto atm = atomic_ref(sum[0]); out[gid] = atm.fetch_add(T(1)); }); }); @@ -45,8 +47,7 @@ void add_fetch_test(queue q, size_t N) { assert(std::unique(output.begin(), output.end()) == output.end()); } -template -void add_plus_equal_test(queue q, size_t N) { +template void add_plus_equal_test(queue q, size_t N) { T sum = 0; std::vector output(N); std::fill(output.begin(), output.end(), 0); @@ -56,10 +57,13 @@ void add_plus_equal_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto sum = sum_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); + auto out = + output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); + auto atm = atomic_ref(sum[0]); out[gid] = atm += T(1); }); }); @@ -78,8 +82,7 @@ void add_plus_equal_test(queue q, size_t N) { assert(std::unique(output.begin(), output.end()) == output.end()); } -template -void add_pre_inc_test(queue q, size_t N) { +template void add_pre_inc_test(queue q, size_t N) { T sum = 0; std::vector output(N); std::fill(output.begin(), output.end(), 0); @@ -89,10 +92,13 @@ void add_pre_inc_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto sum = sum_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); + auto out = + output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); + auto atm = atomic_ref(sum[0]); out[gid] = ++atm; }); }); @@ -111,8 +117,7 @@ void add_pre_inc_test(queue q, size_t N) { assert(std::unique(output.begin(), output.end()) == output.end()); } -template -void add_post_inc_test(queue q, size_t N) { +template void add_post_inc_test(queue q, size_t N) { T sum = 0; std::vector output(N); std::fill(output.begin(), output.end(), 0); @@ -122,10 +127,13 @@ void add_post_inc_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto sum = sum_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); + auto out = + output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); + auto atm = atomic_ref(sum[0]); out[gid] = atm++; }); }); @@ -144,8 +152,7 @@ void add_post_inc_test(queue q, size_t N) { assert(std::unique(output.begin(), output.end()) == output.end()); } -template -void add_test(queue q, size_t N) { +template void add_test(queue q, size_t N) { add_fetch_test(q, N); add_plus_equal_test(q, N); add_pre_inc_test(q, N); @@ -153,13 +160,11 @@ void add_test(queue q, size_t N) { } // Floating-point types do not support pre- or post-increment -template <> -void add_test(queue q, size_t N) { +template <> void add_test(queue q, size_t N) { add_fetch_test(q, N); add_plus_equal_test(q, N); } -template <> -void add_test(queue q, size_t N) { +template <> void add_test(queue q, size_t N) { add_fetch_test(q, N); add_plus_equal_test(q, N); } @@ -183,7 +188,7 @@ int main() { add_test(q, N); add_test(q, N); add_test(q, N); - //add_test(q, N); + // add_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/atomic_ref/compare_exchange.cpp b/sycl/test/atomic_ref/compare_exchange.cpp index 31290418a144b..5660fa64882cf 100644 --- a/sycl/test/atomic_ref/compare_exchange.cpp +++ b/sycl/test/atomic_ref/compare_exchange.cpp @@ -11,11 +11,9 @@ using namespace sycl; using namespace sycl::ext::oneapi; -template -class compare_exchange_kernel; +template class compare_exchange_kernel; -template -void compare_exchange_test(queue q, size_t N) { +template void compare_exchange_test(queue q, size_t N) { const T initial = std::numeric_limits::max(); T compare_exchange = initial; std::vector output(N); @@ -25,19 +23,25 @@ void compare_exchange_test(queue q, size_t N) { buffer output_buf(output.data(), output.size()); q.submit([&](handler &cgh) { - auto exc = compare_exchange_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); - cgh.parallel_for>(range<1>(N), [=](item<1> it) { - int gid = it.get_id(0); - auto atm = atomic_ref(exc[0]); - T result = initial; - bool success = atm.compare_exchange_strong(result, (T)gid); - if (success) { - out[gid] = result; - } else { - out[gid] = gid; - } - }); + auto exc = + compare_exchange_buf.template get_access( + cgh); + auto out = + output_buf.template get_access(cgh); + cgh.parallel_for>( + range<1>(N), [=](item<1> it) { + int gid = it.get_id(0); + auto atm = atomic_ref(exc[0]); + T result = initial; + bool success = atm.compare_exchange_strong(result, (T)gid); + if (success) { + out[gid] = result; + } else { + out[gid] = gid; + } + }); }); } @@ -69,7 +73,7 @@ int main() { compare_exchange_test(q, N); compare_exchange_test(q, N); compare_exchange_test(q, N); - //compare_exchange_test(q, N); + // compare_exchange_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/atomic_ref/exchange.cpp b/sycl/test/atomic_ref/exchange.cpp index bba5dae8e29b3..3600bb36c3700 100644 --- a/sycl/test/atomic_ref/exchange.cpp +++ b/sycl/test/atomic_ref/exchange.cpp @@ -11,11 +11,9 @@ using namespace sycl; using namespace sycl::ext::oneapi; -template -class exchange_kernel; +template class exchange_kernel; -template -void exchange_test(queue q, size_t N) { +template void exchange_test(queue q, size_t N) { const T initial = std::numeric_limits::max(); T exchange = initial; std::vector output(N); @@ -25,11 +23,15 @@ void exchange_test(queue q, size_t N) { buffer output_buf(output.data(), output.size()); q.submit([&](handler &cgh) { - auto exc = exchange_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); + auto exc = + exchange_buf.template get_access(cgh); + auto out = + output_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(exc[0]); + auto atm = atomic_ref(exc[0]); out[gid] = atm.exchange(gid); }); }); @@ -38,7 +40,8 @@ void exchange_test(queue q, size_t N) { // Only one work-item should have received the initial sentinel value assert(std::count(output.begin(), output.end(), initial) == 1); - // All other values should be unique; each work-item replaces the value it reads with its own ID + // All other values should be unique; each work-item replaces the value it + // reads with its own ID std::sort(output.begin(), output.end()); assert(std::unique(output.begin(), output.end()) == output.end()); } @@ -62,7 +65,7 @@ int main() { exchange_test(q, N); exchange_test(q, N); exchange_test(q, N); - //exchange_test(q, N); + // exchange_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/atomic_ref/load.cpp b/sycl/test/atomic_ref/load.cpp index 4d95c4a5f1858..2bb3cf45d749c 100644 --- a/sycl/test/atomic_ref/load.cpp +++ b/sycl/test/atomic_ref/load.cpp @@ -11,11 +11,9 @@ using namespace sycl; using namespace sycl::ext::oneapi; -template -class load_kernel; +template class load_kernel; -template -void load_test(queue q, size_t N) { +template void load_test(queue q, size_t N) { T initial = 42; T load = initial; std::vector output(N); @@ -26,10 +24,13 @@ void load_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto ld = load_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); + auto out = + output_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(ld[0]); + auto atm = atomic_ref(ld[0]); out[gid] = atm.load(); }); }); @@ -37,7 +38,8 @@ void load_test(queue q, size_t N) { // All work-items should read the same value // Atomicity isn't tested here, but support for load() is - assert(std::all_of(output.begin(), output.end(), [&](T x) { return (x == initial); })); + assert(std::all_of(output.begin(), output.end(), + [&](T x) { return (x == initial); })); } int main() { @@ -59,7 +61,7 @@ int main() { load_test(q, N); load_test(q, N); load_test(q, N); - //load_test(q, N); + // load_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/atomic_ref/max.cpp b/sycl/test/atomic_ref/max.cpp index 7be6b9ac392b5..d1c326237b009 100644 --- a/sycl/test/atomic_ref/max.cpp +++ b/sycl/test/atomic_ref/max.cpp @@ -12,8 +12,7 @@ using namespace sycl; using namespace sycl::ext::oneapi; -template -void max_test(queue q, size_t N) { +template void max_test(queue q, size_t N) { T initial = std::numeric_limits::lowest(); T val = initial; std::vector output(N); @@ -24,10 +23,13 @@ void max_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto val = val_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); + auto out = + output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); + auto atm = atomic_ref(val[0]); // +1 accounts for lowest() returning 0 for unsigned types out[gid] = atm.fetch_max(T(gid) + 1); @@ -67,7 +69,7 @@ int main() { max_test(q, N); max_test(q, N); max_test(q, N); - //max_test(q, N); + // max_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/atomic_ref/min.cpp b/sycl/test/atomic_ref/min.cpp index 47787a52b2eea..1066370fcf4e3 100644 --- a/sycl/test/atomic_ref/min.cpp +++ b/sycl/test/atomic_ref/min.cpp @@ -12,8 +12,7 @@ using namespace sycl; using namespace sycl::ext::oneapi; -template -void min_test(queue q, size_t N) { +template void min_test(queue q, size_t N) { T initial = std::numeric_limits::max(); T val = initial; std::vector output(N); @@ -24,10 +23,13 @@ void min_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto val = val_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); + auto out = + output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); + auto atm = atomic_ref(val[0]); out[gid] = atm.fetch_min(T(gid)); }); }); @@ -65,7 +67,7 @@ int main() { min_test(q, N); min_test(q, N); min_test(q, N); - //min_test(q, N); + // min_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/atomic_ref/sub.cpp b/sycl/test/atomic_ref/sub.cpp index 13ed2c5bdafbe..5296b41ddc5e5 100644 --- a/sycl/test/atomic_ref/sub.cpp +++ b/sycl/test/atomic_ref/sub.cpp @@ -12,8 +12,7 @@ using namespace sycl; using namespace sycl::ext::oneapi; -template -void sub_fetch_test(queue q, size_t N) { +template void sub_fetch_test(queue q, size_t N) { T val = N; std::vector output(N); std::fill(output.begin(), output.end(), 0); @@ -23,10 +22,13 @@ void sub_fetch_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto val = val_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); + auto out = + output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); + auto atm = atomic_ref(val[0]); out[gid] = atm.fetch_sub(T(1)); }); }); @@ -45,8 +47,7 @@ void sub_fetch_test(queue q, size_t N) { assert(std::unique(output.begin(), output.end()) == output.end()); } -template -void sub_plus_equal_test(queue q, size_t N) { +template void sub_plus_equal_test(queue q, size_t N) { T val = N; std::vector output(N); std::fill(output.begin(), output.end(), 0); @@ -56,10 +57,13 @@ void sub_plus_equal_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto val = val_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); + auto out = + output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); + auto atm = atomic_ref(val[0]); out[gid] = atm -= T(1); }); }); @@ -78,8 +82,7 @@ void sub_plus_equal_test(queue q, size_t N) { assert(std::unique(output.begin(), output.end()) == output.end()); } -template -void sub_pre_dec_test(queue q, size_t N) { +template void sub_pre_dec_test(queue q, size_t N) { T val = N; std::vector output(N); std::fill(output.begin(), output.end(), 0); @@ -89,10 +92,13 @@ void sub_pre_dec_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto val = val_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); + auto out = + output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); + auto atm = atomic_ref(val[0]); out[gid] = --atm; }); }); @@ -111,8 +117,7 @@ void sub_pre_dec_test(queue q, size_t N) { assert(std::unique(output.begin(), output.end()) == output.end()); } -template -void sub_post_dec_test(queue q, size_t N) { +template void sub_post_dec_test(queue q, size_t N) { T val = N; std::vector output(N); std::fill(output.begin(), output.end(), 0); @@ -122,10 +127,13 @@ void sub_post_dec_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto val = val_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); + auto out = + output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); + auto atm = atomic_ref(val[0]); out[gid] = atm--; }); }); @@ -144,8 +152,7 @@ void sub_post_dec_test(queue q, size_t N) { assert(std::unique(output.begin(), output.end()) == output.end()); } -template -void sub_test(queue q, size_t N) { +template void sub_test(queue q, size_t N) { sub_fetch_test(q, N); sub_plus_equal_test(q, N); sub_pre_dec_test(q, N); @@ -153,13 +160,11 @@ void sub_test(queue q, size_t N) { } // Floating-point types do not support pre- or post-decrement -template <> -void sub_test(queue q, size_t N) { +template <> void sub_test(queue q, size_t N) { sub_fetch_test(q, N); sub_plus_equal_test(q, N); } -template <> -void sub_test(queue q, size_t N) { +template <> void sub_test(queue q, size_t N) { sub_fetch_test(q, N); sub_plus_equal_test(q, N); } @@ -183,7 +188,7 @@ int main() { sub_test(q, N); sub_test(q, N); sub_test(q, N); - //sub_test(q, N); + // sub_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/built-ins/printf.cpp b/sycl/test/built-ins/printf.cpp index fe1a416b9550e..26a7bde2451b3 100644 --- a/sycl/test/built-ins/printf.cpp +++ b/sycl/test/built-ins/printf.cpp @@ -78,18 +78,15 @@ int main() { // However, you are still able to print them by-element: { - ext::oneapi::printf(format_vec, (int32_t)v4.w(), - (int32_t)v4.z(), (int32_t)v4.y(), - (int32_t)v4.x()); + ext::oneapi::printf(format_vec, (int32_t)v4.w(), (int32_t)v4.z(), + (int32_t)v4.y(), (int32_t)v4.x()); } #else // On host side you always have to print them by-element: - ext::oneapi::printf(format_vec, (int32_t)v4.x(), - (int32_t)v4.y(), (int32_t)v4.z(), - (int32_t)v4.w()); - ext::oneapi::printf(format_vec, (int32_t)v4.w(), - (int32_t)v4.z(), (int32_t)v4.y(), - (int32_t)v4.x()); + ext::oneapi::printf(format_vec, (int32_t)v4.x(), (int32_t)v4.y(), + (int32_t)v4.z(), (int32_t)v4.w()); + ext::oneapi::printf(format_vec, (int32_t)v4.w(), (int32_t)v4.z(), + (int32_t)v4.y(), (int32_t)v4.x()); #endif // __SYCL_DEVICE_ONLY__ // CHECK-NEXT: 5,6,7,8 // CHECK-NEXT: 8,7,6,5 @@ -127,8 +124,8 @@ int main() { // CHECK-NEXT: {{[0-9]+}}: Hello, World! } -// FIXME: strictly check output order once the bug mentioned above is fixed -// CHECK: {{(Hello, World!)?}} + // FIXME: strictly check output order once the bug mentioned above is fixed + // CHECK: {{(Hello, World!)?}} return 0; } diff --git a/sycl/test/built-ins/scalar_integer.cpp b/sycl/test/built-ins/scalar_integer.cpp index 85a0eae294d5f..3151fb3652955 100644 --- a/sycl/test/built-ins/scalar_integer.cpp +++ b/sycl/test/built-ins/scalar_integer.cpp @@ -14,15 +14,14 @@ namespace s = cl::sycl; int main() { // max { - s::cl_int r{ 0 }; + s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { - AccR[0] = s::max(s::cl_int{ 5 }, s::cl_int{ 2 }); - }); + cgh.single_task( + [=]() { AccR[0] = s::max(s::cl_int{5}, s::cl_int{2}); }); }); } assert(r == 5); @@ -30,15 +29,14 @@ int main() { // max { - s::cl_uint r{ 0 }; + s::cl_uint r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { - AccR[0] = s::max(s::cl_uint{ 5 }, s::cl_uint{ 2 }); - }); + cgh.single_task( + [=]() { AccR[0] = s::max(s::cl_uint{5}, s::cl_uint{2}); }); }); } assert(r == 5); @@ -46,15 +44,14 @@ int main() { // min { - s::cl_int r{ 0 }; + s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { - AccR[0] = s::min(s::cl_int{ 5 }, s::cl_int{ 2 }); - }); + cgh.single_task( + [=]() { AccR[0] = s::min(s::cl_int{5}, s::cl_int{2}); }); }); } assert(r == 2); @@ -62,15 +59,14 @@ int main() { // min (longlong) { - s::longlong r{ 0 }; + s::longlong r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { - AccR[0] = s::min(s::longlong{ 5 }, s::longlong{ 2 }); - }); + cgh.single_task( + [=]() { AccR[0] = s::min(s::longlong{5}, s::longlong{2}); }); }); } assert(r == 2); @@ -78,15 +74,14 @@ int main() { // min { - s::cl_uint r{ 0 }; + s::cl_uint r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { - AccR[0] = s::min(s::cl_uint{ 5 }, s::cl_uint{ 2 }); - }); + cgh.single_task( + [=]() { AccR[0] = s::min(s::cl_uint{5}, s::cl_uint{2}); }); }); } assert(r == 2); @@ -94,15 +89,14 @@ int main() { // min (ulonglong) { - s::ulonglong r{ 0 }; + s::ulonglong r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { - AccR[0] = s::min(s::ulonglong{ 5 }, s::ulonglong{ 2 }); - }); + cgh.single_task( + [=]() { AccR[0] = s::min(s::ulonglong{5}, s::ulonglong{2}); }); }); } assert(r == 2); @@ -110,15 +104,14 @@ int main() { // abs { - s::cl_uint r{ 0 }; + s::cl_uint r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { - AccR[0] = s::abs(s::cl_int{ -5 }); - }); + cgh.single_task( + [=]() { AccR[0] = s::abs(s::cl_int{-5}); }); }); } assert(r == 5); @@ -126,15 +119,14 @@ int main() { // abs_diff { - s::cl_uint r{ 0 }; + s::cl_uint r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { - AccR[0] = s::abs_diff(s::cl_int{ -5 }, s::cl_int{ -1 }); - }); + cgh.single_task( + [=]() { AccR[0] = s::abs_diff(s::cl_int{-5}, s::cl_int{-1}); }); }); } assert(r == 4); @@ -142,15 +134,14 @@ int main() { // abs_diff(uchar) { - s::cl_uchar r{ 0 }; + s::cl_uchar r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { - AccR[0] = s::abs_diff(s::uchar{ 3 }, s::uchar{ 250 }); - }); + cgh.single_task( + [=]() { AccR[0] = s::abs_diff(s::uchar{3}, s::uchar{250}); }); }); } assert(r == 247); @@ -158,14 +149,14 @@ int main() { // add_sat { - s::cl_int r{ 0 }; + s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::add_sat(s::cl_int{ 0x7FFFFFFF }, s::cl_int{ 100 }); + AccR[0] = s::add_sat(s::cl_int{0x7FFFFFFF}, s::cl_int{100}); }); }); } @@ -174,14 +165,14 @@ int main() { // hadd { - s::cl_int r{ 0 }; + s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::hadd(s::cl_int{ 0x0000007F }, s::cl_int{ 0x00000020 }); + AccR[0] = s::hadd(s::cl_int{0x0000007F}, s::cl_int{0x00000020}); }); }); } @@ -190,14 +181,14 @@ int main() { // rhadd { - s::cl_int r{ 0 }; + s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::rhadd(s::cl_int{ 0x0000007F }, s::cl_int{ 0x00000020 }); + AccR[0] = s::rhadd(s::cl_int{0x0000007F}, s::cl_int{0x00000020}); }); }); } @@ -206,14 +197,14 @@ int main() { // clamp { - s::cl_int r{ 0 }; + s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::clamp(s::cl_int{ 5 }, s::cl_int{ 10 }, s::cl_int{ 30 }); + AccR[0] = s::clamp(s::cl_int{5}, s::cl_int{10}, s::cl_int{30}); }); }); } @@ -222,15 +213,14 @@ int main() { // clz { - s::cl_int r{ 0 }; + s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { - AccR[0] = s::clz(s::cl_int{ 0x0FFFFFFF }); - }); + cgh.single_task( + [=]() { AccR[0] = s::clz(s::cl_int{0x0FFFFFFF}); }); }); } assert(r == 4); @@ -238,15 +228,14 @@ int main() { // ctz { - s::cl_int r{ 0 }; + s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { - AccR[0] = s::ext::oneapi::ctz(s::cl_int{ 0x7FFFFFF0 }); - }); + cgh.single_task( + [=]() { AccR[0] = s::ext::oneapi::ctz(s::cl_int{0x7FFFFFF0}); }); }); } assert(r == 4); @@ -254,15 +243,15 @@ int main() { // mad_hi { - s::cl_int r{ 0 }; + s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::mad_hi(s::cl_int{ 0x10000000 }, s::cl_int{ 0x00000100 }, - s::cl_int{ 0x00000001 }); + AccR[0] = s::mad_hi(s::cl_int{0x10000000}, s::cl_int{0x00000100}, + s::cl_int{0x00000001}); }); // 2^28 * 2^8 = 2^36 -> 0x10 00000000. }); } @@ -271,15 +260,15 @@ int main() { // mad_sat { - s::cl_int r{ 0 }; + s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::mad_sat(s::cl_int{ 0x10000000 }, s::cl_int{ 0x00000100 }, - s::cl_int{ 0x00000001 }); + AccR[0] = s::mad_sat(s::cl_int{0x10000000}, s::cl_int{0x00000100}, + s::cl_int{0x00000001}); }); // 2^31 * 2^8 = 2^39 -> 0x80 00000000 -> reuslt is saturated in the // product. }); @@ -305,20 +294,19 @@ int main() { }); } assert(r == exp); // Should return the real number of i0*i1+i2 in CPU - // Only fails in vector, but passes in scalar. - + // Only fails in vector, but passes in scalar. } // mul_hi { - s::cl_int r{ 0 }; + s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::mul_hi(s::cl_int{ 0x10000000 }, s::cl_int{ 0x00000100 }); + AccR[0] = s::mul_hi(s::cl_int{0x10000000}, s::cl_int{0x00000100}); }); // 2^28 * 2^8 = 2^36 -> 0x10 00000000. }); } @@ -360,14 +348,14 @@ int main() { // rotate { - s::cl_int r{ 0 }; + s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::rotate(s::cl_int{ 0x11100000 }, s::cl_int{ 12 }); + AccR[0] = s::rotate(s::cl_int{0x11100000}, s::cl_int{12}); }); }); } @@ -376,7 +364,7 @@ int main() { // rotate (with large rotate size) { - s::cl_char r{ 0 }; + s::cl_char r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; @@ -384,7 +372,7 @@ int main() { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = s::rotate(static_cast((unsigned char)0xe0), - s::cl_char{ 50 }); + s::cl_char{50}); }); }); } @@ -393,15 +381,14 @@ int main() { // sub_sat { auto TestSubSat = [](s::cl_int x, s::cl_int y) { - s::cl_int r{ 0 }; + s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { - AccR[0] = s::sub_sat(x, y); - }); + cgh.single_task( + [=]() { AccR[0] = s::sub_sat(x, y); }); }); } return r; @@ -419,14 +406,14 @@ int main() { // upsample - 1 { - s::cl_ushort r{ 0 }; + s::cl_ushort r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::upsample(s::cl_uchar{ 0x10 }, s::cl_uchar{ 0x10 }); + AccR[0] = s::upsample(s::cl_uchar{0x10}, s::cl_uchar{0x10}); }); }); } @@ -435,14 +422,14 @@ int main() { // upsample - 2 { - s::cl_short r{ 0 }; + s::cl_short r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::upsample(s::cl_char{ 0x10 }, s::cl_uchar{ 0x10 }); + AccR[0] = s::upsample(s::cl_char{0x10}, s::cl_uchar{0x10}); }); }); } @@ -451,14 +438,14 @@ int main() { // upsample - 3 { - s::cl_uint r{ 0 }; + s::cl_uint r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::upsample(s::cl_ushort{ 0x0010 }, s::cl_ushort{ 0x0010 }); + AccR[0] = s::upsample(s::cl_ushort{0x0010}, s::cl_ushort{0x0010}); }); }); } @@ -467,14 +454,14 @@ int main() { // upsample - 4 { - s::cl_int r{ 0 }; + s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::upsample(s::cl_short{ 0x0010 }, s::cl_ushort{ 0x0010 }); + AccR[0] = s::upsample(s::cl_short{0x0010}, s::cl_ushort{0x0010}); }); }); } @@ -483,15 +470,14 @@ int main() { // upsample - 5 { - s::cl_ulong r{ 0 }; + s::cl_ulong r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - s::upsample(s::cl_uint{ 0x00000010 }, s::cl_uint{ 0x00000010 }); + AccR[0] = s::upsample(s::cl_uint{0x00000010}, s::cl_uint{0x00000010}); }); }); } @@ -500,15 +486,14 @@ int main() { // upsample - 6 { - s::cl_long r{ 0 }; + s::cl_long r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = - s::upsample(s::cl_int{ 0x00000010 }, s::cl_uint{ 0x00000010 }); + AccR[0] = s::upsample(s::cl_int{0x00000010}, s::cl_uint{0x00000010}); }); }); } @@ -517,15 +502,14 @@ int main() { // popcount { - s::cl_int r{ 0 }; + s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); - cgh.single_task([=]() { - AccR[0] = s::popcount(s::cl_int{ 0x000000FF }); - }); + cgh.single_task( + [=]() { AccR[0] = s::popcount(s::cl_int{0x000000FF}); }); }); } assert(r == 8); @@ -533,7 +517,7 @@ int main() { // mad24 { - s::cl_int r{ 0 }; + s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; @@ -541,7 +525,7 @@ int main() { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { AccR[0] = - s::mad24(s::cl_int(0xFFFFFFFF), s::cl_int{ 20 }, s::cl_int{ 20 }); + s::mad24(s::cl_int(0xFFFFFFFF), s::cl_int{20}, s::cl_int{20}); }); }); } @@ -550,14 +534,14 @@ int main() { // mul24 { - s::cl_int r{ 0 }; + s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::mul24(s::cl_int(0xFFFFFFFF), s::cl_int{ 20 }); + AccR[0] = s::mul24(s::cl_int(0xFFFFFFFF), s::cl_int{20}); }); }); } diff --git a/sycl/test/enqueue_barrier/enqueue_barrier.cpp b/sycl/test/enqueue_barrier/enqueue_barrier.cpp index b84660d58b467..f8bd5a9e8523c 100644 --- a/sycl/test/enqueue_barrier/enqueue_barrier.cpp +++ b/sycl/test/enqueue_barrier/enqueue_barrier.cpp @@ -6,7 +6,7 @@ // UNSUPPORTED: cuda #include -#include +#include int main() { sycl::context Context; diff --git a/sycl/test/fpga_tests/fpga_io_pipes.cpp b/sycl/test/fpga_tests/fpga_io_pipes.cpp index 989e390389418..20f3c489bded0 100644 --- a/sycl/test/fpga_tests/fpga_io_pipes.cpp +++ b/sycl/test/fpga_tests/fpga_io_pipes.cpp @@ -8,7 +8,7 @@ // //===----------------------------------------------------------------------===// #include -#include +#include #include #include diff --git a/sycl/test/fpga_tests/fpga_pipes.cpp b/sycl/test/fpga_tests/fpga_pipes.cpp index 1eba903d9972e..e3496e0168878 100644 --- a/sycl/test/fpga_tests/fpga_pipes.cpp +++ b/sycl/test/fpga_tests/fpga_pipes.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// #include -#include +#include #include // Size of an array passing through a pipe @@ -27,11 +27,10 @@ class nb_pipe; } // For non-blocking template pipes -template -class templ_nb_pipe; +template class templ_nb_pipe; // For non-blocking multiple pipes -template +template using PipeMulNb = cl::sycl::intel::pipe, int>; // For simple blocking pipes with explicit type @@ -43,25 +42,22 @@ class bl_pipe; } // For blocking template pipes -template -class templ_bl_pipe; +template class templ_bl_pipe; // For blocking multiple pipes -template -using PipeMulBl = cl::sycl::intel::pipe, int>; +template +using PipeMulBl = cl::sycl::ext::intel::pipe, int>; // Kernel names -template -class writer; -template -class reader; +template class writer; +template class reader; // Test for simple non-blocking pipes -template +template int test_simple_nb_pipe(cl::sycl::queue Queue) { int data[] = {0}; - using Pipe = cl::sycl::intel::pipe; + using Pipe = cl::sycl::ext::intel::pipe; cl::sycl::buffer readBuf(data, 1); Queue.submit([&](cl::sycl::handler &cgh) { @@ -97,8 +93,7 @@ int test_simple_nb_pipe(cl::sycl::queue Queue) { } // Test for multiple non-blocking pipes -template -int test_multiple_nb_pipe(cl::sycl::queue Queue) { +template int test_multiple_nb_pipe(cl::sycl::queue Queue) { int data[] = {0}; Queue.submit([&](cl::sycl::handler &cgh) { @@ -149,10 +144,9 @@ int test_multiple_nb_pipe(cl::sycl::queue Queue) { } // Test for array passing through a non-blocking pipe -template -int test_array_th_nb_pipe(cl::sycl::queue Queue) { +template int test_array_th_nb_pipe(cl::sycl::queue Queue) { int data[N] = {0}; - using AnotherNbPipe = cl::sycl::intel::pipe; + using AnotherNbPipe = cl::sycl::ext::intel::pipe; Queue.submit([&](cl::sycl::handler &cgh) { cgh.single_task>([=]() { @@ -190,11 +184,11 @@ int test_array_th_nb_pipe(cl::sycl::queue Queue) { } // Test for simple blocking pipes -template +template int test_simple_bl_pipe(cl::sycl::queue Queue) { int data[] = {0}; - using Pipe = cl::sycl::intel::pipe; + using Pipe = cl::sycl::ext::intel::pipe; cl::sycl::buffer readBuf(data, 1); Queue.submit([&](cl::sycl::handler &cgh) { @@ -224,8 +218,7 @@ int test_simple_bl_pipe(cl::sycl::queue Queue) { } // Test for multiple blocking pipes -template -int test_multiple_bl_pipe(cl::sycl::queue Queue) { +template int test_multiple_bl_pipe(cl::sycl::queue Queue) { int data[] = {0}; Queue.submit([&](cl::sycl::handler &cgh) { @@ -261,10 +254,9 @@ int test_multiple_bl_pipe(cl::sycl::queue Queue) { } // Test for array passing through a blocking pipe -template -int test_array_th_bl_pipe(cl::sycl::queue Queue) { +template int test_array_th_bl_pipe(cl::sycl::queue Queue) { int data[N] = {0}; - using AnotherBlPipe = cl::sycl::intel::pipe; + using AnotherBlPipe = cl::sycl::ext::intel::pipe; Queue.submit([&](cl::sycl::handler &cgh) { cgh.single_task>([=]() { diff --git a/sycl/test/function-pointers/fp-as-kernel-arg.cpp b/sycl/test/function-pointers/fp-as-kernel-arg.cpp index ba76fdf5dbda8..a21e3d8a7d3f1 100644 --- a/sycl/test/function-pointers/fp-as-kernel-arg.cpp +++ b/sycl/test/function-pointers/fp-as-kernel-arg.cpp @@ -15,8 +15,9 @@ #include #include -[[intel::device_indirectly_callable]] -extern "C" int add(int A, int B) { return A + B; } +[[intel::device_indirectly_callable]] extern "C" int add(int A, int B) { + return A + B; +} int main() { const int Size = 10; @@ -31,7 +32,8 @@ int main() { P.build_with_kernel_type(); cl::sycl::kernel KE = P.get_kernel(); - auto FptrStorage = cl::sycl::ext::oneapi::get_device_func_ptr(&add, "add", P, D); + auto FptrStorage = + cl::sycl::ext::oneapi::get_device_func_ptr(&add, "add", P, D); if (!D.is_host()) { // FIXME: update this check with query to supported extension // For now, we don't have runtimes that report required OpenCL extension and @@ -54,10 +56,10 @@ int main() { auto AccB = BufB.template get_access(CGH); CGH.parallel_for( KE, cl::sycl::range<1>(Size), [=](cl::sycl::id<1> Index) { - auto Fptr = - cl::sycl::ext::oneapi::to_device_func_ptr(FptrStorage); - AccA[Index] = Fptr(AccA[Index], AccB[Index]); - }); + auto Fptr = cl::sycl::ext::oneapi::to_device_func_ptr( + FptrStorage); + AccA[Index] = Fptr(AccA[Index], AccB[Index]); + }); }); auto HostAcc = BufA.get_access(); diff --git a/sycl/test/function-pointers/pass-fp-through-buffer.cpp b/sycl/test/function-pointers/pass-fp-through-buffer.cpp index 255bbb0212eeb..10132dcc4c1fa 100644 --- a/sycl/test/function-pointers/pass-fp-through-buffer.cpp +++ b/sycl/test/function-pointers/pass-fp-through-buffer.cpp @@ -33,7 +33,8 @@ int main() { P.build_with_kernel_type(); cl::sycl::kernel KE = P.get_kernel(); - cl::sycl::buffer DispatchTable(2); + cl::sycl::buffer + DispatchTable(2); { auto DTAcc = DispatchTable.get_access(); @@ -68,11 +69,11 @@ int main() { DispatchTable.template get_access(CGH); CGH.parallel_for( KE, cl::sycl::range<1>(Size), [=](cl::sycl::id<1> Index) { - auto FP = - cl::sycl::ext::oneapi::to_device_func_ptr(AccDT[Mode]); + auto FP = cl::sycl::ext::oneapi::to_device_func_ptr( + AccDT[Mode]); - AccA[Index] = FP(AccA[Index], AccB[Index]); - }); + AccA[Index] = FP(AccA[Index], AccB[Index]); + }); }); auto HostAcc = bufA.get_access(); diff --git a/sycl/test/reduction/reduction_ctor.cpp b/sycl/test/reduction/reduction_ctor.cpp index 35ac1266cb11c..258dec907e161 100644 --- a/sycl/test/reduction/reduction_ctor.cpp +++ b/sycl/test/reduction/reduction_ctor.cpp @@ -10,7 +10,6 @@ using namespace cl::sycl; - template void test_reducer(Reduction &Redu, T A, T B) { typename Reduction::reducer_type Reducer; @@ -35,34 +34,25 @@ void test_reducer(Reduction &Redu, T Identity, T A, T B) { "Wrong result of binary operation."); } -template -class Known; -template -class Unknown; +template class Known; +template class Unknown; -template -struct Point { +template struct Point { Point() : X(0), Y(0) {} Point(T X, T Y) : X(X), Y(Y) {} Point(T V) : X(V), Y(V) {} - bool operator==(const Point &P) const { - return P.X == X && P.Y == Y; - } + bool operator==(const Point &P) const { return P.X == X && P.Y == Y; } T X; T Y; }; -template -bool operator==(const Point &A, const Point &B) { +template bool operator==(const Point &A, const Point &B) { return A.X == B.X && A.Y == B.Y; } -template -struct PointPlus { +template struct PointPlus { using P = Point; - P operator()(const P &A, const P &B) const { - return P(A.X + B.X, A.Y + B.Y); - } + P operator()(const P &A, const P &B) const { return P(A.X + B.X, A.Y + B.Y); } }; template @@ -78,8 +68,7 @@ void testKnown(T Identity, T A, T B) { accessor ReduAcc(ReduBuf, CGH); auto Redu = ext::oneapi::reduction(ReduAcc, BOp); - assert(Redu.getIdentity() == Identity && - "Failed getIdentity() check()."); + assert(Redu.getIdentity() == Identity && "Failed getIdentity() check()."); test_reducer(Redu, A, B); test_reducer(Redu, Identity, A, B); @@ -100,8 +89,7 @@ void testUnknown(T Identity, T A, T B) { accessor ReduAcc(ReduBuf, CGH); auto Redu = ext::oneapi::reduction(ReduAcc, Identity, BOp); - assert(Redu.getIdentity() == Identity && - "Failed getIdentity() check()."); + assert(Redu.getIdentity() == Identity && "Failed getIdentity() check()."); test_reducer(Redu, Identity, A, B); // Command group must have at least one task in it. Use an empty one. @@ -124,16 +112,22 @@ int main() { testBoth>(0, 1, 8); testBoth>(0, 7, 3); testBoth>(~0, 7, 3); - testBoth>((std::numeric_limits::max)(), 7, 3); - testBoth>((std::numeric_limits::min)(), 7, 3); + testBoth>((std::numeric_limits::max)(), 7, + 3); + testBoth>((std::numeric_limits::min)(), 7, + 3); testBoth>(0, 1, 7); testBoth>(1, 1, 7); - testBoth>(getMaximumFPValue(), 7, 3); - testBoth>(getMinimumFPValue(), 7, 3); - - testUnknown, 0, PointPlus>(Point(0), Point(1), Point(7)); - testUnknown, 1, PointPlus>(Point(0), Point(1), Point(7)); + testBoth>(getMaximumFPValue(), 7, + 3); + testBoth>(getMinimumFPValue(), 7, + 3); + + testUnknown, 0, PointPlus>( + Point(0), Point(1), Point(7)); + testUnknown, 1, PointPlus>( + Point(0), Point(1), Point(7)); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/reduction/reduction_nd_ext_type.hpp b/sycl/test/reduction/reduction_nd_ext_type.hpp index 9cdfbb8dce2fc..84e21291cc5da 100644 --- a/sycl/test/reduction/reduction_nd_ext_type.hpp +++ b/sycl/test/reduction/reduction_nd_ext_type.hpp @@ -8,8 +8,7 @@ using namespace cl::sycl; -template -class SomeClass; +template class SomeClass; template void test(T Identity, size_t WGSize, size_t NWItems) { @@ -28,8 +27,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { queue Q; Q.submit([&](handler &CGH) { auto In = InBuf.template get_access(CGH); - accessor - Out(OutBuf, CGH); + accessor Out(OutBuf, CGH); auto Redu = ext::oneapi::reduction(Out, Identity, BOp); range<1> GlobalRange(NWItems); @@ -44,18 +42,18 @@ void test(T Identity, size_t WGSize, size_t NWItems) { // Check correctness. auto Out = OutBuf.template get_access(); T ComputedOut = *(Out.get_pointer()); - T MaxDiff = 3 * std::numeric_limits::epsilon() * std::fabs(ComputedOut + CorrectOut); + T MaxDiff = 3 * std::numeric_limits::epsilon() * + std::fabs(ComputedOut + CorrectOut); if (std::fabs(static_cast(ComputedOut - CorrectOut)) > MaxDiff) { std::cout << "NWItems = " << NWItems << ", WGSize = " << WGSize << "\n"; std::cout << "Computed value: " << ComputedOut - << ", Expected value: " << CorrectOut - << ", MaxDiff = " << MaxDiff << "\n"; + << ", Expected value: " << CorrectOut << ", MaxDiff = " << MaxDiff + << "\n"; assert(0 && "Wrong value."); } } -template -int runTests(const string_class &ExtensionName) { +template int runTests(const string_class &ExtensionName) { device D = default_selector().select_device(); if (!D.is_host() && !D.has_extension(ExtensionName)) { std::cout << "Test skipped\n"; @@ -66,13 +64,17 @@ int runTests(const string_class &ExtensionName) { test>(0, 4, 4); test>(0, 4, 64); - test>(getMaximumFPValue(), 7, 7); - test>(getMinimumFPValue(), 7, 7 * 5); + test>( + getMaximumFPValue(), 7, 7); + test>( + getMinimumFPValue(), 7, 7 * 5); #if __cplusplus >= 201402L test>(1, 3, 3 * 5); - test>(getMaximumFPValue(), 3, 3); - test>(getMinimumFPValue(), 3, 3); + test>( + getMaximumFPValue(), 3, 3); + test>( + getMinimumFPValue(), 3, 3); #endif // __cplusplus >= 201402L std::cout << "Test passed\n"; diff --git a/sycl/test/reduction/reduction_nd_s0_dw.cpp b/sycl/test/reduction/reduction_nd_s0_dw.cpp index 356038d9f38b6..912101a25c5e0 100644 --- a/sycl/test/reduction/reduction_nd_s0_dw.cpp +++ b/sycl/test/reduction/reduction_nd_s0_dw.cpp @@ -16,8 +16,7 @@ using namespace cl::sycl; -template -class SomeClass; +template class SomeClass; template void test(T Identity, size_t WGSize, size_t NWItems) { @@ -79,16 +78,21 @@ int main() { test>(0, 8, 256); test>(0, 8, 256); test>(~0, 8, 256); - test>((std::numeric_limits::max)(), 8, 256); - test>((std::numeric_limits::min)(), 8, 256); + test>((std::numeric_limits::max)(), 8, + 256); + test>((std::numeric_limits::min)(), 8, + 256); // Check with various types. test>(1, 8, 256); - test>(getMaximumFPValue(), 8, 256); - test>(getMinimumFPValue(), 8, 256); + test>(getMaximumFPValue(), 8, + 256); + test>(getMinimumFPValue(), 8, + 256); // Check with CUSTOM type. - test, 0, CustomVecPlus>(CustomVec(0), 8, 256); + test, 0, CustomVecPlus>( + CustomVec(0), 8, 256); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/reduction/reduction_nd_s0_rw.cpp b/sycl/test/reduction/reduction_nd_s0_rw.cpp index 88f408a1c4f8d..1bb3059f97f88 100644 --- a/sycl/test/reduction/reduction_nd_s0_rw.cpp +++ b/sycl/test/reduction/reduction_nd_s0_rw.cpp @@ -16,8 +16,7 @@ using namespace cl::sycl; -template -class SomeClass; +template class SomeClass; template void test(T Identity, size_t WGSize, size_t NWItems) { @@ -81,16 +80,21 @@ int main() { test>(0, 8, 256); test>(0, 8, 256); test>(~0, 8, 256); - test>((std::numeric_limits::max)(), 8, 256); - test>((std::numeric_limits::min)(), 8, 256); + test>((std::numeric_limits::max)(), 8, + 256); + test>((std::numeric_limits::min)(), 8, + 256); // Check with various types. test>(1, 8, 256); - test>(getMaximumFPValue(), 8, 256); - test>(getMinimumFPValue(), 8, 256); + test>(getMaximumFPValue(), 8, + 256); + test>(getMinimumFPValue(), 8, + 256); // Check with CUSTOM type. - test, 0, CustomVecPlus>(CustomVec(0), 8, 256); + test, 0, CustomVecPlus>( + CustomVec(0), 8, 256); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/reduction/reduction_nd_s1_dw.cpp b/sycl/test/reduction/reduction_nd_s1_dw.cpp index 68b8e7cafb811..43e04aa2b1e03 100644 --- a/sycl/test/reduction/reduction_nd_s1_dw.cpp +++ b/sycl/test/reduction/reduction_nd_s1_dw.cpp @@ -17,8 +17,7 @@ using namespace cl::sycl; -template -class SomeClass; +template class SomeClass; template void test(T Identity, size_t WGSize, size_t NWItems) { @@ -80,16 +79,21 @@ int main() { test>(0, 8, 256); test>(0, 8, 256); test>(~0, 8, 256); - test>((std::numeric_limits::max)(), 8, 256); - test>((std::numeric_limits::min)(), 8, 256); + test>((std::numeric_limits::max)(), 8, + 256); + test>((std::numeric_limits::min)(), 8, + 256); // Check with various types. test>(1, 8, 256); - test>(getMaximumFPValue(), 8, 256); - test>(getMinimumFPValue(), 8, 256); + test>(getMaximumFPValue(), 8, + 256); + test>(getMinimumFPValue(), 8, + 256); // Check with CUSTOM type. - test, 1, CustomVecPlus>(CustomVec(0), 8, 256); + test, 1, CustomVecPlus>( + CustomVec(0), 8, 256); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/reduction/reduction_nd_s1_rw.cpp b/sycl/test/reduction/reduction_nd_s1_rw.cpp index 68721b3f21544..1261c21a187ef 100644 --- a/sycl/test/reduction/reduction_nd_s1_rw.cpp +++ b/sycl/test/reduction/reduction_nd_s1_rw.cpp @@ -17,8 +17,7 @@ using namespace cl::sycl; -template -class SomeClass; +template class SomeClass; template void test(T Identity, size_t WGSize, size_t NWItems) { @@ -82,16 +81,21 @@ int main() { test>(0, 8, 256); test>(0, 8, 256); test>(~0, 8, 256); - test>((std::numeric_limits::max)(), 8, 256); - test>((std::numeric_limits::min)(), 8, 256); + test>((std::numeric_limits::max)(), 8, + 256); + test>((std::numeric_limits::min)(), 8, + 256); // Check with various types. test>(1, 8, 256); - test>(getMaximumFPValue(), 1, 16); - test>(getMinimumFPValue(), 8, 256); + test>(getMaximumFPValue(), 1, + 16); + test>(getMinimumFPValue(), 8, + 256); // Check with CUSTOM type. - test, 1, CustomVecPlus>(CustomVec(0), 8, 256); + test, 1, CustomVecPlus>( + CustomVec(0), 8, 256); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/reduction/reduction_placeholder.cpp b/sycl/test/reduction/reduction_placeholder.cpp index 42d19f9d8025a..bc670f9bf7fa9 100644 --- a/sycl/test/reduction/reduction_placeholder.cpp +++ b/sycl/test/reduction/reduction_placeholder.cpp @@ -18,8 +18,7 @@ using namespace cl::sycl; -template -class SomeClass; +template class SomeClass; template void test(T Identity, size_t WGSize, size_t NWItems) { @@ -33,9 +32,9 @@ void test(T Identity, size_t WGSize, size_t NWItems) { (OutBuf.template get_access())[0] = Identity; - auto Out = accessor(OutBuf); + auto Out = + accessor(OutBuf); // Compute. queue Q; Q.submit([&](handler &CGH) { @@ -72,13 +71,16 @@ int main() { test>(0, 4, 128); // fast reduce - test>(getMaximumFPValue(), 5, 5 * 7); - test>(getMinimumFPValue(), 4, 128); + test>(getMaximumFPValue(), 5, + 5 * 7); + test>(getMinimumFPValue(), 4, + 128); // generic algorithm test>(1, 7, 7 * 5); test>(1, 8, 16); - test, 0, CustomVecPlus>(CustomVec(0), 8, 8 * 3); + test, 0, CustomVecPlus>(CustomVec(0), 8, + 8 * 3); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/reduction/reduction_transparent.cpp b/sycl/test/reduction/reduction_transparent.cpp index 6619d85366c86..febb5f9615e84 100644 --- a/sycl/test/reduction/reduction_transparent.cpp +++ b/sycl/test/reduction/reduction_transparent.cpp @@ -18,10 +18,8 @@ using namespace cl::sycl; -template -class SomeIdClass; -template -class SomeNoIdClass; +template class SomeIdClass; +template class SomeNoIdClass; // Checks reductions initialized with transparent functor and explicitly set // identity value. @@ -46,7 +44,8 @@ void testId(T Identity, size_t WGSize, size_t NWItems) { range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); CGH.parallel_for>( - NDRange, ext::oneapi::reduction(Out, Identity, BOp), [=](nd_item<1> NDIt, auto &Sum) { + NDRange, ext::oneapi::reduction(Out, Identity, BOp), + [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); }); @@ -86,7 +85,8 @@ void testNoId(T Identity, size_t WGSize, size_t NWItems) { range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); CGH.parallel_for>( - NDRange, ext::oneapi::reduction(Out, BOp), [=](nd_item<1> NDIt, auto &Sum) { + NDRange, ext::oneapi::reduction(Out, BOp), + [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); }); diff --git a/sycl/test/reduction/reduction_usm.cpp b/sycl/test/reduction/reduction_usm.cpp index 6915f86af876e..2c568523c579f 100644 --- a/sycl/test/reduction/reduction_usm.cpp +++ b/sycl/test/reduction/reduction_usm.cpp @@ -7,7 +7,8 @@ // RUN: %ACC_RUN_PLACEHOLDER %t.out // RUNx: env SYCL_DEVICE_TYPE=HOST %t.out -// TODO: Enable the test for HOST when it supports ext::oneapi::reduce() and barrier() +// TODO: Enable the test for HOST when it supports ext::oneapi::reduce() and +// barrier() // This test performs basic checks of parallel_for(nd_range, reduction, func) // with reductions initialized with USM var. @@ -18,10 +19,8 @@ using namespace cl::sycl; -template -class SomeClass; -template -class Copy1; +template class SomeClass; +template class Copy1; template void test(T Identity, size_t WGSize, size_t NWItems, usm::alloc AllocType) { @@ -113,14 +112,16 @@ int main() { testUSM>(0, 4, 128); // fast reduce - testUSM>(getMaximumFPValue(), 5, 5 * 7); - testUSM>(getMinimumFPValue(), 4, 128); + testUSM>(getMaximumFPValue(), 5, + 5 * 7); + testUSM>(getMinimumFPValue(), 4, + 128); // generic algorithm testUSM>(1, 7, 7 * 5); testUSM>(1, 8, 16); - testUSM, 0, CustomVecPlus>( - CustomVec(0), 8, 8 * 3); + testUSM, 0, CustomVecPlus>(CustomVec(0), 8, + 8 * 3); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/sub_group/attributes.cpp b/sycl/test/sub_group/attributes.cpp index 28c5a99a7fa8b..d8173d2d1cf72 100644 --- a/sycl/test/sub_group/attributes.cpp +++ b/sycl/test/sub_group/attributes.cpp @@ -18,13 +18,13 @@ #include -#define KERNEL_FUNCTOR_WITH_SIZE(SIZE) \ - class KernelFunctor##SIZE { \ - public: \ - [[cl::intel_reqd_sub_group_size(SIZE)]] void \ - operator()(cl::sycl::nd_item<1> Item) { \ - const auto GID = Item.get_global_id(); \ - } \ +#define KERNEL_FUNCTOR_WITH_SIZE(SIZE) \ + class KernelFunctor##SIZE { \ + public: \ + [[cl::intel_reqd_sub_group_size(SIZE)]] void \ + operator()(cl::sycl::nd_item<1> Item) { \ + const auto GID = Item.get_global_id(); \ + } \ }; KERNEL_FUNCTOR_WITH_SIZE(1); @@ -44,8 +44,7 @@ inline uint32_t flp2(uint32_t X) { return X - (X >> 1); } -template -inline void submit(cl::sycl::queue &Q) { +template inline void submit(cl::sycl::queue &Q) { Q.submit([](cl::sycl::handler &cgh) { Fn F; cgh.parallel_for(cl::sycl::nd_range<1>{64, 16}, F); diff --git a/sycl/test/sub_group/barrier.cpp b/sycl/test/sub_group/barrier.cpp index c8306c0cc18df..fe7d1cce9432a 100644 --- a/sycl/test/sub_group/barrier.cpp +++ b/sycl/test/sub_group/barrier.cpp @@ -19,11 +19,9 @@ #include #include #include -template -class sycl_subgr; +template class sycl_subgr; using namespace cl::sycl; -template -void check(queue &Queue, size_t G = 240, size_t L = 60) { +template void check(queue &Queue, size_t G = 240, size_t L = 60) { try { nd_range<1> NdRange(G, L); std::vector data(G); diff --git a/sycl/test/sub_group/helper.hpp b/sycl/test/sub_group/helper.hpp index bc88372c870ae..9f4d29ad5e558 100644 --- a/sycl/test/sub_group/helper.hpp +++ b/sycl/test/sub_group/helper.hpp @@ -12,14 +12,12 @@ using namespace cl::sycl; -template -struct utils { +template struct utils { static T1 add_vec(const vec &v); static bool cmp_vec(const vec &v, const vec &r); static std::string stringify_vec(const vec &v); }; -template -struct utils { +template struct utils { static T2 add_vec(const vec &v) { return v.s0(); } static bool cmp_vec(const vec &v, const vec &r) { return v.s0() == r.s0(); @@ -28,8 +26,7 @@ struct utils { return std::to_string((T2)v.s0()); } }; -template -struct utils { +template struct utils { static T2 add_vec(const vec &v) { return v.s0() + v.s1(); } static bool cmp_vec(const vec &v, const vec &r) { return v.s0() == r.s0() && v.s1() == r.s1(); @@ -39,8 +36,7 @@ struct utils { std::to_string((T2)v.s1()) + " )"; } }; -template -struct utils { +template struct utils { static T2 add_vec(const vec &v) { return v.s0() + v.s1() + v.s2() + v.s3(); } @@ -54,8 +50,7 @@ struct utils { std::to_string((T2)v.s3()) + " )"; } }; -template -struct utils { +template struct utils { static T2 add_vec(const vec &v) { return v.s0() + v.s1() + v.s2() + v.s3() + v.s4() + v.s5() + v.s6() + v.s7(); @@ -74,8 +69,7 @@ struct utils { } }; -template -struct utils { +template struct utils { static T2 add_vec(const vec &v) { return v.s0() + v.s1() + v.s2() + v.s3() + v.s4() + v.s5() + v.s6() + v.s7() + v.s8() + v.s9() + v.sA() + v.sB() + v.sC() + v.sD() + @@ -102,8 +96,7 @@ struct utils { } }; -template -void exit_if_not_equal(T val, T ref, const char *name) { +template void exit_if_not_equal(T val, T ref, const char *name) { if (std::is_floating_point::value) { if (std::fabs(val - ref) > 0.01) { std::cout << "Unexpected result for " << name << ": " << (double)val @@ -120,16 +113,17 @@ void exit_if_not_equal(T val, T ref, const char *name) { } template -void exit_if_not_equal(std::complex val, std::complex ref, const char *name) { - if (std::fabs(val.real() - ref.real()) > 0.01 || std::fabs(val.imag() - ref.imag()) > 0.01) { +void exit_if_not_equal(std::complex val, std::complex ref, + const char *name) { + if (std::fabs(val.real() - ref.real()) > 0.01 || + std::fabs(val.imag() - ref.imag()) > 0.01) { std::cout << "Unexpected result for " << name << ": " << val << " expected value: " << ref << std::endl; exit(1); } } -template -void exit_if_not_equal(T *val, T *ref, const char *name) { +template void exit_if_not_equal(T *val, T *ref, const char *name) { if ((val - ref) != 0) { std::cout << "Unexpected result for " << name << ": " << val << " expected value: " << ref << std::endl; @@ -137,8 +131,7 @@ void exit_if_not_equal(T *val, T *ref, const char *name) { } } -template <> -void exit_if_not_equal(half val, half ref, const char *name) { +template <> void exit_if_not_equal(half val, half ref, const char *name) { int16_t cmp_val = reinterpret_cast(val); int16_t cmp_ref = reinterpret_cast(ref); if (std::abs(cmp_val - cmp_ref) > 1) { diff --git a/sycl/test/sub_group/load_store.cpp b/sycl/test/sub_group/load_store.cpp index 507b65a8261be..5ba28e904ada8 100644 --- a/sycl/test/sub_group/load_store.cpp +++ b/sycl/test/sub_group/load_store.cpp @@ -17,13 +17,11 @@ #include "helper.hpp" #include -template -class sycl_subgr; +template class sycl_subgr; using namespace cl::sycl; -template -void check(queue &Queue) { +template void check(queue &Queue) { const int G = 1024, L = 128; try { nd_range<1> NdRange(G, L); @@ -96,8 +94,7 @@ void check(queue &Queue) { exit(1); } } -template -void check(queue &Queue) { +template void check(queue &Queue) { const int G = 128, L = 64; try { nd_range<1> NdRange(G, L); diff --git a/sycl/test/sub_group/scan.hpp b/sycl/test/sub_group/scan.hpp index 42c8c373044f9..408d0c8cff827 100644 --- a/sycl/test/sub_group/scan.hpp +++ b/sycl/test/sub_group/scan.hpp @@ -10,8 +10,7 @@ #include #include -template -class sycl_subgr; +template class sycl_subgr; using namespace cl::sycl; @@ -73,8 +72,7 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, } } -template -void check(queue &Queue, size_t G = 120, size_t L = 60) { +template void check(queue &Queue, size_t G = 120, size_t L = 60) { // limit data range for half to avoid rounding issues if (std::is_same::value) { G = 64; @@ -87,21 +85,21 @@ void check(queue &Queue, size_t G = 120, size_t L = 60) { check_op(Queue, T(0), ext::oneapi::minimum(), false, G, L); if (std::is_floating_point::value || std::is_same::value) { - check_op(Queue, std::numeric_limits::infinity(), ext::oneapi::minimum(), - true, G, L); + check_op(Queue, std::numeric_limits::infinity(), + ext::oneapi::minimum(), true, G, L); } else { - check_op(Queue, std::numeric_limits::max(), ext::oneapi::minimum(), true, - G, L); + check_op(Queue, std::numeric_limits::max(), ext::oneapi::minimum(), + true, G, L); } check_op(Queue, T(G), ext::oneapi::maximum(), false, G, L); if (std::is_floating_point::value || std::is_same::value) { - check_op(Queue, -std::numeric_limits::infinity(), ext::oneapi::maximum(), - true, G, L); + check_op(Queue, -std::numeric_limits::infinity(), + ext::oneapi::maximum(), true, G, L); } else { - check_op(Queue, std::numeric_limits::min(), ext::oneapi::maximum(), true, - G, L); + check_op(Queue, std::numeric_limits::min(), ext::oneapi::maximum(), + true, G, L); } #if __cplusplus >= 201402L @@ -111,21 +109,21 @@ void check(queue &Queue, size_t G = 120, size_t L = 60) { check_op(Queue, T(0), ext::oneapi::minimum<>(), false, G, L); if (std::is_floating_point::value || std::is_same::value) { - check_op(Queue, std::numeric_limits::infinity(), ext::oneapi::minimum<>(), - true, G, L); + check_op(Queue, std::numeric_limits::infinity(), + ext::oneapi::minimum<>(), true, G, L); } else { - check_op(Queue, std::numeric_limits::max(), ext::oneapi::minimum<>(), true, - G, L); + check_op(Queue, std::numeric_limits::max(), ext::oneapi::minimum<>(), + true, G, L); } check_op(Queue, T(G), ext::oneapi::maximum<>(), false, G, L); if (std::is_floating_point::value || std::is_same::value) { - check_op(Queue, -std::numeric_limits::infinity(), ext::oneapi::maximum<>(), - true, G, L); + check_op(Queue, -std::numeric_limits::infinity(), + ext::oneapi::maximum<>(), true, G, L); } else { - check_op(Queue, std::numeric_limits::min(), ext::oneapi::maximum<>(), true, - G, L); + check_op(Queue, std::numeric_limits::min(), ext::oneapi::maximum<>(), + true, G, L); } #endif } From a32fea12c56bd71f795df01c75c5247b147ae8a6 Mon Sep 17 00:00:00 2001 From: James Brodman Date: Wed, 22 Jul 2020 15:55:42 -0400 Subject: [PATCH 05/13] Resolve paths after merge Signed-off-by: James Brodman --- sycl/include/CL/sycl/ext/{oneapi => intel}/fpga_lsu.hpp | 0 sycl/include/CL/sycl/ext/{oneapi => intel}/fpga_utils.hpp | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename sycl/include/CL/sycl/ext/{oneapi => intel}/fpga_lsu.hpp (100%) rename sycl/include/CL/sycl/ext/{oneapi => intel}/fpga_utils.hpp (100%) diff --git a/sycl/include/CL/sycl/ext/oneapi/fpga_lsu.hpp b/sycl/include/CL/sycl/ext/intel/fpga_lsu.hpp similarity index 100% rename from sycl/include/CL/sycl/ext/oneapi/fpga_lsu.hpp rename to sycl/include/CL/sycl/ext/intel/fpga_lsu.hpp diff --git a/sycl/include/CL/sycl/ext/oneapi/fpga_utils.hpp b/sycl/include/CL/sycl/ext/intel/fpga_utils.hpp similarity index 100% rename from sycl/include/CL/sycl/ext/oneapi/fpga_utils.hpp rename to sycl/include/CL/sycl/ext/intel/fpga_utils.hpp From a671d917ac956e7fbd12d2662382968a44e39994 Mon Sep 17 00:00:00 2001 From: James Brodman Date: Thu, 23 Jul 2020 15:56:22 -0400 Subject: [PATCH 06/13] Update FPGA tests Signed-off-by: James Brodman --- sycl/test/fpga_tests/fpga_io_pipes.cpp | 2 +- sycl/test/fpga_tests/fpga_lsu.cpp | 26 +++++++++++++------------- sycl/test/fpga_tests/fpga_pipes.cpp | 12 ++++++------ 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/sycl/test/fpga_tests/fpga_io_pipes.cpp b/sycl/test/fpga_tests/fpga_io_pipes.cpp index 20f3c489bded0..c21704f3478f5 100644 --- a/sycl/test/fpga_tests/fpga_io_pipes.cpp +++ b/sycl/test/fpga_tests/fpga_io_pipes.cpp @@ -113,7 +113,7 @@ int test_io_bl_pipe(cl::sycl::queue Queue) { } int main() { - cl::sycl::queue Queue{cl::sycl::intel::fpga_emulator_selector{}}; + cl::sycl::queue Queue{cl::sycl::ext::intel::fpga_emulator_selector{}}; if (!Queue.get_device() .get_info()) { diff --git a/sycl/test/fpga_tests/fpga_lsu.cpp b/sycl/test/fpga_tests/fpga_lsu.cpp index 65b35e09dbd69..f63343edd9db5 100644 --- a/sycl/test/fpga_tests/fpga_lsu.cpp +++ b/sycl/test/fpga_tests/fpga_lsu.cpp @@ -8,7 +8,7 @@ // //===----------------------------------------------------------------------===// #include -#include +#include // TODO: run is disabled, since no support added in FPGA backend yet. Check // implementation correctness from CXX and SYCL languages perspective. @@ -38,20 +38,20 @@ int test_lsu(cl::sycl::queue Queue) { auto input_ptr = input_accessor.get_pointer(); auto output_ptr = output_accessor.get_pointer(); - using PrefetchingLSU = - cl::sycl::intel::lsu, - cl::sycl::intel::statically_coalesce>; + using PrefetchingLSU = cl::sycl::ext::intel::lsu< + cl::sycl::ext::intel::prefetch, + cl::sycl::ext::intel::statically_coalesce>; - using BurstCoalescedLSU = - cl::sycl::intel::lsu, - cl::sycl::intel::statically_coalesce>; + using BurstCoalescedLSU = cl::sycl::ext::intel::lsu< + cl::sycl::ext::intel::burst_coalesce, + cl::sycl::ext::intel::statically_coalesce>; - using CachingLSU = - cl::sycl::intel::lsu, - cl::sycl::intel::cache<1024>, - cl::sycl::intel::statically_coalesce>; + using CachingLSU = cl::sycl::ext::intel::lsu< + cl::sycl::ext::intel::burst_coalesce, + cl::sycl::ext::intel::cache<1024>, + cl::sycl::ext::intel::statically_coalesce>; - using PipelinedLSU = cl::sycl::intel::lsu<>; + using PipelinedLSU = cl::sycl::ext::intel::lsu<>; int X = PrefetchingLSU::load(input_ptr); // int X = input_ptr[0] int Y = CachingLSU::load(input_ptr + 1); // int Y = input_ptr[1] @@ -74,7 +74,7 @@ int test_lsu(cl::sycl::queue Queue) { } int main() { - cl::sycl::queue Queue{cl::sycl::intel::fpga_emulator_selector{}}; + cl::sycl::queue Queue{cl::sycl::ext::intel::fpga_emulator_selector{}}; return test_lsu(Queue); } diff --git a/sycl/test/fpga_tests/fpga_pipes.cpp b/sycl/test/fpga_tests/fpga_pipes.cpp index e3496e0168878..6539cf75f926b 100644 --- a/sycl/test/fpga_tests/fpga_pipes.cpp +++ b/sycl/test/fpga_tests/fpga_pipes.cpp @@ -31,7 +31,7 @@ template class templ_nb_pipe; // For non-blocking multiple pipes template -using PipeMulNb = cl::sycl::intel::pipe, int>; +using PipeMulNb = cl::sycl::ext::oneapi::pipe, int>; // For simple blocking pipes with explicit type class some_bl_pipe; @@ -46,7 +46,7 @@ template class templ_bl_pipe; // For blocking multiple pipes template -using PipeMulBl = cl::sycl::ext::intel::pipe, int>; +using PipeMulBl = cl::sycl::ext::oneapi::pipe, int>; // Kernel names template class writer; @@ -57,7 +57,7 @@ template int test_simple_nb_pipe(cl::sycl::queue Queue) { int data[] = {0}; - using Pipe = cl::sycl::ext::intel::pipe; + using Pipe = cl::sycl::ext::oneapi::pipe; cl::sycl::buffer readBuf(data, 1); Queue.submit([&](cl::sycl::handler &cgh) { @@ -146,7 +146,7 @@ template int test_multiple_nb_pipe(cl::sycl::queue Queue) { // Test for array passing through a non-blocking pipe template int test_array_th_nb_pipe(cl::sycl::queue Queue) { int data[N] = {0}; - using AnotherNbPipe = cl::sycl::ext::intel::pipe; + using AnotherNbPipe = cl::sycl::ext::oneapi::pipe; Queue.submit([&](cl::sycl::handler &cgh) { cgh.single_task>([=]() { @@ -188,7 +188,7 @@ template int test_simple_bl_pipe(cl::sycl::queue Queue) { int data[] = {0}; - using Pipe = cl::sycl::ext::intel::pipe; + using Pipe = cl::sycl::ext::oneapi::pipe; cl::sycl::buffer readBuf(data, 1); Queue.submit([&](cl::sycl::handler &cgh) { @@ -256,7 +256,7 @@ template int test_multiple_bl_pipe(cl::sycl::queue Queue) { // Test for array passing through a blocking pipe template int test_array_th_bl_pipe(cl::sycl::queue Queue) { int data[N] = {0}; - using AnotherBlPipe = cl::sycl::ext::intel::pipe; + using AnotherBlPipe = cl::sycl::ext::oneapi::pipe; Queue.submit([&](cl::sycl::handler &cgh) { cgh.single_task>([=]() { From c25fa07f9c971de39f840ccb614d8040f338b684 Mon Sep 17 00:00:00 2001 From: James Brodman Date: Thu, 23 Jul 2020 16:16:51 -0400 Subject: [PATCH 07/13] Update header file paths Signed-off-by: James Brodman --- sycl/include/CL/sycl/ext/oneapi/atomic_accessor.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/include/CL/sycl/ext/oneapi/atomic_accessor.hpp b/sycl/include/CL/sycl/ext/oneapi/atomic_accessor.hpp index faa8ef745365e..40286132566d7 100644 --- a/sycl/include/CL/sycl/ext/oneapi/atomic_accessor.hpp +++ b/sycl/include/CL/sycl/ext/oneapi/atomic_accessor.hpp @@ -9,8 +9,8 @@ #pragma once #include -#include -#include +#include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { From 3e101b549ad64c3199adbe62d9ac6e0a6bfb1516 Mon Sep 17 00:00:00 2001 From: James Brodman Date: Thu, 23 Jul 2020 17:19:17 -0400 Subject: [PATCH 08/13] fix tests. Signed-off-by: James Brodman --- .../include/CL/sycl/ext/oneapi/atomic_ref.hpp | 1 + sycl/test/atomic_ref/accessor.cpp | 31 ++++++++++--------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/sycl/include/CL/sycl/ext/oneapi/atomic_ref.hpp b/sycl/include/CL/sycl/ext/oneapi/atomic_ref.hpp index 19bfbefefb3a2..91c9472b8e82c 100644 --- a/sycl/include/CL/sycl/ext/oneapi/atomic_ref.hpp +++ b/sycl/include/CL/sycl/ext/oneapi/atomic_ref.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #ifndef __SYCL_DEVICE_ONLY__ diff --git a/sycl/test/atomic_ref/accessor.cpp b/sycl/test/atomic_ref/accessor.cpp index 86067e2c74906..115add88c2a4a 100644 --- a/sycl/test/atomic_ref/accessor.cpp +++ b/sycl/test/atomic_ref/accessor.cpp @@ -10,7 +10,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ext::oneapi; // Equivalent to add_test from add.cpp // Uses atomic_accessor instead of atomic_ref @@ -24,14 +24,16 @@ template void accessor_test(queue q, size_t N) { q.submit([&](handler &cgh) { #if __cplusplus > 201402L static_assert( - std::is_same>::value, + std::is_same< + decltype( + atomic_accessor(sum_buf, cgh, relaxed_order, device_scope)), + atomic_accessor>::value, "atomic_accessor type incorrectly deduced"); #endif - auto sum = atomic_accessor(sum_buf, cgh); + auto sum = + atomic_accessor( + sum_buf, cgh); auto out = output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { @@ -39,8 +41,8 @@ template void accessor_test(queue q, size_t N) { static_assert( std::is_same< decltype(sum[0]), - atomic_ref>::value, "atomic_accessor returns incorrect atomic_ref"); out[gid] = sum[0].fetch_add(T(1)); @@ -69,10 +71,9 @@ void local_accessor_test(queue q, size_t N, size_t L = 8) { { buffer output_buf(output.data(), output.size()); q.submit([&](handler &cgh) { - auto sum = - atomic_accessor( - 1, cgh); + auto sum = atomic_accessor(1, cgh); auto out = output_buf.template get_access(cgh); cgh.parallel_for(nd_range<1>(N, L), [=](nd_item<1> it) { int grp = it.get_group(0); @@ -80,8 +81,8 @@ void local_accessor_test(queue q, size_t N, size_t L = 8) { it.barrier(); static_assert( std::is_same>::value, "local atomic_accessor returns incorrect atomic_ref"); T result = sum[0].fetch_add(T(1)); From 6a6a31dddf207ee1c932b92aebd32bc0990420a9 Mon Sep 17 00:00:00 2001 From: James Brodman Date: Fri, 24 Jul 2020 14:21:03 -0400 Subject: [PATCH 09/13] Revert pipes to ext::intel for consistency. Fix tests. Signed-off-by: James Brodman --- sycl/include/CL/sycl/ext/intel/fpga_extensions.hpp | 2 +- sycl/include/CL/sycl/ext/intel/fpga_lsu.hpp | 2 ++ sycl/include/CL/sycl/ext/intel/fpga_utils.hpp | 2 ++ sycl/include/CL/sycl/ext/{oneapi => intel}/pipes.hpp | 4 ++-- sycl/include/CL/sycl/pipes.hpp | 4 ++-- sycl/test/fpga_tests/fpga_pipes.cpp | 12 ++++++------ sycl/test/fpga_tests/io_pipe_def.h | 6 +++--- sycl/test/regression/esimd-util-compiler-eval.cpp | 2 +- 8 files changed, 19 insertions(+), 15 deletions(-) rename sycl/include/CL/sycl/ext/{oneapi => intel}/pipes.hpp (99%) diff --git a/sycl/include/CL/sycl/ext/intel/fpga_extensions.hpp b/sycl/include/CL/sycl/ext/intel/fpga_extensions.hpp index 3d3dac6fae2aa..9b019db1c1d41 100644 --- a/sycl/include/CL/sycl/ext/intel/fpga_extensions.hpp +++ b/sycl/include/CL/sycl/ext/intel/fpga_extensions.hpp @@ -10,4 +10,4 @@ #include #include #include -#include +#include diff --git a/sycl/include/CL/sycl/ext/intel/fpga_lsu.hpp b/sycl/include/CL/sycl/ext/intel/fpga_lsu.hpp index 5f8d37f802e76..2b8324970658a 100644 --- a/sycl/include/CL/sycl/ext/intel/fpga_lsu.hpp +++ b/sycl/include/CL/sycl/ext/intel/fpga_lsu.hpp @@ -13,6 +13,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +namespace ext { namespace intel { constexpr uint8_t BURST_COALESCE = 0x1; constexpr uint8_t CACHE = 0x2; @@ -109,5 +110,6 @@ template class lsu final { } }; } // namespace intel +} // namespace ext } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/ext/intel/fpga_utils.hpp b/sycl/include/CL/sycl/ext/intel/fpga_utils.hpp index be9bf1a6fc5af..60324b4149c12 100644 --- a/sycl/include/CL/sycl/ext/intel/fpga_utils.hpp +++ b/sycl/include/CL/sycl/ext/intel/fpga_utils.hpp @@ -13,6 +13,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +namespace ext { namespace intel { template