diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 6d62f035a9674..40306c90bf8c8 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -3194,9 +3194,10 @@ bool Util::isSyclHalfType(const QualType &Ty) { bool Util::isSyclSpecConstantType(const QualType &Ty) { const StringRef &Name = "spec_constant"; - std::array Scopes = { + std::array Scopes = { Util::DeclContextDesc{clang::Decl::Kind::Namespace, "cl"}, Util::DeclContextDesc{clang::Decl::Kind::Namespace, "sycl"}, + Util::DeclContextDesc{clang::Decl::Kind::Namespace, "ONEAPI"}, Util::DeclContextDesc{clang::Decl::Kind::Namespace, "experimental"}, Util::DeclContextDesc{Decl::Kind::ClassTemplateSpecialization, Name}}; return matchQualifiedTypeName(Ty, Scopes); diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp index 60d5d2bc2307c..0d77fd81dc7da 100644 --- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -253,6 +253,7 @@ struct get_kernel_name_t { using name = Type; }; +namespace ONEAPI { namespace experimental { template class spec_constant { @@ -268,6 +269,7 @@ class spec_constant { } }; } // namespace experimental +} // namespace ONEAPI #define ATTR_SYCL_KERNEL __attribute__((sycl_kernel)) template diff --git a/clang/test/CodeGenSYCL/int_header_spec_const.cpp b/clang/test/CodeGenSYCL/int_header_spec_const.cpp index e6743c4ea2e91..a32389ae4c492 100644 --- a/clang/test/CodeGenSYCL/int_header_spec_const.cpp +++ b/clang/test/CodeGenSYCL/int_header_spec_const.cpp @@ -20,18 +20,18 @@ class MyDoubleConst; int main() { // Create specialization constants. - cl::sycl::experimental::spec_constant i1(false); - cl::sycl::experimental::spec_constant i8(0); - cl::sycl::experimental::spec_constant ui8(0); - cl::sycl::experimental::spec_constant i16(0); - cl::sycl::experimental::spec_constant ui16(0); - cl::sycl::experimental::spec_constant i32(0); + cl::sycl::ONEAPI::experimental::spec_constant i1(false); + cl::sycl::ONEAPI::experimental::spec_constant i8(0); + cl::sycl::ONEAPI::experimental::spec_constant ui8(0); + cl::sycl::ONEAPI::experimental::spec_constant i16(0); + cl::sycl::ONEAPI::experimental::spec_constant ui16(0); + cl::sycl::ONEAPI::experimental::spec_constant i32(0); // Constant used twice, but there must be single entry in the int header, // otherwise compilation error would be issued. - cl::sycl::experimental::spec_constant i32_1(0); - cl::sycl::experimental::spec_constant ui32(0); - cl::sycl::experimental::spec_constant f32(0); - cl::sycl::experimental::spec_constant f64(0); + cl::sycl::ONEAPI::experimental::spec_constant i32_1(0); + cl::sycl::ONEAPI::experimental::spec_constant ui32(0); + cl::sycl::ONEAPI::experimental::spec_constant f32(0); + cl::sycl::ONEAPI::experimental::spec_constant f64(0); double val; double *ptr = &val; // to avoid "unused" warnings diff --git a/clang/test/SemaSYCL/Inputs/sycl.hpp b/clang/test/SemaSYCL/Inputs/sycl.hpp index 5e3478abf5176..2e01b5235a63a 100644 --- a/clang/test/SemaSYCL/Inputs/sycl.hpp +++ b/clang/test/SemaSYCL/Inputs/sycl.hpp @@ -218,12 +218,12 @@ class handler { } }; +namespace ONEAPI { namespace experimental { - template class spec_constant {}; } // namespace experimental - +} // namespace ONEAPI } // namespace sycl } // namespace cl diff --git a/clang/test/SemaSYCL/spec-const-kernel-arg.cpp b/clang/test/SemaSYCL/spec-const-kernel-arg.cpp index d40e5296949a2..ba2489b93fe6f 100644 --- a/clang/test/SemaSYCL/spec-const-kernel-arg.cpp +++ b/clang/test/SemaSYCL/spec-const-kernel-arg.cpp @@ -6,12 +6,12 @@ #include struct SpecConstantsWrapper { - cl::sycl::experimental::spec_constant SC1; - cl::sycl::experimental::spec_constant SC2; + cl::sycl::ONEAPI::experimental::spec_constant SC1; + cl::sycl::ONEAPI::experimental::spec_constant SC2; }; int main() { - cl::sycl::experimental::spec_constant SC; + cl::sycl::ONEAPI::experimental::spec_constant SC; SpecConstantsWrapper W; cl::sycl::kernel_single_task( [=]() { @@ -23,7 +23,7 @@ int main() { // CHECK: FunctionDecl {{.*}}kernel_sc{{.*}} 'void ()' // CHECK: VarDecl {{.*}}'(lambda at {{.*}}' // CHECK-NEXT: InitListExpr {{.*}}'(lambda at {{.*}}' -// CHECK-NEXT: CXXConstructExpr {{.*}}'cl::sycl::experimental::spec_constant':'cl::sycl::experimental::spec_constant' +// CHECK-NEXT: CXXConstructExpr {{.*}}'cl::sycl::ONEAPI::experimental::spec_constant':'cl::sycl::ONEAPI::experimental::spec_constant' // CHECK-NEXT: InitListExpr {{.*}} 'SpecConstantsWrapper' -// CHECK-NEXT: CXXConstructExpr {{.*}} 'cl::sycl::experimental::spec_constant':'cl::sycl::experimental::spec_constant' -// CHECK-NEXT: CXXConstructExpr {{.*}} 'cl::sycl::experimental::spec_constant':'cl::sycl::experimental::spec_constant' +// CHECK-NEXT: CXXConstructExpr {{.*}} 'cl::sycl::ONEAPI::experimental::spec_constant':'cl::sycl::ONEAPI::experimental::spec_constant' +// CHECK-NEXT: CXXConstructExpr {{.*}} 'cl::sycl::ONEAPI::experimental::spec_constant':'cl::sycl::ONEAPI::experimental::spec_constant' diff --git a/clang/test/SemaSYCL/spec_const_and_accesor_crash.cpp b/clang/test/SemaSYCL/spec_const_and_accesor_crash.cpp index 54a6f46290268..bd1375b49f6ec 100644 --- a/clang/test/SemaSYCL/spec_const_and_accesor_crash.cpp +++ b/clang/test/SemaSYCL/spec_const_and_accesor_crash.cpp @@ -10,9 +10,9 @@ __attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { } int main() { - cl::sycl::experimental::spec_constant spec_const; + cl::sycl::ONEAPI::experimental::spec_constant spec_const; cl::sycl::accessor accessor; - // CHECK: FieldDecl {{.*}} implicit referenced 'cl::sycl::experimental::spec_constant' + // CHECK: FieldDecl {{.*}} implicit referenced 'cl::sycl::ONEAPI::experimental::spec_constant' // CHECK: FieldDecl {{.*}} implicit referenced 'cl::sycl::accessor' kernel([spec_const, accessor]() {}); return 0; diff --git a/llvm/test/tools/sycl-post-link/sc_sym_two_refs.ll b/llvm/test/tools/sycl-post-link/sc_sym_two_refs.ll index 5eadf6d40bc28..122e4eea3fce5 100644 --- a/llvm/test/tools/sycl-post-link/sc_sym_two_refs.ll +++ b/llvm/test/tools/sycl-post-link/sc_sym_two_refs.ll @@ -7,7 +7,7 @@ target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" target triple = "spir64-unknown-unknown-sycldevice" -%"sycl::experimental::spec_constant" = type { i8 } +%"spec_constant" = type { i8 } @SCSymID = private unnamed_addr constant [10 x i8] c"SpecConst\00", align 1 ; CHECK-NOT: @SCSymID @@ -21,7 +21,7 @@ define weak_odr dso_local spir_kernel void @Kernel() { } ; Function Attrs: norecurse -define dso_local spir_func float @foo_float(%"sycl::experimental::spec_constant" addrspace(4)* nocapture readnone dereferenceable(1) %0) local_unnamed_addr #3 { +define dso_local spir_func float @foo_float(%"spec_constant" addrspace(4)* nocapture readnone dereferenceable(1) %0) local_unnamed_addr #3 { %2 = tail call spir_func float @_Z27__sycl_getSpecConstantValueIfET_PKc(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([10 x i8], [10 x i8]* @SCSymID, i64 0, i64 0) to i8 addrspace(4)*)) ret float %2 } diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 7dde4b4515322..0c949a0611d8c 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -15,7 +15,7 @@ include(AddSYCLExecutable) set(SYCL_MAJOR_VERSION 3) set(SYCL_MINOR_VERSION 0) set(SYCL_PATCH_VERSION 0) -set(SYCL_DEV_ABI_VERSION 0) +set(SYCL_DEV_ABI_VERSION 1) if (SYCL_ADD_DEV_VERSION_POSTFIX) set(SYCL_VERSION_POSTFIX "-${SYCL_DEV_ABI_VERSION}") endif() diff --git a/sycl/include/CL/sycl.hpp b/sycl/include/CL/sycl.hpp index 21b7056d277cb..86461727fe4ff 100644 --- a/sycl/include/CL/sycl.hpp +++ b/sycl/include/CL/sycl.hpp @@ -8,6 +8,12 @@ #pragma once +#include +#include +#include +#include +#include +#include #include #include #include @@ -23,12 +29,6 @@ #include #include #include -#include -#include -#include -#include -#include -#include #include #include #include diff --git a/sycl/include/CL/sycl/intel/atomic.hpp b/sycl/include/CL/sycl/ONEAPI/atomic.hpp similarity index 57% rename from sycl/include/CL/sycl/intel/atomic.hpp rename to sycl/include/CL/sycl/ONEAPI/atomic.hpp index ecb32b76a6976..a7b07d80d2fd3 100644 --- a/sycl/include/CL/sycl/intel/atomic.hpp +++ b/sycl/include/CL/sycl/ONEAPI/atomic.hpp @@ -1,4 +1,4 @@ -//==---------------- atomic.hpp - SYCL_INTEL_extended_atomics --------------==// +//==--------------- atomic.hpp - SYCL_ONEAPI_extended_atomics --------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -8,7 +8,7 @@ #pragma once -#include -#include -#include -#include +#include +#include +#include +#include diff --git a/sycl/include/CL/sycl/intel/atomic_accessor.hpp b/sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp similarity index 95% rename from sycl/include/CL/sycl/intel/atomic_accessor.hpp rename to sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp index 524e2d4a3ab80..ed415a90891c7 100644 --- a/sycl/include/CL/sycl/intel/atomic_accessor.hpp +++ b/sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp @@ -1,4 +1,4 @@ -//==--- atomic_accessor.hpp - SYCL_INTEL_extended_atomics atomic_accessor --==// +//==-- atomic_accessor.hpp - SYCL_ONEAPI_extended_atomics atomic_accessor --==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -8,13 +8,14 @@ #pragma once +#include +#include #include -#include -#include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -namespace intel { +namespace ONEAPI { #if __cplusplus > 201402L @@ -123,6 +124,6 @@ atomic_accessor(buffer, handler, #endif -} // namespace intel +} // namespace ONEAPI } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/atomic_enums.hpp b/sycl/include/CL/sycl/ONEAPI/atomic_enums.hpp similarity index 93% rename from sycl/include/CL/sycl/intel/atomic_enums.hpp rename to sycl/include/CL/sycl/ONEAPI/atomic_enums.hpp index a85c9902cd524..ad1d94ca2d815 100644 --- a/sycl/include/CL/sycl/intel/atomic_enums.hpp +++ b/sycl/include/CL/sycl/ONEAPI/atomic_enums.hpp @@ -1,4 +1,4 @@ -//==---------------- atomic_enums.hpp - SYCL_INTEL_extended_atomics enums --==// +//==--------------- atomic_enums.hpp - SYCL_ONEAPI_extended_atomics enums --==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -20,7 +20,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -namespace intel { +namespace ONEAPI { enum class memory_order : int { relaxed, @@ -63,7 +63,7 @@ namespace detail { // Nested ternary conditions in else branch required for C++11 #if __cplusplus >= 201402L static inline constexpr std::memory_order -getStdMemoryOrder(::cl::sycl::intel::memory_order order) { +getStdMemoryOrder(::cl::sycl::ONEAPI::memory_order order) { switch (order) { case memory_order::relaxed: return std::memory_order_relaxed; @@ -81,7 +81,7 @@ getStdMemoryOrder(::cl::sycl::intel::memory_order order) { } #else static inline constexpr std::memory_order -getStdMemoryOrder(::cl::sycl::intel::memory_order order) { +getStdMemoryOrder(::cl::sycl::ONEAPI::memory_order order) { return (order == memory_order::relaxed) ? std::memory_order_relaxed : (order == memory_order::__consume_unsupported) @@ -98,6 +98,6 @@ getStdMemoryOrder(::cl::sycl::intel::memory_order order) { } // namespace detail #endif // __SYCL_DEVICE_ONLY__ -} // namespace intel +} // namespace ONEAPI } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/atomic_fence.hpp b/sycl/include/CL/sycl/ONEAPI/atomic_fence.hpp similarity index 87% rename from sycl/include/CL/sycl/intel/atomic_fence.hpp rename to sycl/include/CL/sycl/ONEAPI/atomic_fence.hpp index aba95c060b878..76352df6faed9 100644 --- a/sycl/include/CL/sycl/intel/atomic_fence.hpp +++ b/sycl/include/CL/sycl/ONEAPI/atomic_fence.hpp @@ -1,4 +1,4 @@ -//==----- atomic_fence.hpp - SYCL_INTEL_extended_atomics atomic_fence ------==// +//==---- atomic_fence.hpp - SYCL_ONEAPI_extended_atomics atomic_fence ------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -9,8 +9,8 @@ #pragma once #include +#include #include -#include #ifndef __SYCL_DEVICE_ONLY__ #include @@ -18,7 +18,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -namespace intel { +namespace ONEAPI { namespace detail { using namespace cl::sycl::detail; } @@ -35,6 +35,6 @@ static inline void atomic_fence(memory_order order, memory_scope scope) { #endif } -} // namespace intel +} // namespace ONEAPI } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/atomic_ref.hpp b/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp similarity index 97% rename from sycl/include/CL/sycl/intel/atomic_ref.hpp rename to sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp index 1616727f919b8..625a8902d9c86 100644 --- a/sycl/include/CL/sycl/intel/atomic_ref.hpp +++ b/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp @@ -1,4 +1,4 @@ -//==----- atomic_ref.hpp - SYCL_INTEL_extended_atomics atomic_ref ----------==// +//==----- atomic_ref.hpp - SYCL_ONEAPI_extended_atomics atomic_ref ---------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -9,11 +9,11 @@ #pragma once #include +#include #include #include #include -#include -#include +#include #ifndef __SYCL_DEVICE_ONLY__ #include @@ -27,14 +27,14 @@ namespace sycl { template class multi_ptr; -namespace intel { +namespace ONEAPI { namespace detail { -// Import from detail:: into intel::detail:: to improve readability later +// Import from detail:: into ONEAPI::detail:: to improve readability later using namespace ::cl::sycl::detail; -using memory_order = cl::sycl::intel::memory_order; -using memory_scope = cl::sycl::intel::memory_scope; +using memory_order = cl::sycl::ONEAPI::memory_order; +using memory_scope = cl::sycl::ONEAPI::memory_scope; template using IsValidAtomicType = @@ -127,14 +127,14 @@ class atomic_ref_base { detail::IsValidAtomicType::value, "Invalid atomic type. Valid types are arithmetic and pointer types"); static_assert(!std::is_same::value, - "intel::atomic_ref does not support bool type"); + "ONEAPI::atomic_ref does not support bool type"); static_assert(!(std::is_same::value || std::is_same::value || std::is_same::value), - "intel::atomic_ref does not support char type"); + "ONEAPI::atomic_ref does not support char type"); static_assert(!(std::is_same::value || std::is_same::value), - "intel::atomic_ref does not support short type"); + "ONEAPI::atomic_ref does not support short type"); static_assert(detail::IsValidAtomicAddressSpace::value, "Invalid atomic address_space. Valid address spaces are: " "global_space, local_space, global_device_space"); @@ -651,6 +651,6 @@ class atomic_ref : public detail::atomic_ref_impl::atomic_ref_impl; }; -} // namespace intel +} // namespace ONEAPI } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/builtins.hpp b/sycl/include/CL/sycl/ONEAPI/experimental/builtins.hpp similarity index 98% rename from sycl/include/CL/sycl/intel/builtins.hpp rename to sycl/include/CL/sycl/ONEAPI/experimental/builtins.hpp index a59258a2290ba..e5b047e29c7a0 100644 --- a/sycl/include/CL/sycl/intel/builtins.hpp +++ b/sycl/include/CL/sycl/ONEAPI/experimental/builtins.hpp @@ -18,7 +18,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -namespace intel { +namespace ONEAPI { namespace experimental { // Provides functionality to print data from kernels in a C way: @@ -68,7 +68,7 @@ int printf(const CONSTANT_AS char *__format, Args... args) { } } // namespace experimental -} // namespace intel +} // namespace ONEAPI } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/experimental/spec_constant.hpp b/sycl/include/CL/sycl/ONEAPI/experimental/spec_constant.hpp similarity index 93% rename from sycl/include/CL/sycl/experimental/spec_constant.hpp rename to sycl/include/CL/sycl/ONEAPI/experimental/spec_constant.hpp index 7952d98bd481f..03698008e056d 100644 --- a/sycl/include/CL/sycl/experimental/spec_constant.hpp +++ b/sycl/include/CL/sycl/ONEAPI/experimental/spec_constant.hpp @@ -1,4 +1,4 @@ -//==----- spec_constant.hpp - SYCL public experimental API header file -----==// +//==----------- spec_constant.hpp - SYCL public ONEAPI API header file -----==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -22,6 +22,9 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +class program; + +namespace ONEAPI { namespace experimental { class spec_const_error : public compile_program_error { @@ -60,5 +63,6 @@ template class spec_constant { }; } // namespace experimental +} // namespace ONEAPI } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/function_pointer.hpp b/sycl/include/CL/sycl/ONEAPI/function_pointer.hpp similarity index 98% rename from sycl/include/CL/sycl/intel/function_pointer.hpp rename to sycl/include/CL/sycl/ONEAPI/function_pointer.hpp index f812be911b788..3ccbdf636ec99 100644 --- a/sycl/include/CL/sycl/intel/function_pointer.hpp +++ b/sycl/include/CL/sycl/ONEAPI/function_pointer.hpp @@ -21,7 +21,7 @@ namespace detail { __SYCL_EXPORT cl_ulong getDeviceFunctionPointerImpl(device &D, program &P, const char *FuncName); } -namespace intel { +namespace ONEAPI { // This is a preview extension implementation, intended to provide early // access to a feature for review and community feedback. @@ -83,6 +83,6 @@ device_func_ptr_holder_t get_device_func_ptr(FuncType F, const char *FuncName, return sycl::detail::getDeviceFunctionPointerImpl(D, P, FuncName); } -} // namespace intel +} // namespace ONEAPI } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/functional.hpp b/sycl/include/CL/sycl/ONEAPI/functional.hpp similarity index 85% rename from sycl/include/CL/sycl/intel/functional.hpp rename to sycl/include/CL/sycl/ONEAPI/functional.hpp index ee4ed21b33ffd..ab16a694b00ed 100644 --- a/sycl/include/CL/sycl/intel/functional.hpp +++ b/sycl/include/CL/sycl/ONEAPI/functional.hpp @@ -11,7 +11,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -namespace intel { +namespace ONEAPI { template struct minimum { T operator()(const T &lhs, const T &rhs) const { @@ -57,7 +57,7 @@ template using bit_or = std::bit_or; template using bit_xor = std::bit_xor; template using bit_and = std::bit_and; -} // namespace intel +} // namespace ONEAPI #ifdef __SYCL_DEVICE_ONLY__ namespace detail { @@ -93,15 +93,15 @@ struct GroupOpTag::value>> { return Ret; \ } -__SYCL_CALC_OVERLOAD(GroupOpISigned, SMin, intel::minimum) -__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, UMin, intel::minimum) -__SYCL_CALC_OVERLOAD(GroupOpFP, FMin, intel::minimum) -__SYCL_CALC_OVERLOAD(GroupOpISigned, SMax, intel::maximum) -__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, UMax, intel::maximum) -__SYCL_CALC_OVERLOAD(GroupOpFP, FMax, intel::maximum) -__SYCL_CALC_OVERLOAD(GroupOpISigned, IAdd, intel::plus) -__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, IAdd, intel::plus) -__SYCL_CALC_OVERLOAD(GroupOpFP, FAdd, intel::plus) +__SYCL_CALC_OVERLOAD(GroupOpISigned, SMin, ONEAPI::minimum) +__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, UMin, ONEAPI::minimum) +__SYCL_CALC_OVERLOAD(GroupOpFP, FMin, ONEAPI::minimum) +__SYCL_CALC_OVERLOAD(GroupOpISigned, SMax, ONEAPI::maximum) +__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, UMax, ONEAPI::maximum) +__SYCL_CALC_OVERLOAD(GroupOpFP, FMax, ONEAPI::maximum) +__SYCL_CALC_OVERLOAD(GroupOpISigned, IAdd, ONEAPI::plus) +__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, IAdd, ONEAPI::plus) +__SYCL_CALC_OVERLOAD(GroupOpFP, FAdd, ONEAPI::plus) #undef __SYCL_CALC_OVERLOAD diff --git a/sycl/include/CL/sycl/intel/group_algorithm.hpp b/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp similarity index 93% rename from sycl/include/CL/sycl/intel/group_algorithm.hpp rename to sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp index c8f6faa2a08a6..e5681bcadcb0d 100644 --- a/sycl/include/CL/sycl/intel/group_algorithm.hpp +++ b/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp @@ -10,13 +10,15 @@ #include #include #include +#include +#include +#include #include #include #include -#include -#include +#include -#ifndef __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__ +#ifndef __DISABLE_SYCL_ONEAPI_GROUP_ALGORITHMS__ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { @@ -32,7 +34,7 @@ template <> inline size_t get_local_linear_range>(group<3> g) { return g.get_local_range(0) * g.get_local_range(1) * g.get_local_range(2); } template <> -inline size_t get_local_linear_range(intel::sub_group g) { +inline size_t get_local_linear_range(ONEAPI::sub_group g) { return g.get_local_range()[0]; } @@ -53,8 +55,8 @@ __SYCL_GROUP_GET_LOCAL_LINEAR_ID(3); #endif // __SYCL_DEVICE_ONLY__ template <> -inline intel::sub_group::linear_id_type -get_local_linear_id(intel::sub_group g) { +inline ONEAPI::sub_group::linear_id_type +get_local_linear_id(ONEAPI::sub_group g) { return g.get_local_id()[0]; } @@ -79,22 +81,22 @@ template <> inline id<3> linear_id_to_id(range<3> r, size_t linear_id) { template struct identity {}; -template struct identity> { +template struct identity> { static constexpr T value = 0; }; -template struct identity> { +template struct identity> { static constexpr T value = (std::numeric_limits::max)(); }; -template struct identity> { +template struct identity> { static constexpr T value = std::numeric_limits::lowest(); }; template using native_op_list = - type_list, intel::bit_or, intel::bit_xor, - intel::bit_and, intel::maximum, intel::minimum>; + type_list, ONEAPI::bit_or, ONEAPI::bit_xor, + ONEAPI::bit_and, ONEAPI::maximum, ONEAPI::minimum>; template struct is_native_op { static constexpr bool value = @@ -123,7 +125,7 @@ Function for_each(Group g, Ptr first, Ptr last, Function f) { } // namespace detail -namespace intel { +namespace ONEAPI { // EnableIf shorthands for algorithms that depend only on type template @@ -169,7 +171,7 @@ using EnableIfIsNonNativeOp = cl::sycl::detail::enable_if_t< template bool all_of(Group, bool pred) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ return sycl::detail::spirv::GroupAll(pred); #else @@ -183,7 +185,7 @@ template bool all_of(Group g, T x, Predicate pred) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); return all_of(g, pred(x)); } @@ -192,7 +194,7 @@ EnableIfIsPointer all_of(Group g, Ptr first, Ptr last, Predicate pred) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ bool partial = true; sycl::detail::for_each( @@ -212,7 +214,7 @@ EnableIfIsPointer all_of(Group g, Ptr first, Ptr last, template bool any_of(Group, bool pred) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ return sycl::detail::spirv::GroupAny(pred); #else @@ -226,7 +228,7 @@ template bool any_of(Group g, T x, Predicate pred) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); return any_of(g, pred(x)); } @@ -236,7 +238,7 @@ EnableIfIsPointer any_of(Group g, Ptr first, Ptr last, #ifdef __SYCL_DEVICE_ONLY__ static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); bool partial = false; sycl::detail::for_each( g, first, last, @@ -255,7 +257,7 @@ EnableIfIsPointer any_of(Group g, Ptr first, Ptr last, template bool none_of(Group, bool pred) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ return sycl::detail::spirv::GroupAll(!pred); #else @@ -269,7 +271,7 @@ template bool none_of(Group g, T x, Predicate pred) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); return none_of(g, pred(x)); } @@ -279,7 +281,7 @@ EnableIfIsPointer none_of(Group g, Ptr first, Ptr last, #ifdef __SYCL_DEVICE_ONLY__ static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); return !any_of(g, first, last, pred); #else (void)g; @@ -296,7 +298,7 @@ EnableIfIsTriviallyCopyable broadcast(Group, T x, typename Group::id_type local_id) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ return sycl::detail::spirv::GroupBroadcast(x, local_id); #else @@ -312,7 +314,7 @@ EnableIfIsVectorArithmetic broadcast(Group g, T x, typename Group::id_type local_id) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ T result; for (int s = 0; s < x.get_size(); ++s) { @@ -333,7 +335,7 @@ EnableIfIsTriviallyCopyable broadcast(Group g, T x, typename Group::linear_id_type linear_local_id) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ return broadcast( g, x, @@ -352,7 +354,7 @@ EnableIfIsVectorArithmetic broadcast(Group g, T x, typename Group::linear_id_type linear_local_id) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ T result; for (int s = 0; s < x.get_size(); ++s) { @@ -372,7 +374,7 @@ template EnableIfIsTriviallyCopyable broadcast(Group g, T x) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ return broadcast(g, x, 0); #else @@ -387,7 +389,7 @@ template EnableIfIsVectorArithmetic broadcast(Group g, T x) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ T result; for (int s = 0; s < x.get_size(); ++s) { @@ -407,7 +409,7 @@ EnableIfIsScalarArithmeticNativeOp reduce(Group, T x, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same::value || @@ -429,7 +431,7 @@ EnableIfIsVectorArithmeticNativeOp reduce(Group g, T x, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same reduce(Group g, T x, BinaryOperation op) { static_assert(sycl::detail::is_sub_group::value, "reduce algorithm with user-defined types and operators" - "only supports intel::sub_group class."); + "only supports ONEAPI::sub_group class."); T result = x; for (int mask = 1; mask < g.get_max_local_range()[0]; mask *= 2) { T tmp = g.shuffle_xor(result, id<1>(mask)); @@ -465,7 +467,7 @@ EnableIfIsScalarArithmeticNativeOp reduce(Group g, V x, T init, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same::value || @@ -486,7 +488,7 @@ EnableIfIsVectorArithmeticNativeOp reduce(Group g, V x, T init, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same reduce(Group g, V x, T init, BinaryOperation op) { static_assert(sycl::detail::is_sub_group::value, "reduce algorithm with user-defined types and operators" - "only supports intel::sub_group class."); + "only supports ONEAPI::sub_group class."); T result = x; for (int mask = 1; mask < g.get_max_local_range()[0]; mask *= 2) { T tmp = g.shuffle_xor(result, id<1>(mask)); @@ -528,7 +530,7 @@ EnableIfIsPointer reduce(Group g, Ptr first, Ptr last, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same reduce(Group g, Ptr first, Ptr last, T init, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same::value || @@ -587,7 +589,7 @@ EnableIfIsScalarArithmeticNativeOp exclusive_scan(Group, T x, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); // FIXME: Do not special-case for half precision static_assert(std::is_same::value || (std::is_same::value && @@ -608,7 +610,7 @@ EnableIfIsVectorArithmeticNativeOp exclusive_scan(Group g, T x, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same exclusive_scan(Group g, V x, T init, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same exclusive_scan(Group g, V x, T init, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); // FIXME: Do not special-case for half precision static_assert(std::is_same::value || (std::is_same::value && @@ -679,7 +681,7 @@ exclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, T init, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same::value || @@ -741,7 +743,7 @@ EnableIfIsVectorArithmeticNativeOp inclusive_scan(Group g, T x, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same inclusive_scan(Group, T x, BinaryOperation binary_op) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); // FIXME: Do not special-case for half precision static_assert(std::is_same::value || (std::is_same::value && @@ -782,7 +784,7 @@ EnableIfIsScalarArithmeticNativeOp inclusive_scan(Group g, V x, BinaryOperation binary_op, T init) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); // FIXME: Do not special-case for half precision static_assert(std::is_same::value || (std::is_same::value && @@ -805,7 +807,7 @@ EnableIfIsVectorArithmeticNativeOp inclusive_scan(Group g, V x, BinaryOperation binary_op, T init) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same::value || @@ -826,7 +828,7 @@ inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, BinaryOperation binary_op, T init) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); // FIXME: Do not special-case for half precision static_assert( std::is_same::value || @@ -884,7 +886,7 @@ EnableIfIsPointer inclusive_scan(Group g, InPtr first, template bool leader(Group g) { static_assert(sycl::detail::is_generic_group::value, "Group algorithms only support the sycl::group and " - "intel::sub_group class."); + "ONEAPI::sub_group class."); #ifdef __SYCL_DEVICE_ONLY__ typename Group::linear_id_type linear_id = sycl::detail::get_local_linear_id(g); @@ -896,7 +898,7 @@ template bool leader(Group g) { #endif } -} // namespace intel +} // namespace ONEAPI } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) -#endif // __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__ +#endif // __DISABLE_SYCL_ONEAPI_GROUP_ALGORITHMS__ diff --git a/sycl/include/CL/sycl/intel/reduction.hpp b/sycl/include/CL/sycl/ONEAPI/reduction.hpp similarity index 96% rename from sycl/include/CL/sycl/intel/reduction.hpp rename to sycl/include/CL/sycl/ONEAPI/reduction.hpp index 26adff47778e9..e33db4783156b 100644 --- a/sycl/include/CL/sycl/intel/reduction.hpp +++ b/sycl/include/CL/sycl/ONEAPI/reduction.hpp @@ -8,16 +8,18 @@ #pragma once +#include #include #include -#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -namespace intel { +namespace ONEAPI { namespace detail { +using cl::sycl::detail::queue_impl; + __SYCL_EXPORT size_t reduGetMaxWGSize(shared_ptr_class Queue, size_t LocalMemBytesPerWorkItem); __SYCL_EXPORT size_t reduComputeWGSize(size_t NWorkItems, size_t MaxWGSize, @@ -33,8 +35,8 @@ using cl::sycl::detail::remove_AS; template using IsReduPlus = detail::bool_constant< - std::is_same>::value || - std::is_same>::value>; + std::is_same>::value || + std::is_same>::value>; template using IsReduMultiplies = detail::bool_constant< @@ -43,28 +45,28 @@ using IsReduMultiplies = detail::bool_constant< template using IsReduMinimum = detail::bool_constant< - std::is_same>::value || - std::is_same>::value>; + std::is_same>::value || + std::is_same>::value>; template using IsReduMaximum = detail::bool_constant< - std::is_same>::value || - std::is_same>::value>; + std::is_same>::value || + std::is_same>::value>; template using IsReduBitOR = detail::bool_constant< - std::is_same>::value || - std::is_same>::value>; + std::is_same>::value || + std::is_same>::value>; template using IsReduBitXOR = detail::bool_constant< - std::is_same>::value || - std::is_same>::value>; + std::is_same>::value || + std::is_same>::value>; template using IsReduBitAND = detail::bool_constant< - std::is_same>::value || - std::is_same>::value>; + std::is_same>::value || + std::is_same>::value>; template using IsReduOptForFastAtomicFetch = @@ -171,7 +173,7 @@ class reducer { /// using those operations, which are based on functionality provided by /// sycl::atomic class. /// -/// For example, it is known that 0 is identity for intel::plus operations +/// For example, it is known that 0 is identity for ONEAPI::plus operations /// accepting native scalar types to which scalar 0 is convertible. /// Also, for int32/64 types the atomic_combine() is lowered to /// sycl::atomic::fetch_add(). @@ -313,7 +315,7 @@ class reducer enable_if_t::type, T>::value && (is_geninteger32bit::value || is_geninteger64bit::value) && @@ -323,7 +325,7 @@ class reducer enable_if_t::type, T>::value && (is_geninteger32bit::value || is_geninteger64bit::value) && @@ -609,11 +611,11 @@ struct get_reduction_aux_kernel_name_t { /// Implements a command group function that enqueues a kernel that calls /// user's lambda function KernelFunc and also does one iteration of reduction /// of elements computed in user's lambda function. -/// This version uses intel::reduce() algorithm to reduce elements in each +/// This version uses ONEAPI::reduce() algorithm to reduce elements in each /// of work-groups, then it calls fast sycl atomic operations to update /// user's reduction variable. /// -/// Briefly: calls user's lambda, intel::reduce() + atomic, INT + ADD/MIN/MAX. +/// Briefly: calls user's lambda, ONEAPI::reduce() + atomic, INT + ADD/MIN/MAX. template enable_if_t @@ -632,7 +634,7 @@ reduCGFuncImpl(handler &CGH, KernelType KernelFunc, const nd_range &Range, (UniformWG || NDIt.get_global_linear_id() < NWorkItems) ? Reducer.MValue : Reducer.getIdentity(); - Reducer.MValue = intel::reduce(NDIt.get_group(), Val, BOp); + Reducer.MValue = ONEAPI::reduce(NDIt.get_group(), Val, BOp); if (NDIt.get_local_linear_id() == 0) Reducer.atomic_combine(Reduction::getOutPointer(Out)); }); @@ -726,11 +728,11 @@ reduCGFunc(handler &CGH, KernelType KernelFunc, const nd_range &Range, /// Implements a command group function that enqueues a kernel that /// calls user's lambda function and does one iteration of reduction /// of elements in each of work-groups. -/// This version uses intel::reduce() algorithm to reduce elements in each +/// This version uses ONEAPI::reduce() algorithm to reduce elements in each /// of work-groups. At the end of each work-groups the partial sum is written /// to a global buffer. /// -/// Briefly: user's lambda, intel:reduce(), FP + ADD/MIN/MAX. +/// Briefly: user's lambda, ONEAPI:reduce(), FP + ADD/MIN/MAX. template enable_if_t @@ -760,7 +762,7 @@ reduCGFuncImpl(handler &CGH, KernelType KernelFunc, const nd_range &Range, ? Reducer.MValue : Reducer.getIdentity(); typename Reduction::binary_operation BOp; - PSum = intel::reduce(NDIt.get_group(), PSum, BOp); + PSum = ONEAPI::reduce(NDIt.get_group(), PSum, BOp); if (NDIt.get_local_linear_id() == 0) { if (IsUpdateOfUserVar) PSum = BOp(*(Reduction::getOutPointer(Out)), PSum); @@ -873,11 +875,11 @@ reduCGFunc(handler &CGH, KernelType KernelFunc, const nd_range &Range, /// Implements a command group function that enqueues a kernel that does one /// iteration of reduction of elements in each of work-groups. -/// This version uses intel::reduce() algorithm to reduce elements in each +/// This version uses ONEAPI::reduce() algorithm to reduce elements in each /// of work-groups. At the end of each work-groups the partial sum is written /// to a global buffer. /// -/// Briefly: aux kernel, intel:reduce(), reproducible results, FP + ADD/MIN/MAX +/// Briefly: aux kernel, ONEAPI:reduce(), reproducible results, FP + ADD/MIN/MAX template enable_if_t @@ -897,7 +899,7 @@ reduAuxCGFuncImpl(handler &CGH, size_t NWorkItems, size_t NWorkGroups, (UniformWG || (GID < NWorkItems)) ? In[GID] : Reduction::reducer_type::getIdentity(); - PSum = intel::reduce(NDIt.get_group(), PSum, BOp); + PSum = ONEAPI::reduce(NDIt.get_group(), PSum, BOp); if (NDIt.get_local_linear_id() == 0) { if (IsUpdateOfUserVar) PSum = BOp(*(Reduction::getOutPointer(Out)), PSum); @@ -1074,6 +1076,6 @@ reduction(T *VarPtr, BinaryOperation) { access::mode::read_write>(VarPtr); } -} // namespace intel +} // namespace ONEAPI } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/sub_group.hpp b/sycl/include/CL/sycl/ONEAPI/sub_group.hpp similarity index 97% rename from sycl/include/CL/sycl/intel/sub_group.hpp rename to sycl/include/CL/sycl/ONEAPI/sub_group.hpp index 2c65f08218990..6a46a031e17a2 100644 --- a/sycl/include/CL/sycl/intel/sub_group.hpp +++ b/sycl/include/CL/sycl/ONEAPI/sub_group.hpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -17,7 +18,6 @@ #include #include #include -#include #include #include @@ -96,7 +96,7 @@ void store(multi_ptr dst, const vec &x) { } // namespace detail -namespace intel { +namespace ONEAPI { struct sub_group { @@ -451,7 +451,7 @@ struct sub_group { /* --- deprecated collective functions --- */ template __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::intel::broadcast instead.") + "sycl::ONEAPI::broadcast instead.") EnableIfIsScalarArithmetic broadcast(T x, id<1> local_id) const { #ifdef __SYCL_DEVICE_ONLY__ return sycl::detail::spirv::GroupBroadcast(x, local_id); @@ -465,7 +465,7 @@ struct sub_group { template __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::intel::reduce instead.") + "sycl::ONEAPI::reduce instead.") EnableIfIsScalarArithmetic reduce(T x, BinaryOperation op) const { #ifdef __SYCL_DEVICE_ONLY__ return sycl::detail::calc __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::intel::reduce instead.") + "sycl::ONEAPI::reduce instead.") EnableIfIsScalarArithmetic reduce(T x, T init, BinaryOperation op) const { #ifdef __SYCL_DEVICE_ONLY__ return op(init, reduce(x, op)); @@ -496,7 +496,7 @@ struct sub_group { template __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::intel::exclusive_scan instead.") + "sycl::ONEAPI::exclusive_scan instead.") EnableIfIsScalarArithmetic exclusive_scan(T x, BinaryOperation op) const { #ifdef __SYCL_DEVICE_ONLY__ return sycl::detail::calc __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::intel::exclusive_scan instead.") + "sycl::ONEAPI::exclusive_scan instead.") EnableIfIsScalarArithmetic exclusive_scan(T x, T init, BinaryOperation op) const { #ifdef __SYCL_DEVICE_ONLY__ @@ -535,7 +535,7 @@ struct sub_group { template __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::intel::inclusive_scan instead.") + "sycl::ONEAPI::inclusive_scan instead.") EnableIfIsScalarArithmetic inclusive_scan(T x, BinaryOperation op) const { #ifdef __SYCL_DEVICE_ONLY__ return sycl::detail::calc __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::intel::inclusive_scan instead.") + "sycl::ONEAPI::inclusive_scan instead.") EnableIfIsScalarArithmetic inclusive_scan(T x, BinaryOperation op, T init) const { #ifdef __SYCL_DEVICE_ONLY__ @@ -572,6 +572,6 @@ struct sub_group { template friend class cl::sycl::nd_item; sub_group() = default; }; -} // namespace intel +} // namespace ONEAPI } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/detail/spirv.hpp b/sycl/include/CL/sycl/detail/spirv.hpp index 3ba8ff09b8828..b1f86c0c7ec42 100644 --- a/sycl/include/CL/sycl/detail/spirv.hpp +++ b/sycl/include/CL/sycl/detail/spirv.hpp @@ -10,16 +10,18 @@ #include #include #include +#include #include +#include #include -#include +#include #ifdef __SYCL_DEVICE_ONLY__ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -namespace intel { +namespace ONEAPI { struct sub_group; -} // namespace intel +} // namespace ONEAPI namespace detail { namespace spirv { @@ -29,7 +31,7 @@ template struct group_scope> { static constexpr __spv::Scope::Flag value = __spv::Scope::Flag::Workgroup; }; -template <> struct group_scope<::cl::sycl::intel::sub_group> { +template <> struct group_scope<::cl::sycl::ONEAPI::sub_group> { static constexpr __spv::Scope::Flag value = __spv::Scope::Flag::Subgroup; }; @@ -107,7 +109,7 @@ using EnableIfGenericBroadcast = detail::enable_if_t< // Work-group supports any integral type // Sub-group currently supports only uint32_t template struct GroupId { using type = size_t; }; -template <> struct GroupId<::cl::sycl::intel::sub_group> { +template <> struct GroupId<::cl::sycl::ONEAPI::sub_group> { using type = uint32_t; }; template @@ -203,23 +205,23 @@ EnableIfGenericBroadcast GroupBroadcast(T x, id local_id) { // Single happens-before means semantics should always apply to all spaces // Although consume is unsupported, forwarding to acquire is valid static inline constexpr __spv::MemorySemanticsMask::Flag -getMemorySemanticsMask(intel::memory_order Order) { +getMemorySemanticsMask(ONEAPI::memory_order Order) { __spv::MemorySemanticsMask::Flag SpvOrder = __spv::MemorySemanticsMask::None; switch (Order) { - case intel::memory_order::relaxed: + case ONEAPI::memory_order::relaxed: SpvOrder = __spv::MemorySemanticsMask::None; break; - case intel::memory_order::__consume_unsupported: - case intel::memory_order::acquire: + case ONEAPI::memory_order::__consume_unsupported: + case ONEAPI::memory_order::acquire: SpvOrder = __spv::MemorySemanticsMask::Acquire; break; - case intel::memory_order::release: + case ONEAPI::memory_order::release: SpvOrder = __spv::MemorySemanticsMask::Release; break; - case intel::memory_order::acq_rel: + case ONEAPI::memory_order::acq_rel: SpvOrder = __spv::MemorySemanticsMask::AcquireRelease; break; - case intel::memory_order::seq_cst: + case ONEAPI::memory_order::seq_cst: SpvOrder = __spv::MemorySemanticsMask::SequentiallyConsistent; break; } @@ -229,17 +231,18 @@ getMemorySemanticsMask(intel::memory_order Order) { __spv::MemorySemanticsMask::CrossWorkgroupMemory); } -static inline constexpr __spv::Scope::Flag getScope(intel::memory_scope Scope) { +static inline constexpr __spv::Scope::Flag +getScope(ONEAPI::memory_scope Scope) { switch (Scope) { - case intel::memory_scope::work_item: + case ONEAPI::memory_scope::work_item: return __spv::Scope::Invocation; - case intel::memory_scope::sub_group: + case ONEAPI::memory_scope::sub_group: return __spv::Scope::Subgroup; - case intel::memory_scope::work_group: + case ONEAPI::memory_scope::work_group: return __spv::Scope::Workgroup; - case intel::memory_scope::device: + case ONEAPI::memory_scope::device: return __spv::Scope::Device; - case intel::memory_scope::system: + case ONEAPI::memory_scope::system: return __spv::Scope::CrossDevice; } } @@ -247,8 +250,8 @@ static inline constexpr __spv::Scope::Flag getScope(intel::memory_scope Scope) { template inline typename detail::enable_if_t::value, T> AtomicCompareExchange(multi_ptr MPtr, - intel::memory_scope Scope, intel::memory_order Success, - intel::memory_order Failure, T Desired, T Expected) { + ONEAPI::memory_scope Scope, ONEAPI::memory_order Success, + ONEAPI::memory_order Failure, T Desired, T Expected) { auto SPIRVSuccess = getMemorySemanticsMask(Success); auto SPIRVFailure = getMemorySemanticsMask(Failure); auto SPIRVScope = getScope(Scope); @@ -260,8 +263,8 @@ AtomicCompareExchange(multi_ptr MPtr, template inline typename detail::enable_if_t::value, T> AtomicCompareExchange(multi_ptr MPtr, - intel::memory_scope Scope, intel::memory_order Success, - intel::memory_order Failure, T Desired, T Expected) { + ONEAPI::memory_scope Scope, ONEAPI::memory_order Success, + ONEAPI::memory_order Failure, T Desired, T Expected) { using I = detail::make_unsinged_integer_t; auto SPIRVSuccess = getMemorySemanticsMask(Success); auto SPIRVFailure = getMemorySemanticsMask(Failure); @@ -278,8 +281,8 @@ AtomicCompareExchange(multi_ptr MPtr, template inline typename detail::enable_if_t::value, T> -AtomicLoad(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order) { +AtomicLoad(multi_ptr MPtr, ONEAPI::memory_scope Scope, + ONEAPI::memory_order Order) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -288,8 +291,8 @@ AtomicLoad(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicLoad(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order) { +AtomicLoad(multi_ptr MPtr, ONEAPI::memory_scope Scope, + ONEAPI::memory_order Order) { using I = detail::make_unsinged_integer_t; auto *PtrInt = reinterpret_cast::pointer_t>( @@ -302,8 +305,8 @@ AtomicLoad(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value> -AtomicStore(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicStore(multi_ptr MPtr, ONEAPI::memory_scope Scope, + ONEAPI::memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -312,8 +315,8 @@ AtomicStore(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value> -AtomicStore(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicStore(multi_ptr MPtr, ONEAPI::memory_scope Scope, + ONEAPI::memory_order Order, T Value) { using I = detail::make_unsinged_integer_t; auto *PtrInt = reinterpret_cast::pointer_t>( @@ -326,8 +329,8 @@ AtomicStore(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicExchange(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicExchange(multi_ptr MPtr, ONEAPI::memory_scope Scope, + ONEAPI::memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -336,8 +339,8 @@ AtomicExchange(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicExchange(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicExchange(multi_ptr MPtr, ONEAPI::memory_scope Scope, + ONEAPI::memory_order Order, T Value) { using I = detail::make_unsinged_integer_t; auto *PtrInt = reinterpret_cast::pointer_t>( @@ -352,8 +355,8 @@ AtomicExchange(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicIAdd(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicIAdd(multi_ptr MPtr, ONEAPI::memory_scope Scope, + ONEAPI::memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -362,8 +365,8 @@ AtomicIAdd(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicISub(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicISub(multi_ptr MPtr, ONEAPI::memory_scope Scope, + ONEAPI::memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -372,8 +375,8 @@ AtomicISub(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicAnd(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicAnd(multi_ptr MPtr, ONEAPI::memory_scope Scope, + ONEAPI::memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -382,8 +385,8 @@ AtomicAnd(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicOr(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicOr(multi_ptr MPtr, ONEAPI::memory_scope Scope, + ONEAPI::memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -392,8 +395,8 @@ AtomicOr(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicXor(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicXor(multi_ptr MPtr, ONEAPI::memory_scope Scope, + ONEAPI::memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -402,8 +405,8 @@ AtomicXor(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicMin(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicMin(multi_ptr MPtr, ONEAPI::memory_scope Scope, + ONEAPI::memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -412,8 +415,8 @@ AtomicMin(multi_ptr MPtr, intel::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicMax(multi_ptr MPtr, intel::memory_scope Scope, - intel::memory_order Order, T Value) { +AtomicMax(multi_ptr MPtr, ONEAPI::memory_scope Scope, + ONEAPI::memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); diff --git a/sycl/include/CL/sycl/detail/type_traits.hpp b/sycl/include/CL/sycl/detail/type_traits.hpp index 3f52acc8a2de2..df480f58f99ff 100644 --- a/sycl/include/CL/sycl/detail/type_traits.hpp +++ b/sycl/include/CL/sycl/detail/type_traits.hpp @@ -18,9 +18,9 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { template class group; -namespace intel { +namespace ONEAPI { struct sub_group; -} // namespace intel +} // namespace ONEAPI namespace detail { namespace half_impl { class half; @@ -313,7 +313,7 @@ struct is_group> : std::true_type {}; template struct is_sub_group : std::false_type {}; -template <> struct is_sub_group : std::true_type {}; +template <> struct is_sub_group : std::true_type {}; template struct is_generic_group diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 2d22f155dabbe..215e0203878f6 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -176,7 +176,7 @@ checkValueRange(const T &V) { } // namespace detail -namespace intel { +namespace ONEAPI { namespace detail { template @@ -205,7 +205,7 @@ __SYCL_EXPORT size_t reduGetMaxWGSize(shared_ptr_class Queue, size_t LocalMemBytesPerWorkItem); } // namespace detail -} // namespace intel +} // namespace ONEAPI /// Command group handler class. /// @@ -1036,8 +1036,8 @@ class __SYCL_EXPORT handler { Reduction::has_fast_atomics && !Reduction::is_usm> parallel_for(nd_range Range, Reduction Redu, _KERNELFUNCPARAM(KernelFunc)) { - intel::detail::reduCGFunc(*this, KernelFunc, Range, Redu, - Redu.getUserAccessor()); + ONEAPI::detail::reduCGFunc(*this, KernelFunc, Range, Redu, + Redu.getUserAccessor()); } /// Implements parallel_for() accepting nd_range and 1 reduction variable @@ -1050,8 +1050,8 @@ class __SYCL_EXPORT handler { Reduction::has_fast_atomics && Reduction::is_usm> parallel_for(nd_range Range, Reduction Redu, _KERNELFUNCPARAM(KernelFunc)) { - intel::detail::reduCGFunc(*this, KernelFunc, Range, Redu, - Redu.getUSMPointer()); + ONEAPI::detail::reduCGFunc(*this, KernelFunc, Range, Redu, + Redu.getUSMPointer()); } /// Implements parallel_for() accepting nd_range and 1 reduction variable @@ -1072,8 +1072,8 @@ class __SYCL_EXPORT handler { _KERNELFUNCPARAM(KernelFunc)) { shared_ptr_class QueueCopy = MQueue; auto RWAcc = Redu.getReadWriteScalarAcc(*this); - intel::detail::reduCGFunc(*this, KernelFunc, Range, Redu, - RWAcc); + ONEAPI::detail::reduCGFunc(*this, KernelFunc, Range, Redu, + RWAcc); this->finalize(); // Copy from RWAcc to user's reduction accessor. @@ -1100,7 +1100,7 @@ class __SYCL_EXPORT handler { /// TODO: Need to handle more than 1 reduction in parallel_for(). /// TODO: Support HOST. The kernels called by this parallel_for() may use /// some functionality that is not yet supported on HOST such as: - /// barrier(), and intel::reduce() that also may be used in more + /// barrier(), and ONEAPI::reduce() that also may be used in more /// optimized implementations waiting for their turn of code-review. template @@ -1133,14 +1133,14 @@ class __SYCL_EXPORT handler { // TODO: currently the maximal work group size is determined for the given // queue/device, while it may be safer to use queries to the kernel compiled // for the device. - size_t MaxWGSize = intel::detail::reduGetMaxWGSize(MQueue, OneElemSize); + size_t MaxWGSize = ONEAPI::detail::reduGetMaxWGSize(MQueue, OneElemSize); if (Range.get_local_range().size() > MaxWGSize) throw sycl::runtime_error("The implementation handling parallel_for with" " reduction requires smaller work group size.", PI_INVALID_WORK_GROUP_SIZE); // 1. Call the kernel that includes user's lambda function. - intel::detail::reduCGFunc(*this, KernelFunc, Range, Redu); + ONEAPI::detail::reduCGFunc(*this, KernelFunc, Range, Redu); shared_ptr_class QueueCopy = MQueue; this->finalize(); @@ -1162,7 +1162,7 @@ class __SYCL_EXPORT handler { handler AuxHandler(QueueCopy, MIsHost); AuxHandler.saveCodeLoc(MCodeLoc); - NWorkItems = intel::detail::reduAuxCGFunc( + NWorkItems = ONEAPI::detail::reduAuxCGFunc( AuxHandler, NWorkItems, MaxWGSize, Redu); MLastEvent = AuxHandler.finalize(); } // end while (NWorkItems > 1) @@ -1898,7 +1898,7 @@ class __SYCL_EXPORT handler { // in handler from reduction_impl methods. template - friend class intel::detail::reduction_impl; + friend class ONEAPI::detail::reduction_impl; friend void detail::associateWithHandler(handler &, detail::AccessorBaseHost *, diff --git a/sycl/include/CL/sycl/nd_item.hpp b/sycl/include/CL/sycl/nd_item.hpp index 62abba368dc7f..6fa2babf2c95f 100644 --- a/sycl/include/CL/sycl/nd_item.hpp +++ b/sycl/include/CL/sycl/nd_item.hpp @@ -9,12 +9,12 @@ #pragma once #include +#include #include #include #include #include #include -#include #include #include #include @@ -67,7 +67,7 @@ template class nd_item { group get_group() const { return Group; } - intel::sub_group get_sub_group() const { return intel::sub_group(); } + ONEAPI::sub_group get_sub_group() const { return ONEAPI::sub_group(); } size_t ALWAYS_INLINE get_group(int dimension) const { size_t Size = Group[dimension]; diff --git a/sycl/include/CL/sycl/program.hpp b/sycl/include/CL/sycl/program.hpp index c6dbebf3f45bf..78ce15dc7fa64 100644 --- a/sycl/include/CL/sycl/program.hpp +++ b/sycl/include/CL/sycl/program.hpp @@ -8,11 +8,11 @@ #pragma once +#include #include #include #include #include -#include #include #include #include @@ -307,7 +307,7 @@ class __SYCL_EXPORT program { /// \return a specialization constant instance corresponding to given type ID /// passed as a template parameter template - experimental::spec_constant set_spec_constant(T Cst) { + ONEAPI::experimental::spec_constant set_spec_constant(T Cst) { constexpr const char *Name = detail::SpecConstantInfo::getName(); static_assert(std::is_integral::value || std::is_floating_point::value, @@ -315,10 +315,10 @@ class __SYCL_EXPORT program { #ifdef __SYCL_DEVICE_ONLY__ (void)Cst; (void)Name; - return experimental::spec_constant(); + return ONEAPI::experimental::spec_constant(); #else set_spec_constant_impl(Name, &Cst, sizeof(T)); - return experimental::spec_constant(Cst); + return ONEAPI::experimental::spec_constant(Cst); #endif // __SYCL_DEVICE_ONLY__ } diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp index a73064e455067..8453584aa067c 100644 --- a/sycl/source/detail/program_impl.cpp +++ b/sycl/source/detail/program_impl.cpp @@ -484,8 +484,8 @@ vector_class program_impl::get_info() const { void program_impl::set_spec_constant_impl(const char *Name, const void *ValAddr, size_t ValSize) { if (MState != program_state::none) - throw cl::sycl::experimental::spec_const_error("Invalid program state", - PI_INVALID_PROGRAM); + throw cl::sycl::ONEAPI::experimental::spec_const_error( + "Invalid program state", PI_INVALID_PROGRAM); // Reuse cached programs lock as opposed to introducing a new lock. auto LockGuard = MContext->getKernelProgramCache().acquireCachedPrograms(); spec_constant_impl &SC = SpecConstRegistry[Name]; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index fc45b310c94c6..a4f2f875d424e 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include #include #include #include @@ -14,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -1020,7 +1020,7 @@ void ProgramManager::flushSpecConstants(const program_impl &Prg, std::lock_guard Lock(MNativeProgramsMutex); auto It = NativePrograms.find(NativePrg); if (It == NativePrograms.end()) - throw sycl::experimental::spec_const_error( + throw sycl::ONEAPI::experimental::spec_const_error( "spec constant is set in a program w/o a binary image", PI_INVALID_OPERATION); Img = It->second; diff --git a/sycl/source/detail/reduction.cpp b/sycl/source/detail/reduction.cpp index 71e16724ce3b2..7a0bb02ca103b 100644 --- a/sycl/source/detail/reduction.cpp +++ b/sycl/source/detail/reduction.cpp @@ -6,12 +6,12 @@ // //===----------------------------------------------------------------------===// -#include +#include #include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -namespace intel { +namespace ONEAPI { namespace detail { // TODO: The algorithm of choosing the work-group size is definitely @@ -62,6 +62,6 @@ reduGetMaxWGSize(shared_ptr_class Queue, } } // namespace detail -} // namespace intel +} // namespace ONEAPI } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/function_pointer.cpp b/sycl/source/function_pointer.cpp index c273ae817c8bf..da4712f4abe25 100644 --- a/sycl/source/function_pointer.cpp +++ b/sycl/source/function_pointer.cpp @@ -6,16 +6,16 @@ // //===----------------------------------------------------------------------===// -#include +#include #include #include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { -intel::device_func_ptr_holder_t +ONEAPI::device_func_ptr_holder_t getDeviceFunctionPointerImpl(device &D, program &P, const char *FuncName) { - intel::device_func_ptr_holder_t FPtr = 0; + ONEAPI::device_func_ptr_holder_t FPtr = 0; // FIXME: return value must be checked here, but since we cannot yet check // if corresponding extension is supported, let's silently ignore it here. const detail::plugin &Plugin = detail::getSyclObjImpl(P)->getPlugin(); diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index e63d7647c558a..89c831d9e4962 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3625,8 +3625,6 @@ _ZN2cl4sycl5eventC1Ev _ZN2cl4sycl5eventC2EP9_cl_eventRKNS0_7contextE _ZN2cl4sycl5eventC2ESt10shared_ptrINS0_6detail10event_implEE _ZN2cl4sycl5eventC2Ev -_ZN2cl4sycl5intel6detail16reduGetMaxWGSizeESt10shared_ptrINS0_6detail10queue_implEEm -_ZN2cl4sycl5intel6detail17reduComputeWGSizeEmmRm _ZN2cl4sycl5queue10mem_adviseEPKvm14_pi_mem_advice _ZN2cl4sycl5queue10wait_proxyERKNS0_6detail13code_locationE _ZN2cl4sycl5queue11submit_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationE @@ -3647,6 +3645,8 @@ _ZN2cl4sycl5queueC2ERKNS0_7contextERKNS0_15device_selectorERKNS0_13property_list _ZN2cl4sycl5queueC2ERKNS0_7contextERKNS0_15device_selectorERKSt8functionIFvNS0_14exception_listEEERKNS0_13property_listE _ZN2cl4sycl5queueC2ERKNS0_7contextERKNS0_6deviceERKNS0_13property_listE _ZN2cl4sycl5queueC2ERKNS0_7contextERKNS0_6deviceERKSt8functionIFvNS0_14exception_listEEERKNS0_13property_listE +_ZN2cl4sycl6ONEAPI6detail16reduGetMaxWGSizeESt10shared_ptrINS0_6detail10queue_implEEm +_ZN2cl4sycl6ONEAPI6detail17reduComputeWGSizeEmmRm _ZN2cl4sycl6detail10image_implILi1EE10getDevicesESt10shared_ptrINS1_12context_implEE _ZN2cl4sycl6detail10image_implILi1EE10setPitchesEv _ZN2cl4sycl6detail10image_implILi1EE11allocateMemESt10shared_ptrINS1_12context_implEEbPvRP9_pi_event diff --git a/sycl/test/aot/spec_const_aot.cpp b/sycl/test/aot/spec_const_aot.cpp index 99b451fe6d7ca..706b618438b40 100644 --- a/sycl/test/aot/spec_const_aot.cpp +++ b/sycl/test/aot/spec_const_aot.cpp @@ -32,10 +32,11 @@ int main(int argc, char **argv) { } }); - std::cout << "Running on " << q.get_device().get_info() << "\n"; + std::cout << "Running on " << q.get_device().get_info() + << "\n"; cl::sycl::program prog(q.get_context()); - cl::sycl::experimental::spec_constant i32 = + cl::sycl::ONEAPI::experimental::spec_constant i32 = prog.set_spec_constant(10); prog.build_with_kernel_type(); @@ -46,11 +47,8 @@ int main(int argc, char **argv) { q.submit([&](cl::sycl::handler &cgh) { auto acc = buf.get_access(cgh); - cgh.single_task( - prog.get_kernel(), - [=]() { - acc[0] = i32.get(); - }); + cgh.single_task(prog.get_kernel(), + [=]() { acc[0] = i32.get(); }); }); } bool passed = true; diff --git a/sycl/test/atomic_ref/accessor.cpp b/sycl/test/atomic_ref/accessor.cpp index 86067e2c74906..6410749cf4877 100644 --- a/sycl/test/atomic_ref/accessor.cpp +++ b/sycl/test/atomic_ref/accessor.cpp @@ -10,7 +10,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ONEAPI; // Equivalent to add_test from add.cpp // Uses atomic_accessor instead of atomic_ref @@ -26,12 +26,12 @@ template void accessor_test(queue q, size_t N) { static_assert( std::is_same>::value, + atomic_accessor>::value, "atomic_accessor type incorrectly deduced"); #endif - auto sum = atomic_accessor(sum_buf, cgh); + auto sum = atomic_accessor(sum_buf, cgh); auto out = output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { @@ -39,8 +39,8 @@ template void accessor_test(queue q, size_t N) { static_assert( std::is_same< decltype(sum[0]), - atomic_ref>::value, "atomic_accessor returns incorrect atomic_ref"); out[gid] = sum[0].fetch_add(T(1)); @@ -70,8 +70,8 @@ void local_accessor_test(queue q, size_t N, size_t L = 8) { buffer output_buf(output.data(), output.size()); q.submit([&](handler &cgh) { auto sum = - atomic_accessor( + atomic_accessor( 1, cgh); auto out = output_buf.template get_access(cgh); cgh.parallel_for(nd_range<1>(N, L), [=](nd_item<1> it) { @@ -80,8 +80,8 @@ void local_accessor_test(queue q, size_t N, size_t L = 8) { it.barrier(); static_assert( std::is_same>::value, "local atomic_accessor returns incorrect atomic_ref"); T result = sum[0].fetch_add(T(1)); diff --git a/sycl/test/atomic_ref/add.cpp b/sycl/test/atomic_ref/add.cpp index cfe943d176299..565048f1be250 100644 --- a/sycl/test/atomic_ref/add.cpp +++ b/sycl/test/atomic_ref/add.cpp @@ -10,7 +10,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ONEAPI; template void add_fetch_test(queue q, size_t N) { @@ -23,10 +23,13 @@ void add_fetch_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto sum = sum_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); + auto out = + output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); + auto atm = atomic_ref(sum[0]); out[gid] = atm.fetch_add(Difference(1)); }); }); @@ -56,10 +59,13 @@ void add_plus_equal_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto sum = sum_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); + auto out = + output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); + auto atm = atomic_ref(sum[0]); out[gid] = atm += Difference(1); }); }); @@ -89,10 +95,13 @@ void add_pre_inc_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto sum = sum_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); + auto out = + output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); + auto atm = atomic_ref(sum[0]); out[gid] = ++atm; }); }); @@ -122,10 +131,13 @@ void add_post_inc_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto sum = sum_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); + auto out = + output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); + auto atm = atomic_ref(sum[0]); out[gid] = atm++; }); }); @@ -153,13 +165,11 @@ void add_test(queue q, size_t N) { } // Floating-point types do not support pre- or post-increment -template <> -void add_test(queue q, size_t N) { +template <> void add_test(queue q, size_t N) { add_fetch_test(q, N); add_plus_equal_test(q, N); } -template <> -void add_test(queue q, size_t N) { +template <> void add_test(queue q, size_t N) { add_fetch_test(q, N); add_plus_equal_test(q, N); } diff --git a/sycl/test/atomic_ref/compare_exchange.cpp b/sycl/test/atomic_ref/compare_exchange.cpp index 11c2caa6ef3c4..db8b12a846498 100644 --- a/sycl/test/atomic_ref/compare_exchange.cpp +++ b/sycl/test/atomic_ref/compare_exchange.cpp @@ -9,7 +9,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ONEAPI; template class compare_exchange_kernel; @@ -30,7 +30,9 @@ void compare_exchange_test(queue q, size_t N) { cgh.parallel_for>(range<1>(N), [=](item<1> it) { size_t gid = it.get_id(0); - auto atm = atomic_ref(exc[0]); + auto atm = atomic_ref(exc[0]); T result = T(N); // Avoid copying pointer bool success = atm.compare_exchange_strong(result, (T)gid); if (success) { diff --git a/sycl/test/atomic_ref/exchange.cpp b/sycl/test/atomic_ref/exchange.cpp index b4445928ea075..61e1114b0c1b8 100644 --- a/sycl/test/atomic_ref/exchange.cpp +++ b/sycl/test/atomic_ref/exchange.cpp @@ -9,7 +9,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ONEAPI; template class exchange_kernel; @@ -29,7 +29,9 @@ void exchange_test(queue q, size_t N) { auto out = output_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { size_t gid = it.get_id(0); - auto atm = atomic_ref(exc[0]); + auto atm = atomic_ref(exc[0]); out[gid] = atm.exchange(T(gid)); }); }); diff --git a/sycl/test/atomic_ref/load.cpp b/sycl/test/atomic_ref/load.cpp index 30ae13e16e65e..4e044dc7541f6 100644 --- a/sycl/test/atomic_ref/load.cpp +++ b/sycl/test/atomic_ref/load.cpp @@ -9,7 +9,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ONEAPI; template class load_kernel; @@ -29,7 +29,9 @@ void load_test(queue q, size_t N) { auto out = output_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { size_t gid = it.get_id(0); - auto atm = atomic_ref(ld[0]); + auto atm = atomic_ref(ld[0]); out[gid] = atm.load(); }); }); diff --git a/sycl/test/atomic_ref/max.cpp b/sycl/test/atomic_ref/max.cpp index 0c95653b8219b..2bc7067e01650 100644 --- a/sycl/test/atomic_ref/max.cpp +++ b/sycl/test/atomic_ref/max.cpp @@ -10,7 +10,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ONEAPI; template void max_test(queue q, size_t N) { @@ -27,7 +27,9 @@ void max_test(queue q, size_t N) { auto out = output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); + auto atm = atomic_ref(val[0]); // +1 accounts for lowest() returning 0 for unsigned types out[gid] = atm.fetch_max(T(gid) + 1); diff --git a/sycl/test/atomic_ref/min.cpp b/sycl/test/atomic_ref/min.cpp index 6a0e32ca14bb5..05e41bb36597e 100644 --- a/sycl/test/atomic_ref/min.cpp +++ b/sycl/test/atomic_ref/min.cpp @@ -10,7 +10,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ONEAPI; template void min_test(queue q, size_t N) { @@ -27,7 +27,9 @@ void min_test(queue q, size_t N) { auto out = output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); + auto atm = atomic_ref(val[0]); out[gid] = atm.fetch_min(T(gid)); }); }); diff --git a/sycl/test/atomic_ref/store.cpp b/sycl/test/atomic_ref/store.cpp index db076ee994a3d..4c29e8947ea7f 100644 --- a/sycl/test/atomic_ref/store.cpp +++ b/sycl/test/atomic_ref/store.cpp @@ -9,7 +9,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ONEAPI; template class store_kernel; @@ -24,7 +24,9 @@ void store_test(queue q, size_t N) { auto st = store_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { size_t gid = it.get_id(0); - auto atm = atomic_ref(st[0]); + auto atm = atomic_ref(st[0]); atm.store(T(gid)); }); }); diff --git a/sycl/test/atomic_ref/sub.cpp b/sycl/test/atomic_ref/sub.cpp index 10ed75d21da25..a51ad5a2e9e28 100644 --- a/sycl/test/atomic_ref/sub.cpp +++ b/sycl/test/atomic_ref/sub.cpp @@ -10,7 +10,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ONEAPI; template void sub_fetch_test(queue q, size_t N) { @@ -23,10 +23,13 @@ void sub_fetch_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto val = val_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); + auto out = + output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); + auto atm = atomic_ref(val[0]); out[gid] = atm.fetch_sub(Difference(1)); }); }); @@ -56,10 +59,13 @@ void sub_plus_equal_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto val = val_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); + auto out = + output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); + auto atm = atomic_ref(val[0]); out[gid] = atm -= Difference(1); }); }); @@ -89,10 +95,13 @@ void sub_pre_dec_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto val = val_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); + auto out = + output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); + auto atm = atomic_ref(val[0]); out[gid] = --atm; }); }); @@ -122,10 +131,13 @@ void sub_post_dec_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto val = val_buf.template get_access(cgh); - auto out = output_buf.template get_access(cgh); + auto out = + output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); + auto atm = atomic_ref(val[0]); out[gid] = atm--; }); }); @@ -153,13 +165,11 @@ void sub_test(queue q, size_t N) { } // Floating-point types do not support pre- or post-decrement -template <> -void sub_test(queue q, size_t N) { +template <> void sub_test(queue q, size_t N) { sub_fetch_test(q, N); sub_plus_equal_test(q, N); } -template <> -void sub_test(queue q, size_t N) { +template <> void sub_test(queue q, size_t N) { sub_fetch_test(q, N); sub_plus_equal_test(q, N); } diff --git a/sycl/test/built-ins/printf.cpp b/sycl/test/built-ins/printf.cpp index 6536498587729..1693ed97395ab 100644 --- a/sycl/test/built-ins/printf.cpp +++ b/sycl/test/built-ins/printf.cpp @@ -41,7 +41,7 @@ int main() { Queue.submit([&](handler &CGH) { CGH.single_task([=]() { // String - intel::experimental::printf(format_hello_world); + ONEAPI::experimental::printf(format_hello_world); // Due to a bug in Intel CPU Runtime for OpenCL on Windows, information // printed using such format strings (without %-specifiers) might // appear in different order if output is redirected to a file or @@ -50,8 +50,8 @@ int main() { // CHECK: {{(Hello, World!)?}} // Integral types - intel::experimental::printf(format_int, (int32_t)123); - intel::experimental::printf(format_int, (int32_t)-123); + ONEAPI::experimental::printf(format_int, (int32_t)123); + ONEAPI::experimental::printf(format_int, (int32_t)-123); // CHECK: 123 // CHECK-NEXT: -123 @@ -60,8 +60,8 @@ int main() { // You can declare format string in non-global scope, but in this case // static keyword is required static const CONSTANT char format[] = "%f\n"; - intel::experimental::printf(format, 33.4f); - intel::experimental::printf(format, -33.4f); + ONEAPI::experimental::printf(format, 33.4f); + ONEAPI::experimental::printf(format, -33.4f); } // CHECK-NEXT: 33.4 // CHECK-NEXT: -33.4 @@ -73,23 +73,23 @@ int main() { using ocl_int4 = cl::sycl::vec::vector_t; { static const CONSTANT char format[] = "%v4d\n"; - intel::experimental::printf(format, (ocl_int4)v4); + ONEAPI::experimental::printf(format, (ocl_int4)v4); } // However, you are still able to print them by-element: { - intel::experimental::printf(format_vec, (int32_t)v4.w(), - (int32_t)v4.z(), (int32_t)v4.y(), - (int32_t)v4.x()); + ONEAPI::experimental::printf(format_vec, (int32_t)v4.w(), + (int32_t)v4.z(), (int32_t)v4.y(), + (int32_t)v4.x()); } #else // On host side you always have to print them by-element: - intel::experimental::printf(format_vec, (int32_t)v4.x(), - (int32_t)v4.y(), (int32_t)v4.z(), - (int32_t)v4.w()); - intel::experimental::printf(format_vec, (int32_t)v4.w(), - (int32_t)v4.z(), (int32_t)v4.y(), - (int32_t)v4.x()); + ONEAPI::experimental::printf(format_vec, (int32_t)v4.x(), + (int32_t)v4.y(), (int32_t)v4.z(), + (int32_t)v4.w()); + ONEAPI::experimental::printf(format_vec, (int32_t)v4.w(), + (int32_t)v4.z(), (int32_t)v4.y(), + (int32_t)v4.x()); #endif // __SYCL_DEVICE_ONLY__ // CHECK-NEXT: 5,6,7,8 // CHECK-NEXT: 8,7,6,5 @@ -100,7 +100,7 @@ int main() { // According to OpenCL spec, argument should be a void pointer { static const CONSTANT char format[] = "%p\n"; - intel::experimental::printf(format, (void *)Ptr); + ONEAPI::experimental::printf(format, (void *)Ptr); } // CHECK-NEXT: {{(0x)?[0-9a-fA-F]+$}} }); @@ -111,7 +111,7 @@ int main() { Queue.submit([&](handler &CGH) { CGH.parallel_for(range<1>(10), [=](id<1> i) { // cast to uint64_t to be sure that we pass 64-bit unsigned value - intel::experimental::printf(format_hello_world_2, (uint64_t)i.get(0)); + ONEAPI::experimental::printf(format_hello_world_2, (uint64_t)i.get(0)); }); }); Queue.wait(); @@ -127,8 +127,8 @@ int main() { // CHECK-NEXT: {{[0-9]+}}: Hello, World! } -// FIXME: strictly check output order once the bug mentioned above is fixed -// CHECK: {{(Hello, World!)?}} + // FIXME: strictly check output order once the bug mentioned above is fixed + // CHECK: {{(Hello, World!)?}} return 0; } diff --git a/sycl/test/function-pointers/fp-as-kernel-arg.cpp b/sycl/test/function-pointers/fp-as-kernel-arg.cpp index c68a891dcf94c..0c34ccc5cc607 100644 --- a/sycl/test/function-pointers/fp-as-kernel-arg.cpp +++ b/sycl/test/function-pointers/fp-as-kernel-arg.cpp @@ -31,7 +31,7 @@ int main() { P.build_with_kernel_type(); cl::sycl::kernel KE = P.get_kernel(); - auto FptrStorage = cl::sycl::intel::get_device_func_ptr(&add, "add", P, D); + auto FptrStorage = cl::sycl::ONEAPI::get_device_func_ptr(&add, "add", P, D); if (!D.is_host()) { // FIXME: update this check with query to supported extension // For now, we don't have runtimes that report required OpenCL extension and @@ -54,10 +54,10 @@ int main() { auto AccB = BufB.template get_access(CGH); CGH.parallel_for( KE, cl::sycl::range<1>(Size), [=](cl::sycl::id<1> Index) { - auto Fptr = - cl::sycl::intel::to_device_func_ptr(FptrStorage); - AccA[Index] = Fptr(AccA[Index], AccB[Index]); - }); + auto Fptr = + cl::sycl::ONEAPI::to_device_func_ptr(FptrStorage); + AccA[Index] = Fptr(AccA[Index], AccB[Index]); + }); }); auto HostAcc = BufA.get_access(); diff --git a/sycl/test/function-pointers/pass-fp-through-buffer.cpp b/sycl/test/function-pointers/pass-fp-through-buffer.cpp index 67ecea5509a5f..26685ec11a794 100644 --- a/sycl/test/function-pointers/pass-fp-through-buffer.cpp +++ b/sycl/test/function-pointers/pass-fp-through-buffer.cpp @@ -34,12 +34,12 @@ int main() { P.build_with_kernel_type(); cl::sycl::kernel KE = P.get_kernel(); - cl::sycl::buffer DispatchTable(2); + cl::sycl::buffer DispatchTable(2); { auto DTAcc = DispatchTable.get_access(); - DTAcc[0] = cl::sycl::intel::get_device_func_ptr(&add, "add", P, D); - DTAcc[1] = cl::sycl::intel::get_device_func_ptr(&sub, "sub", P, D); + DTAcc[0] = cl::sycl::ONEAPI::get_device_func_ptr(&add, "add", P, D); + DTAcc[1] = cl::sycl::ONEAPI::get_device_func_ptr(&sub, "sub", P, D); if (!D.is_host()) { // FIXME: update this check with query to supported extension // For now, we don't have runtimes that report required OpenCL extension @@ -69,11 +69,11 @@ int main() { DispatchTable.template get_access(CGH); CGH.parallel_for( KE, cl::sycl::range<1>(Size), [=](cl::sycl::id<1> Index) { - auto FP = - cl::sycl::intel::to_device_func_ptr(AccDT[Mode]); + auto FP = cl::sycl::ONEAPI::to_device_func_ptr( + AccDT[Mode]); - AccA[Index] = FP(AccA[Index], AccB[Index]); - }); + AccA[Index] = FP(AccA[Index], AccB[Index]); + }); }); auto HostAcc = bufA.get_access(); diff --git a/sycl/test/group-algorithm/all_of.cpp b/sycl/test/group-algorithm/all_of.cpp index 2a175d000bb6f..cc19772af0c12 100644 --- a/sycl/test/group-algorithm/all_of.cpp +++ b/sycl/test/group-algorithm/all_of.cpp @@ -12,7 +12,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ONEAPI; template class all_of_kernel; diff --git a/sycl/test/group-algorithm/any_of.cpp b/sycl/test/group-algorithm/any_of.cpp index 6ce61afaffdec..45ddc41f6355f 100644 --- a/sycl/test/group-algorithm/any_of.cpp +++ b/sycl/test/group-algorithm/any_of.cpp @@ -12,7 +12,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ONEAPI; template class any_of_kernel; diff --git a/sycl/test/group-algorithm/broadcast.cpp b/sycl/test/group-algorithm/broadcast.cpp index 04028fade9669..d0320701010f1 100644 --- a/sycl/test/group-algorithm/broadcast.cpp +++ b/sycl/test/group-algorithm/broadcast.cpp @@ -13,7 +13,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ONEAPI; template class broadcast_kernel; diff --git a/sycl/test/group-algorithm/exclusive_scan.cpp b/sycl/test/group-algorithm/exclusive_scan.cpp index 47dc1f6122720..5aaf0373eea57 100644 --- a/sycl/test/group-algorithm/exclusive_scan.cpp +++ b/sycl/test/group-algorithm/exclusive_scan.cpp @@ -14,7 +14,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ONEAPI; template class exclusive_scan_kernel; diff --git a/sycl/test/group-algorithm/inclusive_scan.cpp b/sycl/test/group-algorithm/inclusive_scan.cpp index 54311a162ed9e..75cf06c7c371e 100644 --- a/sycl/test/group-algorithm/inclusive_scan.cpp +++ b/sycl/test/group-algorithm/inclusive_scan.cpp @@ -14,7 +14,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ONEAPI; template class inclusive_scan_kernel; diff --git a/sycl/test/group-algorithm/leader.cpp b/sycl/test/group-algorithm/leader.cpp index ff02cf7e77f9e..afe5e28648a6e 100644 --- a/sycl/test/group-algorithm/leader.cpp +++ b/sycl/test/group-algorithm/leader.cpp @@ -10,7 +10,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ONEAPI; class leader_kernel; diff --git a/sycl/test/group-algorithm/none_of.cpp b/sycl/test/group-algorithm/none_of.cpp index c8b56158d20b7..7a08b6463a682 100644 --- a/sycl/test/group-algorithm/none_of.cpp +++ b/sycl/test/group-algorithm/none_of.cpp @@ -12,7 +12,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ONEAPI; template class none_of_kernel; diff --git a/sycl/test/group-algorithm/reduce.cpp b/sycl/test/group-algorithm/reduce.cpp index 64ed0bd82fcc2..ef65439404884 100644 --- a/sycl/test/group-algorithm/reduce.cpp +++ b/sycl/test/group-algorithm/reduce.cpp @@ -13,7 +13,7 @@ #include #include using namespace sycl; -using namespace sycl::intel; +using namespace sycl::ONEAPI; template class reduce_kernel; diff --git a/sycl/test/linear_id/linear-sub_group.cpp b/sycl/test/linear_id/linear-sub_group.cpp index 2b3f75ae2182e..67d039700fb8b 100644 --- a/sycl/test/linear_id/linear-sub_group.cpp +++ b/sycl/test/linear_id/linear-sub_group.cpp @@ -38,7 +38,7 @@ int main(int argc, char *argv[]) { nd_range<2>(range<2>(outer, inner), range<2>(outer, inner)), [=](nd_item<2> it) { id<2> idx = it.get_global_id(); - intel::sub_group sg = it.get_sub_group(); + ONEAPI::sub_group sg = it.get_sub_group(); output[idx] = sg.get_group_id()[0] * sg.get_local_range()[0] + sg.get_local_id()[0]; }); diff --git a/sycl/test/reduction/reduction_ctor.cpp b/sycl/test/reduction/reduction_ctor.cpp index cbf1b5907cfc2..4828b1dc94535 100644 --- a/sycl/test/reduction/reduction_ctor.cpp +++ b/sycl/test/reduction/reduction_ctor.cpp @@ -49,7 +49,7 @@ void testKnown(T Identity, BinaryOperation BOp, T A, T B) { // This accessor is not really used in this test. accessor ReduAcc(ReduBuf, CGH); - auto Redu = intel::reduction(ReduAcc, BOp); + auto Redu = ONEAPI::reduction(ReduAcc, BOp); assert(Redu.getIdentity() == Identity && "Failed getIdentity() check()."); test_reducer(Redu, A, B); @@ -69,7 +69,7 @@ void testUnknown(T Identity, BinaryOperation BOp, T A, T B) { // This accessor is not really used in this test. accessor ReduAcc(ReduBuf, CGH); - auto Redu = intel::reduction(ReduAcc, Identity, BOp); + auto Redu = ONEAPI::reduction(ReduAcc, Identity, BOp); assert(Redu.getIdentity() == Identity && "Failed getIdentity() check()."); test_reducer(Redu, Identity, BOp, A, B); @@ -88,18 +88,20 @@ void testBoth(T Identity, BinaryOperation BOp, T A, T B) { } int main() { - testBoth(0, intel::plus(), 1, 7); + testBoth(0, ONEAPI::plus(), 1, 7); testBoth(1, std::multiplies(), 1, 7); - testBoth(0, intel::bit_or(), 1, 8); - testBoth(0, intel::bit_xor(), 7, 3); - testBoth(~0, intel::bit_and(), 7, 3); - testBoth((std::numeric_limits::max)(), intel::minimum(), 7, 3); - testBoth((std::numeric_limits::min)(), intel::maximum(), 7, 3); - - testBoth(0, intel::plus(), 1, 7); + testBoth(0, ONEAPI::bit_or(), 1, 8); + testBoth(0, ONEAPI::bit_xor(), 7, 3); + testBoth(~0, ONEAPI::bit_and(), 7, 3); + testBoth((std::numeric_limits::max)(), ONEAPI::minimum(), 7, + 3); + testBoth((std::numeric_limits::min)(), ONEAPI::maximum(), 7, + 3); + + testBoth(0, ONEAPI::plus(), 1, 7); testBoth(1, std::multiplies(), 1, 7); - testBoth(getMaximumFPValue(), intel::minimum(), 7, 3); - testBoth(getMinimumFPValue(), intel::maximum(), 7, 3); + testBoth(getMaximumFPValue(), ONEAPI::minimum(), 7, 3); + testBoth(getMinimumFPValue(), ONEAPI::maximum(), 7, 3); testUnknown, 0, Unknown, 0, CustomVecPlus>>( diff --git a/sycl/test/reduction/reduction_nd_conditional.cpp b/sycl/test/reduction/reduction_nd_conditional.cpp index c700097993079..985f77cbc77f4 100644 --- a/sycl/test/reduction/reduction_nd_conditional.cpp +++ b/sycl/test/reduction/reduction_nd_conditional.cpp @@ -85,7 +85,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { auto In = InBuf.template get_access(CGH); accessor Out(OutBuf, CGH); - auto Redu = intel::reduction(Out, Identity, BOp); + auto Redu = ONEAPI::reduction(Out, Identity, BOp); range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); @@ -114,10 +114,10 @@ void test(T Identity, size_t WGSize, size_t NWItems) { } int main() { - test>(0, 2, 2); - test>(0, 7, 7); - test>(0, 2, 64); - test>(0, 16, 256); + test>(0, 2, 2); + test>(0, 7, 7); + test>(0, 2, 64); + test>(0, 16, 256); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/reduction/reduction_nd_ext_type.hpp b/sycl/test/reduction/reduction_nd_ext_type.hpp index a80aefc09cd45..f81a913837d46 100644 --- a/sycl/test/reduction/reduction_nd_ext_type.hpp +++ b/sycl/test/reduction/reduction_nd_ext_type.hpp @@ -30,7 +30,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { auto In = InBuf.template get_access(CGH); accessor Out(OutBuf, CGH); - auto Redu = intel::reduction(Out, Identity, BOp); + auto Redu = ONEAPI::reduction(Out, Identity, BOp); range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); @@ -64,15 +64,19 @@ int runTests(const string_class &ExtensionName) { // Check some less standards WG sizes and corner cases first. test>(0, 4, 4); - test>(0, 4, 64); + test>(0, 4, 64); - test>(getMaximumFPValue(), 7, 7); - test>(getMinimumFPValue(), 7, 7 * 5); + test>( + getMaximumFPValue(), 7, 7); + test>( + getMinimumFPValue(), 7, 7 * 5); #if __cplusplus >= 201402L - test>(1, 3, 3 * 5); - test>(getMaximumFPValue(), 3, 3); - test>(getMinimumFPValue(), 3, 3); + test>(1, 3, 3 * 5); + test>( + getMaximumFPValue(), 3, 3); + test>( + getMinimumFPValue(), 3, 3); #endif // __cplusplus >= 201402L std::cout << "Test passed\n"; diff --git a/sycl/test/reduction/reduction_nd_lambda.cpp b/sycl/test/reduction/reduction_nd_lambda.cpp index 3d5cf21658995..37c435b9b0f07 100644 --- a/sycl/test/reduction/reduction_nd_lambda.cpp +++ b/sycl/test/reduction/reduction_nd_lambda.cpp @@ -1,5 +1,5 @@ // UNSUPPORTED: cuda -// Reductions use work-group builtins (e.g. intel::reduce()) not yet supported +// Reductions use work-group builtins (e.g. ONEAPI::reduce()) not yet supported // by CUDA. // // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out @@ -30,7 +30,7 @@ void test(T Identity, BinaryOperation BOp, size_t WGSize, size_t NWItems) { Q.submit([&](handler &CGH) { auto In = InBuf.template get_access(CGH); auto Out = OutBuf.template get_access(CGH); - auto Redu = intel::reduction(Out, Identity, BOp); + auto Redu = ONEAPI::reduction(Out, Identity, BOp); range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); diff --git a/sycl/test/reduction/reduction_nd_s0_dw.cpp b/sycl/test/reduction/reduction_nd_s0_dw.cpp index 834ccf4407649..8b900a3a1fd9a 100644 --- a/sycl/test/reduction/reduction_nd_s0_dw.cpp +++ b/sycl/test/reduction/reduction_nd_s0_dw.cpp @@ -35,7 +35,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { auto In = InBuf.template get_access(CGH); accessor Out(OutBuf, CGH); - auto Redu = intel::reduction(Out, Identity, BOp); + auto Redu = ONEAPI::reduction(Out, Identity, BOp); range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); @@ -59,33 +59,33 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { // Check some less standards WG sizes and corner cases first. - test>(0, 2, 2); - test>(0, 7, 7); - test>(0, 9, 18); - test>(0, 49, 49 * 5); + test>(0, 2, 2); + test>(0, 7, 7); + test>(0, 9, 18); + test>(0, 49, 49 * 5); // Try some power-of-two work-group sizes. - test>(0, 2, 64); - test>(0, 4, 64); - test>(0, 8, 128); - test>(0, 16, 256); - test>(0, 32, 256); - test>(0, 64, 256); - test>(0, 128, 256); - test>(0, 256, 256); + test>(0, 2, 64); + test>(0, 4, 64); + test>(0, 8, 128); + test>(0, 16, 256); + test>(0, 32, 256); + test>(0, 64, 256); + test>(0, 128, 256); + test>(0, 256, 256); // Check with various operations. test>(1, 8, 256); - test>(0, 8, 256); - test>(0, 8, 256); - test>(~0, 8, 256); - test>((std::numeric_limits::max)(), 8, 256); - test>((std::numeric_limits::min)(), 8, 256); + test>(0, 8, 256); + test>(0, 8, 256); + test>(~0, 8, 256); + test>((std::numeric_limits::max)(), 8, 256); + test>((std::numeric_limits::min)(), 8, 256); // Check with various types. test>(1, 8, 256); - test>(getMaximumFPValue(), 8, 256); - test>(getMinimumFPValue(), 8, 256); + test>(getMaximumFPValue(), 8, 256); + test>(getMinimumFPValue(), 8, 256); // Check with CUSTOM type. test, 0, CustomVecPlus>(CustomVec(0), 8, 256); diff --git a/sycl/test/reduction/reduction_nd_s0_rw.cpp b/sycl/test/reduction/reduction_nd_s0_rw.cpp index 2040b632e07fb..029458942390a 100644 --- a/sycl/test/reduction/reduction_nd_s0_rw.cpp +++ b/sycl/test/reduction/reduction_nd_s0_rw.cpp @@ -37,7 +37,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { auto In = InBuf.template get_access(CGH); accessor Out(OutBuf, CGH); - auto Redu = intel::reduction(Out, Identity, BOp); + auto Redu = ONEAPI::reduction(Out, Identity, BOp); range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); @@ -61,33 +61,33 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { // Check some less standards WG sizes and corner cases first. - test>(0, 2, 2); - test>(0, 7, 7); - test>(0, 9, 18); - test>(0, 49, 49 * 5); + test>(0, 2, 2); + test>(0, 7, 7); + test>(0, 9, 18); + test>(0, 49, 49 * 5); // Try some power-of-two work-group sizes. - test>(0, 2, 64); - test>(0, 4, 64); - test>(0, 8, 128); - test>(0, 16, 256); - test>(0, 32, 256); - test>(0, 64, 256); - test>(0, 128, 256); - test>(0, 256, 256); + test>(0, 2, 64); + test>(0, 4, 64); + test>(0, 8, 128); + test>(0, 16, 256); + test>(0, 32, 256); + test>(0, 64, 256); + test>(0, 128, 256); + test>(0, 256, 256); // Check with various operations. test>(1, 8, 256); - test>(0, 8, 256); - test>(0, 8, 256); - test>(~0, 8, 256); - test>((std::numeric_limits::max)(), 8, 256); - test>((std::numeric_limits::min)(), 8, 256); + test>(0, 8, 256); + test>(0, 8, 256); + test>(~0, 8, 256); + test>((std::numeric_limits::max)(), 8, 256); + test>((std::numeric_limits::min)(), 8, 256); // Check with various types. test>(1, 8, 256); - test>(getMaximumFPValue(), 8, 256); - test>(getMinimumFPValue(), 8, 256); + test>(getMaximumFPValue(), 8, 256); + test>(getMinimumFPValue(), 8, 256); // Check with CUSTOM type. test, 0, CustomVecPlus>(CustomVec(0), 8, 256); diff --git a/sycl/test/reduction/reduction_nd_s1_dw.cpp b/sycl/test/reduction/reduction_nd_s1_dw.cpp index 9fe36d69daa8c..7cc975e261dc2 100644 --- a/sycl/test/reduction/reduction_nd_s1_dw.cpp +++ b/sycl/test/reduction/reduction_nd_s1_dw.cpp @@ -36,7 +36,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { auto In = InBuf.template get_access(CGH); accessor Out(OutBuf, CGH); - auto Redu = intel::reduction(Out, Identity, BOp); + auto Redu = ONEAPI::reduction(Out, Identity, BOp); range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); @@ -60,33 +60,33 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { // Check some less standards WG sizes and corner cases first. - test>(0, 2, 2); - test>(0, 7, 7); - test>(0, 9, 18); - test>(0, 49, 49 * 5); + test>(0, 2, 2); + test>(0, 7, 7); + test>(0, 9, 18); + test>(0, 49, 49 * 5); // Try some power-of-two work-group sizes. - test>(0, 2, 64); - test>(0, 4, 64); - test>(0, 8, 128); - test>(0, 16, 256); - test>(0, 32, 256); - test>(0, 64, 256); - test>(0, 128, 256); - test>(0, 256, 256); + test>(0, 2, 64); + test>(0, 4, 64); + test>(0, 8, 128); + test>(0, 16, 256); + test>(0, 32, 256); + test>(0, 64, 256); + test>(0, 128, 256); + test>(0, 256, 256); // Check with various operations. test>(1, 8, 256); - test>(0, 8, 256); - test>(0, 8, 256); - test>(~0, 8, 256); - test>((std::numeric_limits::max)(), 8, 256); - test>((std::numeric_limits::min)(), 8, 256); + test>(0, 8, 256); + test>(0, 8, 256); + test>(~0, 8, 256); + test>((std::numeric_limits::max)(), 8, 256); + test>((std::numeric_limits::min)(), 8, 256); // Check with various types. test>(1, 8, 256); - test>(getMaximumFPValue(), 8, 256); - test>(getMinimumFPValue(), 8, 256); + test>(getMaximumFPValue(), 8, 256); + test>(getMinimumFPValue(), 8, 256); // Check with CUSTOM type. test, 1, CustomVecPlus>(CustomVec(0), 8, 256); diff --git a/sycl/test/reduction/reduction_nd_s1_rw.cpp b/sycl/test/reduction/reduction_nd_s1_rw.cpp index d283fbe3cebe0..2c8f6a8343e83 100644 --- a/sycl/test/reduction/reduction_nd_s1_rw.cpp +++ b/sycl/test/reduction/reduction_nd_s1_rw.cpp @@ -38,7 +38,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { auto In = InBuf.template get_access(CGH); accessor Out(OutBuf, CGH); - auto Redu = intel::reduction(Out, Identity, BOp); + auto Redu = ONEAPI::reduction(Out, Identity, BOp); range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); @@ -62,33 +62,33 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { // Check some less standards WG sizes and corner cases first. - test>(0, 2, 2); - test>(0, 7, 7); - test>(0, 9, 18); - test>(0, 49, 49 * 5); + test>(0, 2, 2); + test>(0, 7, 7); + test>(0, 9, 18); + test>(0, 49, 49 * 5); // Try some power-of-two work-group sizes. - test>(0, 2, 64); - test>(0, 4, 64); - test>(0, 8, 128); - test>(0, 16, 256); - test>(0, 32, 256); - test>(0, 64, 256); - test>(0, 128, 256); - test>(0, 256, 256); + test>(0, 2, 64); + test>(0, 4, 64); + test>(0, 8, 128); + test>(0, 16, 256); + test>(0, 32, 256); + test>(0, 64, 256); + test>(0, 128, 256); + test>(0, 256, 256); // Check with various operations. test>(1, 8, 256); - test>(0, 8, 256); - test>(0, 8, 256); - test>(~0, 8, 256); - test>((std::numeric_limits::max)(), 8, 256); - test>((std::numeric_limits::min)(), 8, 256); + test>(0, 8, 256); + test>(0, 8, 256); + test>(~0, 8, 256); + test>((std::numeric_limits::max)(), 8, 256); + test>((std::numeric_limits::min)(), 8, 256); // Check with various types. test>(1, 8, 256); - test>(getMaximumFPValue(), 1, 16); - test>(getMinimumFPValue(), 8, 256); + test>(getMaximumFPValue(), 1, 16); + test>(getMinimumFPValue(), 8, 256); // Check with CUSTOM type. test, 1, CustomVecPlus>(CustomVec(0), 8, 256); diff --git a/sycl/test/reduction/reduction_placeholder.cpp b/sycl/test/reduction/reduction_placeholder.cpp index e972105bbab50..77633992ea2df 100644 --- a/sycl/test/reduction/reduction_placeholder.cpp +++ b/sycl/test/reduction/reduction_placeholder.cpp @@ -7,7 +7,8 @@ // RUN: %ACC_RUN_PLACEHOLDER %t.out // RUNx: env SYCL_DEVICE_TYPE=HOST %t.out -// TODO: Enable the test for HOST when it supports intel::reduce() and barrier() +// TODO: Enable the test for HOST when it supports ONEAPI::reduce() and +// barrier() // This test performs basic checks of parallel_for(nd_range, reduction, func) // with reductions initialized with a placeholder accessor. @@ -41,7 +42,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { Q.submit([&](handler &CGH) { auto In = InBuf.template get_access(CGH); CGH.require(Out); - auto Redu = intel::reduction(Out, Identity, BinaryOperation()); + auto Redu = ONEAPI::reduction(Out, Identity, BinaryOperation()); range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); @@ -64,16 +65,16 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { // fast atomics and fast reduce - test>(0, 49, 49 * 5); - test>(0, 8, 8); + test>(0, 49, 49 * 5); + test>(0, 8, 8); // fast atomics - test>(0, 7, 7 * 3); - test>(0, 4, 128); + test>(0, 7, 7 * 3); + test>(0, 4, 128); // fast reduce - test>(getMaximumFPValue(), 5, 5 * 7); - test>(getMinimumFPValue(), 4, 128); + test>(getMaximumFPValue(), 5, 5 * 7); + test>(getMinimumFPValue(), 4, 128); // generic algorithm test>(1, 7, 7 * 5); diff --git a/sycl/test/reduction/reduction_transparent.cpp b/sycl/test/reduction/reduction_transparent.cpp index fd527f8f4e0ef..dea789b395401 100644 --- a/sycl/test/reduction/reduction_transparent.cpp +++ b/sycl/test/reduction/reduction_transparent.cpp @@ -7,7 +7,8 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUNx: env SYCL_DEVICE_TYPE=HOST %t.out -// TODO: Enable the test for HOST when it supports intel::reduce() and barrier() +// TODO: Enable the test for HOST when it supports ONEAPI::reduce() and +// barrier() // This test performs basic checks of parallel_for(nd_range, reduction, func) // where func is a transparent functor. @@ -46,7 +47,8 @@ void testId(T Identity, size_t WGSize, size_t NWItems) { range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); CGH.parallel_for>( - NDRange, intel::reduction(Out, Identity, BOp), [=](nd_item<1> NDIt, auto &Sum) { + NDRange, ONEAPI::reduction(Out, Identity, BOp), + [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); }); @@ -86,7 +88,7 @@ void testNoId(T Identity, size_t WGSize, size_t NWItems) { range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); CGH.parallel_for>( - NDRange, intel::reduction(Out, BOp), [=](nd_item<1> NDIt, auto &Sum) { + NDRange, ONEAPI::reduction(Out, BOp), [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); }); @@ -110,10 +112,10 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { #if __cplusplus >= 201402L - test>(getMinimumFPValue(), 7, 7 * 5); - test>(0, 7, 49); + test>(getMinimumFPValue(), 7, 7 * 5); + test>(0, 7, 49); test>(1, 4, 16); - test>(0, 1, 512 + 32); + test>(0, 1, 512 + 32); #endif // __cplusplus >= 201402L std::cout << "Test passed\n"; diff --git a/sycl/test/reduction/reduction_usm.cpp b/sycl/test/reduction/reduction_usm.cpp index 592a36904a8e8..0ada4c515b615 100644 --- a/sycl/test/reduction/reduction_usm.cpp +++ b/sycl/test/reduction/reduction_usm.cpp @@ -7,7 +7,8 @@ // RUN: %ACC_RUN_PLACEHOLDER %t.out // RUNx: env SYCL_DEVICE_TYPE=HOST %t.out -// TODO: Enable the test for HOST when it supports intel::reduce() and barrier() +// TODO: Enable the test for HOST when it supports ONEAPI::reduce() and +// barrier() // This test performs basic checks of parallel_for(nd_range, reduction, func) // with reductions initialized with USM var. @@ -62,7 +63,7 @@ void test(T Identity, size_t WGSize, size_t NWItems, usm::alloc AllocType) { // Compute. Q.submit([&](handler &CGH) { auto In = InBuf.template get_access(CGH); - auto Redu = intel::reduction(ReduVarPtr, Identity, BOp); + auto Redu = ONEAPI::reduction(ReduVarPtr, Identity, BOp); range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); @@ -105,16 +106,17 @@ void testUSM(T Identity, size_t WGSize, size_t NWItems) { int main() { // fast atomics and fast reduce - testUSM>(0, 49, 49 * 5); - testUSM>(0, 8, 128); + testUSM>(0, 49, 49 * 5); + testUSM>(0, 8, 128); // fast atomics - testUSM>(0, 7, 7 * 3); - testUSM>(0, 4, 128); + testUSM>(0, 7, 7 * 3); + testUSM>(0, 4, 128); // fast reduce - testUSM>(getMaximumFPValue(), 5, 5 * 7); - testUSM>(getMinimumFPValue(), 4, 128); + testUSM>(getMaximumFPValue(), 5, + 5 * 7); + testUSM>(getMinimumFPValue(), 4, 128); // generic algorithm testUSM>(1, 7, 7 * 5); diff --git a/sycl/test/regression/sub-group-store-const-ref.cpp b/sycl/test/regression/sub-group-store-const-ref.cpp index dd10e1d57f12f..5c79e5e6758f6 100644 --- a/sycl/test/regression/sub-group-store-const-ref.cpp +++ b/sycl/test/regression/sub-group-store-const-ref.cpp @@ -13,4 +13,4 @@ #include using namespace sycl; -void test(intel::sub_group sg, global_ptr ptr) { sg.store(ptr, 1); } +void test(ONEAPI::sub_group sg, global_ptr ptr) { sg.store(ptr, 1); } diff --git a/sycl/test/spec_const/spec_const_hw.cpp b/sycl/test/spec_const/spec_const_hw.cpp index c9450a451d2c6..73cb4e8e8cbb4 100644 --- a/sycl/test/spec_const/spec_const_hw.cpp +++ b/sycl/test/spec_const/spec_const_hw.cpp @@ -38,7 +38,8 @@ int val = 10; int get_value() { return val; } float foo( - const cl::sycl::experimental::spec_constant &f32) { + const cl::sycl::ONEAPI::experimental::spec_constant + &f32) { return f32; } @@ -47,8 +48,8 @@ struct SCWrapper { : SC1(p.set_spec_constant(4)), SC2(p.set_spec_constant(2)) {} - cl::sycl::experimental::spec_constant SC1; - cl::sycl::experimental::spec_constant SC2; + cl::sycl::ONEAPI::experimental::spec_constant SC1; + cl::sycl::ONEAPI::experimental::spec_constant SC2; }; int main(int argc, char **argv) { @@ -79,10 +80,10 @@ int main(int argc, char **argv) { // TODO make this floating point once supported by the compiler float goldf = (float)get_value(); - cl::sycl::experimental::spec_constant i32 = + cl::sycl::ONEAPI::experimental::spec_constant i32 = program1.set_spec_constant(goldi); - cl::sycl::experimental::spec_constant f32 = + cl::sycl::ONEAPI::experimental::spec_constant f32 = program2.set_spec_constant(goldf); program1.build_with_kernel_type(); diff --git a/sycl/test/spec_const/spec_const_neg.cpp b/sycl/test/spec_const/spec_const_neg.cpp index 7312e29ab40e1..26b0a871398d5 100644 --- a/sycl/test/spec_const/spec_const_neg.cpp +++ b/sycl/test/spec_const/spec_const_neg.cpp @@ -45,7 +45,7 @@ int main(int argc, char **argv) { << "\n"; cl::sycl::program program1(q.get_context()); - cl::sycl::experimental::spec_constant i32 = + cl::sycl::ONEAPI::experimental::spec_constant i32 = program1.set_spec_constant(10); std::vector veci(1); @@ -56,7 +56,7 @@ int main(int argc, char **argv) { try { // This is an attempt to set a spec constant after the program has been // built - spec_const_error should be thrown - cl::sycl::experimental::spec_constant i32 = + cl::sycl::ONEAPI::experimental::spec_constant i32 = program1.set_spec_constant(10); cl::sycl::buffer bufi(veci.data(), veci.size()); @@ -69,7 +69,7 @@ int main(int argc, char **argv) { acci[0] = i32.get(); }); }); - } catch (cl::sycl::experimental::spec_const_error &sc_err) { + } catch (cl::sycl::ONEAPI::experimental::spec_const_error &sc_err) { passed = true; } catch (cl::sycl::exception &e) { std::cout << "*** Exception caught: " << e.what() << "\n"; diff --git a/sycl/test/spec_const/spec_const_redefine.cpp b/sycl/test/spec_const/spec_const_redefine.cpp index 6883ce5c9d7d6..fc5e7dcb22ac1 100644 --- a/sycl/test/spec_const/spec_const_redefine.cpp +++ b/sycl/test/spec_const/spec_const_redefine.cpp @@ -68,9 +68,9 @@ int main(int argc, char **argv) { for (int i = 0; i < n_sc_sets; i++) { cl::sycl::program program(q.get_context()); const int *sc_set = &sc_vals[i][0]; - cl::sycl::experimental::spec_constant sc0 = + cl::sycl::ONEAPI::experimental::spec_constant sc0 = program.set_spec_constant(sc_set[0]); - cl::sycl::experimental::spec_constant sc1 = + cl::sycl::ONEAPI::experimental::spec_constant sc1 = program.set_spec_constant(sc_set[1]); program.build_with_kernel_type(); diff --git a/sycl/test/spec_const/spec_const_types.cpp b/sycl/test/spec_const/spec_const_types.cpp index c7017b2b69726..0a5178c5824eb 100644 --- a/sycl/test/spec_const/spec_const_types.cpp +++ b/sycl/test/spec_const/spec_const_types.cpp @@ -42,49 +42,49 @@ int main() { cl::sycl::program program(queue.get_context()); // Create specialization constants. - cl::sycl::experimental::spec_constant i1 = + cl::sycl::ONEAPI::experimental::spec_constant i1 = program.set_spec_constant((bool)get_value()); // CHECK-DAG: _ZTS11MyBoolConst=1|0 - cl::sycl::experimental::spec_constant i8 = + cl::sycl::ONEAPI::experimental::spec_constant i8 = program.set_spec_constant((int8_t)get_value()); // CHECK-DAG: _ZTS11MyInt8Const=1|1 - cl::sycl::experimental::spec_constant ui8 = + cl::sycl::ONEAPI::experimental::spec_constant ui8 = program.set_spec_constant((uint8_t)get_value()); // CHECK-DAG: _ZTS12MyUInt8Const=1|2 - cl::sycl::experimental::spec_constant i16 = + cl::sycl::ONEAPI::experimental::spec_constant i16 = program.set_spec_constant((int16_t)get_value()); // CHECK-DAG: _ZTS12MyInt16Const=1|3 - cl::sycl::experimental::spec_constant ui16 = + cl::sycl::ONEAPI::experimental::spec_constant ui16 = program.set_spec_constant((uint16_t)get_value()); // CHECK-DAG: _ZTS13MyUInt16Const=1|4 - cl::sycl::experimental::spec_constant i32 = + cl::sycl::ONEAPI::experimental::spec_constant i32 = program.set_spec_constant((int32_t)get_value()); // CHECK-DAG: _ZTS12MyInt32Const=1|5 - cl::sycl::experimental::spec_constant ui32 = + cl::sycl::ONEAPI::experimental::spec_constant ui32 = program.set_spec_constant((uint32_t)get_value()); // CHECK-DAG: _ZTS13MyUInt32Const=1|6 - cl::sycl::experimental::spec_constant i64 = + cl::sycl::ONEAPI::experimental::spec_constant i64 = program.set_spec_constant((int64_t)get_value()); // CHECK-DAG: _ZTS12MyInt64Const=1|7 - cl::sycl::experimental::spec_constant ui64 = + cl::sycl::ONEAPI::experimental::spec_constant ui64 = program.set_spec_constant((uint64_t)get_value()); // CHECK-DAG: _ZTS13MyUInt64Const=1|8 #define HALF 0 // TODO not yet supported #if HALF - cl::sycl::experimental::spec_constant f16 = - program.set_spec_constant((cl::sycl::half)get_value()); + cl::sycl::ONEAPI::experimental::spec_constant + f16 = program.set_spec_constant((cl::sycl::half)get_value()); #endif - cl::sycl::experimental::spec_constant f32 = + cl::sycl::ONEAPI::experimental::spec_constant f32 = program.set_spec_constant((float)get_value()); // CHECK-DAG: _ZTS12MyFloatConst=1|9 - cl::sycl::experimental::spec_constant f64 = + cl::sycl::ONEAPI::experimental::spec_constant f64 = program.set_spec_constant((double)get_value()); // CHECK-DAG: _ZTS13MyDoubleConst=1|10 @@ -97,16 +97,14 @@ int main() { queue.submit([&](cl::sycl::handler &cgh) { auto acc = buf.get_access(cgh); cgh.single_task( - program.get_kernel(), - [=]() { acc[0] = i1.get() + - i8.get() + ui8.get() + - i16.get() + ui16.get() + - i32.get() + ui32.get() + - i64.get() + ui64.get() + + program.get_kernel(), [=]() { + acc[0] = i1.get() + i8.get() + ui8.get() + i16.get() + ui16.get() + + i32.get() + ui32.get() + i64.get() + ui64.get() + #if HALF - f16.get() + + f16.get() + #endif - f32.get() + f64.get(); }); + f32.get() + f64.get(); + }); }); } } diff --git a/sycl/test/sub_group/barrier.cpp b/sycl/test/sub_group/barrier.cpp index 25e31cbeb521c..b8aeefa9ca0e3 100644 --- a/sycl/test/sub_group/barrier.cpp +++ b/sycl/test/sub_group/barrier.cpp @@ -33,7 +33,7 @@ template void check(queue &Queue, size_t G = 240, size_t L = 60) { auto sgsizeacc = sgsizebuf.get_access(cgh); cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ONEAPI::sub_group SG = NdItem.get_sub_group(); size_t lid = SG.get_local_id().get(0); size_t gid = NdItem.get_global_id(0); size_t SGoff = gid - lid; diff --git a/sycl/test/sub_group/broadcast.hpp b/sycl/test/sub_group/broadcast.hpp index e057139db0152..4863716f19edd 100644 --- a/sycl/test/sub_group/broadcast.hpp +++ b/sycl/test/sub_group/broadcast.hpp @@ -20,7 +20,7 @@ template void check(queue &Queue) { auto syclacc = syclbuf.template get_access(cgh); auto sgsizeacc = sgsizebuf.get_access(cgh); cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ONEAPI::sub_group SG = NdItem.get_sub_group(); /*Broadcast GID of element with SGLID == SGID % SGMLR*/ syclacc[NdItem.get_global_id()] = broadcast(SG, T(NdItem.get_global_id(0)), diff --git a/sycl/test/sub_group/common.cpp b/sycl/test/sub_group/common.cpp index 17b1a9d8166d8..41623ae2c228b 100644 --- a/sycl/test/sub_group/common.cpp +++ b/sycl/test/sub_group/common.cpp @@ -36,7 +36,7 @@ void check(queue &Queue, unsigned int G, unsigned int L) { auto sgsizeacc = sgsizebuf.get_access(cgh); auto syclacc = syclbuf.get_access(cgh); cgh.parallel_for(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ONEAPI::sub_group SG = NdItem.get_sub_group(); syclacc[NdItem.get_global_id()].local_id = SG.get_local_id().get(0); syclacc[NdItem.get_global_id()].local_range = SG.get_local_range().get(0); diff --git a/sycl/test/sub_group/generic-shuffle.cpp b/sycl/test/sub_group/generic-shuffle.cpp index 3cab0b9edbd7a..786a9b57a0ec2 100644 --- a/sycl/test/sub_group/generic-shuffle.cpp +++ b/sycl/test/sub_group/generic-shuffle.cpp @@ -43,7 +43,7 @@ void check_pointer(queue &Queue, size_t G = 256, size_t L = 64) { auto sgsizeacc = sgsizebuf.get_access(cgh); cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ONEAPI::sub_group SG = NdItem.get_sub_group(); uint32_t wggid = NdItem.get_global_id(0); uint32_t sgid = SG.get_group_id().get(0); if (wggid == 0) @@ -144,7 +144,7 @@ void check_struct(queue &Queue, Generator &Gen, size_t G = 256, size_t L = 64) { auto in = buf_in.template get_access(cgh); cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ONEAPI::sub_group SG = NdItem.get_sub_group(); uint32_t wggid = NdItem.get_global_id(0); uint32_t sgid = SG.get_group_id().get(0); if (wggid == 0) diff --git a/sycl/test/sub_group/generic_reduce.cpp b/sycl/test/sub_group/generic_reduce.cpp index 7dda3a3492e57..98401e091e733 100644 --- a/sycl/test/sub_group/generic_reduce.cpp +++ b/sycl/test/sub_group/generic_reduce.cpp @@ -27,19 +27,18 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, Queue.submit([&](handler &cgh) { auto sgsizeacc = sgsizebuf.get_access(cgh); auto acc = buf.template get_access(cgh); - cgh.parallel_for( - NdRange, [=](nd_item<1> NdItem) { - intel::sub_group sg = NdItem.get_sub_group(); - if (skip_init) { - acc[NdItem.get_global_id(0)] = - reduce(sg, T(NdItem.get_global_id(0)), op); - } else { - acc[NdItem.get_global_id(0)] = - reduce(sg, T(NdItem.get_global_id(0)), init, op); - } - if (NdItem.get_global_id(0) == 0) - sgsizeacc[0] = sg.get_max_local_range()[0]; - }); + cgh.parallel_for(NdRange, [=](nd_item<1> NdItem) { + ONEAPI::sub_group sg = NdItem.get_sub_group(); + if (skip_init) { + acc[NdItem.get_global_id(0)] = + reduce(sg, T(NdItem.get_global_id(0)), op); + } else { + acc[NdItem.get_global_id(0)] = + reduce(sg, T(NdItem.get_global_id(0)), init, op); + } + if (NdItem.get_global_id(0) == 0) + sgsizeacc[0] = sg.get_max_local_range()[0]; + }); }); auto acc = buf.template get_access(); auto sgsizeacc = sgsizebuf.get_access(); @@ -81,8 +80,8 @@ int main() { // Test user-defined type // Use complex as a proxy for this using UDT = std::complex; - check_op(Queue, UDT(L, L), intel::plus(), false, G, L); - check_op(Queue, UDT(0, 0), intel::plus(), true, G, L); + check_op(Queue, UDT(L, L), ONEAPI::plus(), false, G, L); + check_op(Queue, UDT(0, 0), ONEAPI::plus(), true, G, L); // Test user-defined operator auto UDOp = [=](const auto &lhs, const auto &rhs) { return lhs + rhs; }; diff --git a/sycl/test/sub_group/load_store.cpp b/sycl/test/sub_group/load_store.cpp index 3e818e7446ebf..c51a01fbdaefe 100644 --- a/sycl/test/sub_group/load_store.cpp +++ b/sycl/test/sub_group/load_store.cpp @@ -45,7 +45,7 @@ template void check(queue &Queue) { accessor LocalMem( {L}, cgh); cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ONEAPI::sub_group SG = NdItem.get_sub_group(); if (SG.get_group_id().get(0) % N == 0) { size_t SGOffset = SG.get_group_id().get(0) * SG.get_max_local_range().get(0); @@ -119,7 +119,7 @@ template void check(queue &Queue) { accessor LocalMem( {L}, cgh); cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ONEAPI::sub_group SG = NdItem.get_sub_group(); if (NdItem.get_global_id(0) == 0) sgsizeacc[0] = SG.get_max_local_range()[0]; size_t SGOffset = diff --git a/sycl/test/sub_group/reduce.hpp b/sycl/test/sub_group/reduce.hpp index 0fb52c9b6ead0..66a737a95c8c4 100644 --- a/sycl/test/sub_group/reduce.hpp +++ b/sycl/test/sub_group/reduce.hpp @@ -25,7 +25,7 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, auto acc = buf.template get_access(cgh); cgh.parallel_for>( NdRange, [=](nd_item<1> NdItem) { - intel::sub_group sg = NdItem.get_sub_group(); + ONEAPI::sub_group sg = NdItem.get_sub_group(); if (skip_init) { acc[NdItem.get_global_id(0)] = reduce(sg, T(NdItem.get_global_id(0)), op); @@ -71,23 +71,23 @@ template void check(queue &Queue, size_t G = 256, size_t L = 64) { L = 32; } - check_op(Queue, T(L), intel::plus(), false, G, L); - check_op(Queue, T(0), intel::plus(), true, G, L); + check_op(Queue, T(L), ONEAPI::plus(), false, G, L); + check_op(Queue, T(0), ONEAPI::plus(), true, G, L); - check_op(Queue, T(0), intel::minimum(), false, G, L); - check_op(Queue, T(G), intel::minimum(), true, G, L); + check_op(Queue, T(0), ONEAPI::minimum(), false, G, L); + check_op(Queue, T(G), ONEAPI::minimum(), true, G, L); - check_op(Queue, T(G), intel::maximum(), false, G, L); - check_op(Queue, T(0), intel::maximum(), true, G, L); + check_op(Queue, T(G), ONEAPI::maximum(), false, G, L); + check_op(Queue, T(0), ONEAPI::maximum(), true, G, L); #if __cplusplus >= 201402L - check_op(Queue, T(L), intel::plus<>(), false, G, L); - check_op(Queue, T(0), intel::plus<>(), true, G, L); + check_op(Queue, T(L), ONEAPI::plus<>(), false, G, L); + check_op(Queue, T(0), ONEAPI::plus<>(), true, G, L); - check_op(Queue, T(0), intel::minimum<>(), false, G, L); - check_op(Queue, T(G), intel::minimum<>(), true, G, L); + check_op(Queue, T(0), ONEAPI::minimum<>(), false, G, L); + check_op(Queue, T(G), ONEAPI::minimum<>(), true, G, L); - check_op(Queue, T(G), intel::maximum<>(), false, G, L); - check_op(Queue, T(0), intel::maximum<>(), true, G, L); + check_op(Queue, T(G), ONEAPI::maximum<>(), false, G, L); + check_op(Queue, T(0), ONEAPI::maximum<>(), true, G, L); #endif } diff --git a/sycl/test/sub_group/scan.hpp b/sycl/test/sub_group/scan.hpp index e0d49e5f9b352..ae9b4ced66ab0 100644 --- a/sycl/test/sub_group/scan.hpp +++ b/sycl/test/sub_group/scan.hpp @@ -27,7 +27,7 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, auto inacc = inbuf.template get_access(cgh); cgh.parallel_for>( NdRange, [=](nd_item<1> NdItem) { - intel::sub_group sg = NdItem.get_sub_group(); + ONEAPI::sub_group sg = NdItem.get_sub_group(); if (skip_init) { exacc[NdItem.get_global_id(0)] = exclusive_scan(sg, T(NdItem.get_global_id(0)), op); @@ -79,50 +79,50 @@ template void check(queue &Queue, size_t G = 256, size_t L = 64) { L = 32; } - check_op(Queue, T(L), intel::plus(), false, G, L); - check_op(Queue, T(0), intel::plus(), true, G, L); + check_op(Queue, T(L), ONEAPI::plus(), false, G, L); + check_op(Queue, T(0), ONEAPI::plus(), true, G, L); - check_op(Queue, T(0), intel::minimum(), false, G, L); + check_op(Queue, T(0), ONEAPI::minimum(), false, G, L); if (std::is_floating_point::value || std::is_same::value) { - check_op(Queue, std::numeric_limits::infinity(), intel::minimum(), + check_op(Queue, std::numeric_limits::infinity(), ONEAPI::minimum(), true, G, L); } else { - check_op(Queue, std::numeric_limits::max(), intel::minimum(), true, - G, L); + check_op(Queue, std::numeric_limits::max(), ONEAPI::minimum(), + true, G, L); } - check_op(Queue, T(G), intel::maximum(), false, G, L); + check_op(Queue, T(G), ONEAPI::maximum(), false, G, L); if (std::is_floating_point::value || std::is_same::value) { - check_op(Queue, -std::numeric_limits::infinity(), intel::maximum(), - true, G, L); + check_op(Queue, -std::numeric_limits::infinity(), + ONEAPI::maximum(), true, G, L); } else { - check_op(Queue, std::numeric_limits::min(), intel::maximum(), true, - G, L); + check_op(Queue, std::numeric_limits::min(), ONEAPI::maximum(), + true, G, L); } #if __cplusplus >= 201402L - check_op(Queue, T(L), intel::plus<>(), false, G, L); - check_op(Queue, T(0), intel::plus<>(), true, G, L); + check_op(Queue, T(L), ONEAPI::plus<>(), false, G, L); + check_op(Queue, T(0), ONEAPI::plus<>(), true, G, L); - check_op(Queue, T(0), intel::minimum<>(), false, G, L); + check_op(Queue, T(0), ONEAPI::minimum<>(), false, G, L); if (std::is_floating_point::value || std::is_same::value) { - check_op(Queue, std::numeric_limits::infinity(), intel::minimum<>(), + check_op(Queue, std::numeric_limits::infinity(), ONEAPI::minimum<>(), true, G, L); } else { - check_op(Queue, std::numeric_limits::max(), intel::minimum<>(), true, + check_op(Queue, std::numeric_limits::max(), ONEAPI::minimum<>(), true, G, L); } - check_op(Queue, T(G), intel::maximum<>(), false, G, L); + check_op(Queue, T(G), ONEAPI::maximum<>(), false, G, L); if (std::is_floating_point::value || std::is_same::value) { - check_op(Queue, -std::numeric_limits::infinity(), intel::maximum<>(), + check_op(Queue, -std::numeric_limits::infinity(), ONEAPI::maximum<>(), true, G, L); } else { - check_op(Queue, std::numeric_limits::min(), intel::maximum<>(), true, + check_op(Queue, std::numeric_limits::min(), ONEAPI::maximum<>(), true, G, L); } #endif diff --git a/sycl/test/sub_group/shuffle.hpp b/sycl/test/sub_group/shuffle.hpp index 72ee4f46db871..ff18284d3f62a 100644 --- a/sycl/test/sub_group/shuffle.hpp +++ b/sycl/test/sub_group/shuffle.hpp @@ -34,7 +34,7 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) { auto sgsizeacc = sgsizebuf.get_access(cgh); cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ONEAPI::sub_group SG = NdItem.get_sub_group(); uint32_t wggid = NdItem.get_global_id(0); uint32_t sgid = SG.get_group_id().get(0); vec vwggid(wggid), vsgid(sgid); @@ -117,7 +117,7 @@ template void check(queue &Queue, size_t G = 256, size_t L = 64) { auto sgsizeacc = sgsizebuf.get_access(cgh); cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ONEAPI::sub_group SG = NdItem.get_sub_group(); uint32_t wggid = NdItem.get_global_id(0); uint32_t sgid = SG.get_group_id().get(0); if (wggid == 0) diff --git a/sycl/test/sub_group/vote.cpp b/sycl/test/sub_group/vote.cpp index 382266fa412c0..1d9852e217d6f 100644 --- a/sycl/test/sub_group/vote.cpp +++ b/sycl/test/sub_group/vote.cpp @@ -49,7 +49,7 @@ void check(queue Queue, const int G, const int L, const int D, const int R) { auto sganyacc = sganybuf.get_access(cgh); auto sgallacc = sgallbuf.get_access(cgh); cgh.parallel_for(NdRange, [=](nd_item<1> NdItem) { - intel::sub_group SG = NdItem.get_sub_group(); + ONEAPI::sub_group SG = NdItem.get_sub_group(); /* Set to 1 if any local ID in subgroup devided by D has remainder R */ if (any_of(SG, SG.get_local_id().get(0) % D == R)) { sganyacc[NdItem.get_global_id()] = 1;