diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp
index 6d62f035a9674..40306c90bf8c8 100644
--- a/clang/lib/Sema/SemaSYCL.cpp
+++ b/clang/lib/Sema/SemaSYCL.cpp
@@ -3194,9 +3194,10 @@ bool Util::isSyclHalfType(const QualType &Ty) {
 
 bool Util::isSyclSpecConstantType(const QualType &Ty) {
   const StringRef &Name = "spec_constant";
-  std::array<DeclContextDesc, 4> Scopes = {
+  std::array<DeclContextDesc, 5> Scopes = {
       Util::DeclContextDesc{clang::Decl::Kind::Namespace, "cl"},
       Util::DeclContextDesc{clang::Decl::Kind::Namespace, "sycl"},
+      Util::DeclContextDesc{clang::Decl::Kind::Namespace, "ONEAPI"},
       Util::DeclContextDesc{clang::Decl::Kind::Namespace, "experimental"},
       Util::DeclContextDesc{Decl::Kind::ClassTemplateSpecialization, Name}};
   return matchQualifiedTypeName(Ty, Scopes);
diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp
index 60d5d2bc2307c..0d77fd81dc7da 100644
--- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp
+++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp
@@ -253,6 +253,7 @@ struct get_kernel_name_t<auto_name, Type> {
   using name = Type;
 };
 
+namespace ONEAPI {
 namespace experimental {
 template <typename T, typename ID = T>
 class spec_constant {
@@ -268,6 +269,7 @@ class spec_constant {
   }
 };
 } // namespace experimental
+} // namespace ONEAPI
 
 #define ATTR_SYCL_KERNEL __attribute__((sycl_kernel))
 template <typename KernelName = auto_name, typename KernelType>
diff --git a/clang/test/CodeGenSYCL/int_header_spec_const.cpp b/clang/test/CodeGenSYCL/int_header_spec_const.cpp
index e6743c4ea2e91..a32389ae4c492 100644
--- a/clang/test/CodeGenSYCL/int_header_spec_const.cpp
+++ b/clang/test/CodeGenSYCL/int_header_spec_const.cpp
@@ -20,18 +20,18 @@ class MyDoubleConst;
 
 int main() {
   // Create specialization constants.
-  cl::sycl::experimental::spec_constant<bool, MyBoolConst> i1(false);
-  cl::sycl::experimental::spec_constant<char, MyInt8Const> i8(0);
-  cl::sycl::experimental::spec_constant<unsigned char, MyUInt8Const> ui8(0);
-  cl::sycl::experimental::spec_constant<short, MyInt16Const> i16(0);
-  cl::sycl::experimental::spec_constant<unsigned short, MyUInt16Const> ui16(0);
-  cl::sycl::experimental::spec_constant<int, MyInt32Const> i32(0);
+  cl::sycl::ONEAPI::experimental::spec_constant<bool, MyBoolConst> i1(false);
+  cl::sycl::ONEAPI::experimental::spec_constant<char, MyInt8Const> i8(0);
+  cl::sycl::ONEAPI::experimental::spec_constant<unsigned char, MyUInt8Const> ui8(0);
+  cl::sycl::ONEAPI::experimental::spec_constant<short, MyInt16Const> i16(0);
+  cl::sycl::ONEAPI::experimental::spec_constant<unsigned short, MyUInt16Const> ui16(0);
+  cl::sycl::ONEAPI::experimental::spec_constant<int, MyInt32Const> i32(0);
   // Constant used twice, but there must be single entry in the int header,
   // otherwise compilation error would be issued.
-  cl::sycl::experimental::spec_constant<int, MyInt32Const> i32_1(0);
-  cl::sycl::experimental::spec_constant<unsigned int, MyUInt32Const> ui32(0);
-  cl::sycl::experimental::spec_constant<float, MyFloatConst> f32(0);
-  cl::sycl::experimental::spec_constant<double, MyDoubleConst> f64(0);
+  cl::sycl::ONEAPI::experimental::spec_constant<int, MyInt32Const> i32_1(0);
+  cl::sycl::ONEAPI::experimental::spec_constant<unsigned int, MyUInt32Const> ui32(0);
+  cl::sycl::ONEAPI::experimental::spec_constant<float, MyFloatConst> f32(0);
+  cl::sycl::ONEAPI::experimental::spec_constant<double, MyDoubleConst> f64(0);
 
   double val;
   double *ptr = &val; // to avoid "unused" warnings
diff --git a/clang/test/SemaSYCL/Inputs/sycl.hpp b/clang/test/SemaSYCL/Inputs/sycl.hpp
index 5e3478abf5176..2e01b5235a63a 100644
--- a/clang/test/SemaSYCL/Inputs/sycl.hpp
+++ b/clang/test/SemaSYCL/Inputs/sycl.hpp
@@ -218,12 +218,12 @@ class handler {
   }
 };
 
+namespace ONEAPI {
 namespace experimental {
-
 template <typename T, typename ID = T>
 class spec_constant {};
 } // namespace experimental
-
+} // namespace ONEAPI
 } // namespace sycl
 } // namespace cl
 
diff --git a/clang/test/SemaSYCL/spec-const-kernel-arg.cpp b/clang/test/SemaSYCL/spec-const-kernel-arg.cpp
index d40e5296949a2..ba2489b93fe6f 100644
--- a/clang/test/SemaSYCL/spec-const-kernel-arg.cpp
+++ b/clang/test/SemaSYCL/spec-const-kernel-arg.cpp
@@ -6,12 +6,12 @@
 #include <sycl.hpp>
 
 struct SpecConstantsWrapper {
-  cl::sycl::experimental::spec_constant<int, class sc_name1> SC1;
-  cl::sycl::experimental::spec_constant<int, class sc_name2> SC2;
+  cl::sycl::ONEAPI::experimental::spec_constant<int, class sc_name1> SC1;
+  cl::sycl::ONEAPI::experimental::spec_constant<int, class sc_name2> SC2;
 };
 
 int main() {
-  cl::sycl::experimental::spec_constant<char, class MyInt32Const> SC;
+  cl::sycl::ONEAPI::experimental::spec_constant<char, class MyInt32Const> SC;
   SpecConstantsWrapper W;
   cl::sycl::kernel_single_task<class kernel_sc>(
       [=]() {
@@ -23,7 +23,7 @@ int main() {
 // CHECK: FunctionDecl {{.*}}kernel_sc{{.*}} 'void ()'
 // CHECK: VarDecl {{.*}}'(lambda at {{.*}}'
 // CHECK-NEXT: InitListExpr {{.*}}'(lambda at {{.*}}'
-// CHECK-NEXT: CXXConstructExpr {{.*}}'cl::sycl::experimental::spec_constant<char, class MyInt32Const>':'cl::sycl::experimental::spec_constant<char, MyInt32Const>'
+// CHECK-NEXT: CXXConstructExpr {{.*}}'cl::sycl::ONEAPI::experimental::spec_constant<char, class MyInt32Const>':'cl::sycl::ONEAPI::experimental::spec_constant<char, MyInt32Const>'
 // CHECK-NEXT: InitListExpr {{.*}} 'SpecConstantsWrapper'
-// CHECK-NEXT: CXXConstructExpr {{.*}} 'cl::sycl::experimental::spec_constant<int, class sc_name1>':'cl::sycl::experimental::spec_constant<int, sc_name1>'
-// CHECK-NEXT: CXXConstructExpr {{.*}} 'cl::sycl::experimental::spec_constant<int, class sc_name2>':'cl::sycl::experimental::spec_constant<int, sc_name2>'
+// CHECK-NEXT: CXXConstructExpr {{.*}} 'cl::sycl::ONEAPI::experimental::spec_constant<int, class sc_name1>':'cl::sycl::ONEAPI::experimental::spec_constant<int, sc_name1>'
+// CHECK-NEXT: CXXConstructExpr {{.*}} 'cl::sycl::ONEAPI::experimental::spec_constant<int, class sc_name2>':'cl::sycl::ONEAPI::experimental::spec_constant<int, sc_name2>'
diff --git a/clang/test/SemaSYCL/spec_const_and_accesor_crash.cpp b/clang/test/SemaSYCL/spec_const_and_accesor_crash.cpp
index 54a6f46290268..bd1375b49f6ec 100644
--- a/clang/test/SemaSYCL/spec_const_and_accesor_crash.cpp
+++ b/clang/test/SemaSYCL/spec_const_and_accesor_crash.cpp
@@ -10,9 +10,9 @@ __attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) {
 }
 
 int main() {
-  cl::sycl::experimental::spec_constant<char, class MyInt32Const> spec_const;
+  cl::sycl::ONEAPI::experimental::spec_constant<char, class MyInt32Const> spec_const;
   cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write> accessor;
-  // CHECK: FieldDecl {{.*}} implicit referenced 'cl::sycl::experimental::spec_constant<char, class MyInt32Const>'
+  // CHECK: FieldDecl {{.*}} implicit referenced 'cl::sycl::ONEAPI::experimental::spec_constant<char, class MyInt32Const>'
   // CHECK: FieldDecl {{.*}} implicit referenced 'cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write>'
   kernel<class MyKernel>([spec_const, accessor]() {});
   return 0;
diff --git a/llvm/test/tools/sycl-post-link/sc_sym_two_refs.ll b/llvm/test/tools/sycl-post-link/sc_sym_two_refs.ll
index 5eadf6d40bc28..122e4eea3fce5 100644
--- a/llvm/test/tools/sycl-post-link/sc_sym_two_refs.ll
+++ b/llvm/test/tools/sycl-post-link/sc_sym_two_refs.ll
@@ -7,7 +7,7 @@
 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
 target triple = "spir64-unknown-unknown-sycldevice"
 
-%"sycl::experimental::spec_constant" = type { i8 }
+%"spec_constant" = type { i8 }
 
 @SCSymID = private unnamed_addr constant [10 x i8] c"SpecConst\00", align 1
 ; CHECK-NOT: @SCSymID
@@ -21,7 +21,7 @@ define weak_odr dso_local spir_kernel void @Kernel() {
 }
 
 ; Function Attrs: norecurse
-define dso_local spir_func float @foo_float(%"sycl::experimental::spec_constant" addrspace(4)* nocapture readnone dereferenceable(1) %0) local_unnamed_addr #3 {
+define dso_local spir_func float @foo_float(%"spec_constant" addrspace(4)* nocapture readnone dereferenceable(1) %0) local_unnamed_addr #3 {
   %2 = tail call spir_func float @_Z27__sycl_getSpecConstantValueIfET_PKc(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([10 x i8], [10 x i8]* @SCSymID, i64 0, i64 0) to i8 addrspace(4)*))
   ret float %2
 }
diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt
index 7dde4b4515322..0c949a0611d8c 100644
--- a/sycl/CMakeLists.txt
+++ b/sycl/CMakeLists.txt
@@ -15,7 +15,7 @@ include(AddSYCLExecutable)
 set(SYCL_MAJOR_VERSION 3)
 set(SYCL_MINOR_VERSION 0)
 set(SYCL_PATCH_VERSION 0)
-set(SYCL_DEV_ABI_VERSION 0)
+set(SYCL_DEV_ABI_VERSION 1)
 if (SYCL_ADD_DEV_VERSION_POSTFIX)
   set(SYCL_VERSION_POSTFIX "-${SYCL_DEV_ABI_VERSION}")
 endif()
diff --git a/sycl/include/CL/sycl.hpp b/sycl/include/CL/sycl.hpp
index 21b7056d277cb..86461727fe4ff 100644
--- a/sycl/include/CL/sycl.hpp
+++ b/sycl/include/CL/sycl.hpp
@@ -8,6 +8,12 @@
 
 #pragma once
 
+#include <CL/sycl/ONEAPI/atomic.hpp>
+#include <CL/sycl/ONEAPI/experimental/builtins.hpp>
+#include <CL/sycl/ONEAPI/function_pointer.hpp>
+#include <CL/sycl/ONEAPI/group_algorithm.hpp>
+#include <CL/sycl/ONEAPI/reduction.hpp>
+#include <CL/sycl/ONEAPI/sub_group.hpp>
 #include <CL/sycl/accessor.hpp>
 #include <CL/sycl/aspects.hpp>
 #include <CL/sycl/atomic.hpp>
@@ -23,12 +29,6 @@
 #include <CL/sycl/handler.hpp>
 #include <CL/sycl/id.hpp>
 #include <CL/sycl/image.hpp>
-#include <CL/sycl/intel/atomic.hpp>
-#include <CL/sycl/intel/builtins.hpp>
-#include <CL/sycl/intel/function_pointer.hpp>
-#include <CL/sycl/intel/group_algorithm.hpp>
-#include <CL/sycl/intel/reduction.hpp>
-#include <CL/sycl/intel/sub_group.hpp>
 #include <CL/sycl/item.hpp>
 #include <CL/sycl/kernel.hpp>
 #include <CL/sycl/multi_ptr.hpp>
diff --git a/sycl/include/CL/sycl/intel/atomic.hpp b/sycl/include/CL/sycl/ONEAPI/atomic.hpp
similarity index 57%
rename from sycl/include/CL/sycl/intel/atomic.hpp
rename to sycl/include/CL/sycl/ONEAPI/atomic.hpp
index ecb32b76a6976..a7b07d80d2fd3 100644
--- a/sycl/include/CL/sycl/intel/atomic.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/atomic.hpp
@@ -1,4 +1,4 @@
-//==---------------- atomic.hpp - SYCL_INTEL_extended_atomics --------------==//
+//==--------------- atomic.hpp - SYCL_ONEAPI_extended_atomics --------------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -8,7 +8,7 @@
 
 #pragma once
 
-#include <CL/sycl/intel/atomic_accessor.hpp>
-#include <CL/sycl/intel/atomic_enums.hpp>
-#include <CL/sycl/intel/atomic_fence.hpp>
-#include <CL/sycl/intel/atomic_ref.hpp>
+#include <CL/sycl/ONEAPI/atomic_accessor.hpp>
+#include <CL/sycl/ONEAPI/atomic_enums.hpp>
+#include <CL/sycl/ONEAPI/atomic_fence.hpp>
+#include <CL/sycl/ONEAPI/atomic_ref.hpp>
diff --git a/sycl/include/CL/sycl/intel/atomic_accessor.hpp b/sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp
similarity index 95%
rename from sycl/include/CL/sycl/intel/atomic_accessor.hpp
rename to sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp
index 524e2d4a3ab80..ed415a90891c7 100644
--- a/sycl/include/CL/sycl/intel/atomic_accessor.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/atomic_accessor.hpp
@@ -1,4 +1,4 @@
-//==--- atomic_accessor.hpp - SYCL_INTEL_extended_atomics atomic_accessor --==//
+//==-- atomic_accessor.hpp - SYCL_ONEAPI_extended_atomics atomic_accessor --==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -8,13 +8,14 @@
 
 #pragma once
 
+#include <CL/sycl/ONEAPI/atomic_enums.hpp>
+#include <CL/sycl/ONEAPI/atomic_ref.hpp>
 #include <CL/sycl/access/access.hpp>
-#include <CL/sycl/intel/atomic_enums.hpp>
-#include <CL/sycl/intel/atomic_ref.hpp>
+#include <CL/sycl/accessor.hpp>
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
-namespace intel {
+namespace ONEAPI {
 
 #if __cplusplus > 201402L
 
@@ -123,6 +124,6 @@ atomic_accessor(buffer<DataT, Dimensions, AllocatorT>, handler,
 
 #endif
 
-} // namespace intel
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/intel/atomic_enums.hpp b/sycl/include/CL/sycl/ONEAPI/atomic_enums.hpp
similarity index 93%
rename from sycl/include/CL/sycl/intel/atomic_enums.hpp
rename to sycl/include/CL/sycl/ONEAPI/atomic_enums.hpp
index a85c9902cd524..ad1d94ca2d815 100644
--- a/sycl/include/CL/sycl/intel/atomic_enums.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/atomic_enums.hpp
@@ -1,4 +1,4 @@
-//==---------------- atomic_enums.hpp - SYCL_INTEL_extended_atomics enums --==//
+//==--------------- atomic_enums.hpp - SYCL_ONEAPI_extended_atomics enums --==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -20,7 +20,7 @@
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
-namespace intel {
+namespace ONEAPI {
 
 enum class memory_order : int {
   relaxed,
@@ -63,7 +63,7 @@ namespace detail {
 // Nested ternary conditions in else branch required for C++11
 #if __cplusplus >= 201402L
 static inline constexpr std::memory_order
-getStdMemoryOrder(::cl::sycl::intel::memory_order order) {
+getStdMemoryOrder(::cl::sycl::ONEAPI::memory_order order) {
   switch (order) {
   case memory_order::relaxed:
     return std::memory_order_relaxed;
@@ -81,7 +81,7 @@ getStdMemoryOrder(::cl::sycl::intel::memory_order order) {
 }
 #else
 static inline constexpr std::memory_order
-getStdMemoryOrder(::cl::sycl::intel::memory_order order) {
+getStdMemoryOrder(::cl::sycl::ONEAPI::memory_order order) {
   return (order == memory_order::relaxed)
              ? std::memory_order_relaxed
              : (order == memory_order::__consume_unsupported)
@@ -98,6 +98,6 @@ getStdMemoryOrder(::cl::sycl::intel::memory_order order) {
 } // namespace detail
 #endif // __SYCL_DEVICE_ONLY__
 
-} // namespace intel
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/intel/atomic_fence.hpp b/sycl/include/CL/sycl/ONEAPI/atomic_fence.hpp
similarity index 87%
rename from sycl/include/CL/sycl/intel/atomic_fence.hpp
rename to sycl/include/CL/sycl/ONEAPI/atomic_fence.hpp
index aba95c060b878..76352df6faed9 100644
--- a/sycl/include/CL/sycl/intel/atomic_fence.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/atomic_fence.hpp
@@ -1,4 +1,4 @@
-//==----- atomic_fence.hpp - SYCL_INTEL_extended_atomics atomic_fence ------==//
+//==---- atomic_fence.hpp - SYCL_ONEAPI_extended_atomics atomic_fence ------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -9,8 +9,8 @@
 #pragma once
 
 #include <CL/__spirv/spirv_ops.hpp>
+#include <CL/sycl/ONEAPI/atomic_enums.hpp>
 #include <CL/sycl/detail/spirv.hpp>
-#include <CL/sycl/intel/atomic_enums.hpp>
 
 #ifndef __SYCL_DEVICE_ONLY__
 #include <atomic>
@@ -18,7 +18,7 @@
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
-namespace intel {
+namespace ONEAPI {
 namespace detail {
 using namespace cl::sycl::detail;
 }
@@ -35,6 +35,6 @@ static inline void atomic_fence(memory_order order, memory_scope scope) {
 #endif
 }
 
-} // namespace intel
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/intel/atomic_ref.hpp b/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp
similarity index 97%
rename from sycl/include/CL/sycl/intel/atomic_ref.hpp
rename to sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp
index 1616727f919b8..625a8902d9c86 100644
--- a/sycl/include/CL/sycl/intel/atomic_ref.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/atomic_ref.hpp
@@ -1,4 +1,4 @@
-//==----- atomic_ref.hpp - SYCL_INTEL_extended_atomics atomic_ref ----------==//
+//==----- atomic_ref.hpp - SYCL_ONEAPI_extended_atomics atomic_ref ---------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -9,11 +9,11 @@
 #pragma once
 
 #include <CL/__spirv/spirv_ops.hpp>
+#include <CL/sycl/ONEAPI/atomic_enums.hpp>
 #include <CL/sycl/access/access.hpp>
 #include <CL/sycl/atomic.hpp>
 #include <CL/sycl/detail/defines.hpp>
-#include <CL/sycl/detail/helpers.hpp>
-#include <CL/sycl/intel/atomic_enums.hpp>
+#include <CL/sycl/detail/spirv.hpp>
 
 #ifndef __SYCL_DEVICE_ONLY__
 #include <atomic>
@@ -27,14 +27,14 @@ namespace sycl {
 template <typename pointerT, access::address_space AddressSpace>
 class multi_ptr;
 
-namespace intel {
+namespace ONEAPI {
 namespace detail {
 
-// Import from detail:: into intel::detail:: to improve readability later
+// Import from detail:: into ONEAPI::detail:: to improve readability later
 using namespace ::cl::sycl::detail;
 
-using memory_order = cl::sycl::intel::memory_order;
-using memory_scope = cl::sycl::intel::memory_scope;
+using memory_order = cl::sycl::ONEAPI::memory_order;
+using memory_scope = cl::sycl::ONEAPI::memory_scope;
 
 template <typename T>
 using IsValidAtomicType =
@@ -127,14 +127,14 @@ class atomic_ref_base {
       detail::IsValidAtomicType<T>::value,
       "Invalid atomic type.  Valid types are arithmetic and pointer types");
   static_assert(!std::is_same<T, bool>::value,
-                "intel::atomic_ref does not support bool type");
+                "ONEAPI::atomic_ref does not support bool type");
   static_assert(!(std::is_same<T, char>::value ||
                   std::is_same<T, signed char>::value ||
                   std::is_same<T, unsigned char>::value),
-                "intel::atomic_ref does not support char type");
+                "ONEAPI::atomic_ref does not support char type");
   static_assert(!(std::is_same<T, short>::value ||
                   std::is_same<T, unsigned short>::value),
-                "intel::atomic_ref does not support short type");
+                "ONEAPI::atomic_ref does not support short type");
   static_assert(detail::IsValidAtomicAddressSpace<AddressSpace>::value,
                 "Invalid atomic address_space.  Valid address spaces are: "
                 "global_space, local_space, global_device_space");
@@ -651,6 +651,6 @@ class atomic_ref : public detail::atomic_ref_impl<T, DefaultOrder, DefaultScope,
                                 AddressSpace>::atomic_ref_impl;
 };
 
-} // namespace intel
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/intel/builtins.hpp b/sycl/include/CL/sycl/ONEAPI/experimental/builtins.hpp
similarity index 98%
rename from sycl/include/CL/sycl/intel/builtins.hpp
rename to sycl/include/CL/sycl/ONEAPI/experimental/builtins.hpp
index a59258a2290ba..e5b047e29c7a0 100644
--- a/sycl/include/CL/sycl/intel/builtins.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/experimental/builtins.hpp
@@ -18,7 +18,7 @@
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
-namespace intel {
+namespace ONEAPI {
 namespace experimental {
 
 // Provides functionality to print data from kernels in a C way:
@@ -68,7 +68,7 @@ int printf(const CONSTANT_AS char *__format, Args... args) {
 }
 
 } // namespace experimental
-} // namespace intel
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
 
diff --git a/sycl/include/CL/sycl/experimental/spec_constant.hpp b/sycl/include/CL/sycl/ONEAPI/experimental/spec_constant.hpp
similarity index 93%
rename from sycl/include/CL/sycl/experimental/spec_constant.hpp
rename to sycl/include/CL/sycl/ONEAPI/experimental/spec_constant.hpp
index 7952d98bd481f..03698008e056d 100644
--- a/sycl/include/CL/sycl/experimental/spec_constant.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/experimental/spec_constant.hpp
@@ -1,4 +1,4 @@
-//==----- spec_constant.hpp - SYCL public experimental API header file -----==//
+//==----------- spec_constant.hpp - SYCL public ONEAPI API header file -----==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -22,6 +22,9 @@
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
+class program;
+
+namespace ONEAPI {
 namespace experimental {
 
 class spec_const_error : public compile_program_error {
@@ -60,5 +63,6 @@ template <typename T, typename ID = T> class spec_constant {
 };
 
 } // namespace experimental
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/intel/function_pointer.hpp b/sycl/include/CL/sycl/ONEAPI/function_pointer.hpp
similarity index 98%
rename from sycl/include/CL/sycl/intel/function_pointer.hpp
rename to sycl/include/CL/sycl/ONEAPI/function_pointer.hpp
index f812be911b788..3ccbdf636ec99 100644
--- a/sycl/include/CL/sycl/intel/function_pointer.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/function_pointer.hpp
@@ -21,7 +21,7 @@ namespace detail {
 __SYCL_EXPORT cl_ulong getDeviceFunctionPointerImpl(device &D, program &P,
                                                     const char *FuncName);
 }
-namespace intel {
+namespace ONEAPI {
 
 // This is a preview extension implementation, intended to provide early
 // access to a feature for review and community feedback.
@@ -83,6 +83,6 @@ device_func_ptr_holder_t get_device_func_ptr(FuncType F, const char *FuncName,
 
   return sycl::detail::getDeviceFunctionPointerImpl(D, P, FuncName);
 }
-} // namespace intel
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/intel/functional.hpp b/sycl/include/CL/sycl/ONEAPI/functional.hpp
similarity index 85%
rename from sycl/include/CL/sycl/intel/functional.hpp
rename to sycl/include/CL/sycl/ONEAPI/functional.hpp
index ee4ed21b33ffd..ab16a694b00ed 100644
--- a/sycl/include/CL/sycl/intel/functional.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/functional.hpp
@@ -11,7 +11,7 @@
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
-namespace intel {
+namespace ONEAPI {
 
 template <typename T = void> struct minimum {
   T operator()(const T &lhs, const T &rhs) const {
@@ -57,7 +57,7 @@ template <typename T = void> using bit_or = std::bit_or<T>;
 template <typename T = void> using bit_xor = std::bit_xor<T>;
 template <typename T = void> using bit_and = std::bit_and<T>;
 
-} // namespace intel
+} // namespace ONEAPI
 
 #ifdef __SYCL_DEVICE_ONLY__
 namespace detail {
@@ -93,15 +93,15 @@ struct GroupOpTag<T, detail::enable_if_t<detail::is_sgenfloat<T>::value>> {
     return Ret;                                                                \
   }
 
-__SYCL_CALC_OVERLOAD(GroupOpISigned, SMin, intel::minimum<T>)
-__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, UMin, intel::minimum<T>)
-__SYCL_CALC_OVERLOAD(GroupOpFP, FMin, intel::minimum<T>)
-__SYCL_CALC_OVERLOAD(GroupOpISigned, SMax, intel::maximum<T>)
-__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, UMax, intel::maximum<T>)
-__SYCL_CALC_OVERLOAD(GroupOpFP, FMax, intel::maximum<T>)
-__SYCL_CALC_OVERLOAD(GroupOpISigned, IAdd, intel::plus<T>)
-__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, IAdd, intel::plus<T>)
-__SYCL_CALC_OVERLOAD(GroupOpFP, FAdd, intel::plus<T>)
+__SYCL_CALC_OVERLOAD(GroupOpISigned, SMin, ONEAPI::minimum<T>)
+__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, UMin, ONEAPI::minimum<T>)
+__SYCL_CALC_OVERLOAD(GroupOpFP, FMin, ONEAPI::minimum<T>)
+__SYCL_CALC_OVERLOAD(GroupOpISigned, SMax, ONEAPI::maximum<T>)
+__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, UMax, ONEAPI::maximum<T>)
+__SYCL_CALC_OVERLOAD(GroupOpFP, FMax, ONEAPI::maximum<T>)
+__SYCL_CALC_OVERLOAD(GroupOpISigned, IAdd, ONEAPI::plus<T>)
+__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, IAdd, ONEAPI::plus<T>)
+__SYCL_CALC_OVERLOAD(GroupOpFP, FAdd, ONEAPI::plus<T>)
 
 #undef __SYCL_CALC_OVERLOAD
 
diff --git a/sycl/include/CL/sycl/intel/group_algorithm.hpp b/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp
similarity index 93%
rename from sycl/include/CL/sycl/intel/group_algorithm.hpp
rename to sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp
index c8f6faa2a08a6..e5681bcadcb0d 100644
--- a/sycl/include/CL/sycl/intel/group_algorithm.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/group_algorithm.hpp
@@ -10,13 +10,15 @@
 #include <CL/__spirv/spirv_ops.hpp>
 #include <CL/__spirv/spirv_types.hpp>
 #include <CL/__spirv/spirv_vars.hpp>
+#include <CL/sycl/ONEAPI/atomic.hpp>
+#include <CL/sycl/ONEAPI/functional.hpp>
+#include <CL/sycl/ONEAPI/sub_group.hpp>
 #include <CL/sycl/detail/spirv.hpp>
 #include <CL/sycl/detail/type_traits.hpp>
 #include <CL/sycl/group.hpp>
-#include <CL/sycl/intel/functional.hpp>
-#include <CL/sycl/intel/sub_group.hpp>
+#include <CL/sycl/nd_item.hpp>
 
-#ifndef __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__
+#ifndef __DISABLE_SYCL_ONEAPI_GROUP_ALGORITHMS__
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
 namespace detail {
@@ -32,7 +34,7 @@ template <> inline size_t get_local_linear_range<group<3>>(group<3> g) {
   return g.get_local_range(0) * g.get_local_range(1) * g.get_local_range(2);
 }
 template <>
-inline size_t get_local_linear_range<intel::sub_group>(intel::sub_group g) {
+inline size_t get_local_linear_range<ONEAPI::sub_group>(ONEAPI::sub_group g) {
   return g.get_local_range()[0];
 }
 
@@ -53,8 +55,8 @@ __SYCL_GROUP_GET_LOCAL_LINEAR_ID(3);
 #endif // __SYCL_DEVICE_ONLY__
 
 template <>
-inline intel::sub_group::linear_id_type
-get_local_linear_id<intel::sub_group>(intel::sub_group g) {
+inline ONEAPI::sub_group::linear_id_type
+get_local_linear_id<ONEAPI::sub_group>(ONEAPI::sub_group g) {
   return g.get_local_id()[0];
 }
 
@@ -79,22 +81,22 @@ template <> inline id<3> linear_id_to_id(range<3> r, size_t linear_id) {
 
 template <typename T, class BinaryOperation> struct identity {};
 
-template <typename T, typename V> struct identity<T, intel::plus<V>> {
+template <typename T, typename V> struct identity<T, ONEAPI::plus<V>> {
   static constexpr T value = 0;
 };
 
-template <typename T, typename V> struct identity<T, intel::minimum<V>> {
+template <typename T, typename V> struct identity<T, ONEAPI::minimum<V>> {
   static constexpr T value = (std::numeric_limits<T>::max)();
 };
 
-template <typename T, typename V> struct identity<T, intel::maximum<V>> {
+template <typename T, typename V> struct identity<T, ONEAPI::maximum<V>> {
   static constexpr T value = std::numeric_limits<T>::lowest();
 };
 
 template <typename T>
 using native_op_list =
-    type_list<intel::plus<T>, intel::bit_or<T>, intel::bit_xor<T>,
-              intel::bit_and<T>, intel::maximum<T>, intel::minimum<T>>;
+    type_list<ONEAPI::plus<T>, ONEAPI::bit_or<T>, ONEAPI::bit_xor<T>,
+              ONEAPI::bit_and<T>, ONEAPI::maximum<T>, ONEAPI::minimum<T>>;
 
 template <typename T, typename BinaryOperation> struct is_native_op {
   static constexpr bool value =
@@ -123,7 +125,7 @@ Function for_each(Group g, Ptr first, Ptr last, Function f) {
 
 } // namespace detail
 
-namespace intel {
+namespace ONEAPI {
 
 // EnableIf shorthands for algorithms that depend only on type
 template <typename T>
@@ -169,7 +171,7 @@ using EnableIfIsNonNativeOp = cl::sycl::detail::enable_if_t<
 template <typename Group> bool all_of(Group, bool pred) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   return sycl::detail::spirv::GroupAll<Group>(pred);
 #else
@@ -183,7 +185,7 @@ template <typename Group, typename T, class Predicate>
 bool all_of(Group g, T x, Predicate pred) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   return all_of(g, pred(x));
 }
 
@@ -192,7 +194,7 @@ EnableIfIsPointer<Ptr, bool> all_of(Group g, Ptr first, Ptr last,
                                     Predicate pred) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   bool partial = true;
   sycl::detail::for_each(
@@ -212,7 +214,7 @@ EnableIfIsPointer<Ptr, bool> all_of(Group g, Ptr first, Ptr last,
 template <typename Group> bool any_of(Group, bool pred) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   return sycl::detail::spirv::GroupAny<Group>(pred);
 #else
@@ -226,7 +228,7 @@ template <typename Group, typename T, class Predicate>
 bool any_of(Group g, T x, Predicate pred) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   return any_of(g, pred(x));
 }
 
@@ -236,7 +238,7 @@ EnableIfIsPointer<Ptr, bool> any_of(Group g, Ptr first, Ptr last,
 #ifdef __SYCL_DEVICE_ONLY__
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   bool partial = false;
   sycl::detail::for_each(
       g, first, last,
@@ -255,7 +257,7 @@ EnableIfIsPointer<Ptr, bool> any_of(Group g, Ptr first, Ptr last,
 template <typename Group> bool none_of(Group, bool pred) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   return sycl::detail::spirv::GroupAll<Group>(!pred);
 #else
@@ -269,7 +271,7 @@ template <typename Group, typename T, class Predicate>
 bool none_of(Group g, T x, Predicate pred) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   return none_of(g, pred(x));
 }
 
@@ -279,7 +281,7 @@ EnableIfIsPointer<Ptr, bool> none_of(Group g, Ptr first, Ptr last,
 #ifdef __SYCL_DEVICE_ONLY__
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   return !any_of(g, first, last, pred);
 #else
   (void)g;
@@ -296,7 +298,7 @@ EnableIfIsTriviallyCopyable<T> broadcast(Group, T x,
                                          typename Group::id_type local_id) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   return sycl::detail::spirv::GroupBroadcast<Group>(x, local_id);
 #else
@@ -312,7 +314,7 @@ EnableIfIsVectorArithmetic<T> broadcast(Group g, T x,
                                         typename Group::id_type local_id) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   T result;
   for (int s = 0; s < x.get_size(); ++s) {
@@ -333,7 +335,7 @@ EnableIfIsTriviallyCopyable<T>
 broadcast(Group g, T x, typename Group::linear_id_type linear_local_id) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   return broadcast(
       g, x,
@@ -352,7 +354,7 @@ EnableIfIsVectorArithmetic<T>
 broadcast(Group g, T x, typename Group::linear_id_type linear_local_id) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   T result;
   for (int s = 0; s < x.get_size(); ++s) {
@@ -372,7 +374,7 @@ template <typename Group, typename T>
 EnableIfIsTriviallyCopyable<T> broadcast(Group g, T x) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   return broadcast(g, x, 0);
 #else
@@ -387,7 +389,7 @@ template <typename Group, typename T>
 EnableIfIsVectorArithmetic<T> broadcast(Group g, T x) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   T result;
   for (int s = 0; s < x.get_size(); ++s) {
@@ -407,7 +409,7 @@ EnableIfIsScalarArithmeticNativeOp<T, BinaryOperation>
 reduce(Group, T x, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(x, x)), T>::value ||
@@ -429,7 +431,7 @@ EnableIfIsVectorArithmeticNativeOp<T, BinaryOperation>
 reduce(Group g, T x, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(x[0], x[0])),
@@ -449,7 +451,7 @@ EnableIfIsNonNativeOp<T, BinaryOperation> reduce(Group g, T x,
                                                  BinaryOperation op) {
   static_assert(sycl::detail::is_sub_group<Group>::value,
                 "reduce algorithm with user-defined types and operators"
-                "only supports intel::sub_group class.");
+                "only supports ONEAPI::sub_group class.");
   T result = x;
   for (int mask = 1; mask < g.get_max_local_range()[0]; mask *= 2) {
     T tmp = g.shuffle_xor(result, id<1>(mask));
@@ -465,7 +467,7 @@ EnableIfIsScalarArithmeticNativeOp<T, BinaryOperation>
 reduce(Group g, V x, T init, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(init, x)), T>::value ||
@@ -486,7 +488,7 @@ EnableIfIsVectorArithmeticNativeOp<T, BinaryOperation>
 reduce(Group g, V x, T init, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(init[0], x[0])),
@@ -512,7 +514,7 @@ EnableIfIsNonNativeOp<T, BinaryOperation> reduce(Group g, V x, T init,
                                                  BinaryOperation op) {
   static_assert(sycl::detail::is_sub_group<Group>::value,
                 "reduce algorithm with user-defined types and operators"
-                "only supports intel::sub_group class.");
+                "only supports ONEAPI::sub_group class.");
   T result = x;
   for (int mask = 1; mask < g.get_max_local_range()[0]; mask *= 2) {
     T tmp = g.shuffle_xor(result, id<1>(mask));
@@ -528,7 +530,7 @@ EnableIfIsPointer<Ptr, typename Ptr::element_type>
 reduce(Group g, Ptr first, Ptr last, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(*first, *first)),
@@ -559,7 +561,7 @@ EnableIfIsPointer<Ptr, T> reduce(Group g, Ptr first, Ptr last, T init,
                                  BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(init, *first)), T>::value ||
@@ -587,7 +589,7 @@ EnableIfIsScalarArithmeticNativeOp<T, BinaryOperation>
 exclusive_scan(Group, T x, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(std::is_same<decltype(binary_op(x, x)), T>::value ||
                     (std::is_same<T, half>::value &&
@@ -608,7 +610,7 @@ EnableIfIsVectorArithmeticNativeOp<T, BinaryOperation>
 exclusive_scan(Group g, T x, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(x[0], x[0])),
@@ -628,7 +630,7 @@ EnableIfIsVectorArithmeticNativeOp<T, BinaryOperation>
 exclusive_scan(Group g, V x, T init, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(init[0], x[0])),
@@ -648,7 +650,7 @@ EnableIfIsScalarArithmeticNativeOp<T, BinaryOperation>
 exclusive_scan(Group g, V x, T init, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(std::is_same<decltype(binary_op(init, x)), T>::value ||
                     (std::is_same<T, half>::value &&
@@ -679,7 +681,7 @@ exclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, T init,
                BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(*first, *first)), T>::value ||
@@ -741,7 +743,7 @@ EnableIfIsVectorArithmeticNativeOp<T, BinaryOperation>
 inclusive_scan(Group g, T x, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(x[0], x[0])),
@@ -761,7 +763,7 @@ EnableIfIsScalarArithmeticNativeOp<T, BinaryOperation>
 inclusive_scan(Group, T x, BinaryOperation binary_op) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(std::is_same<decltype(binary_op(x, x)), T>::value ||
                     (std::is_same<T, half>::value &&
@@ -782,7 +784,7 @@ EnableIfIsScalarArithmeticNativeOp<T, BinaryOperation>
 inclusive_scan(Group g, V x, BinaryOperation binary_op, T init) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(std::is_same<decltype(binary_op(init, x)), T>::value ||
                     (std::is_same<T, half>::value &&
@@ -805,7 +807,7 @@ EnableIfIsVectorArithmeticNativeOp<T, BinaryOperation>
 inclusive_scan(Group g, V x, BinaryOperation binary_op, T init) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(init[0], x[0])), T>::value ||
@@ -826,7 +828,7 @@ inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result,
                BinaryOperation binary_op, T init) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
   // FIXME: Do not special-case for half precision
   static_assert(
       std::is_same<decltype(binary_op(init, *first)), T>::value ||
@@ -884,7 +886,7 @@ EnableIfIsPointer<InPtr, OutPtr> inclusive_scan(Group g, InPtr first,
 template <typename Group> bool leader(Group g) {
   static_assert(sycl::detail::is_generic_group<Group>::value,
                 "Group algorithms only support the sycl::group and "
-                "intel::sub_group class.");
+                "ONEAPI::sub_group class.");
 #ifdef __SYCL_DEVICE_ONLY__
   typename Group::linear_id_type linear_id =
       sycl::detail::get_local_linear_id(g);
@@ -896,7 +898,7 @@ template <typename Group> bool leader(Group g) {
 #endif
 }
 
-} // namespace intel
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
-#endif // __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__
+#endif // __DISABLE_SYCL_ONEAPI_GROUP_ALGORITHMS__
diff --git a/sycl/include/CL/sycl/intel/reduction.hpp b/sycl/include/CL/sycl/ONEAPI/reduction.hpp
similarity index 96%
rename from sycl/include/CL/sycl/intel/reduction.hpp
rename to sycl/include/CL/sycl/ONEAPI/reduction.hpp
index 26adff47778e9..e33db4783156b 100644
--- a/sycl/include/CL/sycl/intel/reduction.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/reduction.hpp
@@ -8,16 +8,18 @@
 
 #pragma once
 
+#include <CL/sycl/ONEAPI/group_algorithm.hpp>
 #include <CL/sycl/accessor.hpp>
 #include <CL/sycl/handler.hpp>
-#include <CL/sycl/intel/group_algorithm.hpp>
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
-namespace intel {
+namespace ONEAPI {
 
 namespace detail {
 
+using cl::sycl::detail::queue_impl;
+
 __SYCL_EXPORT size_t reduGetMaxWGSize(shared_ptr_class<queue_impl> Queue,
                                       size_t LocalMemBytesPerWorkItem);
 __SYCL_EXPORT size_t reduComputeWGSize(size_t NWorkItems, size_t MaxWGSize,
@@ -33,8 +35,8 @@ using cl::sycl::detail::remove_AS;
 
 template <typename T, class BinaryOperation>
 using IsReduPlus = detail::bool_constant<
-    std::is_same<BinaryOperation, intel::plus<T>>::value ||
-    std::is_same<BinaryOperation, intel::plus<void>>::value>;
+    std::is_same<BinaryOperation, ONEAPI::plus<T>>::value ||
+    std::is_same<BinaryOperation, ONEAPI::plus<void>>::value>;
 
 template <typename T, class BinaryOperation>
 using IsReduMultiplies = detail::bool_constant<
@@ -43,28 +45,28 @@ using IsReduMultiplies = detail::bool_constant<
 
 template <typename T, class BinaryOperation>
 using IsReduMinimum = detail::bool_constant<
-    std::is_same<BinaryOperation, intel::minimum<T>>::value ||
-    std::is_same<BinaryOperation, intel::minimum<void>>::value>;
+    std::is_same<BinaryOperation, ONEAPI::minimum<T>>::value ||
+    std::is_same<BinaryOperation, ONEAPI::minimum<void>>::value>;
 
 template <typename T, class BinaryOperation>
 using IsReduMaximum = detail::bool_constant<
-    std::is_same<BinaryOperation, intel::maximum<T>>::value ||
-    std::is_same<BinaryOperation, intel::maximum<void>>::value>;
+    std::is_same<BinaryOperation, ONEAPI::maximum<T>>::value ||
+    std::is_same<BinaryOperation, ONEAPI::maximum<void>>::value>;
 
 template <typename T, class BinaryOperation>
 using IsReduBitOR = detail::bool_constant<
-    std::is_same<BinaryOperation, intel::bit_or<T>>::value ||
-    std::is_same<BinaryOperation, intel::bit_or<void>>::value>;
+    std::is_same<BinaryOperation, ONEAPI::bit_or<T>>::value ||
+    std::is_same<BinaryOperation, ONEAPI::bit_or<void>>::value>;
 
 template <typename T, class BinaryOperation>
 using IsReduBitXOR = detail::bool_constant<
-    std::is_same<BinaryOperation, intel::bit_xor<T>>::value ||
-    std::is_same<BinaryOperation, intel::bit_xor<void>>::value>;
+    std::is_same<BinaryOperation, ONEAPI::bit_xor<T>>::value ||
+    std::is_same<BinaryOperation, ONEAPI::bit_xor<void>>::value>;
 
 template <typename T, class BinaryOperation>
 using IsReduBitAND = detail::bool_constant<
-    std::is_same<BinaryOperation, intel::bit_and<T>>::value ||
-    std::is_same<BinaryOperation, intel::bit_and<void>>::value>;
+    std::is_same<BinaryOperation, ONEAPI::bit_and<T>>::value ||
+    std::is_same<BinaryOperation, ONEAPI::bit_and<void>>::value>;
 
 template <typename T, class BinaryOperation>
 using IsReduOptForFastAtomicFetch =
@@ -171,7 +173,7 @@ class reducer {
 /// using those operations, which are based on functionality provided by
 /// sycl::atomic class.
 ///
-/// For example, it is known that 0 is identity for intel::plus operations
+/// For example, it is known that 0 is identity for ONEAPI::plus operations
 /// accepting native scalar types to which scalar 0 is convertible.
 /// Also, for int32/64 types the atomic_combine() is lowered to
 /// sycl::atomic::fetch_add().
@@ -313,7 +315,7 @@ class reducer<T, BinaryOperation,
         .fetch_and(MValue);
   }
 
-  /// Atomic MIN operation: *ReduVarPtr = intel::minimum(*ReduVarPtr, MValue);
+  /// Atomic MIN operation: *ReduVarPtr = ONEAPI::minimum(*ReduVarPtr, MValue);
   template <typename _T = T, class _BinaryOperation = BinaryOperation>
   enable_if_t<std::is_same<typename remove_AS<_T>::type, T>::value &&
               (is_geninteger32bit<T>::value || is_geninteger64bit<T>::value) &&
@@ -323,7 +325,7 @@ class reducer<T, BinaryOperation,
         .fetch_min(MValue);
   }
 
-  /// Atomic MAX operation: *ReduVarPtr = intel::maximum(*ReduVarPtr, MValue);
+  /// Atomic MAX operation: *ReduVarPtr = ONEAPI::maximum(*ReduVarPtr, MValue);
   template <typename _T = T, class _BinaryOperation = BinaryOperation>
   enable_if_t<std::is_same<typename remove_AS<_T>::type, T>::value &&
               (is_geninteger32bit<T>::value || is_geninteger64bit<T>::value) &&
@@ -609,11 +611,11 @@ struct get_reduction_aux_kernel_name_t {
 /// Implements a command group function that enqueues a kernel that calls
 /// user's lambda function KernelFunc and also does one iteration of reduction
 /// of elements computed in user's lambda function.
-/// This version uses intel::reduce() algorithm to reduce elements in each
+/// This version uses ONEAPI::reduce() algorithm to reduce elements in each
 /// of work-groups, then it calls fast sycl atomic operations to update
 /// user's reduction variable.
 ///
-/// Briefly: calls user's lambda, intel::reduce() + atomic, INT + ADD/MIN/MAX.
+/// Briefly: calls user's lambda, ONEAPI::reduce() + atomic, INT + ADD/MIN/MAX.
 template <typename KernelName, typename KernelType, int Dims, class Reduction,
           bool UniformWG, typename OutputT>
 enable_if_t<Reduction::has_fast_reduce && Reduction::has_fast_atomics>
@@ -632,7 +634,7 @@ reduCGFuncImpl(handler &CGH, KernelType KernelFunc, const nd_range<Dims> &Range,
         (UniformWG || NDIt.get_global_linear_id() < NWorkItems)
             ? Reducer.MValue
             : Reducer.getIdentity();
-    Reducer.MValue = intel::reduce(NDIt.get_group(), Val, BOp);
+    Reducer.MValue = ONEAPI::reduce(NDIt.get_group(), Val, BOp);
     if (NDIt.get_local_linear_id() == 0)
       Reducer.atomic_combine(Reduction::getOutPointer(Out));
   });
@@ -726,11 +728,11 @@ reduCGFunc(handler &CGH, KernelType KernelFunc, const nd_range<Dims> &Range,
 /// Implements a command group function that enqueues a kernel that
 /// calls user's lambda function and does one iteration of reduction
 /// of elements in each of work-groups.
-/// This version uses intel::reduce() algorithm to reduce elements in each
+/// This version uses ONEAPI::reduce() algorithm to reduce elements in each
 /// of work-groups. At the end of each work-groups the partial sum is written
 /// to a global buffer.
 ///
-/// Briefly: user's lambda, intel:reduce(), FP + ADD/MIN/MAX.
+/// Briefly: user's lambda, ONEAPI:reduce(), FP + ADD/MIN/MAX.
 template <typename KernelName, typename KernelType, int Dims, class Reduction,
           bool UniformWG, typename OutputT>
 enable_if_t<Reduction::has_fast_reduce && !Reduction::has_fast_atomics>
@@ -760,7 +762,7 @@ reduCGFuncImpl(handler &CGH, KernelType KernelFunc, const nd_range<Dims> &Range,
             ? Reducer.MValue
             : Reducer.getIdentity();
     typename Reduction::binary_operation BOp;
-    PSum = intel::reduce(NDIt.get_group(), PSum, BOp);
+    PSum = ONEAPI::reduce(NDIt.get_group(), PSum, BOp);
     if (NDIt.get_local_linear_id() == 0) {
       if (IsUpdateOfUserVar)
         PSum = BOp(*(Reduction::getOutPointer(Out)), PSum);
@@ -873,11 +875,11 @@ reduCGFunc(handler &CGH, KernelType KernelFunc, const nd_range<Dims> &Range,
 
 /// Implements a command group function that enqueues a kernel that does one
 /// iteration of reduction of elements in each of work-groups.
-/// This version uses intel::reduce() algorithm to reduce elements in each
+/// This version uses ONEAPI::reduce() algorithm to reduce elements in each
 /// of work-groups. At the end of each work-groups the partial sum is written
 /// to a global buffer.
 ///
-/// Briefly: aux kernel, intel:reduce(), reproducible results, FP + ADD/MIN/MAX
+/// Briefly: aux kernel, ONEAPI:reduce(), reproducible results, FP + ADD/MIN/MAX
 template <typename KernelName, typename KernelType, bool UniformWG,
           class Reduction, typename InputT, typename OutputT>
 enable_if_t<Reduction::has_fast_reduce && !Reduction::has_fast_atomics>
@@ -897,7 +899,7 @@ reduAuxCGFuncImpl(handler &CGH, size_t NWorkItems, size_t NWorkGroups,
         (UniformWG || (GID < NWorkItems))
             ? In[GID]
             : Reduction::reducer_type::getIdentity();
-    PSum = intel::reduce(NDIt.get_group(), PSum, BOp);
+    PSum = ONEAPI::reduce(NDIt.get_group(), PSum, BOp);
     if (NDIt.get_local_linear_id() == 0) {
       if (IsUpdateOfUserVar)
         PSum = BOp(*(Reduction::getOutPointer(Out)), PSum);
@@ -1074,6 +1076,6 @@ reduction(T *VarPtr, BinaryOperation) {
                                 access::mode::read_write>(VarPtr);
 }
 
-} // namespace intel
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/intel/sub_group.hpp b/sycl/include/CL/sycl/ONEAPI/sub_group.hpp
similarity index 97%
rename from sycl/include/CL/sycl/intel/sub_group.hpp
rename to sycl/include/CL/sycl/ONEAPI/sub_group.hpp
index 2c65f08218990..6a46a031e17a2 100644
--- a/sycl/include/CL/sycl/intel/sub_group.hpp
+++ b/sycl/include/CL/sycl/ONEAPI/sub_group.hpp
@@ -10,6 +10,7 @@
 
 #include <CL/__spirv/spirv_ops.hpp>
 #include <CL/__spirv/spirv_vars.hpp>
+#include <CL/sycl/ONEAPI/functional.hpp>
 #include <CL/sycl/access/access.hpp>
 #include <CL/sycl/detail/defines.hpp>
 #include <CL/sycl/detail/generic_type_traits.hpp>
@@ -17,7 +18,6 @@
 #include <CL/sycl/detail/spirv.hpp>
 #include <CL/sycl/detail/type_traits.hpp>
 #include <CL/sycl/id.hpp>
-#include <CL/sycl/intel/functional.hpp>
 #include <CL/sycl/range.hpp>
 #include <CL/sycl/types.hpp>
 
@@ -96,7 +96,7 @@ void store(multi_ptr<T, Space> dst, const vec<T, N> &x) {
 
 } // namespace detail
 
-namespace intel {
+namespace ONEAPI {
 
 struct sub_group {
 
@@ -451,7 +451,7 @@ struct sub_group {
   /* --- deprecated collective functions --- */
   template <typename T>
   __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
-                    "sycl::intel::broadcast instead.")
+                    "sycl::ONEAPI::broadcast instead.")
   EnableIfIsScalarArithmetic<T> broadcast(T x, id<1> local_id) const {
 #ifdef __SYCL_DEVICE_ONLY__
     return sycl::detail::spirv::GroupBroadcast<sub_group>(x, local_id);
@@ -465,7 +465,7 @@ struct sub_group {
 
   template <typename T, class BinaryOperation>
   __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
-                    "sycl::intel::reduce instead.")
+                    "sycl::ONEAPI::reduce instead.")
   EnableIfIsScalarArithmetic<T> reduce(T x, BinaryOperation op) const {
 #ifdef __SYCL_DEVICE_ONLY__
     return sycl::detail::calc<T, __spv::GroupOperation::Reduce,
@@ -481,7 +481,7 @@ struct sub_group {
 
   template <typename T, class BinaryOperation>
   __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
-                    "sycl::intel::reduce instead.")
+                    "sycl::ONEAPI::reduce instead.")
   EnableIfIsScalarArithmetic<T> reduce(T x, T init, BinaryOperation op) const {
 #ifdef __SYCL_DEVICE_ONLY__
     return op(init, reduce(x, op));
@@ -496,7 +496,7 @@ struct sub_group {
 
   template <typename T, class BinaryOperation>
   __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
-                    "sycl::intel::exclusive_scan instead.")
+                    "sycl::ONEAPI::exclusive_scan instead.")
   EnableIfIsScalarArithmetic<T> exclusive_scan(T x, BinaryOperation op) const {
 #ifdef __SYCL_DEVICE_ONLY__
     return sycl::detail::calc<T, __spv::GroupOperation::ExclusiveScan,
@@ -512,7 +512,7 @@ struct sub_group {
 
   template <typename T, class BinaryOperation>
   __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
-                    "sycl::intel::exclusive_scan instead.")
+                    "sycl::ONEAPI::exclusive_scan instead.")
   EnableIfIsScalarArithmetic<T> exclusive_scan(T x, T init,
                                                BinaryOperation op) const {
 #ifdef __SYCL_DEVICE_ONLY__
@@ -535,7 +535,7 @@ struct sub_group {
 
   template <typename T, class BinaryOperation>
   __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
-                    "sycl::intel::inclusive_scan instead.")
+                    "sycl::ONEAPI::inclusive_scan instead.")
   EnableIfIsScalarArithmetic<T> inclusive_scan(T x, BinaryOperation op) const {
 #ifdef __SYCL_DEVICE_ONLY__
     return sycl::detail::calc<T, __spv::GroupOperation::InclusiveScan,
@@ -551,7 +551,7 @@ struct sub_group {
 
   template <typename T, class BinaryOperation>
   __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
-                    "sycl::intel::inclusive_scan instead.")
+                    "sycl::ONEAPI::inclusive_scan instead.")
   EnableIfIsScalarArithmetic<T> inclusive_scan(T x, BinaryOperation op,
                                                T init) const {
 #ifdef __SYCL_DEVICE_ONLY__
@@ -572,6 +572,6 @@ struct sub_group {
   template <int dimensions> friend class cl::sycl::nd_item;
   sub_group() = default;
 };
-} // namespace intel
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/include/CL/sycl/detail/spirv.hpp b/sycl/include/CL/sycl/detail/spirv.hpp
index 3ba8ff09b8828..b1f86c0c7ec42 100644
--- a/sycl/include/CL/sycl/detail/spirv.hpp
+++ b/sycl/include/CL/sycl/detail/spirv.hpp
@@ -10,16 +10,18 @@
 #include <CL/__spirv/spirv_ops.hpp>
 #include <CL/__spirv/spirv_types.hpp>
 #include <CL/__spirv/spirv_vars.hpp>
+#include <CL/sycl/ONEAPI/atomic_enums.hpp>
 #include <CL/sycl/detail/generic_type_traits.hpp>
+#include <CL/sycl/detail/helpers.hpp>
 #include <CL/sycl/detail/type_traits.hpp>
-#include <CL/sycl/intel/atomic_enums.hpp>
+#include <CL/sycl/id.hpp>
 
 #ifdef __SYCL_DEVICE_ONLY__
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
-namespace intel {
+namespace ONEAPI {
 struct sub_group;
-} // namespace intel
+} // namespace ONEAPI
 namespace detail {
 namespace spirv {
 
@@ -29,7 +31,7 @@ template <int Dimensions> struct group_scope<group<Dimensions>> {
   static constexpr __spv::Scope::Flag value = __spv::Scope::Flag::Workgroup;
 };
 
-template <> struct group_scope<::cl::sycl::intel::sub_group> {
+template <> struct group_scope<::cl::sycl::ONEAPI::sub_group> {
   static constexpr __spv::Scope::Flag value = __spv::Scope::Flag::Subgroup;
 };
 
@@ -107,7 +109,7 @@ using EnableIfGenericBroadcast = detail::enable_if_t<
 // Work-group supports any integral type
 // Sub-group currently supports only uint32_t
 template <typename Group> struct GroupId { using type = size_t; };
-template <> struct GroupId<::cl::sycl::intel::sub_group> {
+template <> struct GroupId<::cl::sycl::ONEAPI::sub_group> {
   using type = uint32_t;
 };
 template <typename Group, typename T, typename IdT>
@@ -203,23 +205,23 @@ EnableIfGenericBroadcast<T> GroupBroadcast(T x, id<Dimensions> local_id) {
 // Single happens-before means semantics should always apply to all spaces
 // Although consume is unsupported, forwarding to acquire is valid
 static inline constexpr __spv::MemorySemanticsMask::Flag
-getMemorySemanticsMask(intel::memory_order Order) {
+getMemorySemanticsMask(ONEAPI::memory_order Order) {
   __spv::MemorySemanticsMask::Flag SpvOrder = __spv::MemorySemanticsMask::None;
   switch (Order) {
-  case intel::memory_order::relaxed:
+  case ONEAPI::memory_order::relaxed:
     SpvOrder = __spv::MemorySemanticsMask::None;
     break;
-  case intel::memory_order::__consume_unsupported:
-  case intel::memory_order::acquire:
+  case ONEAPI::memory_order::__consume_unsupported:
+  case ONEAPI::memory_order::acquire:
     SpvOrder = __spv::MemorySemanticsMask::Acquire;
     break;
-  case intel::memory_order::release:
+  case ONEAPI::memory_order::release:
     SpvOrder = __spv::MemorySemanticsMask::Release;
     break;
-  case intel::memory_order::acq_rel:
+  case ONEAPI::memory_order::acq_rel:
     SpvOrder = __spv::MemorySemanticsMask::AcquireRelease;
     break;
-  case intel::memory_order::seq_cst:
+  case ONEAPI::memory_order::seq_cst:
     SpvOrder = __spv::MemorySemanticsMask::SequentiallyConsistent;
     break;
   }
@@ -229,17 +231,18 @@ getMemorySemanticsMask(intel::memory_order Order) {
       __spv::MemorySemanticsMask::CrossWorkgroupMemory);
 }
 
-static inline constexpr __spv::Scope::Flag getScope(intel::memory_scope Scope) {
+static inline constexpr __spv::Scope::Flag
+getScope(ONEAPI::memory_scope Scope) {
   switch (Scope) {
-  case intel::memory_scope::work_item:
+  case ONEAPI::memory_scope::work_item:
     return __spv::Scope::Invocation;
-  case intel::memory_scope::sub_group:
+  case ONEAPI::memory_scope::sub_group:
     return __spv::Scope::Subgroup;
-  case intel::memory_scope::work_group:
+  case ONEAPI::memory_scope::work_group:
     return __spv::Scope::Workgroup;
-  case intel::memory_scope::device:
+  case ONEAPI::memory_scope::device:
     return __spv::Scope::Device;
-  case intel::memory_scope::system:
+  case ONEAPI::memory_scope::system:
     return __spv::Scope::CrossDevice;
   }
 }
@@ -247,8 +250,8 @@ static inline constexpr __spv::Scope::Flag getScope(intel::memory_scope Scope) {
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value, T>
 AtomicCompareExchange(multi_ptr<T, AddressSpace> MPtr,
-                      intel::memory_scope Scope, intel::memory_order Success,
-                      intel::memory_order Failure, T Desired, T Expected) {
+                      ONEAPI::memory_scope Scope, ONEAPI::memory_order Success,
+                      ONEAPI::memory_order Failure, T Desired, T Expected) {
   auto SPIRVSuccess = getMemorySemanticsMask(Success);
   auto SPIRVFailure = getMemorySemanticsMask(Failure);
   auto SPIRVScope = getScope(Scope);
@@ -260,8 +263,8 @@ AtomicCompareExchange(multi_ptr<T, AddressSpace> MPtr,
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_floating_point<T>::value, T>
 AtomicCompareExchange(multi_ptr<T, AddressSpace> MPtr,
-                      intel::memory_scope Scope, intel::memory_order Success,
-                      intel::memory_order Failure, T Desired, T Expected) {
+                      ONEAPI::memory_scope Scope, ONEAPI::memory_order Success,
+                      ONEAPI::memory_order Failure, T Desired, T Expected) {
   using I = detail::make_unsinged_integer_t<T>;
   auto SPIRVSuccess = getMemorySemanticsMask(Success);
   auto SPIRVFailure = getMemorySemanticsMask(Failure);
@@ -278,8 +281,8 @@ AtomicCompareExchange(multi_ptr<T, AddressSpace> MPtr,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value, T>
-AtomicLoad(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-           intel::memory_order Order) {
+AtomicLoad(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+           ONEAPI::memory_order Order) {
   auto *Ptr = MPtr.get();
   auto SPIRVOrder = getMemorySemanticsMask(Order);
   auto SPIRVScope = getScope(Scope);
@@ -288,8 +291,8 @@ AtomicLoad(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_floating_point<T>::value, T>
-AtomicLoad(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-           intel::memory_order Order) {
+AtomicLoad(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+           ONEAPI::memory_order Order) {
   using I = detail::make_unsinged_integer_t<T>;
   auto *PtrInt =
       reinterpret_cast<typename multi_ptr<I, AddressSpace>::pointer_t>(
@@ -302,8 +305,8 @@ AtomicLoad(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value>
-AtomicStore(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-            intel::memory_order Order, T Value) {
+AtomicStore(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+            ONEAPI::memory_order Order, T Value) {
   auto *Ptr = MPtr.get();
   auto SPIRVOrder = getMemorySemanticsMask(Order);
   auto SPIRVScope = getScope(Scope);
@@ -312,8 +315,8 @@ AtomicStore(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_floating_point<T>::value>
-AtomicStore(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-            intel::memory_order Order, T Value) {
+AtomicStore(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+            ONEAPI::memory_order Order, T Value) {
   using I = detail::make_unsinged_integer_t<T>;
   auto *PtrInt =
       reinterpret_cast<typename multi_ptr<I, AddressSpace>::pointer_t>(
@@ -326,8 +329,8 @@ AtomicStore(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value, T>
-AtomicExchange(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-               intel::memory_order Order, T Value) {
+AtomicExchange(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+               ONEAPI::memory_order Order, T Value) {
   auto *Ptr = MPtr.get();
   auto SPIRVOrder = getMemorySemanticsMask(Order);
   auto SPIRVScope = getScope(Scope);
@@ -336,8 +339,8 @@ AtomicExchange(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_floating_point<T>::value, T>
-AtomicExchange(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-               intel::memory_order Order, T Value) {
+AtomicExchange(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+               ONEAPI::memory_order Order, T Value) {
   using I = detail::make_unsinged_integer_t<T>;
   auto *PtrInt =
       reinterpret_cast<typename multi_ptr<I, AddressSpace>::pointer_t>(
@@ -352,8 +355,8 @@ AtomicExchange(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value, T>
-AtomicIAdd(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-           intel::memory_order Order, T Value) {
+AtomicIAdd(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+           ONEAPI::memory_order Order, T Value) {
   auto *Ptr = MPtr.get();
   auto SPIRVOrder = getMemorySemanticsMask(Order);
   auto SPIRVScope = getScope(Scope);
@@ -362,8 +365,8 @@ AtomicIAdd(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value, T>
-AtomicISub(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-           intel::memory_order Order, T Value) {
+AtomicISub(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+           ONEAPI::memory_order Order, T Value) {
   auto *Ptr = MPtr.get();
   auto SPIRVOrder = getMemorySemanticsMask(Order);
   auto SPIRVScope = getScope(Scope);
@@ -372,8 +375,8 @@ AtomicISub(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value, T>
-AtomicAnd(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-          intel::memory_order Order, T Value) {
+AtomicAnd(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+          ONEAPI::memory_order Order, T Value) {
   auto *Ptr = MPtr.get();
   auto SPIRVOrder = getMemorySemanticsMask(Order);
   auto SPIRVScope = getScope(Scope);
@@ -382,8 +385,8 @@ AtomicAnd(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value, T>
-AtomicOr(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-         intel::memory_order Order, T Value) {
+AtomicOr(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+         ONEAPI::memory_order Order, T Value) {
   auto *Ptr = MPtr.get();
   auto SPIRVOrder = getMemorySemanticsMask(Order);
   auto SPIRVScope = getScope(Scope);
@@ -392,8 +395,8 @@ AtomicOr(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value, T>
-AtomicXor(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-          intel::memory_order Order, T Value) {
+AtomicXor(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+          ONEAPI::memory_order Order, T Value) {
   auto *Ptr = MPtr.get();
   auto SPIRVOrder = getMemorySemanticsMask(Order);
   auto SPIRVScope = getScope(Scope);
@@ -402,8 +405,8 @@ AtomicXor(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value, T>
-AtomicMin(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-          intel::memory_order Order, T Value) {
+AtomicMin(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+          ONEAPI::memory_order Order, T Value) {
   auto *Ptr = MPtr.get();
   auto SPIRVOrder = getMemorySemanticsMask(Order);
   auto SPIRVScope = getScope(Scope);
@@ -412,8 +415,8 @@ AtomicMin(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
 
 template <typename T, access::address_space AddressSpace>
 inline typename detail::enable_if_t<std::is_integral<T>::value, T>
-AtomicMax(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
-          intel::memory_order Order, T Value) {
+AtomicMax(multi_ptr<T, AddressSpace> MPtr, ONEAPI::memory_scope Scope,
+          ONEAPI::memory_order Order, T Value) {
   auto *Ptr = MPtr.get();
   auto SPIRVOrder = getMemorySemanticsMask(Order);
   auto SPIRVScope = getScope(Scope);
diff --git a/sycl/include/CL/sycl/detail/type_traits.hpp b/sycl/include/CL/sycl/detail/type_traits.hpp
index 3f52acc8a2de2..df480f58f99ff 100644
--- a/sycl/include/CL/sycl/detail/type_traits.hpp
+++ b/sycl/include/CL/sycl/detail/type_traits.hpp
@@ -18,9 +18,9 @@
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
 template <int Dimensions> class group;
-namespace intel {
+namespace ONEAPI {
 struct sub_group;
-} // namespace intel
+} // namespace ONEAPI
 namespace detail {
 namespace half_impl {
 class half;
@@ -313,7 +313,7 @@ struct is_group<group<Dimensions>> : std::true_type {};
 
 template <typename T> struct is_sub_group : std::false_type {};
 
-template <> struct is_sub_group<intel::sub_group> : std::true_type {};
+template <> struct is_sub_group<ONEAPI::sub_group> : std::true_type {};
 
 template <typename T>
 struct is_generic_group
diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp
index 2d22f155dabbe..215e0203878f6 100644
--- a/sycl/include/CL/sycl/handler.hpp
+++ b/sycl/include/CL/sycl/handler.hpp
@@ -176,7 +176,7 @@ checkValueRange(const T &V) {
 
 } // namespace detail
 
-namespace intel {
+namespace ONEAPI {
 namespace detail {
 template <typename T, class BinaryOperation, int Dims, bool IsUSM,
           access::mode AccMode, access::placeholder IsPlaceholder>
@@ -205,7 +205,7 @@ __SYCL_EXPORT size_t reduGetMaxWGSize(shared_ptr_class<queue_impl> Queue,
                                       size_t LocalMemBytesPerWorkItem);
 
 } // namespace detail
-} // namespace intel
+} // namespace ONEAPI
 
 /// Command group handler class.
 ///
@@ -1036,8 +1036,8 @@ class __SYCL_EXPORT handler {
                       Reduction::has_fast_atomics && !Reduction::is_usm>
   parallel_for(nd_range<Dims> Range, Reduction Redu,
                _KERNELFUNCPARAM(KernelFunc)) {
-    intel::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu,
-                                          Redu.getUserAccessor());
+    ONEAPI::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu,
+                                           Redu.getUserAccessor());
   }
 
   /// Implements parallel_for() accepting nd_range and 1 reduction variable
@@ -1050,8 +1050,8 @@ class __SYCL_EXPORT handler {
                       Reduction::has_fast_atomics && Reduction::is_usm>
   parallel_for(nd_range<Dims> Range, Reduction Redu,
                _KERNELFUNCPARAM(KernelFunc)) {
-    intel::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu,
-                                          Redu.getUSMPointer());
+    ONEAPI::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu,
+                                           Redu.getUSMPointer());
   }
 
   /// Implements parallel_for() accepting nd_range and 1 reduction variable
@@ -1072,8 +1072,8 @@ class __SYCL_EXPORT handler {
                _KERNELFUNCPARAM(KernelFunc)) {
     shared_ptr_class<detail::queue_impl> QueueCopy = MQueue;
     auto RWAcc = Redu.getReadWriteScalarAcc(*this);
-    intel::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu,
-                                          RWAcc);
+    ONEAPI::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu,
+                                           RWAcc);
     this->finalize();
 
     // Copy from RWAcc to user's reduction accessor.
@@ -1100,7 +1100,7 @@ class __SYCL_EXPORT handler {
   /// TODO: Need to handle more than 1 reduction in parallel_for().
   /// TODO: Support HOST. The kernels called by this parallel_for() may use
   /// some functionality that is not yet supported on HOST such as:
-  /// barrier(), and intel::reduce() that also may be used in more
+  /// barrier(), and ONEAPI::reduce() that also may be used in more
   /// optimized implementations waiting for their turn of code-review.
   template <typename KernelName = detail::auto_name, typename KernelType,
             int Dims, typename Reduction>
@@ -1133,14 +1133,14 @@ class __SYCL_EXPORT handler {
     // TODO: currently the maximal work group size is determined for the given
     // queue/device, while it may be safer to use queries to the kernel compiled
     // for the device.
-    size_t MaxWGSize = intel::detail::reduGetMaxWGSize(MQueue, OneElemSize);
+    size_t MaxWGSize = ONEAPI::detail::reduGetMaxWGSize(MQueue, OneElemSize);
     if (Range.get_local_range().size() > MaxWGSize)
       throw sycl::runtime_error("The implementation handling parallel_for with"
                                 " reduction requires smaller work group size.",
                                 PI_INVALID_WORK_GROUP_SIZE);
 
     // 1. Call the kernel that includes user's lambda function.
-    intel::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu);
+    ONEAPI::detail::reduCGFunc<KernelName>(*this, KernelFunc, Range, Redu);
     shared_ptr_class<detail::queue_impl> QueueCopy = MQueue;
     this->finalize();
 
@@ -1162,7 +1162,7 @@ class __SYCL_EXPORT handler {
       handler AuxHandler(QueueCopy, MIsHost);
       AuxHandler.saveCodeLoc(MCodeLoc);
 
-      NWorkItems = intel::detail::reduAuxCGFunc<KernelName, KernelType>(
+      NWorkItems = ONEAPI::detail::reduAuxCGFunc<KernelName, KernelType>(
           AuxHandler, NWorkItems, MaxWGSize, Redu);
       MLastEvent = AuxHandler.finalize();
     } // end while (NWorkItems > 1)
@@ -1898,7 +1898,7 @@ class __SYCL_EXPORT handler {
   // in handler from reduction_impl methods.
   template <typename T, class BinaryOperation, int Dims, bool IsUSM,
             access::mode AccMode, access::placeholder IsPlaceholder>
-  friend class intel::detail::reduction_impl;
+  friend class ONEAPI::detail::reduction_impl;
 
   friend void detail::associateWithHandler(handler &,
                                            detail::AccessorBaseHost *,
diff --git a/sycl/include/CL/sycl/nd_item.hpp b/sycl/include/CL/sycl/nd_item.hpp
index 62abba368dc7f..6fa2babf2c95f 100644
--- a/sycl/include/CL/sycl/nd_item.hpp
+++ b/sycl/include/CL/sycl/nd_item.hpp
@@ -9,12 +9,12 @@
 #pragma once
 
 #include <CL/__spirv/spirv_ops.hpp>
+#include <CL/sycl/ONEAPI/sub_group.hpp>
 #include <CL/sycl/access/access.hpp>
 #include <CL/sycl/detail/defines.hpp>
 #include <CL/sycl/detail/helpers.hpp>
 #include <CL/sycl/group.hpp>
 #include <CL/sycl/id.hpp>
-#include <CL/sycl/intel/sub_group.hpp>
 #include <CL/sycl/item.hpp>
 #include <CL/sycl/nd_range.hpp>
 #include <CL/sycl/range.hpp>
@@ -67,7 +67,7 @@ template <int dimensions = 1> class nd_item {
 
   group<dimensions> get_group() const { return Group; }
 
-  intel::sub_group get_sub_group() const { return intel::sub_group(); }
+  ONEAPI::sub_group get_sub_group() const { return ONEAPI::sub_group(); }
 
   size_t ALWAYS_INLINE get_group(int dimension) const {
     size_t Size = Group[dimension];
diff --git a/sycl/include/CL/sycl/program.hpp b/sycl/include/CL/sycl/program.hpp
index c6dbebf3f45bf..78ce15dc7fa64 100644
--- a/sycl/include/CL/sycl/program.hpp
+++ b/sycl/include/CL/sycl/program.hpp
@@ -8,11 +8,11 @@
 
 #pragma once
 
+#include <CL/sycl/ONEAPI/experimental/spec_constant.hpp>
 #include <CL/sycl/context.hpp>
 #include <CL/sycl/detail/export.hpp>
 #include <CL/sycl/detail/kernel_desc.hpp>
 #include <CL/sycl/detail/os_util.hpp>
-#include <CL/sycl/experimental/spec_constant.hpp>
 #include <CL/sycl/info/info_desc.hpp>
 #include <CL/sycl/kernel.hpp>
 #include <CL/sycl/stl.hpp>
@@ -307,7 +307,7 @@ class __SYCL_EXPORT program {
   /// \return a specialization constant instance corresponding to given type ID
   ///         passed as a template parameter
   template <typename ID, typename T>
-  experimental::spec_constant<T, ID> set_spec_constant(T Cst) {
+  ONEAPI::experimental::spec_constant<T, ID> set_spec_constant(T Cst) {
     constexpr const char *Name = detail::SpecConstantInfo<ID>::getName();
     static_assert(std::is_integral<T>::value ||
                       std::is_floating_point<T>::value,
@@ -315,10 +315,10 @@ class __SYCL_EXPORT program {
 #ifdef __SYCL_DEVICE_ONLY__
     (void)Cst;
     (void)Name;
-    return experimental::spec_constant<T, ID>();
+    return ONEAPI::experimental::spec_constant<T, ID>();
 #else
     set_spec_constant_impl(Name, &Cst, sizeof(T));
-    return experimental::spec_constant<T, ID>(Cst);
+    return ONEAPI::experimental::spec_constant<T, ID>(Cst);
 #endif // __SYCL_DEVICE_ONLY__
   }
 
diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp
index a73064e455067..8453584aa067c 100644
--- a/sycl/source/detail/program_impl.cpp
+++ b/sycl/source/detail/program_impl.cpp
@@ -484,8 +484,8 @@ vector_class<device> program_impl::get_info<info::program::devices>() const {
 void program_impl::set_spec_constant_impl(const char *Name, const void *ValAddr,
                                           size_t ValSize) {
   if (MState != program_state::none)
-    throw cl::sycl::experimental::spec_const_error("Invalid program state",
-                                                   PI_INVALID_PROGRAM);
+    throw cl::sycl::ONEAPI::experimental::spec_const_error(
+        "Invalid program state", PI_INVALID_PROGRAM);
   // Reuse cached programs lock as opposed to introducing a new lock.
   auto LockGuard = MContext->getKernelProgramCache().acquireCachedPrograms();
   spec_constant_impl &SC = SpecConstRegistry[Name];
diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp
index fc45b310c94c6..a4f2f875d424e 100644
--- a/sycl/source/detail/program_manager/program_manager.cpp
+++ b/sycl/source/detail/program_manager/program_manager.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <CL/sycl/ONEAPI/experimental/spec_constant.hpp>
 #include <CL/sycl/backend_types.hpp>
 #include <CL/sycl/context.hpp>
 #include <CL/sycl/detail/common.hpp>
@@ -14,7 +15,6 @@
 #include <CL/sycl/detail/util.hpp>
 #include <CL/sycl/device.hpp>
 #include <CL/sycl/exception.hpp>
-#include <CL/sycl/experimental/spec_constant.hpp>
 #include <CL/sycl/stl.hpp>
 #include <detail/config.hpp>
 #include <detail/context_impl.hpp>
@@ -1020,7 +1020,7 @@ void ProgramManager::flushSpecConstants(const program_impl &Prg,
       std::lock_guard<std::mutex> Lock(MNativeProgramsMutex);
       auto It = NativePrograms.find(NativePrg);
       if (It == NativePrograms.end())
-        throw sycl::experimental::spec_const_error(
+        throw sycl::ONEAPI::experimental::spec_const_error(
             "spec constant is set in a program w/o a binary image",
             PI_INVALID_OPERATION);
       Img = It->second;
diff --git a/sycl/source/detail/reduction.cpp b/sycl/source/detail/reduction.cpp
index 71e16724ce3b2..7a0bb02ca103b 100644
--- a/sycl/source/detail/reduction.cpp
+++ b/sycl/source/detail/reduction.cpp
@@ -6,12 +6,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <CL/sycl/intel/reduction.hpp>
+#include <CL/sycl/ONEAPI/reduction.hpp>
 #include <detail/queue_impl.hpp>
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
-namespace intel {
+namespace ONEAPI {
 namespace detail {
 
 // TODO: The algorithm of choosing the work-group size is definitely
@@ -62,6 +62,6 @@ reduGetMaxWGSize(shared_ptr_class<sycl::detail::queue_impl> Queue,
 }
 
 } // namespace detail
-} // namespace intel
+} // namespace ONEAPI
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
diff --git a/sycl/source/function_pointer.cpp b/sycl/source/function_pointer.cpp
index c273ae817c8bf..da4712f4abe25 100644
--- a/sycl/source/function_pointer.cpp
+++ b/sycl/source/function_pointer.cpp
@@ -6,16 +6,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <CL/sycl/intel/function_pointer.hpp>
+#include <CL/sycl/ONEAPI/function_pointer.hpp>
 #include <detail/device_impl.hpp>
 #include <detail/program_impl.hpp>
 
 __SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
 namespace detail {
-intel::device_func_ptr_holder_t
+ONEAPI::device_func_ptr_holder_t
 getDeviceFunctionPointerImpl(device &D, program &P, const char *FuncName) {
-  intel::device_func_ptr_holder_t FPtr = 0;
+  ONEAPI::device_func_ptr_holder_t FPtr = 0;
   // FIXME: return value must be checked here, but since we cannot yet check
   // if corresponding extension is supported, let's silently ignore it here.
   const detail::plugin &Plugin = detail::getSyclObjImpl(P)->getPlugin();
diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump
index e63d7647c558a..89c831d9e4962 100644
--- a/sycl/test/abi/sycl_symbols_linux.dump
+++ b/sycl/test/abi/sycl_symbols_linux.dump
@@ -3625,8 +3625,6 @@ _ZN2cl4sycl5eventC1Ev
 _ZN2cl4sycl5eventC2EP9_cl_eventRKNS0_7contextE
 _ZN2cl4sycl5eventC2ESt10shared_ptrINS0_6detail10event_implEE
 _ZN2cl4sycl5eventC2Ev
-_ZN2cl4sycl5intel6detail16reduGetMaxWGSizeESt10shared_ptrINS0_6detail10queue_implEEm
-_ZN2cl4sycl5intel6detail17reduComputeWGSizeEmmRm
 _ZN2cl4sycl5queue10mem_adviseEPKvm14_pi_mem_advice
 _ZN2cl4sycl5queue10wait_proxyERKNS0_6detail13code_locationE
 _ZN2cl4sycl5queue11submit_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationE
@@ -3647,6 +3645,8 @@ _ZN2cl4sycl5queueC2ERKNS0_7contextERKNS0_15device_selectorERKNS0_13property_list
 _ZN2cl4sycl5queueC2ERKNS0_7contextERKNS0_15device_selectorERKSt8functionIFvNS0_14exception_listEEERKNS0_13property_listE
 _ZN2cl4sycl5queueC2ERKNS0_7contextERKNS0_6deviceERKNS0_13property_listE
 _ZN2cl4sycl5queueC2ERKNS0_7contextERKNS0_6deviceERKSt8functionIFvNS0_14exception_listEEERKNS0_13property_listE
+_ZN2cl4sycl6ONEAPI6detail16reduGetMaxWGSizeESt10shared_ptrINS0_6detail10queue_implEEm
+_ZN2cl4sycl6ONEAPI6detail17reduComputeWGSizeEmmRm
 _ZN2cl4sycl6detail10image_implILi1EE10getDevicesESt10shared_ptrINS1_12context_implEE
 _ZN2cl4sycl6detail10image_implILi1EE10setPitchesEv
 _ZN2cl4sycl6detail10image_implILi1EE11allocateMemESt10shared_ptrINS1_12context_implEEbPvRP9_pi_event
diff --git a/sycl/test/aot/spec_const_aot.cpp b/sycl/test/aot/spec_const_aot.cpp
index 99b451fe6d7ca..706b618438b40 100644
--- a/sycl/test/aot/spec_const_aot.cpp
+++ b/sycl/test/aot/spec_const_aot.cpp
@@ -32,10 +32,11 @@ int main(int argc, char **argv) {
     }
   });
 
-  std::cout << "Running on " << q.get_device().get_info<info::device::name>() << "\n";
+  std::cout << "Running on " << q.get_device().get_info<info::device::name>()
+            << "\n";
   cl::sycl::program prog(q.get_context());
 
-  cl::sycl::experimental::spec_constant<int32_t, MyInt32Const> i32 =
+  cl::sycl::ONEAPI::experimental::spec_constant<int32_t, MyInt32Const> i32 =
       prog.set_spec_constant<MyInt32Const>(10);
 
   prog.build_with_kernel_type<Kernel>();
@@ -46,11 +47,8 @@ int main(int argc, char **argv) {
 
     q.submit([&](cl::sycl::handler &cgh) {
       auto acc = buf.get_access<cl::sycl::access::mode::write>(cgh);
-      cgh.single_task<Kernel>(
-          prog.get_kernel<Kernel>(),
-          [=]() {
-            acc[0] = i32.get();
-          });
+      cgh.single_task<Kernel>(prog.get_kernel<Kernel>(),
+                              [=]() { acc[0] = i32.get(); });
     });
   }
   bool passed = true;
diff --git a/sycl/test/atomic_ref/accessor.cpp b/sycl/test/atomic_ref/accessor.cpp
index 86067e2c74906..6410749cf4877 100644
--- a/sycl/test/atomic_ref/accessor.cpp
+++ b/sycl/test/atomic_ref/accessor.cpp
@@ -10,7 +10,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 // Equivalent to add_test from add.cpp
 // Uses atomic_accessor instead of atomic_ref
@@ -26,12 +26,12 @@ template <typename T> void accessor_test(queue q, size_t N) {
       static_assert(
           std::is_same<decltype(atomic_accessor(sum_buf, cgh, relaxed_order,
                                                 device_scope)),
-                       atomic_accessor<T, 1, intel::memory_order::relaxed,
-                                       intel::memory_scope::device>>::value,
+                       atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
+                                       ONEAPI::memory_scope::device>>::value,
           "atomic_accessor type incorrectly deduced");
 #endif
-      auto sum = atomic_accessor<T, 1, intel::memory_order::relaxed,
-                                 intel::memory_scope::device>(sum_buf, cgh);
+      auto sum = atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
+                                 ONEAPI::memory_scope::device>(sum_buf, cgh);
       auto out =
           output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
@@ -39,8 +39,8 @@ template <typename T> void accessor_test(queue q, size_t N) {
         static_assert(
             std::is_same<
                 decltype(sum[0]),
-                atomic_ref<T, intel::memory_order::relaxed,
-                           intel::memory_scope::device,
+                atomic_ref<T, ONEAPI::memory_order::relaxed,
+                           ONEAPI::memory_scope::device,
                            access::address_space::global_space>>::value,
             "atomic_accessor returns incorrect atomic_ref");
         out[gid] = sum[0].fetch_add(T(1));
@@ -70,8 +70,8 @@ void local_accessor_test(queue q, size_t N, size_t L = 8) {
     buffer<T> output_buf(output.data(), output.size());
     q.submit([&](handler &cgh) {
       auto sum =
-          atomic_accessor<T, 1, intel::memory_order::relaxed,
-                          intel::memory_scope::device, access::target::local>(
+          atomic_accessor<T, 1, ONEAPI::memory_order::relaxed,
+                          ONEAPI::memory_scope::device, access::target::local>(
               1, cgh);
       auto out = output_buf.template get_access<access::mode::read_write>(cgh);
       cgh.parallel_for(nd_range<1>(N, L), [=](nd_item<1> it) {
@@ -80,8 +80,8 @@ void local_accessor_test(queue q, size_t N, size_t L = 8) {
         it.barrier();
         static_assert(
             std::is_same<decltype(sum[0]),
-                         atomic_ref<T, intel::memory_order::relaxed,
-                                    intel::memory_scope::device,
+                         atomic_ref<T, ONEAPI::memory_order::relaxed,
+                                    ONEAPI::memory_scope::device,
                                     access::address_space::local_space>>::value,
             "local atomic_accessor returns incorrect atomic_ref");
         T result = sum[0].fetch_add(T(1));
diff --git a/sycl/test/atomic_ref/add.cpp b/sycl/test/atomic_ref/add.cpp
index cfe943d176299..565048f1be250 100644
--- a/sycl/test/atomic_ref/add.cpp
+++ b/sycl/test/atomic_ref/add.cpp
@@ -10,7 +10,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <typename T, typename Difference = T>
 void add_fetch_test(queue q, size_t N) {
@@ -23,10 +23,13 @@ void add_fetch_test(queue q, size_t N) {
 
     q.submit([&](handler &cgh) {
       auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
-      auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(sum[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(sum[0]);
         out[gid] = atm.fetch_add(Difference(1));
       });
     });
@@ -56,10 +59,13 @@ void add_plus_equal_test(queue q, size_t N) {
 
     q.submit([&](handler &cgh) {
       auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
-      auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(sum[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(sum[0]);
         out[gid] = atm += Difference(1);
       });
     });
@@ -89,10 +95,13 @@ void add_pre_inc_test(queue q, size_t N) {
 
     q.submit([&](handler &cgh) {
       auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
-      auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(sum[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(sum[0]);
         out[gid] = ++atm;
       });
     });
@@ -122,10 +131,13 @@ void add_post_inc_test(queue q, size_t N) {
 
     q.submit([&](handler &cgh) {
       auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
-      auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(sum[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(sum[0]);
         out[gid] = atm++;
       });
     });
@@ -153,13 +165,11 @@ void add_test(queue q, size_t N) {
 }
 
 // Floating-point types do not support pre- or post-increment
-template <>
-void add_test<float>(queue q, size_t N) {
+template <> void add_test<float>(queue q, size_t N) {
   add_fetch_test<float>(q, N);
   add_plus_equal_test<float>(q, N);
 }
-template <>
-void add_test<double>(queue q, size_t N) {
+template <> void add_test<double>(queue q, size_t N) {
   add_fetch_test<double>(q, N);
   add_plus_equal_test<double>(q, N);
 }
diff --git a/sycl/test/atomic_ref/compare_exchange.cpp b/sycl/test/atomic_ref/compare_exchange.cpp
index 11c2caa6ef3c4..db8b12a846498 100644
--- a/sycl/test/atomic_ref/compare_exchange.cpp
+++ b/sycl/test/atomic_ref/compare_exchange.cpp
@@ -9,7 +9,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <typename T>
 class compare_exchange_kernel;
@@ -30,7 +30,9 @@ void compare_exchange_test(queue q, size_t N) {
       cgh.parallel_for<compare_exchange_kernel<T>>(range<1>(N), [=](item<1>
                                                                         it) {
         size_t gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(exc[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(exc[0]);
         T result = T(N); // Avoid copying pointer
         bool success = atm.compare_exchange_strong(result, (T)gid);
         if (success) {
diff --git a/sycl/test/atomic_ref/exchange.cpp b/sycl/test/atomic_ref/exchange.cpp
index b4445928ea075..61e1114b0c1b8 100644
--- a/sycl/test/atomic_ref/exchange.cpp
+++ b/sycl/test/atomic_ref/exchange.cpp
@@ -9,7 +9,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <typename T>
 class exchange_kernel;
@@ -29,7 +29,9 @@ void exchange_test(queue q, size_t N) {
       auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for<exchange_kernel<T>>(range<1>(N), [=](item<1> it) {
         size_t gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(exc[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(exc[0]);
         out[gid] = atm.exchange(T(gid));
       });
     });
diff --git a/sycl/test/atomic_ref/load.cpp b/sycl/test/atomic_ref/load.cpp
index 30ae13e16e65e..4e044dc7541f6 100644
--- a/sycl/test/atomic_ref/load.cpp
+++ b/sycl/test/atomic_ref/load.cpp
@@ -9,7 +9,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <typename T>
 class load_kernel;
@@ -29,7 +29,9 @@ void load_test(queue q, size_t N) {
       auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for<load_kernel<T>>(range<1>(N), [=](item<1> it) {
         size_t gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(ld[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(ld[0]);
         out[gid] = atm.load();
       });
     });
diff --git a/sycl/test/atomic_ref/max.cpp b/sycl/test/atomic_ref/max.cpp
index 0c95653b8219b..2bc7067e01650 100644
--- a/sycl/test/atomic_ref/max.cpp
+++ b/sycl/test/atomic_ref/max.cpp
@@ -10,7 +10,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <typename T>
 void max_test(queue q, size_t N) {
@@ -27,7 +27,9 @@ void max_test(queue q, size_t N) {
       auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(val[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(val[0]);
 
         // +1 accounts for lowest() returning 0 for unsigned types
         out[gid] = atm.fetch_max(T(gid) + 1);
diff --git a/sycl/test/atomic_ref/min.cpp b/sycl/test/atomic_ref/min.cpp
index 6a0e32ca14bb5..05e41bb36597e 100644
--- a/sycl/test/atomic_ref/min.cpp
+++ b/sycl/test/atomic_ref/min.cpp
@@ -10,7 +10,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <typename T>
 void min_test(queue q, size_t N) {
@@ -27,7 +27,9 @@ void min_test(queue q, size_t N) {
       auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(val[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(val[0]);
         out[gid] = atm.fetch_min(T(gid));
       });
     });
diff --git a/sycl/test/atomic_ref/store.cpp b/sycl/test/atomic_ref/store.cpp
index db076ee994a3d..4c29e8947ea7f 100644
--- a/sycl/test/atomic_ref/store.cpp
+++ b/sycl/test/atomic_ref/store.cpp
@@ -9,7 +9,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <typename T>
 class store_kernel;
@@ -24,7 +24,9 @@ void store_test(queue q, size_t N) {
       auto st = store_buf.template get_access<access::mode::read_write>(cgh);
       cgh.parallel_for<store_kernel<T>>(range<1>(N), [=](item<1> it) {
         size_t gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(st[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(st[0]);
         atm.store(T(gid));
       });
     });
diff --git a/sycl/test/atomic_ref/sub.cpp b/sycl/test/atomic_ref/sub.cpp
index 10ed75d21da25..a51ad5a2e9e28 100644
--- a/sycl/test/atomic_ref/sub.cpp
+++ b/sycl/test/atomic_ref/sub.cpp
@@ -10,7 +10,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <typename T, typename Difference = T>
 void sub_fetch_test(queue q, size_t N) {
@@ -23,10 +23,13 @@ void sub_fetch_test(queue q, size_t N) {
 
     q.submit([&](handler &cgh) {
       auto val = val_buf.template get_access<access::mode::read_write>(cgh);
-      auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(val[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(val[0]);
         out[gid] = atm.fetch_sub(Difference(1));
       });
     });
@@ -56,10 +59,13 @@ void sub_plus_equal_test(queue q, size_t N) {
 
     q.submit([&](handler &cgh) {
       auto val = val_buf.template get_access<access::mode::read_write>(cgh);
-      auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(val[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(val[0]);
         out[gid] = atm -= Difference(1);
       });
     });
@@ -89,10 +95,13 @@ void sub_pre_dec_test(queue q, size_t N) {
 
     q.submit([&](handler &cgh) {
       auto val = val_buf.template get_access<access::mode::read_write>(cgh);
-      auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(val[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(val[0]);
         out[gid] = --atm;
       });
     });
@@ -122,10 +131,13 @@ void sub_post_dec_test(queue q, size_t N) {
 
     q.submit([&](handler &cgh) {
       auto val = val_buf.template get_access<access::mode::read_write>(cgh);
-      auto out = output_buf.template get_access<access::mode::discard_write>(cgh);
+      auto out =
+          output_buf.template get_access<access::mode::discard_write>(cgh);
       cgh.parallel_for(range<1>(N), [=](item<1> it) {
         int gid = it.get_id(0);
-        auto atm = atomic_ref<T, intel::memory_order::relaxed, intel::memory_scope::device, access::address_space::global_space>(val[0]);
+        auto atm = atomic_ref<T, ONEAPI::memory_order::relaxed,
+                              ONEAPI::memory_scope::device,
+                              access::address_space::global_space>(val[0]);
         out[gid] = atm--;
       });
     });
@@ -153,13 +165,11 @@ void sub_test(queue q, size_t N) {
 }
 
 // Floating-point types do not support pre- or post-decrement
-template <>
-void sub_test<float>(queue q, size_t N) {
+template <> void sub_test<float>(queue q, size_t N) {
   sub_fetch_test<float>(q, N);
   sub_plus_equal_test<float>(q, N);
 }
-template <>
-void sub_test<double>(queue q, size_t N) {
+template <> void sub_test<double>(queue q, size_t N) {
   sub_fetch_test<double>(q, N);
   sub_plus_equal_test<double>(q, N);
 }
diff --git a/sycl/test/built-ins/printf.cpp b/sycl/test/built-ins/printf.cpp
index 6536498587729..1693ed97395ab 100644
--- a/sycl/test/built-ins/printf.cpp
+++ b/sycl/test/built-ins/printf.cpp
@@ -41,7 +41,7 @@ int main() {
     Queue.submit([&](handler &CGH) {
       CGH.single_task<class integral>([=]() {
         // String
-        intel::experimental::printf(format_hello_world);
+        ONEAPI::experimental::printf(format_hello_world);
         // Due to a bug in Intel CPU Runtime for OpenCL on Windows, information
         // printed using such format strings (without %-specifiers) might
         // appear in different order if output is redirected to a file or
@@ -50,8 +50,8 @@ int main() {
         // CHECK: {{(Hello, World!)?}}
 
         // Integral types
-        intel::experimental::printf(format_int, (int32_t)123);
-        intel::experimental::printf(format_int, (int32_t)-123);
+        ONEAPI::experimental::printf(format_int, (int32_t)123);
+        ONEAPI::experimental::printf(format_int, (int32_t)-123);
         // CHECK: 123
         // CHECK-NEXT: -123
 
@@ -60,8 +60,8 @@ int main() {
           // You can declare format string in non-global scope, but in this case
           // static keyword is required
           static const CONSTANT char format[] = "%f\n";
-          intel::experimental::printf(format, 33.4f);
-          intel::experimental::printf(format, -33.4f);
+          ONEAPI::experimental::printf(format, 33.4f);
+          ONEAPI::experimental::printf(format, -33.4f);
         }
         // CHECK-NEXT: 33.4
         // CHECK-NEXT: -33.4
@@ -73,23 +73,23 @@ int main() {
         using ocl_int4 = cl::sycl::vec<int, 4>::vector_t;
         {
           static const CONSTANT char format[] = "%v4d\n";
-          intel::experimental::printf(format, (ocl_int4)v4);
+          ONEAPI::experimental::printf(format, (ocl_int4)v4);
         }
 
         // However, you are still able to print them by-element:
         {
-          intel::experimental::printf(format_vec, (int32_t)v4.w(),
-                                      (int32_t)v4.z(), (int32_t)v4.y(),
-                                      (int32_t)v4.x());
+          ONEAPI::experimental::printf(format_vec, (int32_t)v4.w(),
+                                       (int32_t)v4.z(), (int32_t)v4.y(),
+                                       (int32_t)v4.x());
         }
 #else
         // On host side you always have to print them by-element:
-        intel::experimental::printf(format_vec, (int32_t)v4.x(),
-                                    (int32_t)v4.y(), (int32_t)v4.z(),
-                                    (int32_t)v4.w());
-        intel::experimental::printf(format_vec, (int32_t)v4.w(),
-                                    (int32_t)v4.z(), (int32_t)v4.y(),
-                                    (int32_t)v4.x());
+        ONEAPI::experimental::printf(format_vec, (int32_t)v4.x(),
+                                     (int32_t)v4.y(), (int32_t)v4.z(),
+                                     (int32_t)v4.w());
+        ONEAPI::experimental::printf(format_vec, (int32_t)v4.w(),
+                                     (int32_t)v4.z(), (int32_t)v4.y(),
+                                     (int32_t)v4.x());
 #endif // __SYCL_DEVICE_ONLY__
        // CHECK-NEXT: 5,6,7,8
        // CHECK-NEXT: 8,7,6,5
@@ -100,7 +100,7 @@ int main() {
         // According to OpenCL spec, argument should be a void pointer
         {
           static const CONSTANT char format[] = "%p\n";
-          intel::experimental::printf(format, (void *)Ptr);
+          ONEAPI::experimental::printf(format, (void *)Ptr);
         }
         // CHECK-NEXT: {{(0x)?[0-9a-fA-F]+$}}
       });
@@ -111,7 +111,7 @@ int main() {
     Queue.submit([&](handler &CGH) {
       CGH.parallel_for<class stream_string>(range<1>(10), [=](id<1> i) {
         // cast to uint64_t to be sure that we pass 64-bit unsigned value
-        intel::experimental::printf(format_hello_world_2, (uint64_t)i.get(0));
+        ONEAPI::experimental::printf(format_hello_world_2, (uint64_t)i.get(0));
       });
     });
     Queue.wait();
@@ -127,8 +127,8 @@ int main() {
     // CHECK-NEXT: {{[0-9]+}}: Hello, World!
   }
 
-// FIXME: strictly check output order once the bug mentioned above is fixed
-// CHECK: {{(Hello, World!)?}}
+  // FIXME: strictly check output order once the bug mentioned above is fixed
+  // CHECK: {{(Hello, World!)?}}
 
   return 0;
 }
diff --git a/sycl/test/function-pointers/fp-as-kernel-arg.cpp b/sycl/test/function-pointers/fp-as-kernel-arg.cpp
index c68a891dcf94c..0c34ccc5cc607 100644
--- a/sycl/test/function-pointers/fp-as-kernel-arg.cpp
+++ b/sycl/test/function-pointers/fp-as-kernel-arg.cpp
@@ -31,7 +31,7 @@ int main() {
   P.build_with_kernel_type<class K>();
   cl::sycl::kernel KE = P.get_kernel<class K>();
 
-  auto FptrStorage = cl::sycl::intel::get_device_func_ptr(&add, "add", P, D);
+  auto FptrStorage = cl::sycl::ONEAPI::get_device_func_ptr(&add, "add", P, D);
   if (!D.is_host()) {
     // FIXME: update this check with query to supported extension
     // For now, we don't have runtimes that report required OpenCL extension and
@@ -54,10 +54,10 @@ int main() {
     auto AccB = BufB.template get_access<cl::sycl::access::mode::read>(CGH);
     CGH.parallel_for<class K>(
         KE, cl::sycl::range<1>(Size), [=](cl::sycl::id<1> Index) {
-      auto Fptr =
-          cl::sycl::intel::to_device_func_ptr<decltype(add)>(FptrStorage);
-      AccA[Index] = Fptr(AccA[Index], AccB[Index]);
-    });
+          auto Fptr =
+              cl::sycl::ONEAPI::to_device_func_ptr<decltype(add)>(FptrStorage);
+          AccA[Index] = Fptr(AccA[Index], AccB[Index]);
+        });
   });
 
   auto HostAcc = BufA.get_access<cl::sycl::access::mode::read>();
diff --git a/sycl/test/function-pointers/pass-fp-through-buffer.cpp b/sycl/test/function-pointers/pass-fp-through-buffer.cpp
index 67ecea5509a5f..26685ec11a794 100644
--- a/sycl/test/function-pointers/pass-fp-through-buffer.cpp
+++ b/sycl/test/function-pointers/pass-fp-through-buffer.cpp
@@ -34,12 +34,12 @@ int main() {
   P.build_with_kernel_type<class K>();
   cl::sycl::kernel KE = P.get_kernel<class K>();
 
-  cl::sycl::buffer<cl::sycl::intel::device_func_ptr_holder_t> DispatchTable(2);
+  cl::sycl::buffer<cl::sycl::ONEAPI::device_func_ptr_holder_t> DispatchTable(2);
   {
     auto DTAcc =
         DispatchTable.get_access<cl::sycl::access::mode::discard_write>();
-    DTAcc[0] = cl::sycl::intel::get_device_func_ptr(&add, "add", P, D);
-    DTAcc[1] = cl::sycl::intel::get_device_func_ptr(&sub, "sub", P, D);
+    DTAcc[0] = cl::sycl::ONEAPI::get_device_func_ptr(&add, "add", P, D);
+    DTAcc[1] = cl::sycl::ONEAPI::get_device_func_ptr(&sub, "sub", P, D);
     if (!D.is_host()) {
       // FIXME: update this check with query to supported extension
       // For now, we don't have runtimes that report required OpenCL extension
@@ -69,11 +69,11 @@ int main() {
           DispatchTable.template get_access<cl::sycl::access::mode::read>(CGH);
       CGH.parallel_for<class K>(
           KE, cl::sycl::range<1>(Size), [=](cl::sycl::id<1> Index) {
-        auto FP =
-            cl::sycl::intel::to_device_func_ptr<int(int, int)>(AccDT[Mode]);
+            auto FP = cl::sycl::ONEAPI::to_device_func_ptr<int(int, int)>(
+                AccDT[Mode]);
 
-        AccA[Index] = FP(AccA[Index], AccB[Index]);
-      });
+            AccA[Index] = FP(AccA[Index], AccB[Index]);
+          });
     });
 
     auto HostAcc = bufA.get_access<cl::sycl::access::mode::read>();
diff --git a/sycl/test/group-algorithm/all_of.cpp b/sycl/test/group-algorithm/all_of.cpp
index 2a175d000bb6f..cc19772af0c12 100644
--- a/sycl/test/group-algorithm/all_of.cpp
+++ b/sycl/test/group-algorithm/all_of.cpp
@@ -12,7 +12,7 @@
 #include <cassert>
 #include <numeric>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <class Predicate>
 class all_of_kernel;
diff --git a/sycl/test/group-algorithm/any_of.cpp b/sycl/test/group-algorithm/any_of.cpp
index 6ce61afaffdec..45ddc41f6355f 100644
--- a/sycl/test/group-algorithm/any_of.cpp
+++ b/sycl/test/group-algorithm/any_of.cpp
@@ -12,7 +12,7 @@
 #include <cassert>
 #include <numeric>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <class Predicate>
 class any_of_kernel;
diff --git a/sycl/test/group-algorithm/broadcast.cpp b/sycl/test/group-algorithm/broadcast.cpp
index 04028fade9669..d0320701010f1 100644
--- a/sycl/test/group-algorithm/broadcast.cpp
+++ b/sycl/test/group-algorithm/broadcast.cpp
@@ -13,7 +13,7 @@
 #include <complex>
 #include <numeric>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <typename InputContainer, typename OutputContainer>
 class broadcast_kernel;
diff --git a/sycl/test/group-algorithm/exclusive_scan.cpp b/sycl/test/group-algorithm/exclusive_scan.cpp
index 47dc1f6122720..5aaf0373eea57 100644
--- a/sycl/test/group-algorithm/exclusive_scan.cpp
+++ b/sycl/test/group-algorithm/exclusive_scan.cpp
@@ -14,7 +14,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <class BinaryOperation, int TestNumber>
 class exclusive_scan_kernel;
diff --git a/sycl/test/group-algorithm/inclusive_scan.cpp b/sycl/test/group-algorithm/inclusive_scan.cpp
index 54311a162ed9e..75cf06c7c371e 100644
--- a/sycl/test/group-algorithm/inclusive_scan.cpp
+++ b/sycl/test/group-algorithm/inclusive_scan.cpp
@@ -14,7 +14,7 @@
 #include <numeric>
 #include <vector>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <class BinaryOperation, int TestNumber>
 class inclusive_scan_kernel;
diff --git a/sycl/test/group-algorithm/leader.cpp b/sycl/test/group-algorithm/leader.cpp
index ff02cf7e77f9e..afe5e28648a6e 100644
--- a/sycl/test/group-algorithm/leader.cpp
+++ b/sycl/test/group-algorithm/leader.cpp
@@ -10,7 +10,7 @@
 #include <CL/sycl.hpp>
 #include <cassert>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 class leader_kernel;
 
diff --git a/sycl/test/group-algorithm/none_of.cpp b/sycl/test/group-algorithm/none_of.cpp
index c8b56158d20b7..7a08b6463a682 100644
--- a/sycl/test/group-algorithm/none_of.cpp
+++ b/sycl/test/group-algorithm/none_of.cpp
@@ -12,7 +12,7 @@
 #include <cassert>
 #include <numeric>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <class Predicate>
 class none_of_kernel;
diff --git a/sycl/test/group-algorithm/reduce.cpp b/sycl/test/group-algorithm/reduce.cpp
index 64ed0bd82fcc2..ef65439404884 100644
--- a/sycl/test/group-algorithm/reduce.cpp
+++ b/sycl/test/group-algorithm/reduce.cpp
@@ -13,7 +13,7 @@
 #include <limits>
 #include <numeric>
 using namespace sycl;
-using namespace sycl::intel;
+using namespace sycl::ONEAPI;
 
 template <class BinaryOperation>
 class reduce_kernel;
diff --git a/sycl/test/linear_id/linear-sub_group.cpp b/sycl/test/linear_id/linear-sub_group.cpp
index 2b3f75ae2182e..67d039700fb8b 100644
--- a/sycl/test/linear_id/linear-sub_group.cpp
+++ b/sycl/test/linear_id/linear-sub_group.cpp
@@ -38,7 +38,7 @@ int main(int argc, char *argv[]) {
           nd_range<2>(range<2>(outer, inner), range<2>(outer, inner)),
           [=](nd_item<2> it) {
             id<2> idx = it.get_global_id();
-            intel::sub_group sg = it.get_sub_group();
+            ONEAPI::sub_group sg = it.get_sub_group();
             output[idx] = sg.get_group_id()[0] * sg.get_local_range()[0] +
                           sg.get_local_id()[0];
           });
diff --git a/sycl/test/reduction/reduction_ctor.cpp b/sycl/test/reduction/reduction_ctor.cpp
index cbf1b5907cfc2..4828b1dc94535 100644
--- a/sycl/test/reduction/reduction_ctor.cpp
+++ b/sycl/test/reduction/reduction_ctor.cpp
@@ -49,7 +49,7 @@ void testKnown(T Identity, BinaryOperation BOp, T A, T B) {
     // This accessor is not really used in this test.
     accessor<T, Dim, access::mode::discard_write, access::target::global_buffer>
         ReduAcc(ReduBuf, CGH);
-    auto Redu = intel::reduction(ReduAcc, BOp);
+    auto Redu = ONEAPI::reduction(ReduAcc, BOp);
     assert(Redu.getIdentity() == Identity &&
            "Failed getIdentity() check().");
     test_reducer(Redu, A, B);
@@ -69,7 +69,7 @@ void testUnknown(T Identity, BinaryOperation BOp, T A, T B) {
     // This accessor is not really used in this test.
     accessor<T, Dim, access::mode::discard_write, access::target::global_buffer>
         ReduAcc(ReduBuf, CGH);
-    auto Redu = intel::reduction(ReduAcc, Identity, BOp);
+    auto Redu = ONEAPI::reduction(ReduAcc, Identity, BOp);
     assert(Redu.getIdentity() == Identity &&
            "Failed getIdentity() check().");
     test_reducer(Redu, Identity, BOp, A, B);
@@ -88,18 +88,20 @@ void testBoth(T Identity, BinaryOperation BOp, T A, T B) {
 }
 
 int main() {
-  testBoth<int>(0, intel::plus<int>(), 1, 7);
+  testBoth<int>(0, ONEAPI::plus<int>(), 1, 7);
   testBoth<int>(1, std::multiplies<int>(), 1, 7);
-  testBoth<int>(0, intel::bit_or<int>(), 1, 8);
-  testBoth<int>(0, intel::bit_xor<int>(), 7, 3);
-  testBoth<int>(~0, intel::bit_and<int>(), 7, 3);
-  testBoth<int>((std::numeric_limits<int>::max)(), intel::minimum<int>(), 7, 3);
-  testBoth<int>((std::numeric_limits<int>::min)(), intel::maximum<int>(), 7, 3);
-
-  testBoth<float>(0, intel::plus<float>(), 1, 7);
+  testBoth<int>(0, ONEAPI::bit_or<int>(), 1, 8);
+  testBoth<int>(0, ONEAPI::bit_xor<int>(), 7, 3);
+  testBoth<int>(~0, ONEAPI::bit_and<int>(), 7, 3);
+  testBoth<int>((std::numeric_limits<int>::max)(), ONEAPI::minimum<int>(), 7,
+                3);
+  testBoth<int>((std::numeric_limits<int>::min)(), ONEAPI::maximum<int>(), 7,
+                3);
+
+  testBoth<float>(0, ONEAPI::plus<float>(), 1, 7);
   testBoth<float>(1, std::multiplies<float>(), 1, 7);
-  testBoth<float>(getMaximumFPValue<float>(), intel::minimum<float>(), 7, 3);
-  testBoth<float>(getMinimumFPValue<float>(), intel::maximum<float>(), 7, 3);
+  testBoth<float>(getMaximumFPValue<float>(), ONEAPI::minimum<float>(), 7, 3);
+  testBoth<float>(getMinimumFPValue<float>(), ONEAPI::maximum<float>(), 7, 3);
 
   testUnknown<CustomVec<float>, 0,
               Unknown<CustomVec<float>, 0, CustomVecPlus<float>>>(
diff --git a/sycl/test/reduction/reduction_nd_conditional.cpp b/sycl/test/reduction/reduction_nd_conditional.cpp
index c700097993079..985f77cbc77f4 100644
--- a/sycl/test/reduction/reduction_nd_conditional.cpp
+++ b/sycl/test/reduction/reduction_nd_conditional.cpp
@@ -85,7 +85,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
     auto In = InBuf.template get_access<access::mode::read>(CGH);
     accessor<T, Dim, access::mode::discard_write, access::target::global_buffer>
         Out(OutBuf, CGH);
-    auto Redu = intel::reduction(Out, Identity, BOp);
+    auto Redu = ONEAPI::reduction(Out, Identity, BOp);
 
     range<1> GlobalRange(NWItems);
     range<1> LocalRange(WGSize);
@@ -114,10 +114,10 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
 }
 
 int main() {
-  test<int, 0, intel::plus<int>>(0, 2, 2);
-  test<int, 1, intel::plus<int>>(0, 7, 7);
-  test<int, 0, intel::plus<int>>(0, 2, 64);
-  test<short, 1, intel::plus<short>>(0, 16, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 2, 2);
+  test<int, 1, ONEAPI::plus<int>>(0, 7, 7);
+  test<int, 0, ONEAPI::plus<int>>(0, 2, 64);
+  test<short, 1, ONEAPI::plus<short>>(0, 16, 256);
 
   std::cout << "Test passed\n";
   return 0;
diff --git a/sycl/test/reduction/reduction_nd_ext_type.hpp b/sycl/test/reduction/reduction_nd_ext_type.hpp
index a80aefc09cd45..f81a913837d46 100644
--- a/sycl/test/reduction/reduction_nd_ext_type.hpp
+++ b/sycl/test/reduction/reduction_nd_ext_type.hpp
@@ -30,7 +30,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
     auto In = InBuf.template get_access<access::mode::read>(CGH);
     accessor<T, Dim, Mode, access::target::global_buffer>
         Out(OutBuf, CGH);
-    auto Redu = intel::reduction(Out, Identity, BOp);
+    auto Redu = ONEAPI::reduction(Out, Identity, BOp);
 
     range<1> GlobalRange(NWItems);
     range<1> LocalRange(WGSize);
@@ -64,15 +64,19 @@ int runTests(const string_class &ExtensionName) {
 
   // Check some less standards WG sizes and corner cases first.
   test<T, 1, access::mode::read_write, std::multiplies<T>>(0, 4, 4);
-  test<T, 0, access::mode::discard_write, intel::plus<T>>(0, 4, 64);
+  test<T, 0, access::mode::discard_write, ONEAPI::plus<T>>(0, 4, 64);
 
-  test<T, 0, access::mode::read_write, intel::minimum<T>>(getMaximumFPValue<T>(), 7, 7);
-  test<T, 1, access::mode::discard_write, intel::maximum<T>>(getMinimumFPValue<T>(), 7, 7 * 5);
+  test<T, 0, access::mode::read_write, ONEAPI::minimum<T>>(
+      getMaximumFPValue<T>(), 7, 7);
+  test<T, 1, access::mode::discard_write, ONEAPI::maximum<T>>(
+      getMinimumFPValue<T>(), 7, 7 * 5);
 
 #if __cplusplus >= 201402L
-  test<T, 1, access::mode::read_write, intel::plus<>>(1, 3, 3 * 5);
-  test<T, 1, access::mode::discard_write, intel::minimum<>>(getMaximumFPValue<T>(), 3, 3);
-  test<T, 0, access::mode::discard_write, intel::maximum<>>(getMinimumFPValue<T>(), 3, 3);
+  test<T, 1, access::mode::read_write, ONEAPI::plus<>>(1, 3, 3 * 5);
+  test<T, 1, access::mode::discard_write, ONEAPI::minimum<>>(
+      getMaximumFPValue<T>(), 3, 3);
+  test<T, 0, access::mode::discard_write, ONEAPI::maximum<>>(
+      getMinimumFPValue<T>(), 3, 3);
 #endif // __cplusplus >= 201402L
 
   std::cout << "Test passed\n";
diff --git a/sycl/test/reduction/reduction_nd_lambda.cpp b/sycl/test/reduction/reduction_nd_lambda.cpp
index 3d5cf21658995..37c435b9b0f07 100644
--- a/sycl/test/reduction/reduction_nd_lambda.cpp
+++ b/sycl/test/reduction/reduction_nd_lambda.cpp
@@ -1,5 +1,5 @@
 // UNSUPPORTED: cuda
-// Reductions use work-group builtins (e.g. intel::reduce()) not yet supported
+// Reductions use work-group builtins (e.g. ONEAPI::reduce()) not yet supported
 // by CUDA.
 //
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
@@ -30,7 +30,7 @@ void test(T Identity, BinaryOperation BOp, size_t WGSize, size_t NWItems) {
   Q.submit([&](handler &CGH) {
     auto In = InBuf.template get_access<access::mode::read>(CGH);
     auto Out = OutBuf.template get_access<access::mode::discard_write>(CGH);
-    auto Redu = intel::reduction(Out, Identity, BOp);
+    auto Redu = ONEAPI::reduction(Out, Identity, BOp);
 
     range<1> GlobalRange(NWItems);
     range<1> LocalRange(WGSize);
diff --git a/sycl/test/reduction/reduction_nd_s0_dw.cpp b/sycl/test/reduction/reduction_nd_s0_dw.cpp
index 834ccf4407649..8b900a3a1fd9a 100644
--- a/sycl/test/reduction/reduction_nd_s0_dw.cpp
+++ b/sycl/test/reduction/reduction_nd_s0_dw.cpp
@@ -35,7 +35,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
     auto In = InBuf.template get_access<access::mode::read>(CGH);
     accessor<T, Dim, access::mode::discard_write, access::target::global_buffer>
         Out(OutBuf, CGH);
-    auto Redu = intel::reduction(Out, Identity, BOp);
+    auto Redu = ONEAPI::reduction(Out, Identity, BOp);
 
     range<1> GlobalRange(NWItems);
     range<1> LocalRange(WGSize);
@@ -59,33 +59,33 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
 
 int main() {
   // Check some less standards WG sizes and corner cases first.
-  test<int, 0, intel::plus<int>>(0, 2, 2);
-  test<int, 0, intel::plus<int>>(0, 7, 7);
-  test<int, 0, intel::plus<int>>(0, 9, 18);
-  test<int, 0, intel::plus<int>>(0, 49, 49 * 5);
+  test<int, 0, ONEAPI::plus<int>>(0, 2, 2);
+  test<int, 0, ONEAPI::plus<int>>(0, 7, 7);
+  test<int, 0, ONEAPI::plus<int>>(0, 9, 18);
+  test<int, 0, ONEAPI::plus<int>>(0, 49, 49 * 5);
 
   // Try some power-of-two work-group sizes.
-  test<int, 0, intel::plus<int>>(0, 2, 64);
-  test<int, 0, intel::plus<int>>(0, 4, 64);
-  test<int, 0, intel::plus<int>>(0, 8, 128);
-  test<int, 0, intel::plus<int>>(0, 16, 256);
-  test<int, 0, intel::plus<int>>(0, 32, 256);
-  test<int, 0, intel::plus<int>>(0, 64, 256);
-  test<int, 0, intel::plus<int>>(0, 128, 256);
-  test<int, 0, intel::plus<int>>(0, 256, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 2, 64);
+  test<int, 0, ONEAPI::plus<int>>(0, 4, 64);
+  test<int, 0, ONEAPI::plus<int>>(0, 8, 128);
+  test<int, 0, ONEAPI::plus<int>>(0, 16, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 32, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 64, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 128, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 256, 256);
 
   // Check with various operations.
   test<int, 0, std::multiplies<int>>(1, 8, 256);
-  test<int, 0, intel::bit_or<int>>(0, 8, 256);
-  test<int, 0, intel::bit_xor<int>>(0, 8, 256);
-  test<int, 0, intel::bit_and<int>>(~0, 8, 256);
-  test<int, 0, intel::minimum<int>>((std::numeric_limits<int>::max)(), 8, 256);
-  test<int, 0, intel::maximum<int>>((std::numeric_limits<int>::min)(), 8, 256);
+  test<int, 0, ONEAPI::bit_or<int>>(0, 8, 256);
+  test<int, 0, ONEAPI::bit_xor<int>>(0, 8, 256);
+  test<int, 0, ONEAPI::bit_and<int>>(~0, 8, 256);
+  test<int, 0, ONEAPI::minimum<int>>((std::numeric_limits<int>::max)(), 8, 256);
+  test<int, 0, ONEAPI::maximum<int>>((std::numeric_limits<int>::min)(), 8, 256);
 
   // Check with various types.
   test<float, 0, std::multiplies<float>>(1, 8, 256);
-  test<float, 0, intel::minimum<float>>(getMaximumFPValue<float>(), 8, 256);
-  test<float, 0, intel::maximum<float>>(getMinimumFPValue<float>(), 8, 256);
+  test<float, 0, ONEAPI::minimum<float>>(getMaximumFPValue<float>(), 8, 256);
+  test<float, 0, ONEAPI::maximum<float>>(getMinimumFPValue<float>(), 8, 256);
 
   // Check with CUSTOM type.
   test<CustomVec<long long>, 0, CustomVecPlus<long long>>(CustomVec<long long>(0), 8, 256);
diff --git a/sycl/test/reduction/reduction_nd_s0_rw.cpp b/sycl/test/reduction/reduction_nd_s0_rw.cpp
index 2040b632e07fb..029458942390a 100644
--- a/sycl/test/reduction/reduction_nd_s0_rw.cpp
+++ b/sycl/test/reduction/reduction_nd_s0_rw.cpp
@@ -37,7 +37,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
     auto In = InBuf.template get_access<access::mode::read>(CGH);
     accessor<T, Dim, access::mode::read_write, access::target::global_buffer>
         Out(OutBuf, CGH);
-    auto Redu = intel::reduction(Out, Identity, BOp);
+    auto Redu = ONEAPI::reduction(Out, Identity, BOp);
 
     range<1> GlobalRange(NWItems);
     range<1> LocalRange(WGSize);
@@ -61,33 +61,33 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
 
 int main() {
   // Check some less standards WG sizes and corner cases first.
-  test<int, 0, intel::plus<int>>(0, 2, 2);
-  test<int, 0, intel::plus<int>>(0, 7, 7);
-  test<int, 0, intel::plus<int>>(0, 9, 18);
-  test<int, 0, intel::plus<int>>(0, 49, 49 * 5);
+  test<int, 0, ONEAPI::plus<int>>(0, 2, 2);
+  test<int, 0, ONEAPI::plus<int>>(0, 7, 7);
+  test<int, 0, ONEAPI::plus<int>>(0, 9, 18);
+  test<int, 0, ONEAPI::plus<int>>(0, 49, 49 * 5);
 
   // Try some power-of-two work-group sizes.
-  test<int, 0, intel::plus<int>>(0, 2, 64);
-  test<int, 0, intel::plus<int>>(0, 4, 64);
-  test<int, 0, intel::plus<int>>(0, 8, 128);
-  test<int, 0, intel::plus<int>>(0, 16, 256);
-  test<int, 0, intel::plus<int>>(0, 32, 256);
-  test<int, 0, intel::plus<int>>(0, 64, 256);
-  test<int, 0, intel::plus<int>>(0, 128, 256);
-  test<int, 0, intel::plus<int>>(0, 256, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 2, 64);
+  test<int, 0, ONEAPI::plus<int>>(0, 4, 64);
+  test<int, 0, ONEAPI::plus<int>>(0, 8, 128);
+  test<int, 0, ONEAPI::plus<int>>(0, 16, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 32, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 64, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 128, 256);
+  test<int, 0, ONEAPI::plus<int>>(0, 256, 256);
 
   // Check with various operations.
   test<int, 0, std::multiplies<int>>(1, 8, 256);
-  test<int, 0, intel::bit_or<int>>(0, 8, 256);
-  test<int, 0, intel::bit_xor<int>>(0, 8, 256);
-  test<int, 0, intel::bit_and<int>>(~0, 8, 256);
-  test<int, 0, intel::minimum<int>>((std::numeric_limits<int>::max)(), 8, 256);
-  test<int, 0, intel::maximum<int>>((std::numeric_limits<int>::min)(), 8, 256);
+  test<int, 0, ONEAPI::bit_or<int>>(0, 8, 256);
+  test<int, 0, ONEAPI::bit_xor<int>>(0, 8, 256);
+  test<int, 0, ONEAPI::bit_and<int>>(~0, 8, 256);
+  test<int, 0, ONEAPI::minimum<int>>((std::numeric_limits<int>::max)(), 8, 256);
+  test<int, 0, ONEAPI::maximum<int>>((std::numeric_limits<int>::min)(), 8, 256);
 
   // Check with various types.
   test<float, 0, std::multiplies<float>>(1, 8, 256);
-  test<float, 0, intel::minimum<float>>(getMaximumFPValue<float>(), 8, 256);
-  test<float, 0, intel::maximum<float>>(getMinimumFPValue<float>(), 8, 256);
+  test<float, 0, ONEAPI::minimum<float>>(getMaximumFPValue<float>(), 8, 256);
+  test<float, 0, ONEAPI::maximum<float>>(getMinimumFPValue<float>(), 8, 256);
 
   // Check with CUSTOM type.
   test<CustomVec<long long>, 0, CustomVecPlus<long long>>(CustomVec<long long>(0), 8, 256);
diff --git a/sycl/test/reduction/reduction_nd_s1_dw.cpp b/sycl/test/reduction/reduction_nd_s1_dw.cpp
index 9fe36d69daa8c..7cc975e261dc2 100644
--- a/sycl/test/reduction/reduction_nd_s1_dw.cpp
+++ b/sycl/test/reduction/reduction_nd_s1_dw.cpp
@@ -36,7 +36,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
     auto In = InBuf.template get_access<access::mode::read>(CGH);
     accessor<T, Dim, access::mode::discard_write, access::target::global_buffer>
         Out(OutBuf, CGH);
-    auto Redu = intel::reduction(Out, Identity, BOp);
+    auto Redu = ONEAPI::reduction(Out, Identity, BOp);
 
     range<1> GlobalRange(NWItems);
     range<1> LocalRange(WGSize);
@@ -60,33 +60,33 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
 
 int main() {
   // Check some less standards WG sizes and corner cases first.
-  test<int, 1, intel::plus<int>>(0, 2, 2);
-  test<int, 1, intel::plus<int>>(0, 7, 7);
-  test<int, 1, intel::plus<int>>(0, 9, 18);
-  test<int, 1, intel::plus<int>>(0, 49, 49 * 5);
+  test<int, 1, ONEAPI::plus<int>>(0, 2, 2);
+  test<int, 1, ONEAPI::plus<int>>(0, 7, 7);
+  test<int, 1, ONEAPI::plus<int>>(0, 9, 18);
+  test<int, 1, ONEAPI::plus<int>>(0, 49, 49 * 5);
 
   // Try some power-of-two work-group sizes.
-  test<int, 1, intel::plus<int>>(0, 2, 64);
-  test<int, 1, intel::plus<int>>(0, 4, 64);
-  test<int, 1, intel::plus<int>>(0, 8, 128);
-  test<int, 1, intel::plus<int>>(0, 16, 256);
-  test<int, 1, intel::plus<int>>(0, 32, 256);
-  test<int, 1, intel::plus<int>>(0, 64, 256);
-  test<int, 1, intel::plus<int>>(0, 128, 256);
-  test<int, 1, intel::plus<int>>(0, 256, 256);
+  test<int, 1, ONEAPI::plus<int>>(0, 2, 64);
+  test<int, 1, ONEAPI::plus<int>>(0, 4, 64);
+  test<int, 1, ONEAPI::plus<int>>(0, 8, 128);
+  test<int, 1, ONEAPI::plus<int>>(0, 16, 256);
+  test<int, 1, ONEAPI::plus<int>>(0, 32, 256);
+  test<int, 1, ONEAPI::plus<int>>(0, 64, 256);
+  test<int, 1, ONEAPI::plus<int>>(0, 128, 256);
+  test<int, 1, ONEAPI::plus<int>>(0, 256, 256);
 
   // Check with various operations.
   test<int, 1, std::multiplies<int>>(1, 8, 256);
-  test<int, 1, intel::bit_or<int>>(0, 8, 256);
-  test<int, 1, intel::bit_xor<int>>(0, 8, 256);
-  test<int, 1, intel::bit_and<int>>(~0, 8, 256);
-  test<int, 1, intel::minimum<int>>((std::numeric_limits<int>::max)(), 8, 256);
-  test<int, 1, intel::maximum<int>>((std::numeric_limits<int>::min)(), 8, 256);
+  test<int, 1, ONEAPI::bit_or<int>>(0, 8, 256);
+  test<int, 1, ONEAPI::bit_xor<int>>(0, 8, 256);
+  test<int, 1, ONEAPI::bit_and<int>>(~0, 8, 256);
+  test<int, 1, ONEAPI::minimum<int>>((std::numeric_limits<int>::max)(), 8, 256);
+  test<int, 1, ONEAPI::maximum<int>>((std::numeric_limits<int>::min)(), 8, 256);
 
   // Check with various types.
   test<float, 1, std::multiplies<float>>(1, 8, 256);
-  test<float, 1, intel::minimum<float>>(getMaximumFPValue<float>(), 8, 256);
-  test<float, 1, intel::maximum<float>>(getMinimumFPValue<float>(), 8, 256);
+  test<float, 1, ONEAPI::minimum<float>>(getMaximumFPValue<float>(), 8, 256);
+  test<float, 1, ONEAPI::maximum<float>>(getMinimumFPValue<float>(), 8, 256);
 
   // Check with CUSTOM type.
   test<CustomVec<long long>, 1, CustomVecPlus<long long>>(CustomVec<long long>(0), 8, 256);
diff --git a/sycl/test/reduction/reduction_nd_s1_rw.cpp b/sycl/test/reduction/reduction_nd_s1_rw.cpp
index d283fbe3cebe0..2c8f6a8343e83 100644
--- a/sycl/test/reduction/reduction_nd_s1_rw.cpp
+++ b/sycl/test/reduction/reduction_nd_s1_rw.cpp
@@ -38,7 +38,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
     auto In = InBuf.template get_access<access::mode::read>(CGH);
     accessor<T, Dim, access::mode::read_write, access::target::global_buffer>
         Out(OutBuf, CGH);
-    auto Redu = intel::reduction(Out, Identity, BOp);
+    auto Redu = ONEAPI::reduction(Out, Identity, BOp);
 
     range<1> GlobalRange(NWItems);
     range<1> LocalRange(WGSize);
@@ -62,33 +62,33 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
 
 int main() {
   // Check some less standards WG sizes and corner cases first.
-  test<int, 1, intel::plus<int>>(0, 2, 2);
-  test<int, 1, intel::plus<int>>(0, 7, 7);
-  test<int, 1, intel::plus<int>>(0, 9, 18);
-  test<int, 1, intel::plus<int>>(0, 49, 49 * 5);
+  test<int, 1, ONEAPI::plus<int>>(0, 2, 2);
+  test<int, 1, ONEAPI::plus<int>>(0, 7, 7);
+  test<int, 1, ONEAPI::plus<int>>(0, 9, 18);
+  test<int, 1, ONEAPI::plus<int>>(0, 49, 49 * 5);
 
   // Try some power-of-two work-group sizes.
-  test<int, 1, intel::plus<int>>(0, 2, 64);
-  test<int, 1, intel::plus<int>>(0, 4, 64);
-  test<int, 1, intel::plus<int>>(0, 8, 128);
-  test<int, 1, intel::plus<int>>(0, 16, 256);
-  test<int, 1, intel::plus<int>>(0, 32, 256);
-  test<int, 1, intel::plus<int>>(0, 64, 256);
-  test<int, 1, intel::plus<int>>(0, 128, 256);
-  test<int, 1, intel::plus<int>>(0, 256, 256);
+  test<int, 1, ONEAPI::plus<int>>(0, 2, 64);
+  test<int, 1, ONEAPI::plus<int>>(0, 4, 64);
+  test<int, 1, ONEAPI::plus<int>>(0, 8, 128);
+  test<int, 1, ONEAPI::plus<int>>(0, 16, 256);
+  test<int, 1, ONEAPI::plus<int>>(0, 32, 256);
+  test<int, 1, ONEAPI::plus<int>>(0, 64, 256);
+  test<int, 1, ONEAPI::plus<int>>(0, 128, 256);
+  test<int, 1, ONEAPI::plus<int>>(0, 256, 256);
 
   // Check with various operations.
   test<int, 1, std::multiplies<int>>(1, 8, 256);
-  test<int, 1, intel::bit_or<int>>(0, 8, 256);
-  test<int, 1, intel::bit_xor<int>>(0, 8, 256);
-  test<int, 1, intel::bit_and<int>>(~0, 8, 256);
-  test<int, 1, intel::minimum<int>>((std::numeric_limits<int>::max)(), 8, 256);
-  test<int, 1, intel::maximum<int>>((std::numeric_limits<int>::min)(), 8, 256);
+  test<int, 1, ONEAPI::bit_or<int>>(0, 8, 256);
+  test<int, 1, ONEAPI::bit_xor<int>>(0, 8, 256);
+  test<int, 1, ONEAPI::bit_and<int>>(~0, 8, 256);
+  test<int, 1, ONEAPI::minimum<int>>((std::numeric_limits<int>::max)(), 8, 256);
+  test<int, 1, ONEAPI::maximum<int>>((std::numeric_limits<int>::min)(), 8, 256);
 
   // Check with various types.
   test<float, 1, std::multiplies<float>>(1, 8, 256);
-  test<float, 1, intel::minimum<float>>(getMaximumFPValue<float>(), 1, 16);
-  test<float, 1, intel::maximum<float>>(getMinimumFPValue<float>(), 8, 256);
+  test<float, 1, ONEAPI::minimum<float>>(getMaximumFPValue<float>(), 1, 16);
+  test<float, 1, ONEAPI::maximum<float>>(getMinimumFPValue<float>(), 8, 256);
 
   // Check with CUSTOM type.
   test<CustomVec<long long>, 1, CustomVecPlus<long long>>(CustomVec<long long>(0), 8, 256);
diff --git a/sycl/test/reduction/reduction_placeholder.cpp b/sycl/test/reduction/reduction_placeholder.cpp
index e972105bbab50..77633992ea2df 100644
--- a/sycl/test/reduction/reduction_placeholder.cpp
+++ b/sycl/test/reduction/reduction_placeholder.cpp
@@ -7,7 +7,8 @@
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
 // RUNx: env SYCL_DEVICE_TYPE=HOST %t.out
-// TODO: Enable the test for HOST when it supports intel::reduce() and barrier()
+// TODO: Enable the test for HOST when it supports ONEAPI::reduce() and
+// barrier()
 
 // This test performs basic checks of parallel_for(nd_range, reduction, func)
 // with reductions initialized with a placeholder accessor.
@@ -41,7 +42,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
   Q.submit([&](handler &CGH) {
     auto In = InBuf.template get_access<access::mode::read>(CGH);
     CGH.require(Out);
-    auto Redu = intel::reduction(Out, Identity, BinaryOperation());
+    auto Redu = ONEAPI::reduction(Out, Identity, BinaryOperation());
     range<1> GlobalRange(NWItems);
     range<1> LocalRange(WGSize);
     nd_range<1> NDRange(GlobalRange, LocalRange);
@@ -64,16 +65,16 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
 
 int main() {
   // fast atomics and fast reduce
-  test<int, 1, intel::plus<int>>(0, 49, 49 * 5);
-  test<int, 0, intel::plus<int>>(0, 8, 8);
+  test<int, 1, ONEAPI::plus<int>>(0, 49, 49 * 5);
+  test<int, 0, ONEAPI::plus<int>>(0, 8, 8);
 
   // fast atomics
-  test<int, 0, intel::bit_or<int>>(0, 7, 7 * 3);
-  test<int, 1, intel::bit_or<int>>(0, 4, 128);
+  test<int, 0, ONEAPI::bit_or<int>>(0, 7, 7 * 3);
+  test<int, 1, ONEAPI::bit_or<int>>(0, 4, 128);
 
   // fast reduce
-  test<float, 1, intel::minimum<float>>(getMaximumFPValue<float>(), 5, 5 * 7);
-  test<float, 0, intel::maximum<float>>(getMinimumFPValue<float>(), 4, 128);
+  test<float, 1, ONEAPI::minimum<float>>(getMaximumFPValue<float>(), 5, 5 * 7);
+  test<float, 0, ONEAPI::maximum<float>>(getMinimumFPValue<float>(), 4, 128);
 
   // generic algorithm
   test<int, 0, std::multiplies<int>>(1, 7, 7 * 5);
diff --git a/sycl/test/reduction/reduction_transparent.cpp b/sycl/test/reduction/reduction_transparent.cpp
index fd527f8f4e0ef..dea789b395401 100644
--- a/sycl/test/reduction/reduction_transparent.cpp
+++ b/sycl/test/reduction/reduction_transparent.cpp
@@ -7,7 +7,8 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 
 // RUNx: env SYCL_DEVICE_TYPE=HOST %t.out
-// TODO: Enable the test for HOST when it supports intel::reduce() and barrier()
+// TODO: Enable the test for HOST when it supports ONEAPI::reduce() and
+// barrier()
 
 // This test performs basic checks of parallel_for(nd_range, reduction, func)
 // where func is a transparent functor.
@@ -46,7 +47,8 @@ void testId(T Identity, size_t WGSize, size_t NWItems) {
     range<1> LocalRange(WGSize);
     nd_range<1> NDRange(GlobalRange, LocalRange);
     CGH.parallel_for<SomeIdClass<T, Dim, BinaryOperation>>(
-        NDRange, intel::reduction(Out, Identity, BOp), [=](nd_item<1> NDIt, auto &Sum) {
+        NDRange, ONEAPI::reduction(Out, Identity, BOp),
+        [=](nd_item<1> NDIt, auto &Sum) {
           Sum.combine(In[NDIt.get_global_linear_id()]);
         });
   });
@@ -86,7 +88,7 @@ void testNoId(T Identity, size_t WGSize, size_t NWItems) {
     range<1> LocalRange(WGSize);
     nd_range<1> NDRange(GlobalRange, LocalRange);
     CGH.parallel_for<SomeNoIdClass<T, Dim, BinaryOperation>>(
-        NDRange, intel::reduction(Out, BOp), [=](nd_item<1> NDIt, auto &Sum) {
+        NDRange, ONEAPI::reduction(Out, BOp), [=](nd_item<1> NDIt, auto &Sum) {
           Sum.combine(In[NDIt.get_global_linear_id()]);
         });
   });
@@ -110,10 +112,10 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
 
 int main() {
 #if __cplusplus >= 201402L
-  test<float, 0, intel::maximum<>>(getMinimumFPValue<float>(), 7, 7 * 5);
-  test<signed char, 0, intel::plus<>>(0, 7, 49);
+  test<float, 0, ONEAPI::maximum<>>(getMinimumFPValue<float>(), 7, 7 * 5);
+  test<signed char, 0, ONEAPI::plus<>>(0, 7, 49);
   test<unsigned char, 1, std::multiplies<>>(1, 4, 16);
-  test<unsigned short, 0, intel::plus<>>(0, 1, 512 + 32);
+  test<unsigned short, 0, ONEAPI::plus<>>(0, 1, 512 + 32);
 #endif // __cplusplus >= 201402L
 
   std::cout << "Test passed\n";
diff --git a/sycl/test/reduction/reduction_usm.cpp b/sycl/test/reduction/reduction_usm.cpp
index 592a36904a8e8..0ada4c515b615 100644
--- a/sycl/test/reduction/reduction_usm.cpp
+++ b/sycl/test/reduction/reduction_usm.cpp
@@ -7,7 +7,8 @@
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
 // RUNx: env SYCL_DEVICE_TYPE=HOST %t.out
-// TODO: Enable the test for HOST when it supports intel::reduce() and barrier()
+// TODO: Enable the test for HOST when it supports ONEAPI::reduce() and
+// barrier()
 
 // This test performs basic checks of parallel_for(nd_range, reduction, func)
 // with reductions initialized with USM var.
@@ -62,7 +63,7 @@ void test(T Identity, size_t WGSize, size_t NWItems, usm::alloc AllocType) {
   // Compute.
   Q.submit([&](handler &CGH) {
     auto In = InBuf.template get_access<access::mode::read>(CGH);
-    auto Redu = intel::reduction(ReduVarPtr, Identity, BOp);
+    auto Redu = ONEAPI::reduction(ReduVarPtr, Identity, BOp);
     range<1> GlobalRange(NWItems);
     range<1> LocalRange(WGSize);
     nd_range<1> NDRange(GlobalRange, LocalRange);
@@ -105,16 +106,17 @@ void testUSM(T Identity, size_t WGSize, size_t NWItems) {
 
 int main() {
   // fast atomics and fast reduce
-  testUSM<int, 1, intel::plus<int>>(0, 49, 49 * 5);
-  testUSM<int, 0, intel::plus<int>>(0, 8, 128);
+  testUSM<int, 1, ONEAPI::plus<int>>(0, 49, 49 * 5);
+  testUSM<int, 0, ONEAPI::plus<int>>(0, 8, 128);
 
   // fast atomics
-  testUSM<int, 0, intel::bit_or<int>>(0, 7, 7 * 3);
-  testUSM<int, 1, intel::bit_or<int>>(0, 4, 128);
+  testUSM<int, 0, ONEAPI::bit_or<int>>(0, 7, 7 * 3);
+  testUSM<int, 1, ONEAPI::bit_or<int>>(0, 4, 128);
 
   // fast reduce
-  testUSM<float, 1, intel::minimum<float>>(getMaximumFPValue<float>(), 5, 5 * 7);
-  testUSM<float, 0, intel::maximum<float>>(getMinimumFPValue<float>(), 4, 128);
+  testUSM<float, 1, ONEAPI::minimum<float>>(getMaximumFPValue<float>(), 5,
+                                            5 * 7);
+  testUSM<float, 0, ONEAPI::maximum<float>>(getMinimumFPValue<float>(), 4, 128);
 
   // generic algorithm
   testUSM<int, 0, std::multiplies<int>>(1, 7, 7 * 5);
diff --git a/sycl/test/regression/sub-group-store-const-ref.cpp b/sycl/test/regression/sub-group-store-const-ref.cpp
index dd10e1d57f12f..5c79e5e6758f6 100644
--- a/sycl/test/regression/sub-group-store-const-ref.cpp
+++ b/sycl/test/regression/sub-group-store-const-ref.cpp
@@ -13,4 +13,4 @@
 #include <CL/sycl.hpp>
 using namespace sycl;
 
-void test(intel::sub_group sg, global_ptr<int> ptr) { sg.store(ptr, 1); }
+void test(ONEAPI::sub_group sg, global_ptr<int> ptr) { sg.store(ptr, 1); }
diff --git a/sycl/test/spec_const/spec_const_hw.cpp b/sycl/test/spec_const/spec_const_hw.cpp
index c9450a451d2c6..73cb4e8e8cbb4 100644
--- a/sycl/test/spec_const/spec_const_hw.cpp
+++ b/sycl/test/spec_const/spec_const_hw.cpp
@@ -38,7 +38,8 @@ int val = 10;
 int get_value() { return val; }
 
 float foo(
-    const cl::sycl::experimental::spec_constant<float, MyFloatConst> &f32) {
+    const cl::sycl::ONEAPI::experimental::spec_constant<float, MyFloatConst>
+        &f32) {
   return f32;
 }
 
@@ -47,8 +48,8 @@ struct SCWrapper {
       : SC1(p.set_spec_constant<class sc_name1, int>(4)),
         SC2(p.set_spec_constant<class sc_name2, int>(2)) {}
 
-  cl::sycl::experimental::spec_constant<int, class sc_name1> SC1;
-  cl::sycl::experimental::spec_constant<int, class sc_name2> SC2;
+  cl::sycl::ONEAPI::experimental::spec_constant<int, class sc_name1> SC1;
+  cl::sycl::ONEAPI::experimental::spec_constant<int, class sc_name2> SC2;
 };
 
 int main(int argc, char **argv) {
@@ -79,10 +80,10 @@ int main(int argc, char **argv) {
   // TODO make this floating point once supported by the compiler
   float goldf = (float)get_value();
 
-  cl::sycl::experimental::spec_constant<int32_t, MyInt32Const> i32 =
+  cl::sycl::ONEAPI::experimental::spec_constant<int32_t, MyInt32Const> i32 =
       program1.set_spec_constant<MyInt32Const>(goldi);
 
-  cl::sycl::experimental::spec_constant<float, MyFloatConst> f32 =
+  cl::sycl::ONEAPI::experimental::spec_constant<float, MyFloatConst> f32 =
       program2.set_spec_constant<MyFloatConst>(goldf);
 
   program1.build_with_kernel_type<KernelAAAi>();
diff --git a/sycl/test/spec_const/spec_const_neg.cpp b/sycl/test/spec_const/spec_const_neg.cpp
index 7312e29ab40e1..26b0a871398d5 100644
--- a/sycl/test/spec_const/spec_const_neg.cpp
+++ b/sycl/test/spec_const/spec_const_neg.cpp
@@ -45,7 +45,7 @@ int main(int argc, char **argv) {
             << "\n";
   cl::sycl::program program1(q.get_context());
 
-  cl::sycl::experimental::spec_constant<int32_t, MyInt32Const> i32 =
+  cl::sycl::ONEAPI::experimental::spec_constant<int32_t, MyInt32Const> i32 =
       program1.set_spec_constant<MyInt32Const>(10);
 
   std::vector<int> veci(1);
@@ -56,7 +56,7 @@ int main(int argc, char **argv) {
   try {
     // This is an attempt to set a spec constant after the program has been
     // built - spec_const_error should be thrown
-    cl::sycl::experimental::spec_constant<int32_t, MyInt32Const> i32 =
+    cl::sycl::ONEAPI::experimental::spec_constant<int32_t, MyInt32Const> i32 =
         program1.set_spec_constant<MyInt32Const>(10);
 
     cl::sycl::buffer<int, 1> bufi(veci.data(), veci.size());
@@ -69,7 +69,7 @@ int main(int argc, char **argv) {
             acci[0] = i32.get();
           });
     });
-  } catch (cl::sycl::experimental::spec_const_error &sc_err) {
+  } catch (cl::sycl::ONEAPI::experimental::spec_const_error &sc_err) {
     passed = true;
   } catch (cl::sycl::exception &e) {
     std::cout << "*** Exception caught: " << e.what() << "\n";
diff --git a/sycl/test/spec_const/spec_const_redefine.cpp b/sycl/test/spec_const/spec_const_redefine.cpp
index 6883ce5c9d7d6..fc5e7dcb22ac1 100644
--- a/sycl/test/spec_const/spec_const_redefine.cpp
+++ b/sycl/test/spec_const/spec_const_redefine.cpp
@@ -68,9 +68,9 @@ int main(int argc, char **argv) {
   for (int i = 0; i < n_sc_sets; i++) {
     cl::sycl::program program(q.get_context());
     const int *sc_set = &sc_vals[i][0];
-    cl::sycl::experimental::spec_constant<int32_t, SC0> sc0 =
+    cl::sycl::ONEAPI::experimental::spec_constant<int32_t, SC0> sc0 =
         program.set_spec_constant<SC0>(sc_set[0]);
-    cl::sycl::experimental::spec_constant<int32_t, SC1> sc1 =
+    cl::sycl::ONEAPI::experimental::spec_constant<int32_t, SC1> sc1 =
         program.set_spec_constant<SC1>(sc_set[1]);
 
     program.build_with_kernel_type<KernelAAA>();
diff --git a/sycl/test/spec_const/spec_const_types.cpp b/sycl/test/spec_const/spec_const_types.cpp
index c7017b2b69726..0a5178c5824eb 100644
--- a/sycl/test/spec_const/spec_const_types.cpp
+++ b/sycl/test/spec_const/spec_const_types.cpp
@@ -42,49 +42,49 @@ int main() {
   cl::sycl::program program(queue.get_context());
 
   // Create specialization constants.
-  cl::sycl::experimental::spec_constant<bool, MyBoolConst> i1 =
+  cl::sycl::ONEAPI::experimental::spec_constant<bool, MyBoolConst> i1 =
       program.set_spec_constant<MyBoolConst>((bool)get_value());
   // CHECK-DAG: _ZTS11MyBoolConst=1|0
 
-  cl::sycl::experimental::spec_constant<int8_t, MyInt8Const> i8 =
+  cl::sycl::ONEAPI::experimental::spec_constant<int8_t, MyInt8Const> i8 =
       program.set_spec_constant<MyInt8Const>((int8_t)get_value());
   // CHECK-DAG: _ZTS11MyInt8Const=1|1
-  cl::sycl::experimental::spec_constant<uint8_t, MyUInt8Const> ui8 =
+  cl::sycl::ONEAPI::experimental::spec_constant<uint8_t, MyUInt8Const> ui8 =
       program.set_spec_constant<MyUInt8Const>((uint8_t)get_value());
   // CHECK-DAG: _ZTS12MyUInt8Const=1|2
 
-  cl::sycl::experimental::spec_constant<int16_t, MyInt16Const> i16 =
+  cl::sycl::ONEAPI::experimental::spec_constant<int16_t, MyInt16Const> i16 =
       program.set_spec_constant<MyInt16Const>((int16_t)get_value());
   // CHECK-DAG: _ZTS12MyInt16Const=1|3
-  cl::sycl::experimental::spec_constant<uint16_t, MyUInt16Const> ui16 =
+  cl::sycl::ONEAPI::experimental::spec_constant<uint16_t, MyUInt16Const> ui16 =
       program.set_spec_constant<MyUInt16Const>((uint16_t)get_value());
   // CHECK-DAG: _ZTS13MyUInt16Const=1|4
 
-  cl::sycl::experimental::spec_constant<int32_t, MyInt32Const> i32 =
+  cl::sycl::ONEAPI::experimental::spec_constant<int32_t, MyInt32Const> i32 =
       program.set_spec_constant<MyInt32Const>((int32_t)get_value());
   // CHECK-DAG: _ZTS12MyInt32Const=1|5
-  cl::sycl::experimental::spec_constant<uint32_t, MyUInt32Const> ui32 =
+  cl::sycl::ONEAPI::experimental::spec_constant<uint32_t, MyUInt32Const> ui32 =
       program.set_spec_constant<MyUInt32Const>((uint32_t)get_value());
   // CHECK-DAG: _ZTS13MyUInt32Const=1|6
 
-  cl::sycl::experimental::spec_constant<int64_t, MyInt64Const> i64 =
+  cl::sycl::ONEAPI::experimental::spec_constant<int64_t, MyInt64Const> i64 =
       program.set_spec_constant<MyInt64Const>((int64_t)get_value());
   // CHECK-DAG: _ZTS12MyInt64Const=1|7
-  cl::sycl::experimental::spec_constant<uint64_t, MyUInt64Const> ui64 =
+  cl::sycl::ONEAPI::experimental::spec_constant<uint64_t, MyUInt64Const> ui64 =
       program.set_spec_constant<MyUInt64Const>((uint64_t)get_value());
   // CHECK-DAG: _ZTS13MyUInt64Const=1|8
 
 #define HALF 0 // TODO not yet supported
 #if HALF
-  cl::sycl::experimental::spec_constant<cl::sycl::half, MyHalfConst> f16 =
-      program.set_spec_constant<MyHalfConst>((cl::sycl::half)get_value());
+  cl::sycl::ONEAPI::experimental::spec_constant<cl::sycl::half, MyHalfConst>
+      f16 = program.set_spec_constant<MyHalfConst>((cl::sycl::half)get_value());
 #endif
 
-  cl::sycl::experimental::spec_constant<float, MyFloatConst> f32 =
+  cl::sycl::ONEAPI::experimental::spec_constant<float, MyFloatConst> f32 =
       program.set_spec_constant<MyFloatConst>((float)get_value());
   // CHECK-DAG: _ZTS12MyFloatConst=1|9
 
-  cl::sycl::experimental::spec_constant<double, MyDoubleConst> f64 =
+  cl::sycl::ONEAPI::experimental::spec_constant<double, MyDoubleConst> f64 =
       program.set_spec_constant<MyDoubleConst>((double)get_value());
   // CHECK-DAG: _ZTS13MyDoubleConst=1|10
 
@@ -97,16 +97,14 @@ int main() {
     queue.submit([&](cl::sycl::handler &cgh) {
       auto acc = buf.get_access<cl::sycl::access::mode::write>(cgh);
       cgh.single_task<SpecializedKernel>(
-          program.get_kernel<SpecializedKernel>(),
-          [=]() { acc[0] = i1.get() +
-                           i8.get() + ui8.get() +
-                           i16.get() + ui16.get() +
-                           i32.get() + ui32.get() +
-                           i64.get() + ui64.get() +
+          program.get_kernel<SpecializedKernel>(), [=]() {
+            acc[0] = i1.get() + i8.get() + ui8.get() + i16.get() + ui16.get() +
+                     i32.get() + ui32.get() + i64.get() + ui64.get() +
 #if HALF
-                           f16.get() +
+                     f16.get() +
 #endif
-                           f32.get() + f64.get(); });
+                     f32.get() + f64.get();
+          });
     });
   }
 }
diff --git a/sycl/test/sub_group/barrier.cpp b/sycl/test/sub_group/barrier.cpp
index 25e31cbeb521c..b8aeefa9ca0e3 100644
--- a/sycl/test/sub_group/barrier.cpp
+++ b/sycl/test/sub_group/barrier.cpp
@@ -33,7 +33,7 @@ template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
       auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
 
       cgh.parallel_for<sycl_subgr<T>>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         size_t lid = SG.get_local_id().get(0);
         size_t gid = NdItem.get_global_id(0);
         size_t SGoff = gid - lid;
diff --git a/sycl/test/sub_group/broadcast.hpp b/sycl/test/sub_group/broadcast.hpp
index e057139db0152..4863716f19edd 100644
--- a/sycl/test/sub_group/broadcast.hpp
+++ b/sycl/test/sub_group/broadcast.hpp
@@ -20,7 +20,7 @@ template <typename T> void check(queue &Queue) {
       auto syclacc = syclbuf.template get_access<access::mode::read_write>(cgh);
       auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
       cgh.parallel_for<sycl_subgr<T>>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         /*Broadcast GID of element with SGLID == SGID % SGMLR*/
         syclacc[NdItem.get_global_id()] =
             broadcast(SG, T(NdItem.get_global_id(0)),
diff --git a/sycl/test/sub_group/common.cpp b/sycl/test/sub_group/common.cpp
index 17b1a9d8166d8..41623ae2c228b 100644
--- a/sycl/test/sub_group/common.cpp
+++ b/sycl/test/sub_group/common.cpp
@@ -36,7 +36,7 @@ void check(queue &Queue, unsigned int G, unsigned int L) {
       auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
       auto syclacc = syclbuf.get_access<access::mode::read_write>(cgh);
       cgh.parallel_for<class sycl_subgr>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         syclacc[NdItem.get_global_id()].local_id = SG.get_local_id().get(0);
         syclacc[NdItem.get_global_id()].local_range =
             SG.get_local_range().get(0);
diff --git a/sycl/test/sub_group/generic-shuffle.cpp b/sycl/test/sub_group/generic-shuffle.cpp
index 3cab0b9edbd7a..786a9b57a0ec2 100644
--- a/sycl/test/sub_group/generic-shuffle.cpp
+++ b/sycl/test/sub_group/generic-shuffle.cpp
@@ -43,7 +43,7 @@ void check_pointer(queue &Queue, size_t G = 256, size_t L = 64) {
       auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
 
       cgh.parallel_for<pointer_kernel<T>>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         uint32_t wggid = NdItem.get_global_id(0);
         uint32_t sgid = SG.get_group_id().get(0);
         if (wggid == 0)
@@ -144,7 +144,7 @@ void check_struct(queue &Queue, Generator &Gen, size_t G = 256, size_t L = 64) {
       auto in = buf_in.template get_access<access::mode::read>(cgh);
 
       cgh.parallel_for<pointer_kernel<T>>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         uint32_t wggid = NdItem.get_global_id(0);
         uint32_t sgid = SG.get_group_id().get(0);
         if (wggid == 0)
diff --git a/sycl/test/sub_group/generic_reduce.cpp b/sycl/test/sub_group/generic_reduce.cpp
index 7dda3a3492e57..98401e091e733 100644
--- a/sycl/test/sub_group/generic_reduce.cpp
+++ b/sycl/test/sub_group/generic_reduce.cpp
@@ -27,19 +27,18 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
     Queue.submit([&](handler &cgh) {
       auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
       auto acc = buf.template get_access<access::mode::read_write>(cgh);
-      cgh.parallel_for(
-          NdRange, [=](nd_item<1> NdItem) {
-            intel::sub_group sg = NdItem.get_sub_group();
-            if (skip_init) {
-              acc[NdItem.get_global_id(0)] =
-                  reduce(sg, T(NdItem.get_global_id(0)), op);
-            } else {
-              acc[NdItem.get_global_id(0)] =
-                  reduce(sg, T(NdItem.get_global_id(0)), init, op);
-            }
-            if (NdItem.get_global_id(0) == 0)
-              sgsizeacc[0] = sg.get_max_local_range()[0];
-          });
+      cgh.parallel_for(NdRange, [=](nd_item<1> NdItem) {
+        ONEAPI::sub_group sg = NdItem.get_sub_group();
+        if (skip_init) {
+          acc[NdItem.get_global_id(0)] =
+              reduce(sg, T(NdItem.get_global_id(0)), op);
+        } else {
+          acc[NdItem.get_global_id(0)] =
+              reduce(sg, T(NdItem.get_global_id(0)), init, op);
+        }
+        if (NdItem.get_global_id(0) == 0)
+          sgsizeacc[0] = sg.get_max_local_range()[0];
+      });
     });
     auto acc = buf.template get_access<access::mode::read_write>();
     auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>();
@@ -81,8 +80,8 @@ int main() {
   // Test user-defined type
   // Use complex as a proxy for this
   using UDT = std::complex<float>;
-  check_op<UDT>(Queue, UDT(L, L), intel::plus<UDT>(), false, G, L);
-  check_op<UDT>(Queue, UDT(0, 0), intel::plus<UDT>(), true, G, L);
+  check_op<UDT>(Queue, UDT(L, L), ONEAPI::plus<UDT>(), false, G, L);
+  check_op<UDT>(Queue, UDT(0, 0), ONEAPI::plus<UDT>(), true, G, L);
 
   // Test user-defined operator
   auto UDOp = [=](const auto &lhs, const auto &rhs) { return lhs + rhs; };
diff --git a/sycl/test/sub_group/load_store.cpp b/sycl/test/sub_group/load_store.cpp
index 3e818e7446ebf..c51a01fbdaefe 100644
--- a/sycl/test/sub_group/load_store.cpp
+++ b/sycl/test/sub_group/load_store.cpp
@@ -45,7 +45,7 @@ template <typename T, int N> void check(queue &Queue) {
       accessor<T, 1, access::mode::read_write, access::target::local> LocalMem(
           {L}, cgh);
       cgh.parallel_for<sycl_subgr<T, N>>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         if (SG.get_group_id().get(0) % N == 0) {
           size_t SGOffset =
               SG.get_group_id().get(0) * SG.get_max_local_range().get(0);
@@ -119,7 +119,7 @@ template <typename T> void check(queue &Queue) {
       accessor<T, 1, access::mode::read_write, access::target::local> LocalMem(
           {L}, cgh);
       cgh.parallel_for<sycl_subgr<T, 0>>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         if (NdItem.get_global_id(0) == 0)
           sgsizeacc[0] = SG.get_max_local_range()[0];
         size_t SGOffset =
diff --git a/sycl/test/sub_group/reduce.hpp b/sycl/test/sub_group/reduce.hpp
index 0fb52c9b6ead0..66a737a95c8c4 100644
--- a/sycl/test/sub_group/reduce.hpp
+++ b/sycl/test/sub_group/reduce.hpp
@@ -25,7 +25,7 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
       auto acc = buf.template get_access<access::mode::read_write>(cgh);
       cgh.parallel_for<sycl_subgr<T, BinaryOperation>>(
           NdRange, [=](nd_item<1> NdItem) {
-            intel::sub_group sg = NdItem.get_sub_group();
+            ONEAPI::sub_group sg = NdItem.get_sub_group();
             if (skip_init) {
               acc[NdItem.get_global_id(0)] =
                   reduce(sg, T(NdItem.get_global_id(0)), op);
@@ -71,23 +71,23 @@ template <typename T> void check(queue &Queue, size_t G = 256, size_t L = 64) {
     L = 32;
   }
 
-  check_op<T>(Queue, T(L), intel::plus<T>(), false, G, L);
-  check_op<T>(Queue, T(0), intel::plus<T>(), true, G, L);
+  check_op<T>(Queue, T(L), ONEAPI::plus<T>(), false, G, L);
+  check_op<T>(Queue, T(0), ONEAPI::plus<T>(), true, G, L);
 
-  check_op<T>(Queue, T(0), intel::minimum<T>(), false, G, L);
-  check_op<T>(Queue, T(G), intel::minimum<T>(), true, G, L);
+  check_op<T>(Queue, T(0), ONEAPI::minimum<T>(), false, G, L);
+  check_op<T>(Queue, T(G), ONEAPI::minimum<T>(), true, G, L);
 
-  check_op<T>(Queue, T(G), intel::maximum<T>(), false, G, L);
-  check_op<T>(Queue, T(0), intel::maximum<T>(), true, G, L);
+  check_op<T>(Queue, T(G), ONEAPI::maximum<T>(), false, G, L);
+  check_op<T>(Queue, T(0), ONEAPI::maximum<T>(), true, G, L);
 
 #if __cplusplus >= 201402L
-  check_op<T>(Queue, T(L), intel::plus<>(), false, G, L);
-  check_op<T>(Queue, T(0), intel::plus<>(), true, G, L);
+  check_op<T>(Queue, T(L), ONEAPI::plus<>(), false, G, L);
+  check_op<T>(Queue, T(0), ONEAPI::plus<>(), true, G, L);
 
-  check_op<T>(Queue, T(0), intel::minimum<>(), false, G, L);
-  check_op<T>(Queue, T(G), intel::minimum<>(), true, G, L);
+  check_op<T>(Queue, T(0), ONEAPI::minimum<>(), false, G, L);
+  check_op<T>(Queue, T(G), ONEAPI::minimum<>(), true, G, L);
 
-  check_op<T>(Queue, T(G), intel::maximum<>(), false, G, L);
-  check_op<T>(Queue, T(0), intel::maximum<>(), true, G, L);
+  check_op<T>(Queue, T(G), ONEAPI::maximum<>(), false, G, L);
+  check_op<T>(Queue, T(0), ONEAPI::maximum<>(), true, G, L);
 #endif
 }
diff --git a/sycl/test/sub_group/scan.hpp b/sycl/test/sub_group/scan.hpp
index e0d49e5f9b352..ae9b4ced66ab0 100644
--- a/sycl/test/sub_group/scan.hpp
+++ b/sycl/test/sub_group/scan.hpp
@@ -27,7 +27,7 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
       auto inacc = inbuf.template get_access<access::mode::read_write>(cgh);
       cgh.parallel_for<sycl_subgr<T, BinaryOperation>>(
           NdRange, [=](nd_item<1> NdItem) {
-            intel::sub_group sg = NdItem.get_sub_group();
+            ONEAPI::sub_group sg = NdItem.get_sub_group();
             if (skip_init) {
               exacc[NdItem.get_global_id(0)] =
                   exclusive_scan(sg, T(NdItem.get_global_id(0)), op);
@@ -79,50 +79,50 @@ template <typename T> void check(queue &Queue, size_t G = 256, size_t L = 64) {
     L = 32;
   }
 
-  check_op<T>(Queue, T(L), intel::plus<T>(), false, G, L);
-  check_op<T>(Queue, T(0), intel::plus<T>(), true, G, L);
+  check_op<T>(Queue, T(L), ONEAPI::plus<T>(), false, G, L);
+  check_op<T>(Queue, T(0), ONEAPI::plus<T>(), true, G, L);
 
-  check_op<T>(Queue, T(0), intel::minimum<T>(), false, G, L);
+  check_op<T>(Queue, T(0), ONEAPI::minimum<T>(), false, G, L);
   if (std::is_floating_point<T>::value ||
       std::is_same<T, cl::sycl::half>::value) {
-    check_op<T>(Queue, std::numeric_limits<T>::infinity(), intel::minimum<T>(),
+    check_op<T>(Queue, std::numeric_limits<T>::infinity(), ONEAPI::minimum<T>(),
                 true, G, L);
   } else {
-    check_op<T>(Queue, std::numeric_limits<T>::max(), intel::minimum<T>(), true,
-                G, L);
+    check_op<T>(Queue, std::numeric_limits<T>::max(), ONEAPI::minimum<T>(),
+                true, G, L);
   }
 
-  check_op<T>(Queue, T(G), intel::maximum<T>(), false, G, L);
+  check_op<T>(Queue, T(G), ONEAPI::maximum<T>(), false, G, L);
   if (std::is_floating_point<T>::value ||
       std::is_same<T, cl::sycl::half>::value) {
-    check_op<T>(Queue, -std::numeric_limits<T>::infinity(), intel::maximum<T>(),
-                true, G, L);
+    check_op<T>(Queue, -std::numeric_limits<T>::infinity(),
+                ONEAPI::maximum<T>(), true, G, L);
   } else {
-    check_op<T>(Queue, std::numeric_limits<T>::min(), intel::maximum<T>(), true,
-                G, L);
+    check_op<T>(Queue, std::numeric_limits<T>::min(), ONEAPI::maximum<T>(),
+                true, G, L);
   }
 
 #if __cplusplus >= 201402L
-  check_op<T>(Queue, T(L), intel::plus<>(), false, G, L);
-  check_op<T>(Queue, T(0), intel::plus<>(), true, G, L);
+  check_op<T>(Queue, T(L), ONEAPI::plus<>(), false, G, L);
+  check_op<T>(Queue, T(0), ONEAPI::plus<>(), true, G, L);
 
-  check_op<T>(Queue, T(0), intel::minimum<>(), false, G, L);
+  check_op<T>(Queue, T(0), ONEAPI::minimum<>(), false, G, L);
   if (std::is_floating_point<T>::value ||
       std::is_same<T, cl::sycl::half>::value) {
-    check_op<T>(Queue, std::numeric_limits<T>::infinity(), intel::minimum<>(),
+    check_op<T>(Queue, std::numeric_limits<T>::infinity(), ONEAPI::minimum<>(),
                 true, G, L);
   } else {
-    check_op<T>(Queue, std::numeric_limits<T>::max(), intel::minimum<>(), true,
+    check_op<T>(Queue, std::numeric_limits<T>::max(), ONEAPI::minimum<>(), true,
                 G, L);
   }
 
-  check_op<T>(Queue, T(G), intel::maximum<>(), false, G, L);
+  check_op<T>(Queue, T(G), ONEAPI::maximum<>(), false, G, L);
   if (std::is_floating_point<T>::value ||
       std::is_same<T, cl::sycl::half>::value) {
-    check_op<T>(Queue, -std::numeric_limits<T>::infinity(), intel::maximum<>(),
+    check_op<T>(Queue, -std::numeric_limits<T>::infinity(), ONEAPI::maximum<>(),
                 true, G, L);
   } else {
-    check_op<T>(Queue, std::numeric_limits<T>::min(), intel::maximum<>(), true,
+    check_op<T>(Queue, std::numeric_limits<T>::min(), ONEAPI::maximum<>(), true,
                 G, L);
   }
 #endif
diff --git a/sycl/test/sub_group/shuffle.hpp b/sycl/test/sub_group/shuffle.hpp
index 72ee4f46db871..ff18284d3f62a 100644
--- a/sycl/test/sub_group/shuffle.hpp
+++ b/sycl/test/sub_group/shuffle.hpp
@@ -34,7 +34,7 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) {
       auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
 
       cgh.parallel_for<sycl_subgr<T, N>>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         uint32_t wggid = NdItem.get_global_id(0);
         uint32_t sgid = SG.get_group_id().get(0);
         vec<T, N> vwggid(wggid), vsgid(sgid);
@@ -117,7 +117,7 @@ template <typename T> void check(queue &Queue, size_t G = 256, size_t L = 64) {
       auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
 
       cgh.parallel_for<sycl_subgr<T, 0>>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         uint32_t wggid = NdItem.get_global_id(0);
         uint32_t sgid = SG.get_group_id().get(0);
         if (wggid == 0)
diff --git a/sycl/test/sub_group/vote.cpp b/sycl/test/sub_group/vote.cpp
index 382266fa412c0..1d9852e217d6f 100644
--- a/sycl/test/sub_group/vote.cpp
+++ b/sycl/test/sub_group/vote.cpp
@@ -49,7 +49,7 @@ void check(queue Queue, const int G, const int L, const int D, const int R) {
       auto sganyacc = sganybuf.get_access<access::mode::read_write>(cgh);
       auto sgallacc = sgallbuf.get_access<access::mode::read_write>(cgh);
       cgh.parallel_for<class subgr>(NdRange, [=](nd_item<1> NdItem) {
-        intel::sub_group SG = NdItem.get_sub_group();
+        ONEAPI::sub_group SG = NdItem.get_sub_group();
         /* Set to 1 if any local ID in subgroup devided by D has remainder R */
         if (any_of(SG, SG.get_local_id().get(0) % D == R)) {
           sganyacc[NdItem.get_global_id()] = 1;