From 6e98e3f68e40769c8ba5a049a85b483eaac45a66 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 22 May 2024 04:12:01 -0700
Subject: [PATCH 01/58] not buildable: remove host device from device_impl.*

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/device_impl.cpp | 38 +++++-------------------------
 sycl/source/detail/device_impl.hpp | 14 -----------
 2 files changed, 6 insertions(+), 46 deletions(-)
diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp
index 532cffe22500f..d043a59d9cebd 100644
--- a/sycl/source/detail/device_impl.cpp
+++ b/sycl/source/detail/device_impl.cpp
@@ -17,11 +17,6 @@ namespace sycl {
 inline namespace _V1 {
 namespace detail {
 
-device_impl::device_impl()
-    : MIsHostDevice(true), MPlatform(platform_impl::getHostPlatformImpl()),
-      // assert is natively supported by host
-      MIsAssertFailSupported(true) {}
-
 device_impl::device_impl(pi_native_handle InteropDeviceHandle,
                          const PluginPtr &Plugin)
     : device_impl(InteropDeviceHandle, nullptr, nullptr, Plugin) {}
@@ -39,7 +34,7 @@ device_impl::device_impl(sycl::detail::pi::PiDevice Device,
 device_impl::device_impl(pi_native_handle InteropDeviceHandle,
                          sycl::detail::pi::PiDevice Device,
                          PlatformImplPtr Platform, const PluginPtr &Plugin)
-    : MDevice(Device), MIsHostDevice(false),
+    : MDevice(Device),
       MDeviceHostBaseTime(std::make_pair(0, 0)) {
 
   bool InteroperabilityConstructor = false;
@@ -84,13 +79,11 @@ device_impl::device_impl(pi_native_handle InteropDeviceHandle,
 }
 
 device_impl::~device_impl() {
-  if (!MIsHostDevice) {
-    // TODO catch an exception and put it to list of asynchronous exceptions
-    const PluginPtr &Plugin = getPlugin();
-    sycl::detail::pi::PiResult Err =
-        Plugin->call_nocheck<PiApiKind::piDeviceRelease>(MDevice);
-    __SYCL_CHECK_OCL_CODE_NO_EXC(Err);
-  }
+  // TODO catch an exception and put it to list of asynchronous exceptions
+  const PluginPtr &Plugin = getPlugin();
+  sycl::detail::pi::PiResult Err =
+      Plugin->call_nocheck<PiApiKind::piDeviceRelease>(MDevice);
+  __SYCL_CHECK_OCL_CODE_NO_EXC(Err);
 }
 
 bool device_impl::is_affinity_supported(
@@ -101,11 +94,6 @@ bool device_impl::is_affinity_supported(
 }
 
 cl_device_id device_impl::get() const {
-  if (MIsHostDevice) {
-    throw invalid_object_error(
-        "This instance of device doesn't support OpenCL interoperability.",
-        PI_ERROR_INVALID_DEVICE);
-  }
   // TODO catch an exception and put it to list of asynchronous exceptions
   getPlugin()->call<PiApiKind::piDeviceRetain>(MDevice);
   return pi::cast<cl_device_id>(getNative());
@@ -180,9 +168,6 @@ device_impl::get_backend_info<info::device::backend_version>() const {
 }
 
 bool device_impl::has_extension(const std::string &ExtensionName) const {
-  if (MIsHostDevice)
-    // TODO: implement extension management for host device;
-    return false;
   std::string AllExtensionNames =
       get_device_info_string(PiInfoCode<info::device::extensions>::value);
   return (AllExtensionNames.find(ExtensionName) != std::string::npos);
@@ -224,8 +209,6 @@ device_impl::create_sub_devices(const cl_device_partition_property *Properties,
 }
 
 std::vector<device> device_impl::create_sub_devices(size_t ComputeUnits) const {
-  assert(!MIsHostDevice && "Partitioning is not supported on host.");
-
   if (!is_partition_supported(info::partition_property::partition_equally)) {
     throw sycl::feature_not_supported(
         "Device does not support "
@@ -248,8 +231,6 @@ std::vector<device> device_impl::create_sub_devices(size_t ComputeUnits) const {
 
 std::vector<device>
 device_impl::create_sub_devices(const std::vector<size_t> &Counts) const {
-  assert(!MIsHostDevice && "Partitioning is not supported on host.");
-
   if (!is_partition_supported(info::partition_property::partition_by_counts)) {
     throw sycl::feature_not_supported(
         "Device does not support "
@@ -291,8 +272,6 @@ device_impl::create_sub_devices(const std::vector<size_t> &Counts) const {
 
 std::vector<device> device_impl::create_sub_devices(
     info::partition_affinity_domain AffinityDomain) const {
-  assert(!MIsHostDevice && "Partitioning is not supported on host.");
-
   if (!is_partition_supported(
           info::partition_property::partition_by_affinity_domain)) {
     throw sycl::feature_not_supported(
@@ -319,8 +298,6 @@ std::vector<device> device_impl::create_sub_devices(
 }
 
 std::vector<device> device_impl::create_sub_devices() const {
-  assert(!MIsHostDevice && "Partitioning is not supported on host.");
-
   if (!is_partition_supported(
           info::partition_property::ext_intel_partition_by_cslice)) {
     throw sycl::feature_not_supported(
@@ -789,9 +766,6 @@ uint64_t device_impl::getCurrentDeviceTime() {
   uint64_t HostTime =
       duration_cast<nanoseconds>(steady_clock::now().time_since_epoch())
           .count();
-  if (MIsHostDevice) {
-    return HostTime;
-  }
 
   // To account for potential clock drift between host clock and device clock.
   // The value set is arbitrary: 200 seconds
diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp
index 981b1e059a30e..2526647152892 100644
--- a/sycl/source/detail/device_impl.hpp
+++ b/sycl/source/detail/device_impl.hpp
@@ -65,10 +65,6 @@ class device_impl {
   ///
   /// \return non-constant reference to PI device
   sycl::detail::pi::PiDevice &getHandleRef() {
-    if (MIsHostDevice)
-      throw invalid_object_error("This instance of device is a host instance",
-                                 PI_ERROR_INVALID_DEVICE);
-
     return MDevice;
   }
 
@@ -78,18 +74,9 @@ class device_impl {
   ///
   /// \return constant reference to PI device
   const sycl::detail::pi::PiDevice &getHandleRef() const {
-    if (MIsHostDevice)
-      throw invalid_object_error("This instance of device is a host instance",
-                                 PI_ERROR_INVALID_DEVICE);
-
     return MDevice;
   }
 
-  /// Check if SYCL device is a host device
-  ///
-  /// \return true if SYCL device is a host device
-  bool is_host() const { return MIsHostDevice; }
-
   /// Check if device is a CPU device
   ///
   /// \return true if SYCL device is a CPU device
@@ -327,7 +314,6 @@ class device_impl {
   sycl::detail::pi::PiDevice MDevice = 0;
   sycl::detail::pi::PiDeviceType MType;
   sycl::detail::pi::PiDevice MRootDevice = nullptr;
-  bool MIsHostDevice;
   PlatformImplPtr MPlatform;
   bool MIsAssertFailSupported = false;
   mutable std::string MDeviceName;

From abe4586ce16a07b69a1d2c662679697754db00a2 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 22 May 2024 04:13:51 -0700
Subject: [PATCH 02/58] not-buildable: remove getHostPlatformImpl

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/context_impl.cpp  | 3 ---
 sycl/source/detail/device_info.hpp   | 4 ----
 sycl/source/detail/platform_impl.cpp | 6 ------
 sycl/source/detail/platform_impl.hpp | 8 --------
 4 files changed, 21 deletions(-)

diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp
index 388c312305d4a..c2124456dae24 100644
--- a/sycl/source/detail/context_impl.cpp
+++ b/sycl/source/detail/context_impl.cpp
@@ -177,9 +177,6 @@ uint32_t context_impl::get_info<info::context::reference_count>() const {
                                                           this->getPlugin());
 }
 template <> platform context_impl::get_info<info::context::platform>() const {
-  if (is_host())
-    return createSyclObjFromImpl<platform>(
-        platform_impl::getHostPlatformImpl());
   return createSyclObjFromImpl<platform>(MPlatform);
 }
 template <>
diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp
index a8769b69e83cc..61cb09e1b0b38 100644
--- a/sycl/source/detail/device_info.hpp
+++ b/sycl/source/detail/device_info.hpp
@@ -1802,10 +1802,6 @@ get_device_info_host<info::device::built_in_kernels>() {
   return {};
 }
 
-template <> inline platform get_device_info_host<info::device::platform>() {
-  return createSyclObjFromImpl<platform>(platform_impl::getHostPlatformImpl());
-}
-
 template <> inline std::string get_device_info_host<info::device::name>() {
   return "SYCL host device";
 }
diff --git a/sycl/source/detail/platform_impl.cpp b/sycl/source/detail/platform_impl.cpp
index 2bdfab26676d9..9700fde466803 100644
--- a/sycl/source/detail/platform_impl.cpp
+++ b/sycl/source/detail/platform_impl.cpp
@@ -30,12 +30,6 @@ namespace detail {
 
 using PlatformImplPtr = std::shared_ptr<platform_impl>;
 
-PlatformImplPtr platform_impl::getHostPlatformImpl() {
-  static PlatformImplPtr HostImpl = std::make_shared<platform_impl>();
-
-  return HostImpl;
-}
-
 PlatformImplPtr
 platform_impl::getOrMakePlatformImpl(sycl::detail::pi::PiPlatform PiPlatform,
                                      const PluginPtr &Plugin) {
diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp
index 34537c7191af6..0bb8d1ab77e2f 100644
--- a/sycl/source/detail/platform_impl.hpp
+++ b/sycl/source/detail/platform_impl.hpp
@@ -192,14 +192,6 @@ class platform_impl {
   getOrMakeDeviceImpl(sycl::detail::pi::PiDevice PiDevice,
                       const std::shared_ptr<platform_impl> &PlatformImpl);
 
-  /// Static functions that help maintain platform uniquess and
-  /// equality of comparison
-
-  /// Returns the host platform impl
-  ///
-  /// \return the host platform impl
-  static std::shared_ptr<platform_impl> getHostPlatformImpl();
-
   /// Queries the cache to see if the specified PiPlatform has been seen
   /// before.  If so, return the cached platform_impl, otherwise create a new
   /// one and cache it.

From 6a0a25005b1b9b831419e94ed56b0bb8f15b4017 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 22 May 2024 04:18:11 -0700
Subject: [PATCH 03/58] not buildable: remove get_device_info_host

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/device_impl.cpp |    3 -
 sycl/source/detail/device_info.hpp | 1032 ----------------------------
 2 files changed, 1035 deletions(-)

diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp
index d043a59d9cebd..2e87300425c20 100644
--- a/sycl/source/detail/device_impl.cpp
+++ b/sycl/source/detail/device_impl.cpp
@@ -105,9 +105,6 @@ platform device_impl::get_platform() const {
 
 template <typename Param>
 typename Param::return_type device_impl::get_info() const {
-  if (is_host()) {
-    return get_device_info_host<Param>();
-  }
   return get_device_info<Param>(
       MPlatform->getOrMakeDeviceImpl(MDevice, MPlatform));
 }
diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp
index 61cb09e1b0b38..9322b65128652 100644
--- a/sycl/source/detail/device_info.hpp
+++ b/sycl/source/detail/device_info.hpp
@@ -1272,1038 +1272,6 @@ typename Param::return_type get_device_info(const DeviceImplPtr &Dev) {
   return get_device_info_impl<typename Param::return_type, Param>::get(Dev);
 }
 
-// SYCL host device information
-
-// Default template is disabled, all possible instantiations are
-// specified explicitly.
-template <typename Param>
-inline typename Param::return_type get_device_info_host() = delete;
-
-template <>
-inline std::vector<sycl::aspect> get_device_info_host<info::device::aspects>() {
-  return std::vector<sycl::aspect>();
-}
-
-template <>
-inline ext::oneapi::experimental::architecture
-get_device_info_host<ext::oneapi::experimental::info::device::architecture>() {
-  return ext::oneapi::experimental::architecture::x86_64;
-}
-
-template <>
-inline info::device_type get_device_info_host<info::device::device_type>() {
-  return info::device_type::host;
-}
-
-template <> inline uint32_t get_device_info_host<info::device::vendor_id>() {
-  return 0x8086;
-}
-
-template <>
-inline uint32_t get_device_info_host<info::device::max_compute_units>() {
-  return std::thread::hardware_concurrency();
-}
-
-template <>
-inline uint32_t get_device_info_host<info::device::max_work_item_dimensions>() {
-  return 3;
-}
-
-template <>
-inline range<1> get_device_info_host<info::device::max_work_item_sizes<1>>() {
-  // current value is the required minimum
-  return {1};
-}
-
-template <>
-inline range<2> get_device_info_host<info::device::max_work_item_sizes<2>>() {
-  // current value is the required minimum
-  return {1, 1};
-}
-
-template <>
-inline range<3> get_device_info_host<info::device::max_work_item_sizes<3>>() {
-  // current value is the required minimum
-  return {1, 1, 1};
-}
-
-template <>
-inline constexpr size_t get_device_info_host<
-    ext::oneapi::experimental::info::device::max_global_work_groups>() {
-  // See handler.hpp for the maximum value :
-  return static_cast<size_t>((std::numeric_limits<int>::max)());
-}
-
-template <>
-inline id<1> get_device_info_host<
-    ext::oneapi::experimental::info::device::max_work_groups<1>>() {
-  // See handler.hpp for the maximum value :
-  static constexpr size_t Limit = get_device_info_host<
-      ext::oneapi::experimental::info::device::max_global_work_groups>();
-  return {Limit};
-}
-
-template <>
-inline id<2> get_device_info_host<
-    ext::oneapi::experimental::info::device::max_work_groups<2>>() {
-  // See handler.hpp for the maximum value :
-  static constexpr size_t Limit = get_device_info_host<
-      ext::oneapi::experimental::info::device::max_global_work_groups>();
-  return {Limit, Limit};
-}
-
-template <>
-inline id<3> get_device_info_host<
-    ext::oneapi::experimental::info::device::max_work_groups<3>>() {
-  // See handler.hpp for the maximum value :
-  static constexpr size_t Limit = get_device_info_host<
-      ext::oneapi::experimental::info::device::max_global_work_groups>();
-  return {Limit, Limit, Limit};
-}
-
-// TODO:remove with deprecated feature
-// device::get_info<info::device::ext_oneapi_max_global_work_groups>
-template <>
-inline constexpr size_t
-get_device_info_host<info::device::ext_oneapi_max_global_work_groups>() {
-  return get_device_info_host<
-      ext::oneapi::experimental::info::device::max_global_work_groups>();
-}
-
-// TODO:remove with deprecated feature
-// device::get_info<info::device::ext_oneapi_max_work_groups_1d>
-template <>
-inline id<1>
-get_device_info_host<info::device::ext_oneapi_max_work_groups_1d>() {
-
-  return get_device_info_host<
-      ext::oneapi::experimental::info::device::max_work_groups<1>>();
-}
-
-// TODO:remove with deprecated feature
-// device::get_info<info::device::ext_oneapi_max_work_groups_2d>
-template <>
-inline id<2>
-get_device_info_host<info::device::ext_oneapi_max_work_groups_2d>() {
-  return get_device_info_host<
-      ext::oneapi::experimental::info::device::max_work_groups<2>>();
-}
-
-// TODO:remove with deprecated feature
-// device::get_info<info::device::ext_oneapi_max_work_groups_3d>
-template <>
-inline id<3>
-get_device_info_host<info::device::ext_oneapi_max_work_groups_3d>() {
-  return get_device_info_host<
-      ext::oneapi::experimental::info::device::max_work_groups<3>>();
-}
-
-template <>
-inline size_t get_device_info_host<info::device::max_work_group_size>() {
-  // current value is the required minimum
-  return 1;
-}
-
-template <>
-inline uint32_t
-get_device_info_host<info::device::preferred_vector_width_char>() {
-  // TODO update when appropriate
-  return 1;
-}
-
-template <>
-inline uint32_t
-get_device_info_host<info::device::preferred_vector_width_short>() {
-  // TODO update when appropriate
-  return 1;
-}
-
-template <>
-inline uint32_t
-get_device_info_host<info::device::preferred_vector_width_int>() {
-  // TODO update when appropriate
-  return 1;
-}
-
-template <>
-inline uint32_t
-get_device_info_host<info::device::preferred_vector_width_long>() {
-  // TODO update when appropriate
-  return 1;
-}
-
-template <>
-inline uint32_t
-get_device_info_host<info::device::preferred_vector_width_float>() {
-  // TODO update when appropriate
-  return 1;
-}
-
-template <>
-inline uint32_t
-get_device_info_host<info::device::preferred_vector_width_double>() {
-  // TODO update when appropriate
-  return 1;
-}
-
-template <>
-inline uint32_t
-get_device_info_host<info::device::preferred_vector_width_half>() {
-  // TODO update when appropriate
-  return 0;
-}
-
-template <>
-inline uint32_t get_device_info_host<info::device::native_vector_width_char>() {
-  return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Char);
-}
-
-template <>
-inline uint32_t
-get_device_info_host<info::device::native_vector_width_short>() {
-  return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Short);
-}
-
-template <>
-inline uint32_t get_device_info_host<info::device::native_vector_width_int>() {
-  return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Int);
-}
-
-template <>
-inline uint32_t get_device_info_host<info::device::native_vector_width_long>() {
-  return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Long);
-}
-
-template <>
-inline uint32_t
-get_device_info_host<info::device::native_vector_width_float>() {
-  return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Float);
-}
-
-template <>
-inline uint32_t
-get_device_info_host<info::device::native_vector_width_double>() {
-  return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Double);
-}
-
-template <>
-inline uint32_t get_device_info_host<info::device::native_vector_width_half>() {
-  return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Half);
-}
-
-template <>
-inline uint32_t get_device_info_host<info::device::max_clock_frequency>() {
-  return PlatformUtil::getMaxClockFrequency();
-}
-
-template <> inline uint32_t get_device_info_host<info::device::address_bits>() {
-  return sizeof(void *) * 8;
-}
-
-template <>
-inline uint64_t get_device_info_host<info::device::global_mem_size>() {
-  return static_cast<uint64_t>(OSUtil::getOSMemSize());
-}
-
-template <>
-inline uint64_t get_device_info_host<info::device::max_mem_alloc_size>() {
-  // current value is the required minimum
-  const uint64_t a = get_device_info_host<info::device::global_mem_size>() / 4;
-  const uint64_t b = 128ul * 1024 * 1024;
-  return (a > b) ? a : b;
-}
-
-template <> inline bool get_device_info_host<info::device::image_support>() {
-  return true;
-}
-
-template <> inline bool get_device_info_host<info::device::atomic64>() {
-  return false;
-}
-
-template <>
-inline std::vector<memory_order>
-get_device_info_host<info::device::atomic_memory_order_capabilities>() {
-  return {memory_order::relaxed, memory_order::acquire, memory_order::release,
-          memory_order::acq_rel, memory_order::seq_cst};
-}
-
-template <>
-inline std::vector<memory_order>
-get_device_info_host<info::device::atomic_fence_order_capabilities>() {
-  return {memory_order::relaxed, memory_order::acquire, memory_order::release,
-          memory_order::acq_rel};
-}
-
-template <>
-inline std::vector<memory_scope>
-get_device_info_host<info::device::atomic_memory_scope_capabilities>() {
-  return {memory_scope::work_item, memory_scope::sub_group,
-          memory_scope::work_group, memory_scope::device, memory_scope::system};
-}
-
-template <>
-inline std::vector<memory_scope>
-get_device_info_host<info::device::atomic_fence_scope_capabilities>() {
-  return {memory_scope::work_item, memory_scope::sub_group,
-          memory_scope::work_group, memory_scope::device, memory_scope::system};
-}
-
-template <>
-inline bool
-get_device_info_host<info::device::ext_oneapi_bfloat16_math_functions>() {
-  return false;
-}
-
-template <>
-inline uint32_t get_device_info_host<info::device::max_read_image_args>() {
-  // current value is the required minimum
-  return 128;
-}
-
-template <>
-inline uint32_t get_device_info_host<info::device::max_write_image_args>() {
-  // current value is the required minimum
-  return 8;
-}
-
-template <>
-inline size_t get_device_info_host<info::device::image2d_max_width>() {
-  // SYCL guarantees at least 8192. Some devices already known to provide more
-  // than that (i.e. it is 16384 for opencl:gpu), which may create issues during
-  // image object allocation on host.
-  // Using any fixed number (i.e. 16384) brings the risk of having similar
-  // issues on newer devices in future. Thus it does not make sense limiting
-  // the returned value on host. Practially speaking the returned value on host
-  // depends only on memory required for the image, which also depends on
-  // the image channel_type and the image height. Both are not known in this
-  // query, thus it becomes user's responsibility to choose proper image
-  // parameters depending on similar query to (non-host device) and amount
-  // of available/allocatable memory.
-  return std::numeric_limits<std::size_t>::max();
-}
-
-template <>
-inline size_t get_device_info_host<info::device::image2d_max_height>() {
-  // SYCL guarantees at least 8192. Some devices already known to provide more
-  // than that (i.e. it is 16384 for opencl:gpu), which may create issues during
-  // image object allocation on host.
-  // Using any fixed number (i.e. 16384) brings the risk of having similar
-  // issues on newer devices in future. Thus it does not make sense limiting
-  // the returned value on host. Practially speaking the returned value on host
-  // depends only on memory required for the image, which also depends on
-  // the image channel_type and the image width. Both are not known in this
-  // query, thus it becomes user's responsibility to choose proper image
-  // parameters depending on similar query to (non-host device) and amount
-  // of available/allocatable memory.
-  return std::numeric_limits<std::size_t>::max();
-}
-
-template <>
-inline size_t get_device_info_host<info::device::image3d_max_width>() {
-  // SYCL guarantees at least 8192. Some devices already known to provide more
-  // than that (i.e. it is 16384 for opencl:gpu), which may create issues during
-  // image object allocation on host.
-  // Using any fixed number (i.e. 16384) brings the risk of having similar
-  // issues on newer devices in future. Thus it does not make sense limiting
-  // the returned value on host. Practially speaking the returned value on host
-  // depends only on memory required for the image, which also depends on
-  // the image channel_type and the image height/depth. Both are not known
-  // in this query, thus it becomes user's responsibility to choose proper image
-  // parameters depending on similar query to (non-host device) and amount
-  // of available/allocatable memory.
-  return std::numeric_limits<std::size_t>::max();
-}
-
-template <>
-inline size_t get_device_info_host<info::device::image3d_max_height>() {
-  // SYCL guarantees at least 8192. Some devices already known to provide more
-  // than that (i.e. it is 16384 for opencl:gpu), which may create issues during
-  // image object allocation on host.
-  // Using any fixed number (i.e. 16384) brings the risk of having similar
-  // issues on newer devices in future. Thus it does not make sense limiting
-  // the returned value on host. Practially speaking the returned value on host
-  // depends only on memory required for the image, which also depends on
-  // the image channel_type and the image width/depth. Both are not known
-  // in this query, thus it becomes user's responsibility to choose proper image
-  // parameters depending on similar query to (non-host device) and amount
-  // of available/allocatable memory.
-  return std::numeric_limits<std::size_t>::max();
-}
-
-template <>
-inline size_t get_device_info_host<info::device::image3d_max_depth>() {
-  // SYCL guarantees at least 8192. Some devices already known to provide more
-  // than that (i.e. it is 16384 for opencl:gpu), which may create issues during
-  // image object allocation on host.
-  // Using any fixed number (i.e. 16384) brings the risk of having similar
-  // issues on newer devices in future. Thus it does not make sense limiting
-  // the returned value on host. Practially speaking the returned value on host
-  // depends only on memory required for the image, which also depends on
-  // the image channel_type and the image height/width, which are not known
-  // in this query, thus it becomes user's responsibility to choose proper image
-  // parameters depending on similar query to (non-host device) and amount
-  // of available/allocatable memory.
-  return std::numeric_limits<std::size_t>::max();
-}
-
-template <>
-inline size_t get_device_info_host<info::device::image_max_buffer_size>() {
-  // Not supported in SYCL
-  return 0;
-}
-
-template <>
-inline size_t get_device_info_host<info::device::image_max_array_size>() {
-  // current value is the required minimum
-  return 2048;
-}
-
-template <> inline uint32_t get_device_info_host<info::device::max_samplers>() {
-  // current value is the required minimum
-  return 16;
-}
-
-template <>
-inline size_t get_device_info_host<info::device::max_parameter_size>() {
-  // current value is the required minimum
-  return 1024;
-}
-
-template <>
-inline uint32_t get_device_info_host<info::device::mem_base_addr_align>() {
-  return 1024;
-}
-
-template <>
-inline std::vector<info::fp_config>
-get_device_info_host<info::device::half_fp_config>() {
-  // current value is the required minimum
-  return {};
-}
-
-template <>
-inline std::vector<info::fp_config>
-get_device_info_host<info::device::single_fp_config>() {
-  // current value is the required minimum
-  return {info::fp_config::round_to_nearest, info::fp_config::inf_nan};
-}
-
-template <>
-inline std::vector<info::fp_config>
-get_device_info_host<info::device::double_fp_config>() {
-  // current value is the required minimum
-  return {info::fp_config::fma,           info::fp_config::round_to_nearest,
-          info::fp_config::round_to_zero, info::fp_config::round_to_inf,
-          info::fp_config::inf_nan,       info::fp_config::denorm};
-}
-
-template <>
-inline info::global_mem_cache_type
-get_device_info_host<info::device::global_mem_cache_type>() {
-  return info::global_mem_cache_type::read_write;
-}
-
-template <>
-inline uint32_t
-get_device_info_host<info::device::global_mem_cache_line_size>() {
-  return PlatformUtil::getMemCacheLineSize();
-}
-
-template <>
-inline uint64_t get_device_info_host<info::device::global_mem_cache_size>() {
-  return PlatformUtil::getMemCacheSize();
-}
-
-template <>
-inline uint64_t get_device_info_host<info::device::max_constant_buffer_size>() {
-  // current value is the required minimum
-  return 64 * 1024;
-}
-
-template <>
-inline uint32_t get_device_info_host<info::device::max_constant_args>() {
-  // current value is the required minimum
-  return 8;
-}
-
-template <>
-inline info::local_mem_type
-get_device_info_host<info::device::local_mem_type>() {
-  return info::local_mem_type::global;
-}
-
-template <>
-inline uint64_t get_device_info_host<info::device::local_mem_size>() {
-  // current value is the required minimum
-  return 32 * 1024;
-}
-
-template <>
-inline bool get_device_info_host<info::device::error_correction_support>() {
-  return false;
-}
-
-template <>
-inline bool get_device_info_host<info::device::host_unified_memory>() {
-  return true;
-}
-
-template <>
-inline size_t get_device_info_host<info::device::profiling_timer_resolution>() {
-  typedef std::ratio_divide<std::chrono::high_resolution_clock::period,
-                            std::nano>
-      ns_period;
-  return ns_period::num / ns_period::den;
-}
-
-template <> inline bool get_device_info_host<info::device::is_endian_little>() {
-  union {
-    uint16_t a;
-    uint8_t b[2];
-  } u = {0x0100};
-
-  return u.b[1];
-}
-
-template <> inline bool get_device_info_host<info::device::is_available>() {
-  return true;
-}
-
-template <>
-inline bool get_device_info_host<info::device::is_compiler_available>() {
-  return true;
-}
-
-template <>
-inline bool get_device_info_host<info::device::is_linker_available>() {
-  return true;
-}
-
-template <>
-inline std::vector<info::execution_capability>
-get_device_info_host<info::device::execution_capabilities>() {
-  return {info::execution_capability::exec_kernel};
-}
-
-template <> inline bool get_device_info_host<info::device::queue_profiling>() {
-  return true;
-}
-
-template <>
-inline std::vector<kernel_id>
-get_device_info_host<info::device::built_in_kernel_ids>() {
-  return {};
-}
-
-template <>
-inline std::vector<std::string>
-get_device_info_host<info::device::built_in_kernels>() {
-  return {};
-}
-
-template <> inline std::string get_device_info_host<info::device::name>() {
-  return "SYCL host device";
-}
-
-template <> inline std::string get_device_info_host<info::device::vendor>() {
-  return "";
-}
-
-template <>
-inline std::string get_device_info_host<info::device::driver_version>() {
-  return "1.2";
-}
-
-template <> inline std::string get_device_info_host<info::device::profile>() {
-  return "FULL PROFILE";
-}
-
-template <> inline std::string get_device_info_host<info::device::version>() {
-  return "1.2";
-}
-
-template <>
-inline std::string get_device_info_host<info::device::opencl_c_version>() {
-  return "not applicable";
-}
-
-template <>
-inline std::vector<std::string>
-get_device_info_host<info::device::extensions>() {
-  // TODO update when appropriate
-  return {};
-}
-
-template <>
-inline size_t get_device_info_host<info::device::printf_buffer_size>() {
-  // current value is the required minimum
-  return 1024 * 1024;
-}
-
-template <>
-inline bool get_device_info_host<info::device::preferred_interop_user_sync>() {
-  return false;
-}
-
-template <> inline device get_device_info_host<info::device::parent_device>() {
-  throw invalid_object_error(
-      "Partitioning to subdevices of the host device is not implemented",
-      PI_ERROR_INVALID_DEVICE);
-}
-
-template <>
-inline uint32_t
-get_device_info_host<info::device::partition_max_sub_devices>() {
-  // TODO update once subdevice creation is enabled
-  return 1;
-}
-
-template <>
-inline std::vector<info::partition_property>
-get_device_info_host<info::device::partition_properties>() {
-  // TODO update once subdevice creation is enabled
-  return {};
-}
-
-template <>
-inline std::vector<info::partition_affinity_domain>
-get_device_info_host<info::device::partition_affinity_domains>() {
-  // TODO update once subdevice creation is enabled
-  return {};
-}
-
-template <>
-inline info::partition_property
-get_device_info_host<info::device::partition_type_property>() {
-  return info::partition_property::no_partition;
-}
-
-template <>
-inline info::partition_affinity_domain
-get_device_info_host<info::device::partition_type_affinity_domain>() {
-  // TODO update once subdevice creation is enabled
-  return info::partition_affinity_domain::not_applicable;
-}
-
-template <>
-inline uint32_t get_device_info_host<info::device::reference_count>() {
-  // TODO update once subdevice creation is enabled
-  return 1;
-}
-
-template <>
-inline uint32_t get_device_info_host<info::device::max_num_sub_groups>() {
-  // TODO update once subgroups are enabled
-  throw runtime_error("Sub-group feature is not supported on HOST device.",
-                      PI_ERROR_INVALID_DEVICE);
-}
-
-template <>
-inline std::vector<size_t>
-get_device_info_host<info::device::sub_group_sizes>() {
-  // TODO update once subgroups are enabled
-  throw runtime_error("Sub-group feature is not supported on HOST device.",
-                      PI_ERROR_INVALID_DEVICE);
-}
-
-template <>
-inline bool
-get_device_info_host<info::device::sub_group_independent_forward_progress>() {
-  // TODO update once subgroups are enabled
-  throw runtime_error("Sub-group feature is not supported on HOST device.",
-                      PI_ERROR_INVALID_DEVICE);
-}
-
-template <>
-inline bool get_device_info_host<info::device::kernel_kernel_pipe_support>() {
-  return false;
-}
-
-template <>
-inline std::string get_device_info_host<info::device::backend_version>() {
-  throw runtime_error(
-      "Backend version feature is not supported on HOST device.",
-      PI_ERROR_INVALID_DEVICE);
-}
-
-template <>
-inline bool get_device_info_host<info::device::usm_device_allocations>() {
-  return true;
-}
-
-template <>
-inline bool get_device_info_host<info::device::usm_host_allocations>() {
-  return true;
-}
-
-template <>
-inline bool get_device_info_host<info::device::usm_shared_allocations>() {
-  return true;
-}
-
-template <>
-inline bool
-get_device_info_host<info::device::usm_restricted_shared_allocations>() {
-  return true;
-}
-
-template <>
-inline bool get_device_info_host<info::device::usm_system_allocations>() {
-  return true;
-}
-
-template <>
-inline bool get_device_info_host<info::device::ext_intel_mem_channel>() {
-  return false;
-}
-
-// Specializations for intel extensions for Level Zero low-level
-// detail device descriptors (not support on host).
-template <>
-inline uint32_t get_device_info_host<ext::intel::info::device::device_id>() {
-  throw runtime_error("Obtaining the device ID is not supported on HOST device",
-                      PI_ERROR_INVALID_DEVICE);
-}
-template <>
-inline std::string
-get_device_info_host<ext::intel::info::device::pci_address>() {
-  throw runtime_error(
-      "Obtaining the PCI address is not supported on HOST device",
-      PI_ERROR_INVALID_DEVICE);
-}
-template <>
-inline uint32_t get_device_info_host<ext::intel::info::device::gpu_eu_count>() {
-  throw runtime_error("Obtaining the EU count is not supported on HOST device",
-                      PI_ERROR_INVALID_DEVICE);
-}
-template <>
-inline uint32_t
-get_device_info_host<ext::intel::info::device::gpu_eu_simd_width>() {
-  throw runtime_error(
-      "Obtaining the EU SIMD width is not supported on HOST device",
-      PI_ERROR_INVALID_DEVICE);
-}
-template <>
-inline uint32_t get_device_info_host<ext::intel::info::device::gpu_slices>() {
-  throw runtime_error(
-      "Obtaining the number of slices is not supported on HOST device",
-      PI_ERROR_INVALID_DEVICE);
-}
-template <>
-inline uint32_t
-get_device_info_host<ext::intel::info::device::gpu_subslices_per_slice>() {
-  throw runtime_error("Obtaining the number of subslices per slice is not "
-                      "supported on HOST device",
-                      PI_ERROR_INVALID_DEVICE);
-}
-template <>
-inline uint32_t
-get_device_info_host<ext::intel::info::device::gpu_eu_count_per_subslice>() {
-  throw runtime_error(
-      "Obtaining the EU count per subslice is not supported on HOST device",
-      PI_ERROR_INVALID_DEVICE);
-}
-template <>
-inline uint32_t
-get_device_info_host<ext::intel::info::device::gpu_hw_threads_per_eu>() {
-  throw runtime_error(
-      "Obtaining the HW threads count per EU is not supported on HOST device",
-      PI_ERROR_INVALID_DEVICE);
-}
-template <>
-inline uint64_t
-get_device_info_host<ext::intel::info::device::max_mem_bandwidth>() {
-  throw runtime_error(
-      "Obtaining the maximum memory bandwidth is not supported on HOST device",
-      PI_ERROR_INVALID_DEVICE);
-}
-template <>
-inline detail::uuid_type
-get_device_info_host<ext::intel::info::device::uuid>() {
-  throw runtime_error(
-      "Obtaining the device uuid is not supported on HOST device",
-      PI_ERROR_INVALID_DEVICE);
-}
-
-// TODO: Remove with deprecated feature
-// device::get_info<info::device::ext_intel_pci_address>()
-template <>
-inline std::string get_device_info_host<info::device::ext_intel_pci_address>() {
-  throw runtime_error(
-      "Obtaining the PCI address is not supported on HOST device",
-      PI_ERROR_INVALID_DEVICE);
-}
-// TODO: Remove with deprecated feature
-// device::get_info<info::device::ext_intel_gpu_eu_count>()
-template <>
-inline uint32_t get_device_info_host<info::device::ext_intel_gpu_eu_count>() {
-  throw runtime_error("Obtaining the EU count is not supported on HOST device",
-                      PI_ERROR_INVALID_DEVICE);
-}
-// TODO: Remove with deprecated feature
-// device::get_info<info::device::ext_intel_gpu_eu_simd_width>()
-template <>
-inline uint32_t
-get_device_info_host<info::device::ext_intel_gpu_eu_simd_width>() {
-  throw runtime_error(
-      "Obtaining the EU SIMD width is not supported on HOST device",
-      PI_ERROR_INVALID_DEVICE);
-}
-// TODO: Remove with deprecated feature
-// device::get_info<info::device::ext_intel_gpu_slices>()
-template <>
-inline uint32_t get_device_info_host<info::device::ext_intel_gpu_slices>() {
-  throw runtime_error(
-      "Obtaining the number of slices is not supported on HOST device",
-      PI_ERROR_INVALID_DEVICE);
-}
-// TODO: Remove with deprecated feature
-// device::get_info<info::device::ext_intel_gpu_subslices_per_slice>()
-template <>
-inline uint32_t
-get_device_info_host<info::device::ext_intel_gpu_subslices_per_slice>() {
-  throw runtime_error("Obtaining the number of subslices per slice is not "
-                      "supported on HOST device",
-                      PI_ERROR_INVALID_DEVICE);
-}
-// TODO: Remove with deprecated feature
-// device::get_info<info::device::ext_intel_gpu_eu_count_per_subslices>()
-template <>
-inline uint32_t
-get_device_info_host<info::device::ext_intel_gpu_eu_count_per_subslice>() {
-  throw runtime_error(
-      "Obtaining the EU count per subslice is not supported on HOST device",
-      PI_ERROR_INVALID_DEVICE);
-}
-// TODO: Remove with deprecated feature
-// device::get_info<info::device::ext_intel_gpu_hw_threads_per_eu>()
-template <>
-inline uint32_t
-get_device_info_host<info::device::ext_intel_gpu_hw_threads_per_eu>() {
-  throw runtime_error(
-      "Obtaining the HW threads count per EU is not supported on HOST device",
-      PI_ERROR_INVALID_DEVICE);
-}
-// TODO: Remove with deprecated feature
-// device::get_info<info::device::ext_intel_max_mem_bandwidth>()
-template <>
-inline uint64_t
-get_device_info_host<info::device::ext_intel_max_mem_bandwidth>() {
-  throw runtime_error(
-      "Obtaining the maximum memory bandwidth is not supported on HOST device",
-      PI_ERROR_INVALID_DEVICE);
-}
-// TODO:Move to namespace ext::intel::info::device
-template <> inline bool get_device_info_host<info::device::ext_oneapi_srgb>() {
-  return false;
-}
-
-// TODO: Remove with deprecated feature
-// device::get_info<info::device::ext_intel_device_info_uuid>()
-template <>
-inline detail::uuid_type
-get_device_info_host<info::device::ext_intel_device_info_uuid>() {
-  throw runtime_error(
-      "Obtaining the device uuid is not supported on HOST device",
-      PI_ERROR_INVALID_DEVICE);
-}
-
-template <>
-inline uint64_t get_device_info_host<ext::intel::info::device::free_memory>() {
-  throw runtime_error(
-      "Obtaining the device free memory is not supported on HOST device",
-      PI_ERROR_INVALID_DEVICE);
-}
-
-template <>
-inline uint32_t
-get_device_info_host<ext::intel::info::device::memory_clock_rate>() {
-  throw runtime_error(
-      "Obtaining the device memory clock rate is not supported on HOST device",
-      PI_ERROR_INVALID_DEVICE);
-}
-
-template <>
-inline uint32_t
-get_device_info_host<ext::intel::info::device::memory_bus_width>() {
-  throw runtime_error(
-      "Obtaining the device memory bus width is not supported on HOST device",
-      PI_ERROR_INVALID_DEVICE);
-}
-
-template <>
-inline int32_t
-get_device_info_host<ext::intel::info::device::max_compute_queue_indices>() {
-  throw runtime_error(
-      "Obtaining max compute queue indices is not supported on HOST device",
-      PI_ERROR_INVALID_DEVICE);
-}
-
-template <>
-inline bool get_device_info_host<
-    ext::codeplay::experimental::info::device::supports_fusion>() {
-  // No support for fusion on the host device.
-  return false;
-}
-
-template <>
-inline uint32_t get_device_info_host<
-    ext::codeplay::experimental::info::device::max_registers_per_work_group>() {
-  throw runtime_error("Obtaining the maximum number of available registers per "
-                      "work-group is not supported on HOST device",
-                      PI_ERROR_INVALID_DEVICE);
-}
-
-template <>
-inline uint32_t get_device_info_host<
-    ext::oneapi::experimental::info::device::image_row_pitch_align>() {
-  throw runtime_error("Obtaining image pitch alignment is not "
-                      "supported on HOST device",
-                      PI_ERROR_INVALID_DEVICE);
-}
-
-template <>
-inline uint32_t get_device_info_host<
-    ext::oneapi::experimental::info::device::max_image_linear_row_pitch>() {
-  throw runtime_error("Obtaining max image linear pitch is not "
-                      "supported on HOST device",
-                      PI_ERROR_INVALID_DEVICE);
-}
-
-template <>
-inline std::vector<ext::oneapi::experimental::matrix::combination>
-get_device_info_host<
-    ext::oneapi::experimental::info::device::matrix_combinations>() {
-  throw runtime_error("Obtaining matrix combinations is not "
-                      "supported on HOST device",
-                      PI_ERROR_INVALID_DEVICE);
-}
-
-template <>
-inline uint32_t get_device_info_host<
-    ext::oneapi::experimental::info::device::max_image_linear_width>() {
-  throw runtime_error("Obtaining max image linear width is not "
-                      "supported on HOST device",
-                      PI_ERROR_INVALID_DEVICE);
-}
-
-template <>
-inline uint32_t get_device_info_host<
-    ext::oneapi::experimental::info::device::max_image_linear_height>() {
-  throw runtime_error("Obtaining max image linear height is not "
-                      "supported on HOST device",
-                      PI_ERROR_INVALID_DEVICE);
-}
-
-template <>
-inline float get_device_info_host<
-    ext::oneapi::experimental::info::device::mipmap_max_anisotropy>() {
-  throw runtime_error("Bindless image mipaps are not supported on HOST device",
-                      PI_ERROR_INVALID_DEVICE);
-}
-
-template <>
-inline std::vector<sycl::device> get_device_info_host<
-    ext::oneapi::experimental::info::device::component_devices>() {
-  throw runtime_error("Host devices cannot be component devices.",
-                      PI_ERROR_INVALID_DEVICE);
-}
-
-template <>
-inline sycl::device get_device_info_host<
-    ext::oneapi::experimental::info::device::composite_device>() {
-  throw runtime_error("Host devices cannot be composite devices.",
-                      PI_ERROR_INVALID_DEVICE);
-}
-
-// Returns the list of all progress guarantees that can be requested for
-// work_groups from the coordination level of root_group when using host device.
-// First it calls getHostProgressGuarantee to get the strongest guarantee
-// available and then calls getProgressGuaranteesUpTo to get a list of all
-// guarantees that are either equal to the strongest guarantee or weaker than
-// it. The next 5 definitions follow the same model but for different scopes.
-template <>
-inline std::vector<ext::oneapi::experimental::forward_progress_guarantee>
-get_device_info_host<
-    ext::oneapi::experimental::info::device::work_group_progress_capabilities<
-        ext::oneapi::experimental::execution_scope::root_group>>() {
-
-  using execution_scope = ext::oneapi::experimental::execution_scope;
-  using ReturnT =
-      std::vector<ext::oneapi::experimental::forward_progress_guarantee>;
-  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
-      device_impl::getHostProgressGuarantee(execution_scope::work_group,
-                                            execution_scope::root_group));
-}
-
-template <>
-inline std::vector<ext::oneapi::experimental::forward_progress_guarantee>
-get_device_info_host<
-    ext::oneapi::experimental::info::device::sub_group_progress_capabilities<
-        ext::oneapi::experimental::execution_scope::root_group>>() {
-
-  using execution_scope = ext::oneapi::experimental::execution_scope;
-  using ReturnT =
-      std::vector<ext::oneapi::experimental::forward_progress_guarantee>;
-  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
-      device_impl::getHostProgressGuarantee(execution_scope::sub_group,
-                                            execution_scope::root_group));
-}
-
-template <>
-inline std::vector<ext::oneapi::experimental::forward_progress_guarantee>
-get_device_info_host<
-    ext::oneapi::experimental::info::device::sub_group_progress_capabilities<
-        ext::oneapi::experimental::execution_scope::work_group>>() {
-  using execution_scope = ext::oneapi::experimental::execution_scope;
-  using ReturnT =
-      std::vector<ext::oneapi::experimental::forward_progress_guarantee>;
-  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
-      device_impl::getHostProgressGuarantee(execution_scope::sub_group,
-                                            execution_scope::work_group));
-}
-
-template <>
-inline std::vector<ext::oneapi::experimental::forward_progress_guarantee>
-get_device_info_host<
-    ext::oneapi::experimental::info::device::work_item_progress_capabilities<
-        ext::oneapi::experimental::execution_scope::root_group>>() {
-
-  using execution_scope = ext::oneapi::experimental::execution_scope;
-  using ReturnT =
-      std::vector<ext::oneapi::experimental::forward_progress_guarantee>;
-  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
-      device_impl::getHostProgressGuarantee(execution_scope::work_item,
-                                            execution_scope::root_group));
-}
-
-template <>
-inline std::vector<ext::oneapi::experimental::forward_progress_guarantee>
-get_device_info_host<
-    ext::oneapi::experimental::info::device::work_item_progress_capabilities<
-        ext::oneapi::experimental::execution_scope::work_group>>() {
-  using execution_scope = ext::oneapi::experimental::execution_scope;
-  using ReturnT =
-      std::vector<ext::oneapi::experimental::forward_progress_guarantee>;
-  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
-      device_impl::getHostProgressGuarantee(execution_scope::work_item,
-                                            execution_scope::work_group));
-}
-
-template <>
-inline std::vector<ext::oneapi::experimental::forward_progress_guarantee>
-get_device_info_host<
-    ext::oneapi::experimental::info::device::work_item_progress_capabilities<
-        ext::oneapi::experimental::execution_scope::sub_group>>() {
-  using execution_scope = ext::oneapi::experimental::execution_scope;
-  using ReturnT =
-      std::vector<ext::oneapi::experimental::forward_progress_guarantee>;
-  return device_impl::getProgressGuaranteesUpTo<ReturnT>(
-      device_impl::getHostProgressGuarantee(execution_scope::work_item,
-                                            execution_scope::sub_group));
-}
-
 // Returns the list of all progress guarantees that can be requested for
 // work_groups from the coordination level of root_group when using the device
 // given by Dev. First it calls getProgressGuarantee to get the strongest

From 35b682216afe064e98bf8c6f2c45334d99a5120a Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 22 May 2024 04:23:01 -0700
Subject: [PATCH 04/58] not-buildable: remove is_host from context_impl.*

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/context_impl.cpp | 19 +++++--------------
 sycl/source/detail/context_impl.hpp |  1 -
 2 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp
index c2124456dae24..87663c4e10775 100644
--- a/sycl/source/detail/context_impl.cpp
+++ b/sycl/source/detail/context_impl.cpp
@@ -34,7 +34,6 @@ context_impl::context_impl(const device &Device, async_handler AsyncHandler,
       MContext(nullptr),
       MPlatform(detail::getSyclObjImpl(Device.get_platform())),
       MPropList(PropList),
-      MHostContext(detail::getSyclObjImpl(Device)->is_host()),
       MSupportBufferLocationByDevices(NotChecked) {
   MKernelProgramCache.setContextPtr(this);
 }
@@ -43,7 +42,7 @@ context_impl::context_impl(const std::vector<sycl::device> Devices,
                            async_handler AsyncHandler,
                            const property_list &PropList)
     : MOwnedByRuntime(true), MAsyncHandler(AsyncHandler), MDevices(Devices),
-      MContext(nullptr), MPlatform(), MPropList(PropList), MHostContext(false),
+      MContext(nullptr), MPlatform(), MPropList(PropList),
       MSupportBufferLocationByDevices(NotChecked) {
   MPlatform = detail::getSyclObjImpl(MDevices[0].get_platform());
   std::vector<sycl::detail::pi::PiDevice> DeviceIds;
@@ -88,7 +87,7 @@ context_impl::context_impl(sycl::detail::pi::PiContext PiContext,
                            bool OwnedByRuntime)
     : MOwnedByRuntime(OwnedByRuntime), MAsyncHandler(AsyncHandler),
       MDevices(DeviceList), MContext(PiContext), MPlatform(),
-      MHostContext(false), MSupportBufferLocationByDevices(NotChecked) {
+      MSupportBufferLocationByDevices(NotChecked) {
   if (!MDevices.empty()) {
     MPlatform = detail::getSyclObjImpl(MDevices[0].get_platform());
   } else {
@@ -132,18 +131,11 @@ context_impl::context_impl(sycl::detail::pi::PiContext PiContext,
 }
 
 cl_context context_impl::get() const {
-  if (MHostContext) {
-    throw invalid_object_error(
-        "This instance of context doesn't support OpenCL interoperability.",
-        PI_ERROR_INVALID_CONTEXT);
-  }
   // TODO catch an exception and put it to list of asynchronous exceptions
   getPlugin()->call<PiApiKind::piContextRetain>(MContext);
   return pi::cast<cl_context>(MContext);
 }
 
-bool context_impl::is_host() const { return MHostContext; }
-
 context_impl::~context_impl() {
   // Free all events associated with the initialization of device globals.
   for (auto &DeviceGlobalInitializer : MDeviceGlobalInitializers)
@@ -159,10 +151,9 @@ context_impl::~context_impl() {
     assert(LibProg.second && "Null program must not be kept in the cache");
     getPlugin()->call<PiApiKind::piProgramRelease>(LibProg.second);
   }
-  if (!MHostContext) {
-    // TODO catch an exception and put it to list of asynchronous exceptions
-    getPlugin()->call_nocheck<PiApiKind::piContextRelease>(MContext);
-  }
+
+  // TODO catch an exception and put it to list of asynchronous exceptions
+  getPlugin()->call_nocheck<PiApiKind::piContextRelease>(MContext);
 }
 
 const async_handler &context_impl::get_async_handler() const {
diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp
index a1e383f721e31..af20236fc4b23 100644
--- a/sycl/source/detail/context_impl.hpp
+++ b/sycl/source/detail/context_impl.hpp
@@ -272,7 +272,6 @@ class context_impl {
   sycl::detail::pi::PiContext MContext;
   PlatformImplPtr MPlatform;
   property_list MPropList;
-  bool MHostContext;
   CachedLibProgramsT MCachedLibPrograms;
   std::mutex MCachedLibProgramsMutex;
   mutable KernelProgramCache MKernelProgramCache;

From 77c749c6ea54b35b5324bfe163460279b3039930 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 22 May 2024 04:29:12 -0700
Subject: [PATCH 05/58] not-buildable: remove is_host from event_impl.*

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/event_impl.cpp | 91 +++++++++++++------------------
 sycl/source/detail/event_impl.hpp |  3 +-
 2 files changed, 38 insertions(+), 56 deletions(-)

diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp
index 7442cd4ccfe7a..e187be3563f5b 100644
--- a/sycl/source/detail/event_impl.cpp
+++ b/sycl/source/detail/event_impl.cpp
@@ -37,20 +37,9 @@ void event_impl::ensureContextInitialized() {
   if (MIsContextInitialized)
     return;
 
-  if (MHostEvent) {
-    QueueImplPtr HostQueue = Scheduler::getInstance().getDefaultHostQueue();
-    this->setContextImpl(detail::getSyclObjImpl(HostQueue->get_context()));
-  } else {
-    const device SyclDevice;
-    this->setContextImpl(detail::queue_impl::getDefaultOrNew(
-        detail::getSyclObjImpl(SyclDevice)));
-  }
-}
-
-bool event_impl::is_host() {
-  // Treat all devices that don't support interoperability as host devices to
-  // avoid attempts to call method get on such events.
-  return MHostEvent;
+  const device SyclDevice;
+  this->setContextImpl(detail::queue_impl::getDefaultOrNew(
+      detail::getSyclObjImpl(SyclDevice)));
 }
 
 event_impl::~event_impl() {
@@ -59,7 +48,7 @@ event_impl::~event_impl() {
 }
 
 void event_impl::waitInternal(bool *Success) {
-  if (!MHostEvent && MEvent) {
+  if (MEvent) {
     // Wait for the native event
     sycl::detail::pi::PiResult Err =
         getPlugin()->call_nocheck<PiApiKind::piEventsWait>(1, &MEvent);
@@ -92,7 +81,7 @@ void event_impl::waitInternal(bool *Success) {
 }
 
 void event_impl::setComplete() {
-  if (MHostEvent || !MEvent) {
+  if (!MEvent) {
     {
       std::unique_lock<std::mutex> lock(MMutex);
 #ifndef NDEBUG
@@ -137,7 +126,6 @@ const PluginPtr &event_impl::getPlugin() {
 void event_impl::setStateIncomplete() { MState = HES_NotComplete; }
 
 void event_impl::setContextImpl(const ContextImplPtr &Context) {
-  MHostEvent = Context->is_host();
   MContext = Context;
   MIsContextInitialized = true;
 }
@@ -145,7 +133,7 @@ void event_impl::setContextImpl(const ContextImplPtr &Context) {
 event_impl::event_impl(sycl::detail::pi::PiEvent Event,
                        const context &SyclContext)
     : MIsContextInitialized(true), MEvent(Event),
-      MContext(detail::getSyclObjImpl(SyclContext)), MHostEvent(false),
+      MContext(detail::getSyclObjImpl(SyclContext)),
       MIsFlushed(true), MState(HES_Complete) {
 
   if (MContext->is_host()) {
@@ -317,7 +305,7 @@ event_impl::get_profiling_info<info::event_profiling::command_submit>() {
   // made by forcing the re-sync of submit time to start time is less than
   // 0.5ms. These timing values were obtained empirically using an integrated
   // Intel GPU).
-  if (MEventFromSubmittedExecCommandBuffer && !MHostEvent && MEvent) {
+  if (MEventFromSubmittedExecCommandBuffer && MEvent) {
     uint64_t StartTime =
         get_event_profiling_info<info::event_profiling::command_start>(
             this->getHandleRef(), this->getPlugin());
@@ -336,20 +324,19 @@ event_impl::get_profiling_info<info::event_profiling::command_start>() {
   if (isNOP() && MSubmitTime)
     return MSubmitTime;
 
-  if (!MHostEvent) {
-    if (MEvent) {
-      auto StartTime =
-          get_event_profiling_info<info::event_profiling::command_start>(
+  if (MEvent) {
+    auto StartTime =
+        get_event_profiling_info<info::event_profiling::command_start>(
+            this->getHandleRef(), this->getPlugin());
+    if (!MFallbackProfiling) {
+      return StartTime;
+    } else {
+      auto DeviceBaseTime =
+          get_event_profiling_info<info::event_profiling::command_submit>(
               this->getHandleRef(), this->getPlugin());
-      if (!MFallbackProfiling) {
-        return StartTime;
-      } else {
-        auto DeviceBaseTime =
-            get_event_profiling_info<info::event_profiling::command_submit>(
-                this->getHandleRef(), this->getPlugin());
-        return MHostBaseTime - DeviceBaseTime + StartTime;
-      }
+      return MHostBaseTime - DeviceBaseTime + StartTime;
     }
+  
     return 0;
   }
   if (!MHostProfilingInfo)
@@ -368,19 +355,17 @@ uint64_t event_impl::get_profiling_info<info::event_profiling::command_end>() {
   if (isNOP() && MSubmitTime)
     return MSubmitTime;
 
-  if (!MHostEvent) {
-    if (MEvent) {
-      auto EndTime =
-          get_event_profiling_info<info::event_profiling::command_end>(
+  if (MEvent) {
+    auto EndTime =
+        get_event_profiling_info<info::event_profiling::command_end>(
+            this->getHandleRef(), this->getPlugin());
+    if (!MFallbackProfiling) {
+      return EndTime;
+    } else {
+      auto DeviceBaseTime =
+          get_event_profiling_info<info::event_profiling::command_submit>(
               this->getHandleRef(), this->getPlugin());
-      if (!MFallbackProfiling) {
-        return EndTime;
-      } else {
-        auto DeviceBaseTime =
-            get_event_profiling_info<info::event_profiling::command_submit>(
-                this->getHandleRef(), this->getPlugin());
-        return MHostBaseTime - DeviceBaseTime + EndTime;
-      }
+      return MHostBaseTime - DeviceBaseTime + EndTime;
     }
     return 0;
   }
@@ -393,7 +378,7 @@ uint64_t event_impl::get_profiling_info<info::event_profiling::command_end>() {
 }
 
 template <> uint32_t event_impl::get_info<info::event::reference_count>() {
-  if (!MHostEvent && MEvent) {
+  if (MEvent) {
     return get_event_info<info::event::reference_count>(this->getHandleRef(),
                                                         this->getPlugin());
   }
@@ -406,17 +391,15 @@ event_impl::get_info<info::event::command_execution_status>() {
   if (MState == HES_Discarded)
     return info::event_command_status::ext_oneapi_unknown;
 
-  if (!MHostEvent) {
-    // Command is enqueued and PiEvent is ready
-    if (MEvent)
-      return get_event_info<info::event::command_execution_status>(
-          this->getHandleRef(), this->getPlugin());
-    // Command is blocked and not enqueued, PiEvent is not assigned yet
-    else if (MCommand)
-      return sycl::info::event_command_status::submitted;
-  }
+  // Command is enqueued and PiEvent is ready
+  if (MEvent)
+    return get_event_info<info::event::command_execution_status>(
+        this->getHandleRef(), this->getPlugin());
+  // Command is blocked and not enqueued, PiEvent is not assigned yet
+  else if (MCommand)
+    return sycl::info::event_command_status::submitted;
 
-  return MHostEvent && MState.load() != HES_Complete
+  return MState.load() != HES_Complete
              ? sycl::info::event_command_status::submitted
              : info::event_command_status::complete;
 }
diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp
index f33c160f9df97..08bb15cff6ff8 100644
--- a/sycl/source/detail/event_impl.hpp
+++ b/sycl/source/detail/event_impl.hpp
@@ -49,7 +49,7 @@ class event_impl {
   /// Normally constructs a host event, use std::nullopt to instead instantiate
   /// a device event.
   event_impl(std::optional<HostEventState> State = HES_Complete)
-      : MIsInitialized(false), MHostEvent(State), MIsFlushed(true),
+      : MIsInitialized(false), MIsFlushed(true),
         MState(State.value_or(HES_Complete)) {
     // Need to fail in event() constructor  if there are problems with the
     // ONEAPI_DEVICE_SELECTOR. Deferring may lead to conficts with noexcept
@@ -364,7 +364,6 @@ class event_impl {
   uint64_t MSubmitTime = 0;
   uint64_t MHostBaseTime = 0;
   ContextImplPtr MContext;
-  bool MHostEvent = true;
   std::unique_ptr<HostProfilingInfo> MHostProfilingInfo;
   void *MCommand = nullptr;
   std::weak_ptr<queue_impl> MQueue;

From 6e7142097db4e014c7a12e576c2af6d124675ed1 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 22 May 2024 04:31:22 -0700
Subject: [PATCH 06/58] not-buildable: update is_host for API objects to be
 easily removed

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/context.cpp  | 5 ++---
 sycl/source/device.cpp   | 5 ++---
 sycl/source/event.cpp    | 5 ++---
 sycl/source/kernel.cpp   | 5 ++---
 sycl/source/platform.cpp | 6 ++----
 sycl/source/queue.cpp    | 5 ++---
 6 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/sycl/source/context.cpp b/sycl/source/context.cpp
index 3273c4f3056c2..c24a6c1ec2079 100644
--- a/sycl/source/context.cpp
+++ b/sycl/source/context.cpp
@@ -138,9 +138,8 @@ context::get_backend_info() const {
 cl_context context::get() const { return impl->get(); }
 
 bool context::is_host() const {
-  bool IsHost = impl->is_host();
-  assert(!IsHost && "context::is_host should not be called in implementation.");
-  return IsHost;
+  assert(true && "context::is_host should not be called in implementation.");
+  return false;
 }
 
 backend context::get_backend() const noexcept { return impl->getBackend(); }
diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp
index 70aa37aad26a2..a3a88ebf6636a 100644
--- a/sycl/source/device.cpp
+++ b/sycl/source/device.cpp
@@ -71,9 +71,8 @@ std::vector<device> device::get_devices(info::device_type deviceType) {
 cl_device_id device::get() const { return impl->get(); }
 
 bool device::is_host() const {
-  bool IsHost = impl->is_host();
-  assert(!IsHost && "device::is_host should not be called in implementation.");
-  return IsHost;
+  assert(true && "device::is_host should not be called in implementation.");
+  return false;
 }
 
 bool device::is_cpu() const { return impl->is_cpu(); }
diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp
index a7bae8055c10b..12b4a7e68164e 100644
--- a/sycl/source/event.cpp
+++ b/sycl/source/event.cpp
@@ -38,9 +38,8 @@ bool event::operator==(const event &rhs) const { return rhs.impl == impl; }
 bool event::operator!=(const event &rhs) const { return !(*this == rhs); }
 
 bool event::is_host() const {
-  bool IsHost = impl->is_host();
-  assert(!IsHost && "event::is_host should not be called in implementation.");
-  return IsHost;
+  assert(true && "event::is_host should not be called in implementation.");
+  return false;
 }
 
 void event::wait() { impl->wait(impl); }
diff --git a/sycl/source/kernel.cpp b/sycl/source/kernel.cpp
index ff14c0a879078..bc842f6e596a5 100644
--- a/sycl/source/kernel.cpp
+++ b/sycl/source/kernel.cpp
@@ -31,9 +31,8 @@ kernel::kernel(cl_kernel ClKernel, const context &SyclContext)
 cl_kernel kernel::get() const { return impl->get(); }
 
 bool kernel::is_host() const {
-  bool IsHost = impl->is_host();
-  assert(!IsHost && "kernel::is_host should not be called in implementation.");
-  return IsHost;
+  assert(true && "kernel::is_host should not be called in implementation.");
+  return false;
 }
 
 context kernel::get_context() const {
diff --git a/sycl/source/platform.cpp b/sycl/source/platform.cpp
index a2ee714952be9..9a15943213ec6 100644
--- a/sycl/source/platform.cpp
+++ b/sycl/source/platform.cpp
@@ -41,10 +41,8 @@ bool platform::has_extension(const std::string &ExtensionName) const {
 }
 
 bool platform::is_host() const {
-  bool IsHost = impl->is_host();
-  assert(!IsHost &&
-         "platform::is_host should not be called in implementation.");
-  return IsHost;
+  assert(true && "platform::is_host should not be called in implementation.");
+  return false;
 }
 
 std::vector<device> platform::get_devices(info::device_type DeviceType) const {
diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp
index 15d7f11fcb42d..6a66cce267aa1 100644
--- a/sycl/source/queue.cpp
+++ b/sycl/source/queue.cpp
@@ -96,9 +96,8 @@ queue::ext_oneapi_get_graph() const {
 }
 
 bool queue::is_host() const {
-  bool IsHost = impl->is_host();
-  assert(!IsHost && "queue::is_host should not be called in implementation.");
-  return IsHost;
+  assert(true && "queue::is_host should not be called in implementation.");
+  return false;
 }
 
 void queue::throw_asynchronous() { impl->throw_asynchronous(); }

From 7e5abe966b8ebbfee9e0adcc7ce935cd864c21b8 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 22 May 2024 08:53:47 -0700
Subject: [PATCH 07/58] not-buildable: update most obvious places

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/context.cpp                       | 37 ++++--------
 sycl/source/detail/event_impl.cpp             | 27 +++------
 sycl/source/detail/event_impl.hpp             | 13 ++--
 sycl/source/detail/scheduler/commands.cpp     | 60 +++----------------
 sycl/source/detail/scheduler/commands.hpp     |  7 +--
 .../source/detail/scheduler/graph_builder.cpp |  4 +-
 sycl/source/detail/scheduler/scheduler.cpp    | 24 +-------
 sycl/source/detail/scheduler/scheduler.hpp    |  8 ---
 sycl/source/handler.cpp                       |  9 +--
 9 files changed, 39 insertions(+), 150 deletions(-)

diff --git a/sycl/source/context.cpp b/sycl/source/context.cpp
index c24a6c1ec2079..70b12836fc297 100644
--- a/sycl/source/context.cpp
+++ b/sycl/source/context.cpp
@@ -56,31 +56,20 @@ context::context(const std::vector<device> &DeviceList,
     throw invalid_parameter_error("DeviceList is empty.",
                                   PI_ERROR_INVALID_VALUE);
   }
-  auto NonHostDeviceIter = std::find_if_not(
-      DeviceList.begin(), DeviceList.end(), [&](const device &CurrentDevice) {
-        return detail::getSyclObjImpl(CurrentDevice)->is_host();
-      });
-  if (NonHostDeviceIter == DeviceList.end())
-    impl = std::make_shared<detail::context_impl>(DeviceList[0], AsyncHandler,
+  
+  const auto &RefPlatform =
+      detail::getSyclObjImpl(DeviceList[0].get_platform())->getHandleRef();
+  if (std::any_of(DeviceList.begin(), DeviceList.end(),
+                  [&](const device &CurrentDevice) {
+                    return (detail::getSyclObjImpl(CurrentDevice.get_platform())
+                              ->getHandleRef() != RefPlatform);
+                  }))
+    throw invalid_parameter_error(
+        "Can't add devices across platforms to a single context.",
+        PI_ERROR_INVALID_DEVICE);
+  else
+    impl = std::make_shared<detail::context_impl>(DeviceList, AsyncHandler,
                                                   PropList);
-  else {
-    const device &NonHostDevice = *NonHostDeviceIter;
-    const auto &NonHostPlatform =
-        detail::getSyclObjImpl(NonHostDevice.get_platform())->getHandleRef();
-    if (std::any_of(DeviceList.begin(), DeviceList.end(),
-                    [&](const device &CurrentDevice) {
-                      return (
-                          detail::getSyclObjImpl(CurrentDevice)->is_host() ||
-                          (detail::getSyclObjImpl(CurrentDevice.get_platform())
-                               ->getHandleRef() != NonHostPlatform));
-                    }))
-      throw invalid_parameter_error(
-          "Can't add devices across platforms to a single context.",
-          PI_ERROR_INVALID_DEVICE);
-    else
-      impl = std::make_shared<detail::context_impl>(DeviceList, AsyncHandler,
-                                                    PropList);
-  }
 }
 context::context(cl_context ClContext, async_handler AsyncHandler) {
   const auto &Plugin = sycl::detail::pi::getPlugin<backend::opencl>();
diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp
index e187be3563f5b..28bb37200392a 100644
--- a/sycl/source/detail/event_impl.cpp
+++ b/sycl/source/detail/event_impl.cpp
@@ -136,13 +136,6 @@ event_impl::event_impl(sycl::detail::pi::PiEvent Event,
       MContext(detail::getSyclObjImpl(SyclContext)),
       MIsFlushed(true), MState(HES_Complete) {
 
-  if (MContext->is_host()) {
-    throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
-                          "The syclContext must match the OpenCL context "
-                          "associated with the clEvent. " +
-                              codeToString(PI_ERROR_INVALID_CONTEXT));
-  }
-
   sycl::detail::pi::PiContext TempContext;
   getPlugin()->call<PiApiKind::piEventGetInfo>(
       MEvent, PI_EVENT_INFO_CONTEXT, sizeof(sycl::detail::pi::PiContext),
@@ -162,19 +155,8 @@ event_impl::event_impl(const QueueImplPtr &Queue) {
 
 void event_impl::associateWithQueue(const QueueImplPtr &Queue) {
   MQueue = Queue;
-  MIsProfilingEnabled = Queue->is_host() || Queue->MIsProfilingEnabled;
+  MIsProfilingEnabled = Queue->MIsProfilingEnabled;
   MFallbackProfiling = MIsProfilingEnabled && Queue->isProfilingFallback();
-  if (Queue->is_host()) {
-    MState.store(HES_NotComplete);
-    if (Queue->has_property<property::queue::enable_profiling>()) {
-      MHostProfilingInfo.reset(new HostProfilingInfo());
-      if (!MHostProfilingInfo)
-        throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
-                              "Out of host memory " +
-                                  codeToString(PI_ERROR_OUT_OF_HOST_MEMORY));
-    }
-    return;
-  }
   MState.store(HES_Complete);
 }
 
@@ -578,6 +560,13 @@ bool event_impl::isCompleted() {
          info::event_command_status::complete;
 }
 
+void event_impl::setCommand(void *Cmd) {
+  MCommand = Cmd;
+  auto TypedCommand = static_cast<Command*>(Cmd);
+  if (TypedCommand)
+    MIsHostTask = TypedCommand->isHostTask();
+}
+
 } // namespace detail
 } // namespace _V1
 } // namespace sycl
diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp
index 08bb15cff6ff8..7c1eb99e3b286 100644
--- a/sycl/source/detail/event_impl.hpp
+++ b/sycl/source/detail/event_impl.hpp
@@ -68,14 +68,6 @@ class event_impl {
   event_impl(sycl::detail::pi::PiEvent Event, const context &SyclContext);
   event_impl(const QueueImplPtr &Queue);
 
-  /// Checks if this event is a SYCL host event.
-  ///
-  /// All devices that do not support OpenCL interoperability are treated as
-  /// host device to avoid attempts to call method get on such events.
-  //
-  /// \return true if this event is a SYCL host event.
-  bool is_host();
-
   /// Waits for the event.
   ///
   /// Self is needed in order to pass shared_ptr to Scheduler.
@@ -177,7 +169,7 @@ class event_impl {
   /// Scheduler mutex must be locked in write mode when this is called.
   ///
   /// @param Command is a generic pointer to Command object instance.
-  void setCommand(void *Command) { MCommand = Command; }
+  void setCommand(void *Command);
 
   /// Returns host profiling information.
   ///
@@ -345,6 +337,8 @@ class event_impl {
 
   void setEnqueued() { MIsEnqueued = true; }
 
+  bool isHost() { return MIsHostTask; }
+
 protected:
   // When instrumentation is enabled emits trace event for event wait begin and
   // returns the telemetry event generated for the wait
@@ -412,6 +406,7 @@ class event_impl {
                   std::shared_ptr<sycl::detail::context_impl> Context);
 
   std::atomic_bool MIsEnqueued{false};
+  bool MIsHostTask{false};
 };
 
 } // namespace detail
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index bf7e44062cb5e..0739ac77373b7 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -96,9 +96,7 @@ static std::string demangleKernelName(std::string Name) { return Name; }
 #endif
 
 static std::string deviceToString(device Device) {
-  if (getSyclObjImpl(Device)->is_host())
-    return "HOST";
-  else if (Device.is_cpu())
+  if (Device.is_cpu())
     return "CPU";
   else if (Device.is_gpu())
     return "GPU";
@@ -144,10 +142,7 @@ void applyFuncOnFilteredArgs(
 
 #ifdef XPTI_ENABLE_INSTRUMENTATION
 static size_t deviceToID(const device &Device) {
-  if (getSyclObjImpl(Device)->is_host())
-    return 0;
-  else
-    return reinterpret_cast<size_t>(getSyclObjImpl(Device)->getHandleRef());
+  return reinterpret_cast<size_t>(getSyclObjImpl(Device)->getHandleRef());
 }
 #endif
 
@@ -265,7 +260,7 @@ std::vector<sycl::detail::pi::PiEvent> Command::getPiEventsBlocking(
     // (which is set lazily) calling getContextImpl() would set that
     // context, which we wish to avoid as it is expensive.
     // Skip host task and NOP events also.
-    if (!EventImpl->isContextInitialized() || EventImpl->is_host() ||
+    if (!EventImpl->isContextInitialized() || EventImpl->isHost() ||
         EventImpl->isNOP())
       continue;
     // In this path nullptr native event means that the command has not been
@@ -455,40 +450,9 @@ void Command::waitForEvents(QueueImplPtr Queue,
                             std::vector<EventImplPtr> &EventImpls,
                             sycl::detail::pi::PiEvent &Event) {
   if (!EventImpls.empty()) {
-    if (Queue->is_host()) {
-      // Host queue can wait for events from different contexts, i.e. it may
-      // contain events with different contexts in its MPreparedDepsEvents.
-      // OpenCL 2.1 spec says that clWaitForEvents will return
-      // CL_INVALID_CONTEXT if events specified in the list do not belong to
-      // the same context. Thus we split all the events into per-context map.
-      // An example. We have two queues for the same CPU device: Q1, Q2. Thus
-      // we will have two different contexts for the same CPU device: C1, C2.
-      // Also we have default host queue. This queue is accessible via
-      // Scheduler. Now, let's assume we have three different events: E1(C1),
-      // E2(C1), E3(C2). The command's MPreparedDepsEvents will contain all
-      // three events (E1, E2, E3). Now, if piEventsWait is called for all
-      // three events we'll experience failure with CL_INVALID_CONTEXT 'cause
-      // these events refer to different contexts.
-      std::map<context_impl *, std::vector<EventImplPtr>>
-          RequiredEventsPerContext;
-
-      for (const EventImplPtr &Event : EventImpls) {
-        ContextImplPtr Context = Event->getContextImpl();
-        assert(Context.get() &&
-               "Only non-host events are expected to be waited for here");
-        RequiredEventsPerContext[Context.get()].push_back(Event);
-      }
-
-      for (auto &CtxWithEvents : RequiredEventsPerContext) {
-        std::vector<sycl::detail::pi::PiEvent> RawEvents =
-            getPiEvents(CtxWithEvents.second);
-        CtxWithEvents.first->getPlugin()->call<PiApiKind::piEventsWait>(
-            RawEvents.size(), RawEvents.data());
-      }
-    } else {
 #ifndef NDEBUG
       for (const EventImplPtr &Event : EventImpls)
-        assert(Event->getContextImpl().get() &&
+        assert(!Event->isHost() &&
                "Only non-host events are expected to be waited for here");
 #endif
 
@@ -501,7 +465,6 @@ void Command::waitForEvents(QueueImplPtr Queue,
         MEvent->setHostEnqueueTime();
       Plugin->call<PiApiKind::piEnqueueEventsWait>(
           Queue->getHandleRef(), RawEvents.size(), &RawEvents[0], &Event);
-    }
   }
 }
 
@@ -714,7 +677,7 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep,
   // enqueued
   //    (e.g. alloca). Note that we can't check the pi event to make that
   //    distinction since the command might still be unenqueued at this point.
-  bool PiEventExpected = (!DepEvent->is_host() && DepEvent->isInitialized());
+  bool PiEventExpected = (!DepEvent->isHost() && DepEvent->isInitialized());
   if (auto *DepCmd = static_cast<Command *>(DepEvent->getCommand()))
     PiEventExpected &= DepCmd->producesPiEvent();
 
@@ -885,7 +848,7 @@ bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking,
   else {
     MEvent->setEnqueued();
     if (MShouldCompleteEventIfPossible &&
-        (MEvent->is_host() || MEvent->getHandleRef() == nullptr))
+        (MEvent->isHost() || MEvent->getHandleRef() == nullptr))
       MEvent->setComplete();
 
     // Consider the command is successfully enqueued if return code is
@@ -3172,8 +3135,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     std::vector<detail::EventImplPtr> Events = Barrier->MEventsWaitWithBarrier;
     std::vector<sycl::detail::pi::PiEvent> PiEvents =
         getPiEventsBlocking(Events);
-    if (MQueue->getDeviceImplPtr()->is_host() || PiEvents.empty()) {
-      // NOP for host device.
+    if (PiEvents.empty()) {
       // If Events is empty, then the barrier has no effect.
       return PI_SUCCESS;
     }
@@ -3244,10 +3206,6 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
   }
   case CG::CGTYPE::SemaphoreWait: {
     CGSemaphoreWait *SemWait = (CGSemaphoreWait *)MCommandGroup.get();
-    if (MQueue->getDeviceImplPtr()->is_host()) {
-      // NOP for host device.
-      return PI_SUCCESS;
-    }
 
     const detail::PluginPtr &Plugin = MQueue->getPlugin();
     Plugin->call<PiApiKind::piextWaitExternalSemaphore>(
@@ -3258,10 +3216,6 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
   }
   case CG::CGTYPE::SemaphoreSignal: {
     CGSemaphoreSignal *SemSignal = (CGSemaphoreSignal *)MCommandGroup.get();
-    if (MQueue->getDeviceImplPtr()->is_host()) {
-      // NOP for host device.
-      return PI_SUCCESS;
-    }
 
     const detail::PluginPtr &Plugin = MQueue->getPlugin();
     Plugin->call<PiApiKind::piextSignalExternalSemaphore>(
diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp
index 8ba0cceee9e6a..89cabd134a7e1 100644
--- a/sycl/source/detail/scheduler/commands.hpp
+++ b/sycl/source/detail/scheduler/commands.hpp
@@ -377,10 +377,9 @@ class Command {
   std::string MSubmissionFileName;
   std::string MSubmissionFunctionName;
 
-  // This flag allows to control whether host event should be set complete
-  // after successfull enqueue of command. Event is considered as host event if
-  // either it's is_host() return true or there is no backend representation
-  // of event (i.e. getHandleRef() return reference to nullptr value).
+  // This flag allows to control whether event should be set complete
+  // after successfull enqueue of command. Event is considered as "host" event if
+  // there is no backend representation of event (i.e. getHandleRef() return reference to nullptr value).
   // By default the flag is set to true due to most of host operations are
   // synchronous. The only asynchronous operation currently is host-task.
   bool MShouldCompleteEventIfPossible = true;
diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp
index f0c5dc670aa05..196232b95d734 100644
--- a/sycl/source/detail/scheduler/graph_builder.cpp
+++ b/sycl/source/detail/scheduler/graph_builder.cpp
@@ -50,9 +50,7 @@ static bool doOverlap(const Requirement *LHS, const Requirement *RHS) {
 }
 
 static bool sameCtx(const ContextImplPtr &LHS, const ContextImplPtr &RHS) {
-  // Consider two different host contexts to be the same to avoid additional
-  // allocation on the host
-  return LHS == RHS || (LHS->is_host() && RHS->is_host());
+  return LHS == RHS;
 }
 
 /// Checks if current requirement is requirement for sub buffer.
diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp
index 7b6c837131658..0b061a86dbc62 100644
--- a/sycl/source/detail/scheduler/scheduler.cpp
+++ b/sycl/source/detail/scheduler/scheduler.cpp
@@ -105,14 +105,6 @@ EventImplPtr Scheduler::addCG(
     auto *CGExecKernelPtr = static_cast<CGExecKernel *>(CommandGroup.get());
     Streams = CGExecKernelPtr->getStreams();
     CGExecKernelPtr->clearStreams();
-    // Stream's flush buffer memory is mainly initialized in stream's __init
-    // method. However, this method is not available on host device.
-    // Initializing stream's flush buffer on the host side in a separate task.
-    if (Queue->is_host()) {
-      for (const StreamImplPtr &Stream : Streams) {
-        Stream->initStreamHost(Queue);
-      }
-    }
   }
   std::vector<std::shared_ptr<const void>> AuxiliaryResources;
   AuxiliaryResources = CommandGroup->getAuxiliaryResources();
@@ -394,18 +386,6 @@ void Scheduler::enqueueUnblockedCommands(
   }
 }
 
-Scheduler::Scheduler() {
-  sycl::device HostDevice =
-      createSyclObjFromImpl<device>(device_impl::getHostDeviceImpl());
-  sycl::context HostContext{HostDevice};
-  DefaultHostQueue = QueueImplPtr(
-      new queue_impl(detail::getSyclObjImpl(HostDevice),
-                     detail::getSyclObjImpl(HostContext), /*AsyncHandler=*/{},
-                     /*PropList=*/{sycl::property::queue::enable_profiling()}));
-}
-
-Scheduler::~Scheduler() { DefaultHostQueue.reset(); }
-
 void Scheduler::releaseResources(BlockingT Blocking) {
   //  There might be some commands scheduled for post enqueue cleanup that
   //  haven't been freed because of the graph mutex being locked at the time,
@@ -726,11 +706,11 @@ bool CheckEventReadiness(const ContextImplPtr &Context,
   // their context, which we wish to avoid as it is expensive.
   // NOP events also don't represent actual dependencies.
   if ((!SyclEventImplPtr->isContextInitialized() &&
-       !SyclEventImplPtr->is_host()) ||
+       !SyclEventImplPtr->isHost()) ||
       SyclEventImplPtr->isNOP()) {
     return true;
   }
-  if (SyclEventImplPtr->is_host()) {
+  if (SyclEventImplPtr->isHost()) {
     return SyclEventImplPtr->isCompleted();
   }
   // Cross-context dependencies can't be passed to the backend directly.
diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp
index 09437928f1d32..6fa95cb4a4a54 100644
--- a/sycl/source/detail/scheduler/scheduler.hpp
+++ b/sycl/source/detail/scheduler/scheduler.hpp
@@ -450,10 +450,6 @@ class Scheduler {
   /// \return true if an instance of the scheduler object exists.
   static bool isInstanceAlive();
 
-  QueueImplPtr getDefaultHostQueue() { return DefaultHostQueue; }
-
-  const QueueImplPtr &getDefaultHostQueue() const { return DefaultHostQueue; }
-
   static MemObjRecord *getMemObjRecord(const Requirement *const Req);
 
   void deferMemObjRelease(const std::shared_ptr<detail::SYCLMemObjI> &MemObj);
@@ -468,8 +464,6 @@ class Scheduler {
 
   bool isInFusionMode(QueueIdT Queue);
 
-  Scheduler();
-  ~Scheduler();
   void releaseResources(BlockingT Blocking = BlockingT::BLOCKING);
   bool isDeferredMemObjectsEmpty();
 
@@ -966,8 +960,6 @@ class Scheduler {
       MAuxiliaryResources;
   std::mutex MAuxiliaryResourcesMutex;
 
-  QueueImplPtr DefaultHostQueue;
-
   friend class Command;
   friend class DispatchHostTask;
   friend class queue_impl;
diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp
index 8223c9330814e..749ab6750df5e 100644
--- a/sycl/source/handler.cpp
+++ b/sycl/source/handler.cpp
@@ -273,12 +273,6 @@ event handler::finalize() {
         detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent,
                                            xpti::trace_task_begin, nullptr);
 #endif
-        if (MQueue->is_host()) {
-          MHostKernel->call(MNDRDesc, (NewEvent)
-                                          ? NewEvent->getHostProfilingInfo()
-                                          : nullptr);
-          Result = PI_SUCCESS;
-        } else {
           if (MQueue->getDeviceImplPtr()->getBackend() ==
               backend::ext_intel_esimd_emulator) {
             // Capture the host timestamp for profiling (queue time)
@@ -313,7 +307,6 @@ event handler::finalize() {
                 MKernelName.c_str(), RawEvents, NewEvent, nullptr,
                 MImpl->MKernelCacheConfig, MImpl->MKernelIsCooperative);
           }
-        }
 #ifdef XPTI_ENABLE_INSTRUMENTATION
         // Emit signal only when event is created
         if (NewEvent != nullptr) {
@@ -351,7 +344,7 @@ event handler::finalize() {
         if (PI_SUCCESS != EnqueueKernel())
           throw runtime_error("Enqueue process failed.",
                               PI_ERROR_INVALID_OPERATION);
-        else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr)
+        else if (NewEvent->isHost() || NewEvent->getHandleRef() == nullptr)
           NewEvent->setComplete();
         NewEvent->setEnqueued();
 

From 31a702c1c2ec81aa2430595230761edc75d52dce Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Thu, 23 May 2024 06:33:00 -0700
Subject: [PATCH 08/58] not-buildable: remove is_host from obvious places,
 part2

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/backend_impl.hpp           |  1 -
 sycl/source/detail/bindless_images.cpp        |  4 -
 sycl/source/detail/context_impl.cpp           | 10 ---
 sycl/source/detail/device_impl.hpp            |  6 +-
 sycl/source/detail/filter_selector_impl.cpp   |  3 -
 sycl/source/detail/helpers.cpp                |  4 +-
 sycl/source/detail/kernel_impl.cpp            |  4 +-
 sycl/source/detail/kernel_impl.hpp            | 22 ------
 sycl/source/detail/kernel_info.hpp            | 73 -------------------
 sycl/source/detail/platform_impl.cpp          | 17 +----
 sycl/source/detail/platform_impl.hpp          | 12 ---
 sycl/source/detail/platform_info.hpp          | 30 --------
 sycl/source/detail/program_impl.cpp           | 46 +++---------
 sycl/source/detail/program_impl.hpp           |  6 --
 sycl/source/detail/queue_impl.cpp             | 35 ++++-----
 sycl/source/detail/queue_impl.hpp             | 61 +++++-----------
 sycl/source/detail/scheduler/commands.cpp     | 20 +----
 .../source/detail/scheduler/graph_builder.cpp |  2 +-
 18 files changed, 56 insertions(+), 300 deletions(-)

diff --git a/sycl/source/detail/backend_impl.hpp b/sycl/source/detail/backend_impl.hpp
index ca23ceb48815c..0c160ed1920c4 100644
--- a/sycl/source/detail/backend_impl.hpp
+++ b/sycl/source/detail/backend_impl.hpp
@@ -15,7 +15,6 @@ inline namespace _V1 {
 namespace detail {
 
 template <class T> backend getImplBackend(const T &Impl) {
-  assert(!Impl->is_host() && "Cannot get the backend for host.");
   return Impl->getContextImplPtr()->getBackend();
 }
 
diff --git a/sycl/source/detail/bindless_images.cpp b/sycl/source/detail/bindless_images.cpp
index 174fe087ede4f..fbf90e692598e 100644
--- a/sycl/source/detail/bindless_images.cpp
+++ b/sycl/source/detail/bindless_images.cpp
@@ -746,10 +746,6 @@ __SYCL_EXPORT void *pitched_alloc_device(size_t *resultPitch,
 
   std::shared_ptr<sycl::detail::context_impl> CtxImpl =
       sycl::detail::getSyclObjImpl(syclContext);
-  if (CtxImpl->is_host()) {
-    throw sycl::exception(sycl::make_error_code(sycl::errc::memory_allocation),
-                          "Cannot allocate pitched memory on host!");
-  }
 
   pi_context PiContext = CtxImpl->getHandleRef();
   const sycl::detail::PluginPtr &Plugin = CtxImpl->getPlugin();
diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp
index 87663c4e10775..0c79ed2f70462 100644
--- a/sycl/source/detail/context_impl.cpp
+++ b/sycl/source/detail/context_impl.cpp
@@ -162,8 +162,6 @@ const async_handler &context_impl::get_async_handler() const {
 
 template <>
 uint32_t context_impl::get_info<info::context::reference_count>() const {
-  if (is_host())
-    return 0;
   return get_context_info<info::context::reference_count>(this->getHandleRef(),
                                                           this->getPlugin());
 }
@@ -183,8 +181,6 @@ context_impl::get_info<info::context::atomic_memory_order_capabilities>()
       sycl::memory_order::relaxed, sycl::memory_order::acquire,
       sycl::memory_order::release, sycl::memory_order::acq_rel,
       sycl::memory_order::seq_cst};
-  if (is_host())
-    return CapabilityList;
 
   GetCapabilitiesIntersectionSet<
       sycl::memory_order, info::device::atomic_memory_order_capabilities>(
@@ -200,8 +196,6 @@ context_impl::get_info<info::context::atomic_memory_scope_capabilities>()
       sycl::memory_scope::work_item, sycl::memory_scope::sub_group,
       sycl::memory_scope::work_group, sycl::memory_scope::device,
       sycl::memory_scope::system};
-  if (is_host())
-    return CapabilityList;
 
   GetCapabilitiesIntersectionSet<
       sycl::memory_scope, info::device::atomic_memory_scope_capabilities>(
@@ -216,8 +210,6 @@ context_impl::get_info<info::context::atomic_fence_order_capabilities>() const {
       sycl::memory_order::relaxed, sycl::memory_order::acquire,
       sycl::memory_order::release, sycl::memory_order::acq_rel,
       sycl::memory_order::seq_cst};
-  if (is_host())
-    return CapabilityList;
 
   GetCapabilitiesIntersectionSet<sycl::memory_order,
                                  info::device::atomic_fence_order_capabilities>(
@@ -232,8 +224,6 @@ context_impl::get_info<info::context::atomic_fence_scope_capabilities>() const {
       sycl::memory_scope::work_item, sycl::memory_scope::sub_group,
       sycl::memory_scope::work_group, sycl::memory_scope::device,
       sycl::memory_scope::system};
-  if (is_host())
-    return CapabilityList;
 
   GetCapabilitiesIntersectionSet<sycl::memory_scope,
                                  info::device::atomic_fence_scope_capabilities>(
diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp
index 2526647152892..efec017d372f5 100644
--- a/sycl/source/detail/device_impl.hpp
+++ b/sycl/source/detail/device_impl.hpp
@@ -80,18 +80,18 @@ class device_impl {
   /// Check if device is a CPU device
   ///
   /// \return true if SYCL device is a CPU device
-  bool is_cpu() const { return (!is_host() && (MType == PI_DEVICE_TYPE_CPU)); }
+  bool is_cpu() const { return MType == PI_DEVICE_TYPE_CPU; }
 
   /// Check if device is a GPU device
   ///
   /// \return true if SYCL device is a GPU device
-  bool is_gpu() const { return (!is_host() && (MType == PI_DEVICE_TYPE_GPU)); }
+  bool is_gpu() const { return MType == PI_DEVICE_TYPE_GPU; }
 
   /// Check if device is an accelerator device
   ///
   /// \return true if SYCL device is an accelerator device
   bool is_accelerator() const {
-    return (!is_host() && (MType == PI_DEVICE_TYPE_ACC));
+    return MType == PI_DEVICE_TYPE_ACC;
   }
 
   /// Return device type
diff --git a/sycl/source/detail/filter_selector_impl.cpp b/sycl/source/detail/filter_selector_impl.cpp
index 4b5f8e836ee6d..0043622d62483 100644
--- a/sycl/source/detail/filter_selector_impl.cpp
+++ b/sycl/source/detail/filter_selector_impl.cpp
@@ -99,9 +99,6 @@ filter_selector_impl::filter_selector_impl(const std::string &Input)
 }
 
 int filter_selector_impl::operator()(const device &Dev) const {
-  assert(!sycl::detail::getSyclObjImpl(Dev)->is_host() &&
-         "filter_selector_impl should not be used with host.");
-
   int Score = REJECT_DEVICE_SCORE;
 
   for (auto &Filter : mFilters) {
diff --git a/sycl/source/detail/helpers.cpp b/sycl/source/detail/helpers.cpp
index 1bdb2ddbd4697..75c6fd72b8fd0 100644
--- a/sycl/source/detail/helpers.cpp
+++ b/sycl/source/detail/helpers.cpp
@@ -32,7 +32,7 @@ getOrWaitEvents(std::vector<sycl::event> DepEvents, ContextImplPtr Context) {
     // (which is set lazily) calling getContextImpl() would set that
     // context, which we wish to avoid as it is expensive.
     if ((!SyclEventImplPtr->isContextInitialized() &&
-         !SyclEventImplPtr->is_host()) ||
+         !SyclEventImplPtr->isHost()) ||
         SyclEventImplPtr->isNOP()) {
       continue;
     }
@@ -41,7 +41,7 @@ getOrWaitEvents(std::vector<sycl::event> DepEvents, ContextImplPtr Context) {
     bool NoPiEvent =
         SyclEventImplPtr->MCommand &&
         !static_cast<Command *>(SyclEventImplPtr->MCommand)->producesPiEvent();
-    if (SyclEventImplPtr->is_host() ||
+    if (SyclEventImplPtr->isHost() ||
         SyclEventImplPtr->getContextImpl() != Context || NoPiEvent) {
       // Call wait, because the command for the event might not have been
       // enqueued when kernel fusion is happening.
diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp
index 9c5a1851cd3b1..b4ab6b232eef9 100644
--- a/sycl/source/detail/kernel_impl.cpp
+++ b/sycl/source/detail/kernel_impl.cpp
@@ -76,9 +76,7 @@ kernel_impl::kernel_impl(ContextImplPtr Context, ProgramImplPtr ProgramImpl)
 
 kernel_impl::~kernel_impl() {
   // TODO catch an exception and put it to list of asynchronous exceptions
-  if (!is_host()) {
-    getPlugin()->call<PiApiKind::piKernelRelease>(MKernel);
-  }
+  getPlugin()->call<PiApiKind::piKernelRelease>(MKernel);
 }
 
 bool kernel_impl::isCreatedFromSource() const {
diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp
index 1e56e6da4dc53..1a1542d0d409b 100644
--- a/sycl/source/detail/kernel_impl.hpp
+++ b/sycl/source/detail/kernel_impl.hpp
@@ -103,20 +103,10 @@ class kernel_impl {
   ///
   /// \return a valid cl_kernel instance
   cl_kernel get() const {
-    if (is_host()) {
-      throw invalid_object_error(
-          "This instance of kernel doesn't support OpenCL interoperability.",
-          PI_ERROR_INVALID_KERNEL);
-    }
     getPlugin()->call<PiApiKind::piKernelRetain>(MKernel);
     return pi::cast<cl_kernel>(MKernel);
   }
 
-  /// Check if the associated SYCL context is a SYCL host context.
-  ///
-  /// \return true if this SYCL kernel is a host kernel.
-  bool is_host() const { return MContext->is_host(); }
-
   const PluginPtr &getPlugin() const { return MContext->getPlugin(); }
 
   /// Query information from the kernel object using the info::kernel_info
@@ -217,11 +207,6 @@ template <typename Param>
 inline typename Param::return_type kernel_impl::get_info() const {
   static_assert(is_kernel_info_desc<Param>::value,
                 "Invalid kernel information descriptor");
-  if (is_host()) {
-    // TODO implement
-    assert(0 && "Not implemented");
-  }
-
   if constexpr (std::is_same_v<Param, info::kernel::num_args>)
     checkIfValidForNumArgsInfoQuery();
 
@@ -248,9 +233,6 @@ kernel_impl::get_info(const device &Device) const {
           "is a built-in kernel.");
   }
 
-  if (is_host()) {
-    return get_kernel_device_specific_info_host<Param>(Device);
-  }
   return get_kernel_device_specific_info<Param>(
       this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(),
       getPlugin());
@@ -260,10 +242,6 @@ template <typename Param>
 inline typename Param::return_type
 kernel_impl::get_info(const device &Device,
                       const sycl::range<3> &WGSize) const {
-  if (is_host()) {
-    throw runtime_error("Sub-group feature is not supported on HOST device.",
-                        PI_ERROR_INVALID_DEVICE);
-  }
   return get_kernel_device_specific_info_with_input<Param>(
       this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), WGSize,
       getPlugin());
diff --git a/sycl/source/detail/kernel_info.hpp b/sycl/source/detail/kernel_info.hpp
index 12256158eed49..79c0f73c952de 100644
--- a/sycl/source/detail/kernel_info.hpp
+++ b/sycl/source/detail/kernel_info.hpp
@@ -137,79 +137,6 @@ uint32_t get_kernel_device_specific_info_with_input(
   return Result;
 }
 
-template <typename Param>
-inline typename Param::return_type
-get_kernel_device_specific_info_host(const sycl::device &Device) = delete;
-
-template <>
-inline sycl::range<3> get_kernel_device_specific_info_host<
-    info::kernel_device_specific::global_work_size>(const sycl::device &) {
-  throw invalid_object_error("This instance of kernel is a host instance",
-                             PI_ERROR_INVALID_KERNEL);
-}
-
-template <>
-inline size_t get_kernel_device_specific_info_host<
-    info::kernel_device_specific::work_group_size>(const sycl::device &Dev) {
-  return Dev.get_info<info::device::max_work_group_size>();
-}
-
-template <>
-inline sycl::range<3> get_kernel_device_specific_info_host<
-    info::kernel_device_specific::compile_work_group_size>(
-    const sycl::device &) {
-  return {0, 0, 0};
-}
-
-template <>
-inline size_t get_kernel_device_specific_info_host<
-    info::kernel_device_specific::preferred_work_group_size_multiple>(
-    const sycl::device &Dev) {
-  return get_kernel_device_specific_info_host<
-      info::kernel_device_specific::work_group_size>(Dev);
-}
-
-template <>
-inline size_t get_kernel_device_specific_info_host<
-    info::kernel_device_specific::private_mem_size>(const sycl::device &) {
-  return 0;
-}
-
-template <>
-inline uint32_t get_kernel_device_specific_info_host<
-    info::kernel_device_specific::ext_codeplay_num_regs>(const sycl::device &) {
-  return 0;
-}
-
-template <>
-inline uint32_t get_kernel_device_specific_info_host<
-    info::kernel_device_specific::max_num_sub_groups>(const sycl::device &) {
-  throw invalid_object_error("This instance of kernel is a host instance",
-                             PI_ERROR_INVALID_KERNEL);
-}
-
-template <>
-inline uint32_t get_kernel_device_specific_info_host<
-    info::kernel_device_specific::max_sub_group_size>(const sycl::device &) {
-  throw invalid_object_error("This instance of kernel is a host instance",
-                             PI_ERROR_INVALID_KERNEL);
-}
-
-template <>
-inline uint32_t get_kernel_device_specific_info_host<
-    info::kernel_device_specific::compile_num_sub_groups>(
-    const sycl::device &) {
-  throw invalid_object_error("This instance of kernel is a host instance",
-                             PI_ERROR_INVALID_KERNEL);
-}
-
-template <>
-inline uint32_t get_kernel_device_specific_info_host<
-    info::kernel_device_specific::compile_sub_group_size>(
-    const sycl::device &) {
-  throw invalid_object_error("This instance of kernel is a host instance",
-                             PI_ERROR_INVALID_KERNEL);
-}
 } // namespace detail
 } // namespace _V1
 } // namespace sycl
diff --git a/sycl/source/detail/platform_impl.cpp b/sycl/source/detail/platform_impl.cpp
index 9700fde466803..2caf958bb842b 100644
--- a/sycl/source/detail/platform_impl.cpp
+++ b/sycl/source/detail/platform_impl.cpp
@@ -79,9 +79,6 @@ static bool IsBannedPlatform(platform Platform) {
   // is disabled as well.
   //
   auto IsMatchingOpenCL = [](platform Platform, const std::string_view name) {
-    if (getSyclObjImpl(Platform)->is_host())
-      return false;
-
     const bool HasNameMatch = Platform.get_info<info::platform::name>().find(
                                   name) != std::string::npos;
     const auto Backend = detail::getSyclObjImpl(Platform)->getBackend();
@@ -466,15 +463,9 @@ platform_impl::get_devices(info::device_type DeviceType) const {
 
   ods_target_list *OdsTargetList = SYCLConfig<ONEAPI_DEVICE_SELECTOR>::get();
 
-  if (is_host() && (DeviceType == info::device_type::host ||
-                    DeviceType == info::device_type::all)) {
-    Res.push_back(
-        createSyclObjFromImpl<device>(device_impl::getHostDeviceImpl()));
-  }
-
   // If any DeviceType other than host was requested for host platform,
   // an empty vector will be returned.
-  if (is_host() || DeviceType == info::device_type::host)
+  if (DeviceType == info::device_type::host)
     return Res;
 
   pi_uint32 NumDevices = 0;
@@ -556,9 +547,6 @@ platform_impl::get_devices(info::device_type DeviceType) const {
 }
 
 bool platform_impl::has_extension(const std::string &ExtensionName) const {
-  if (is_host())
-    return false;
-
   std::string AllExtensionNames = get_platform_info_string_impl(
       MPlatform, getPlugin(),
       detail::PiInfoCode<info::platform::extensions>::value);
@@ -580,9 +568,6 @@ pi_native_handle platform_impl::getNative() const {
 
 template <typename Param>
 typename Param::return_type platform_impl::get_info() const {
-  if (is_host())
-    return get_platform_info_host<Param>();
-
   return get_platform_info<Param>(this->getHandleRef(), getPlugin());
 }
 
diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp
index 0bb8d1ab77e2f..e13bd0a3a1b31 100644
--- a/sycl/source/detail/platform_impl.hpp
+++ b/sycl/source/detail/platform_impl.hpp
@@ -89,9 +89,6 @@ class platform_impl {
   template <typename Param>
   typename Param::return_type get_backend_info() const;
 
-  /// \return true if this SYCL platform is a host platform.
-  bool is_host() const { return MHostPlatform; };
-
   /// Returns the backend of this platform.
   backend getBackend(void) const { return MBackend; }
 
@@ -107,11 +104,6 @@ class platform_impl {
 
   /// \return an instance of OpenCL cl_platform_id.
   cl_platform_id get() const {
-    if (is_host()) {
-      throw invalid_object_error(
-          "This instance of platform doesn't support OpenCL interoperability.",
-          PI_ERROR_INVALID_PLATFORM);
-    }
     return pi::cast<cl_platform_id>(MPlatform);
   }
 
@@ -123,10 +115,6 @@ class platform_impl {
   ///
   /// \return a raw plug-in platform handle.
   const sycl::detail::pi::PiPlatform &getHandleRef() const {
-    if (is_host())
-      throw invalid_object_error("This instance of platform is a host instance",
-                                 PI_ERROR_INVALID_PLATFORM);
-
     return MPlatform;
   }
 
diff --git a/sycl/source/detail/platform_info.hpp b/sycl/source/detail/platform_info.hpp
index 42c41b5063cf5..70bcd626024d9 100644
--- a/sycl/source/detail/platform_info.hpp
+++ b/sycl/source/detail/platform_info.hpp
@@ -59,36 +59,6 @@ get_platform_info(sycl::detail::pi::PiPlatform Plt, const PluginPtr &Plugin) {
   return split_string(Result, ' ');
 }
 
-// Host platform information methods
-template <typename Param>
-inline typename Param::return_type get_platform_info_host() = delete;
-
-template <>
-inline std::string get_platform_info_host<info::platform::profile>() {
-  return "FULL PROFILE";
-}
-
-template <>
-inline std::string get_platform_info_host<info::platform::version>() {
-  return "1.2";
-}
-
-template <> inline std::string get_platform_info_host<info::platform::name>() {
-  return "SYCL host platform";
-}
-
-template <>
-inline std::string get_platform_info_host<info::platform::vendor>() {
-  return "";
-}
-
-template <>
-inline std::vector<std::string>
-get_platform_info_host<info::platform::extensions>() {
-  // TODO update when appropriate
-  return {};
-}
-
 } // namespace detail
 } // namespace _V1
 } // namespace sycl
diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp
index d65f3163b961f..584b2487f5dee 100644
--- a/sycl/source/detail/program_impl.cpp
+++ b/sycl/source/detail/program_impl.cpp
@@ -72,9 +72,8 @@ program_impl::program_impl(
   }
   MDevices = ProgramList[0]->MDevices;
   std::vector<device> DevicesSorted;
-  if (!is_host()) {
-    DevicesSorted = sort_devices_by_cl_device_id(MDevices);
-  }
+  DevicesSorted = sort_devices_by_cl_device_id(MDevices);
+
   check_device_feature_support<info::device::is_linker_available>(MDevices);
   std::list<std::lock_guard<std::mutex>> Locks;
   for (const auto &Prg : ProgramList) {
@@ -85,18 +84,16 @@ program_impl::program_impl(
           "Not all programs are associated with the same context",
           PI_ERROR_INVALID_PROGRAM);
     }
-    if (!is_host()) {
-      std::vector<device> PrgDevicesSorted =
-          sort_devices_by_cl_device_id(Prg->MDevices);
-      if (PrgDevicesSorted != DevicesSorted) {
-        throw invalid_object_error(
-            "Not all programs are associated with the same devices",
-            PI_ERROR_INVALID_PROGRAM);
-      }
+
+    std::vector<device> PrgDevicesSorted =
+        sort_devices_by_cl_device_id(Prg->MDevices);
+    if (PrgDevicesSorted != DevicesSorted) {
+      throw invalid_object_error(
+          "Not all programs are associated with the same devices",
+          PI_ERROR_INVALID_PROGRAM);
     }
   }
 
-  if (!is_host()) {
     std::vector<sycl::detail::pi::PiDevice> Devices(get_pi_devices());
     std::vector<sycl::detail::pi::PiProgram> Programs;
     bool NonInterOpToLink = false;
@@ -113,7 +110,6 @@ program_impl::program_impl(
             LinkOptions.c_str(), Programs.size(), Programs.data(), nullptr,
             nullptr, &MProgram);
     Plugin->checkPiResult<compile_program_error>(Err);
-  }
 }
 
 program_impl::program_impl(ContextImplPtr Context,
@@ -208,7 +204,7 @@ program_impl::program_impl(ContextImplPtr Context,
 
 program_impl::~program_impl() {
   // TODO catch an exception and put it to list of asynchronous exceptions
-  if (!is_host() && MProgram != nullptr) {
+  if (MProgram != nullptr) {
     const PluginPtr &Plugin = getPlugin();
     Plugin->call<PiApiKind::piProgramRelease>(MProgram);
   }
@@ -216,11 +212,6 @@ program_impl::~program_impl() {
 
 cl_program program_impl::get() const {
   throw_if_state_is(program_state::none);
-  if (is_host()) {
-    throw invalid_object_error(
-        "This instance of program doesn't support OpenCL interoperability.",
-        PI_ERROR_INVALID_PROGRAM);
-  }
   getPlugin()->call<PiApiKind::piProgramRetain>(MProgram);
   return pi::cast<cl_program>(MProgram);
 }
@@ -229,19 +220,16 @@ void program_impl::compile_with_kernel_name(std::string KernelName,
                                             std::string CompileOptions) {
   std::lock_guard<std::mutex> Lock(MMutex);
   throw_if_state_is_not(program_state::none);
-  if (!is_host()) {
     create_pi_program_with_kernel_name(
         KernelName,
         /*JITCompilationIsRequired=*/(!CompileOptions.empty()));
     compile(CompileOptions);
-  }
   MState = program_state::compiled;
 }
 
 void program_impl::link(std::string LinkOptions) {
   std::lock_guard<std::mutex> Lock(MMutex);
   throw_if_state_is_not(program_state::compiled);
-  if (!is_host()) {
     check_device_feature_support<info::device::is_linker_available>(MDevices);
     std::vector<sycl::detail::pi::PiDevice> Devices(get_pi_devices());
     const PluginPtr &Plugin = getPlugin();
@@ -263,16 +251,12 @@ void program_impl::link(std::string LinkOptions) {
     Plugin->checkPiResult<compile_program_error>(Err);
     MLinkOptions = LinkOptions;
     MBuildOptions = LinkOptions;
-  }
   MState = program_state::linked;
 }
 
 bool program_impl::has_kernel(std::string KernelName,
                               bool IsCreatedFromSource) const {
   throw_if_state_is(program_state::none);
-  if (is_host()) {
-    return !IsCreatedFromSource;
-  }
 
   std::vector<sycl::detail::pi::PiDevice> Devices(get_pi_devices());
   pi_uint64 function_ptr;
@@ -299,14 +283,6 @@ kernel program_impl::get_kernel(std::string KernelName,
                                 std::shared_ptr<program_impl> PtrToSelf,
                                 bool IsCreatedFromSource) const {
   throw_if_state_is(program_state::none);
-  if (is_host()) {
-    if (IsCreatedFromSource)
-      throw invalid_object_error("This instance of program is a host instance",
-                                 PI_ERROR_INVALID_PROGRAM);
-
-    return createSyclObjFromImpl<kernel>(
-        std::make_shared<kernel_impl>(MContext, PtrToSelf));
-  }
   auto [Kernel, ArgMask] = get_pi_kernel_arg_mask_pair(KernelName);
   return createSyclObjFromImpl<kernel>(std::make_shared<kernel_impl>(
       Kernel, MContext, PtrToSelf, IsCreatedFromSource, nullptr, ArgMask));
@@ -314,8 +290,6 @@ kernel program_impl::get_kernel(std::string KernelName,
 
 std::vector<std::vector<char>> program_impl::get_binaries() const {
   throw_if_state_is(program_state::none);
-  if (is_host())
-    return {};
 
   std::vector<std::vector<char>> Result;
   const PluginPtr &Plugin = getPlugin();
diff --git a/sycl/source/detail/program_impl.hpp b/sycl/source/detail/program_impl.hpp
index 32a0c7fd38bfe..1fa8767774961 100644
--- a/sycl/source/detail/program_impl.hpp
+++ b/sycl/source/detail/program_impl.hpp
@@ -134,9 +134,6 @@ class program_impl {
   /// not retained before return.
   const sycl::detail::pi::PiProgram &getHandleRef() const { return MProgram; }
 
-  /// \return true if this SYCL program is a host program.
-  bool is_host() const { return MContext->is_host(); }
-
   /// Compiles the SYCL kernel function into the encapsulated raw program.
   ///
   /// The kernel function is defined by its name. This member function
@@ -215,14 +212,11 @@ class program_impl {
 
   /// \return the SYCL context that this program was constructed with.
   context get_context() const {
-    if (is_host())
-      return context();
     return createSyclObjFromImpl<context>(MContext);
   }
 
   /// \return the Plugin associated with the context of this program.
   const PluginPtr &getPlugin() const {
-    assert(!is_host() && "Plugin is not available for Host.");
     return MContext->getPlugin();
   }
 
diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp
index 05c579f78a405..2c7876ea14c08 100644
--- a/sycl/source/detail/queue_impl.cpp
+++ b/sycl/source/detail/queue_impl.cpp
@@ -42,10 +42,9 @@ getPIEvents(const std::vector<sycl::event> &DepEvents) {
 template <>
 uint32_t queue_impl::get_info<info::queue::reference_count>() const {
   sycl::detail::pi::PiResult result = PI_SUCCESS;
-  if (!is_host())
-    getPlugin()->call<PiApiKind::piQueueGetInfo>(
-        MQueues[0], PI_QUEUE_INFO_REFERENCE_COUNT, sizeof(result), &result,
-        nullptr);
+  getPlugin()->call<PiApiKind::piQueueGetInfo>(
+      MQueues[0], PI_QUEUE_INFO_REFERENCE_COUNT, sizeof(result), &result,
+      nullptr);
   return result;
 }
 
@@ -142,8 +141,7 @@ event queue_impl::memset(const std::shared_ptr<detail::queue_impl> &Self,
                           SYCL_STREAM_NAME, "memory_transfer_node");
   PrepareNotify.addMetadata([&](auto TEvent) {
     xpti::addMetadata(TEvent, "sycl_device",
-                      reinterpret_cast<size_t>(
-                          MDevice->is_host() ? 0 : MDevice->getHandleRef()));
+                      reinterpret_cast<size_t>(MDevice->getHandleRef()));
     xpti::addMetadata(TEvent, "memory_ptr", reinterpret_cast<size_t>(Ptr));
     xpti::addMetadata(TEvent, "value_set", Value);
     xpti::addMetadata(TEvent, "memory_size", Count);
@@ -190,8 +188,7 @@ event queue_impl::memcpy(const std::shared_ptr<detail::queue_impl> &Self,
                           SYCL_STREAM_NAME, "memory_transfer_node");
   PrepareNotify.addMetadata([&](auto TEvent) {
     xpti::addMetadata(TEvent, "sycl_device",
-                      reinterpret_cast<size_t>(
-                          MDevice->is_host() ? 0 : MDevice->getHandleRef()));
+                      reinterpret_cast<size_t>(MDevice->getHandleRef()));
     xpti::addMetadata(TEvent, "src_memory_ptr", reinterpret_cast<size_t>(Src));
     xpti::addMetadata(TEvent, "dest_memory_ptr",
                       reinterpret_cast<size_t>(Dest));
@@ -430,9 +427,7 @@ void *queue_impl::instrumentationProlog(const detail::code_location &CodeLoc,
   if (WaitEvent) {
     device D = get_device();
     std::string DevStr;
-    if (getSyclObjImpl(D)->is_host())
-      DevStr = "HOST";
-    else if (D.is_cpu())
+    if (D.is_cpu())
       DevStr = "CPU";
     else if (D.is_gpu())
       DevStr = "GPU";
@@ -588,14 +583,12 @@ bool queue_impl::ext_oneapi_empty() const {
   }
 
   // Check the status of the backend queue if this is not a host queue.
-  if (!is_host()) {
-    pi_bool IsReady = false;
-    getPlugin()->call<PiApiKind::piQueueGetInfo>(
-        MQueues[0], PI_EXT_ONEAPI_QUEUE_INFO_EMPTY, sizeof(pi_bool), &IsReady,
-        nullptr);
-    if (!IsReady)
-      return false;
-  }
+  pi_bool IsReady = false;
+  getPlugin()->call<PiApiKind::piQueueGetInfo>(
+      MQueues[0], PI_EXT_ONEAPI_QUEUE_INFO_EMPTY, sizeof(pi_bool), &IsReady,
+      nullptr);
+  if (!IsReady)
+    return false;
 
   // We may have events like host tasks which are not submitted to the backend
   // queue so we need to get their status separately.
@@ -609,7 +602,7 @@ bool queue_impl::ext_oneapi_empty() const {
        EventImplWeakPtrIt != MEventsWeak.end(); ++EventImplWeakPtrIt)
     if (std::shared_ptr<event_impl> EventImplSharedPtr =
             EventImplWeakPtrIt->lock())
-      if (EventImplSharedPtr->is_host() &&
+      if (EventImplSharedPtr->isHost() &&
           EventImplSharedPtr
                   ->get_info<info::event::command_execution_status>() !=
               info::event_command_status::complete)
@@ -641,7 +634,7 @@ void queue_impl::revisitUnenqueuedCommandsState(
           std::remove_if(
               Deps.UnenqueuedCmdEvents.begin(), Deps.UnenqueuedCmdEvents.end(),
               [](const EventImplPtr &CommandEvent) {
-                return (CommandEvent->is_host() ? CommandEvent->isCompleted()
+                return (CommandEvent->isHost() ? CommandEvent->isCompleted()
                                                 : CommandEvent->isEnqueued());
               }),
           Deps.UnenqueuedCmdEvents.end());
diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp
index dff24ad1dfec1..c205b5916f302 100644
--- a/sycl/source/detail/queue_impl.hpp
+++ b/sycl/source/detail/queue_impl.hpp
@@ -106,13 +106,12 @@ class queue_impl {
   queue_impl(const DeviceImplPtr &Device, const ContextImplPtr &Context,
              const async_handler &AsyncHandler, const property_list &PropList)
       : MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler),
-        MPropList(PropList), MHostQueue(MDevice->is_host()),
+        MPropList(PropList),
         MIsInorder(has_property<property::queue::in_order>()),
         MDiscardEvents(
             has_property<ext::oneapi::property::queue::discard_events>()),
         MIsProfilingEnabled(has_property<property::queue::enable_profiling>()),
-        MSupportsDiscardingPiEvents(MDiscardEvents &&
-                                    (MHostQueue ? true : MIsInorder)),
+        MSupportsDiscardingPiEvents(MDiscardEvents && MIsInorder)),
         MQueueID{
             MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} {
     if (has_property<property::queue::enable_profiling>()) {
@@ -124,8 +123,7 @@ class queue_impl {
       if (MDevice->has(aspect::queue_profiling)) {
         // When piGetDeviceAndHostTimer is not supported, compute the
         // profiling time OpenCL version < 2.1 case
-        if (!getDeviceImplPtr()->is_host() &&
-            !getDeviceImplPtr()->isGetDeviceAndHostTimerSupported())
+        if (!getDeviceImplPtr()->isGetDeviceAndHostTimerSupported())
           MFallbackProfiling = true;
       } else {
         throw sycl::exception(make_error_code(errc::feature_not_supported),
@@ -154,7 +152,7 @@ class queue_impl {
           "Cannot enable fusion if device does not support fusion");
     }
     if (!Context->isDeviceValid(Device)) {
-      if (!Context->is_host() && Context->getBackend() == backend::opencl)
+      if (Context->getBackend() == backend::opencl)
         throw sycl::invalid_object_error(
             "Queue cannot be constructed with the given context and device "
             "since the device is not a member of the context (descendants of "
@@ -166,13 +164,12 @@ class queue_impl {
           "descendant of its member.",
           PI_ERROR_INVALID_DEVICE);
     }
-    if (!MHostQueue) {
-      const QueueOrder QOrder =
-          MIsInorder ? QueueOrder::Ordered : QueueOrder::OOO;
-      MQueues.push_back(createQueue(QOrder));
-      // This section is the second part of the instrumentation that uses the
-      // tracepoint information and notifies
-    }
+
+    const QueueOrder QOrder =
+        MIsInorder ? QueueOrder::Ordered : QueueOrder::OOO;
+    MQueues.push_back(createQueue(QOrder));
+    // This section is the second part of the instrumentation that uses the
+    // tracepoint information and notifies
 
     // We enable XPTI tracing events using the TLS mechanism; if the code
     // location data is available, then the tracing data will be rich.
@@ -198,13 +195,11 @@ class queue_impl {
                             MDevice->getDeviceName());
           xpti::addMetadata(
               TEvent, "sycl_device",
-              reinterpret_cast<size_t>(
-                  MDevice->is_host() ? 0 : MDevice->getHandleRef()));
+              reinterpret_cast<size_t>(MDevice->getHandleRef()));
         }
         xpti::addMetadata(TEvent, "is_inorder", MIsInorder);
         xpti::addMetadata(TEvent, "queue_id", MQueueID);
-        if (!MHostQueue)
-          xpti::addMetadata(TEvent, "queue_handle",
+        xpti::addMetadata(TEvent, "queue_handle",
                             reinterpret_cast<size_t>(getHandleRef()));
       });
       // Also publish to TLS
@@ -263,13 +258,11 @@ class queue_impl {
                             MDevice->getDeviceName());
           xpti::addMetadata(
               TEvent, "sycl_device",
-              reinterpret_cast<size_t>(
-                  MDevice->is_host() ? 0 : MDevice->getHandleRef()));
+              reinterpret_cast<size_t>(MDevice->getHandleRef()));
         }
         xpti::addMetadata(TEvent, "is_inorder", MIsInorder);
         xpti::addMetadata(TEvent, "queue_id", MQueueID);
-        if (!MHostQueue)
-          xpti::addMetadata(TEvent, "queue_handle", getHandleRef());
+        xpti::addMetadata(TEvent, "queue_handle", getHandleRef());
       });
       // Also publish to TLS before notification
       xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
@@ -287,13 +280,12 @@ class queue_impl {
   /// \param AsyncHandler is a SYCL asynchronous exception handler.
   queue_impl(sycl::detail::pi::PiQueue PiQueue, const ContextImplPtr &Context,
              const async_handler &AsyncHandler)
-      : MContext(Context), MAsyncHandler(AsyncHandler), MHostQueue(false),
+      : MContext(Context), MAsyncHandler(AsyncHandler),
         MIsInorder(has_property<property::queue::in_order>()),
         MDiscardEvents(
             has_property<ext::oneapi::property::queue::discard_events>()),
         MIsProfilingEnabled(has_property<property::queue::enable_profiling>()),
-        MSupportsDiscardingPiEvents(MDiscardEvents &&
-                                    (MHostQueue ? true : MIsInorder)),
+        MSupportsDiscardingPiEvents(MDiscardEvents && MIsInorder)),
         MQueueID{
             MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} {
     queue_impl_interop(PiQueue);
@@ -309,13 +301,11 @@ class queue_impl {
   queue_impl(sycl::detail::pi::PiQueue PiQueue, const ContextImplPtr &Context,
              const async_handler &AsyncHandler, const property_list &PropList)
       : MContext(Context), MAsyncHandler(AsyncHandler), MPropList(PropList),
-        MHostQueue(false),
         MIsInorder(has_property<property::queue::in_order>()),
         MDiscardEvents(
             has_property<ext::oneapi::property::queue::discard_events>()),
         MIsProfilingEnabled(has_property<property::queue::enable_profiling>()),
-        MSupportsDiscardingPiEvents(MDiscardEvents &&
-                                    (MHostQueue ? true : MIsInorder)) {
+        MSupportsDiscardingPiEvents(MDiscardEvents && MIsInorder)) {
     queue_impl_interop(PiQueue);
   }
 
@@ -336,19 +326,12 @@ class queue_impl {
     }
 #endif
     throw_asynchronous();
-    if (!MHostQueue) {
-      cleanup_fusion_cmd();
-      getPlugin()->call<PiApiKind::piQueueRelease>(MQueues[0]);
-    }
+    cleanup_fusion_cmd();
+    getPlugin()->call<PiApiKind::piQueueRelease>(MQueues[0]);
   }
 
   /// \return an OpenCL interoperability queue handle.
   cl_command_queue get() {
-    if (MHostQueue) {
-      throw invalid_object_error(
-          "This instance of queue doesn't support OpenCL interoperability",
-          PI_ERROR_INVALID_QUEUE);
-    }
     getPlugin()->call<PiApiKind::piQueueRetain>(MQueues[0]);
     return pi::cast<cl_command_queue>(MQueues[0]);
   }
@@ -367,9 +350,6 @@ class queue_impl {
   /// \return an associated SYCL device.
   device get_device() const { return createSyclObjFromImpl<device>(MDevice); }
 
-  /// \return true if this queue is a SYCL host queue.
-  bool is_host() const { return MHostQueue; }
-
   /// \return true if this queue has discard_events support.
   bool supportsDiscardingPiEvents() const {
     return MSupportsDiscardingPiEvents;
@@ -859,7 +839,7 @@ class queue_impl {
           "function objects should use the sycl::handler API instead.");
     }
 
-    handler Handler(Self, PrimaryQueue, SecondaryQueue, MHostQueue);
+    handler Handler(Self, PrimaryQueue, SecondaryQueue);
     Handler.saveCodeLoc(Loc);
     PreventSubmit = true;
     try {
@@ -969,7 +949,6 @@ class queue_impl {
   /// Iterator through MQueues.
   size_t MNextQueueIdx = 0;
 
-  const bool MHostQueue = false;
   /// Indicates that a native out-of-order queue could not be created and we
   /// need to emulate it with multiple native in-order queues.
   bool MEmulateOOO = false;
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index 0739ac77373b7..d6c41f39e9942 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -2246,7 +2246,7 @@ void SetArgBasedOnType(
     const PluginPtr &Plugin, sycl::detail::pi::PiKernel Kernel,
     const std::shared_ptr<device_image_impl> &DeviceImageImpl,
     const std::function<void *(Requirement *Req)> &getMemAllocationFunc,
-    const sycl::context &Context, bool IsHost, detail::ArgDesc &Arg,
+    const sycl::context &Context, detail::ArgDesc &Arg,
     size_t NextTrueIndex) {
   switch (Arg.MType) {
   case kernel_param_kind_t::kind_stream:
@@ -2300,13 +2300,6 @@ void SetArgBasedOnType(
     break;
   }
   case kernel_param_kind_t::kind_specialization_constants_buffer: {
-    if (IsHost) {
-      throw sycl::exception(
-          sycl::make_error_code(sycl::errc::feature_not_supported),
-          "SYCL2020 specialization constants are not yet supported on host "
-          "device " +
-              codeToString(PI_ERROR_INVALID_OPERATION));
-    }
     assert(DeviceImageImpl != nullptr);
     sycl::detail::pi::PiMem SpecConstsBuffer =
         DeviceImageImpl->get_spec_const_buffer_ref();
@@ -2343,7 +2336,7 @@ static pi_result SetKernelParamsAndLaunch(
   auto setFunc = [&Plugin, Kernel, &DeviceImageImpl, &getMemAllocationFunc,
                   &Queue](detail::ArgDesc &Arg, size_t NextTrueIndex) {
     SetArgBasedOnType(Plugin, Kernel, DeviceImageImpl, getMemAllocationFunc,
-                      Queue->get_context(), Queue->is_host(), Arg,
+                      Queue->get_context(), Arg,
                       NextTrueIndex);
   };
 
@@ -2940,8 +2933,8 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     NDRDescT &NDRDesc = ExecKernel->MNDRDesc;
     std::vector<ArgDesc> &Args = ExecKernel->MArgs;
 
-    if (MQueue->is_host() || (MQueue->getDeviceImplPtr()->getBackend() ==
-                              backend::ext_intel_esimd_emulator)) {
+    if (MQueue->getDeviceImplPtr()->getBackend() ==
+                              backend::ext_intel_esimd_emulator) {
       for (ArgDesc &Arg : Args)
         if (kernel_param_kind_t::kind_accessor == Arg.MType) {
           Requirement *Req = (Requirement *)(Arg.MPtr);
@@ -2954,10 +2947,6 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
         Plugin->call<PiApiKind::piEventsWait>(RawEvents.size(), &RawEvents[0]);
       }
 
-      if (MQueue->is_host()) {
-        ExecKernel->MHostKernel->call(NDRDesc,
-                                      getEvent()->getHostProfilingInfo());
-      } else {
         assert(MQueue->getDeviceImplPtr()->getBackend() ==
                backend::ext_intel_esimd_emulator);
         if (MEvent != nullptr)
@@ -2967,7 +2956,6 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
             reinterpret_cast<pi_kernel>(ExecKernel->MHostKernel->getPtr()),
             NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0],
             &NDRDesc.LocalSize[0], 0, nullptr, nullptr);
-      }
       return PI_SUCCESS;
     }
 
diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp
index 196232b95d734..d1b57182d78ff 100644
--- a/sycl/source/detail/scheduler/graph_builder.cpp
+++ b/sycl/source/detail/scheduler/graph_builder.cpp
@@ -678,7 +678,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::findAllocaForReq(
 static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx) {
   if (const char *HUMConfig = SYCLConfig<SYCL_HOST_UNIFIED_MEMORY>::get()) {
     if (std::strcmp(HUMConfig, "0") == 0)
-      return Ctx->is_host();
+      return false;
     if (std::strcmp(HUMConfig, "1") == 0)
       return true;
   }

From fa08c2b3314604af314406fb73bcaf33e669f04a Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Mon, 27 May 2024 02:12:53 -0700
Subject: [PATCH 09/58] non-buildable: remove is_host from obvious places

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/context_impl.hpp |  7 +----
 sycl/source/detail/device_impl.cpp  |  8 ++---
 sycl/source/detail/usm/usm_impl.cpp | 47 -----------------------------
 3 files changed, 3 insertions(+), 59 deletions(-)

diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp
index af20236fc4b23..203242ee40077 100644
--- a/sycl/source/detail/context_impl.hpp
+++ b/sycl/source/detail/context_impl.hpp
@@ -97,11 +97,6 @@ class context_impl {
   /// \return an instance of OpenCL cl_context.
   cl_context get() const;
 
-  /// Checks if this context is a host context.
-  ///
-  /// \return true if this context is a host context.
-  bool is_host() const;
-
   /// Gets asynchronous exception handler.
   ///
   /// \return an instance of SYCL async_handler.
@@ -182,7 +177,7 @@ class context_impl {
     // OpenCL does not support using descendants of context members within that
     // context yet.
     // TODO remove once this limitation is lifted
-    if (!is_host() && Device->getBackend() == backend::opencl)
+    if (Device->getBackend() == backend::opencl)
       return hasDevice(Device);
 
     while (!hasDevice(Device)) {
diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp
index 2e87300425c20..c677b9165d71f 100644
--- a/sycl/source/detail/device_impl.cpp
+++ b/sycl/source/detail/device_impl.cpp
@@ -327,8 +327,6 @@ bool device_impl::has(aspect Aspect) const {
   size_t return_size = 0;
 
   switch (Aspect) {
-  case aspect::host:
-    return is_host();
   case aspect::cpu:
     return is_cpu();
   case aspect::gpu:
@@ -369,16 +367,14 @@ bool device_impl::has(aspect Aspect) const {
   case aspect::ext_intel_mem_channel:
     return get_info<info::device::ext_intel_mem_channel>();
   case aspect::usm_atomic_host_allocations:
-    return is_host() ||
-           (get_device_info_impl<pi_usm_capabilities,
+    return (get_device_info_impl<pi_usm_capabilities,
                                  info::device::usm_host_allocations>::
                 get(MPlatform->getDeviceImpl(MDevice)) &
             PI_USM_CONCURRENT_ATOMIC_ACCESS);
   case aspect::usm_shared_allocations:
     return get_info<info::device::usm_shared_allocations>();
   case aspect::usm_atomic_shared_allocations:
-    return is_host() ||
-           (get_device_info_impl<pi_usm_capabilities,
+    return (get_device_info_impl<pi_usm_capabilities,
                                  info::device::usm_shared_allocations>::
                 get(MPlatform->getDeviceImpl(MDevice)) &
             PI_USM_CONCURRENT_ATOMIC_ACCESS);
diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp
index ecf63bc63e427..753c27d5f678d 100755
--- a/sycl/source/detail/usm/usm_impl.cpp
+++ b/sycl/source/detail/usm/usm_impl.cpp
@@ -73,20 +73,6 @@ void *alignedAllocHost(size_t Alignment, size_t Size, const context &Ctxt,
     return nullptr;
 
   std::shared_ptr<context_impl> CtxImpl = detail::getSyclObjImpl(Ctxt);
-  if (CtxImpl->is_host()) {
-    if (!Alignment) {
-      // worst case default
-      Alignment = 128;
-    }
-
-    aligned_allocator<char> Alloc(Alignment);
-    try {
-      RetVal = Alloc.allocate(Size);
-    } catch (const std::bad_alloc &) {
-      // Conform with Specification behavior
-      RetVal = nullptr;
-    }
-  } else {
     pi_context C = CtxImpl->getHandleRef();
     const PluginPtr &Plugin = CtxImpl->getPlugin();
     pi_result Error = PI_ERROR_INVALID_VALUE;
@@ -128,7 +114,6 @@ void *alignedAllocHost(size_t Alignment, size_t Size, const context &Ctxt,
     // The spec wants a nullptr returned, not an exception.
     if (Error != PI_SUCCESS)
       return nullptr;
-  }
 #ifdef XPTI_ENABLE_INSTRUMENTATION
   xpti::addMetadata(PrepareNotify.traceEvent(), "memory_ptr",
                     reinterpret_cast<size_t>(RetVal));
@@ -154,24 +139,6 @@ void *alignedAllocInternal(size_t Alignment, size_t Size,
   if (Size == 0)
     return nullptr;
 
-  if (CtxImpl->is_host()) {
-    if (Kind == alloc::unknown) {
-      RetVal = nullptr;
-    } else {
-      if (!Alignment) {
-        // worst case default
-        Alignment = 128;
-      }
-
-      aligned_allocator<char> Alloc(Alignment);
-      try {
-        RetVal = Alloc.allocate(Size);
-      } catch (const std::bad_alloc &) {
-        // Conform with Specification behavior
-        RetVal = nullptr;
-      }
-    }
-  } else {
     pi_context C = CtxImpl->getHandleRef();
     const PluginPtr &Plugin = CtxImpl->getPlugin();
     pi_result Error = PI_ERROR_INVALID_VALUE;
@@ -245,7 +212,6 @@ void *alignedAllocInternal(size_t Alignment, size_t Size,
     // The spec wants a nullptr returned, not an exception.
     if (Error != PI_SUCCESS)
       return nullptr;
-  }
   return RetVal;
 }
 
@@ -284,14 +250,9 @@ void *alignedAlloc(size_t Alignment, size_t Size, const context &Ctxt,
 void freeInternal(void *Ptr, const context_impl *CtxImpl) {
   if (Ptr == nullptr)
     return;
-  if (CtxImpl->is_host()) {
-    // need to use alignedFree here for Windows
-    detail::OSUtil::alignedFree(Ptr);
-  } else {
     pi_context C = CtxImpl->getHandleRef();
     const PluginPtr &Plugin = CtxImpl->getPlugin();
     Plugin->call<PiApiKind::piextUSMFree>(C, Ptr);
-  }
 }
 
 void free(void *Ptr, const context &Ctxt,
@@ -578,10 +539,6 @@ alloc get_pointer_type(const void *Ptr, const context &Ctxt) {
 
   std::shared_ptr<detail::context_impl> CtxImpl = detail::getSyclObjImpl(Ctxt);
 
-  // Everything on a host device is just system malloc so call it host
-  if (CtxImpl->is_host())
-    return alloc::host;
-
   pi_context PICtx = CtxImpl->getHandleRef();
   pi_usm_type AllocTy;
 
@@ -631,10 +588,6 @@ device get_pointer_device(const void *Ptr, const context &Ctxt) {
 
   std::shared_ptr<detail::context_impl> CtxImpl = detail::getSyclObjImpl(Ctxt);
 
-  // Just return the host device in the host context
-  if (CtxImpl->is_host())
-    return Ctxt.get_devices()[0];
-
   // Check if ptr is a host allocation
   if (get_pointer_type(Ptr, Ctxt) == alloc::host) {
     auto Devs = CtxImpl->getDevices();

From d021de9af53da859390f6519730dd363b9b2d4bb Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Mon, 27 May 2024 06:03:56 -0700
Subject: [PATCH 10/58] not-buildable: remove is_host in simple places

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/buffer_impl.cpp    |  3 ---
 sycl/source/detail/memory_manager.cpp | 27 +--------------------------
 sycl/source/detail/memory_manager.hpp |  4 ----
 sycl/source/detail/queue_impl.cpp     | 11 ++++-------
 sycl/source/detail/sycl_mem_obj_t.cpp | 23 ++---------------------
 5 files changed, 7 insertions(+), 61 deletions(-)

diff --git a/sycl/source/detail/buffer_impl.cpp b/sycl/source/detail/buffer_impl.cpp
index 835c732a40bf9..d7d77205b162c 100644
--- a/sycl/source/detail/buffer_impl.cpp
+++ b/sycl/source/detail/buffer_impl.cpp
@@ -25,9 +25,6 @@ void *buffer_impl::allocateMem(ContextImplPtr Context, bool InitFromUserData,
   bool HostPtrReadOnly = false;
   BaseT::determineHostPtr(Context, InitFromUserData, HostPtr, HostPtrReadOnly);
 
-  assert(!(nullptr == HostPtr && BaseT::useHostPtr() && Context->is_host()) &&
-         "Internal error. Allocating memory on the host "
-         "while having use_host_ptr property");
   return MemoryManager::allocateMemBuffer(
       std::move(Context), this, HostPtr, HostPtrReadOnly,
       BaseT::getSizeInBytes(), BaseT::MInteropEvent, BaseT::MInteropContext,
diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp
index 840f95ea7a643..f4e42363cb6e1 100644
--- a/sycl/source/detail/memory_manager.cpp
+++ b/sycl/source/detail/memory_manager.cpp
@@ -266,11 +266,6 @@ void MemoryManager::releaseMemObj(ContextImplPtr TargetContext,
     return;
   }
 
-  if (TargetContext->is_host()) {
-    MemObj->releaseHostMem(MemAllocation);
-    return;
-  }
-
   const PluginPtr &Plugin = TargetContext->getPlugin();
   memReleaseHelper(Plugin, pi::cast<sycl::detail::pi::PiMem>(MemAllocation));
 }
@@ -288,20 +283,6 @@ void *MemoryManager::allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj,
                              OutEvent);
 }
 
-void *MemoryManager::allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr,
-                                        bool HostPtrReadOnly, size_t Size,
-                                        const sycl::property_list &) {
-  std::ignore = HostPtrReadOnly;
-  std::ignore = Size;
-
-  // Can return user pointer directly if it is not a nullptr.
-  if (UserPtr)
-    return UserPtr;
-
-  return MemObj->allocateHostMem();
-  ;
-}
-
 void *MemoryManager::allocateInteropMemObject(
     ContextImplPtr TargetContext, void *UserPtr,
     const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext,
@@ -398,10 +379,7 @@ void *MemoryManager::allocateMemBuffer(
     const ContextImplPtr &InteropContext, const sycl::property_list &PropsList,
     sycl::detail::pi::PiEvent &OutEventToWait) {
   void *MemPtr;
-  if (TargetContext->is_host())
-    MemPtr =
-        allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size, PropsList);
-  else if (UserPtr && InteropContext)
+  if (UserPtr && InteropContext)
     MemPtr =
         allocateInteropMemObject(TargetContext, UserPtr, InteropEvent,
                                  InteropContext, PropsList, OutEventToWait);
@@ -420,9 +398,6 @@ void *MemoryManager::allocateMemImage(
     const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext,
     const sycl::property_list &PropsList,
     sycl::detail::pi::PiEvent &OutEventToWait) {
-  if (TargetContext->is_host())
-    return allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size,
-                              PropsList);
   if (UserPtr && InteropContext)
     return allocateInteropMemObject(TargetContext, UserPtr, InteropEvent,
                                     InteropContext, PropsList, OutEventToWait);
diff --git a/sycl/source/detail/memory_manager.hpp b/sycl/source/detail/memory_manager.hpp
index 1d2800bf9dadc..7be17898bc0d9 100644
--- a/sycl/source/detail/memory_manager.hpp
+++ b/sycl/source/detail/memory_manager.hpp
@@ -85,10 +85,6 @@ class __SYCL_EXPORT MemoryManager {
   static void releaseMemObj(ContextImplPtr TargetContext, SYCLMemObjI *MemObj,
                             void *MemAllocation, void *UserPtr);
 
-  static void *allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr,
-                                  bool HostPtrReadOnly, size_t Size,
-                                  const sycl::property_list &PropsList);
-
   static void *
   allocateInteropMemObject(ContextImplPtr TargetContext, void *UserPtr,
                            const EventImplPtr &InteropEvent,
diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp
index 2c7876ea14c08..bba423df61b60 100644
--- a/sycl/source/detail/queue_impl.cpp
+++ b/sycl/source/detail/queue_impl.cpp
@@ -283,12 +283,12 @@ void queue_impl::addEvent(const event &Event) {
     // if there is no command on the event, we cannot track it with MEventsWeak
     // as that will leave it with no owner. Track in MEventsShared only if we're
     // unable to call piQueueFinish during wait.
-    if (is_host() || MEmulateOOO)
+    if (Event->isHost() || MEmulateOOO)
       addSharedEvent(Event);
   }
   // As long as the queue supports piQueueFinish we only need to store events
   // for unenqueued commands and host tasks.
-  else if (is_host() || MEmulateOOO || EImpl->getHandleRef() == nullptr) {
+  else if (Event->isHost() || MEmulateOOO || EImpl->getHandleRef() == nullptr) {
     std::weak_ptr<event_impl> EventWeakPtr{EImpl};
     std::lock_guard<std::mutex> Lock{MMutex};
     MEventsWeak.push_back(std::move(EventWeakPtr));
@@ -299,7 +299,7 @@ void queue_impl::addEvent(const event &Event) {
 /// but some events have no other owner. In this case,
 /// addSharedEvent will have the queue track the events via a shared pointer.
 void queue_impl::addSharedEvent(const event &Event) {
-  assert(is_host() || MEmulateOOO);
+  assert(MEmulateOOO);
   std::lock_guard<std::mutex> Lock(MMutex);
   // Events stored in MEventsShared are not released anywhere else aside from
   // calls to queue::wait/wait_and_throw, which a user application might not
@@ -369,9 +369,6 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
       MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents),
                 &EventImpl->getHandleRef(), EventImpl);
 
-      if (MContext->is_host())
-        return MDiscardEvents ? createDiscardedEvent() : event();
-
       if (isInOrder()) {
         auto &EventToStoreIn = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr
                                                 : MExtGraphDeps.LastEventPtr;
@@ -520,7 +517,7 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
   // directly. Otherwise, only wait for unenqueued or host task events, starting
   // from the latest submitted task in order to minimize total amount of calls,
   // then handle the rest with piQueueFinish.
-  const bool SupportsPiFinish = !is_host() && !MEmulateOOO;
+  const bool SupportsPiFinish = !MEmulateOOO;
   for (auto EventImplWeakPtrIt = WeakEvents.rbegin();
        EventImplWeakPtrIt != WeakEvents.rend(); ++EventImplWeakPtrIt) {
     if (std::shared_ptr<event_impl> EventImplSharedPtr =
diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp
index bb4c5f4e1441d..87f005fe8ca78 100644
--- a/sycl/source/detail/sycl_mem_obj_t.cpp
+++ b/sycl/source/detail/sycl_mem_obj_t.cpp
@@ -33,12 +33,6 @@ SYCLMemObjT::SYCLMemObjT(pi_native_handle MemObject, const context &SyclContext,
       MUserPtr(nullptr), MShadowCopy(nullptr), MUploadDataFunctor(nullptr),
       MSharedPtrStorage(nullptr), MHostPtrProvided(true),
       MOwnNativeHandle(OwnNativeHandle) {
-  if (MInteropContext->is_host())
-    throw sycl::invalid_parameter_error(
-        "Creation of interoperability memory object using host context is "
-        "not allowed",
-        PI_ERROR_INVALID_CONTEXT);
-
   sycl::detail::pi::PiContext Context = nullptr;
   const PluginPtr &Plugin = getPlugin();
 
@@ -84,12 +78,6 @@ SYCLMemObjT::SYCLMemObjT(pi_native_handle MemObject, const context &SyclContext,
       MUserPtr(nullptr), MShadowCopy(nullptr), MUploadDataFunctor(nullptr),
       MSharedPtrStorage(nullptr), MHostPtrProvided(true),
       MOwnNativeHandle(OwnNativeHandle) {
-  if (MInteropContext->is_host())
-    throw sycl::invalid_parameter_error(
-        "Creation of interoperability memory object using host context is "
-        "not allowed",
-        PI_ERROR_INVALID_CONTEXT);
-
   sycl::detail::pi::PiContext Context = nullptr;
   const PluginPtr &Plugin = getPlugin();
 
@@ -191,19 +179,12 @@ void SYCLMemObjT::determineHostPtr(const ContextImplPtr &Context,
   // The data for the allocation can be provided via either the user pointer
   // (InitFromUserData, can be read-only) or a runtime-allocated read-write
   // HostPtr. We can have one of these scenarios:
-  // 1. The allocation is the first one and on host. InitFromUserData == true.
-  // 2. The allocation is the first one and isn't on host. InitFromUserData
+  // 1. The allocation is the first one and isn't on host. InitFromUserData
   // varies based on unified host memory support and whether or not the data can
   // be discarded.
-  // 3. The allocation is not the first one and is on host. InitFromUserData ==
-  // false, HostPtr == nullptr. This can only happen if the allocation command
-  // is not linked since it would be a no-op otherwise. Attempt to reuse the
-  // user pointer if it's read-write, but do not copy its contents if it's not.
-  // 4. The allocation is not the first one and not on host. InitFromUserData ==
+  // 2. The allocation is not the first one and not on host. InitFromUserData ==
   // false, HostPtr is provided if the command is linked. The host pointer is
   // guaranteed to be reused in this case.
-  if (Context->is_host() && !MOpenCLInterop && !MHostPtrReadOnly)
-    InitFromUserData = true;
 
   if (InitFromUserData) {
     assert(!HostPtr && "Cannot init from user data and reuse host ptr provided "

From 5b60b90c37d2bc388272eaed40f375403a148e80 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Tue, 28 May 2024 04:26:44 -0700
Subject: [PATCH 11/58] draft

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/memory_manager.cpp         | 27 -----------
 sycl/source/detail/scheduler/commands.cpp     | 21 ++++-----
 .../source/detail/scheduler/graph_builder.cpp | 46 +++++++++----------
 sycl/source/detail/scheduler/scheduler.hpp    | 32 +++++++++----
 4 files changed, 55 insertions(+), 71 deletions(-)

diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp
index f4e42363cb6e1..792c1c57bd3f1 100644
--- a/sycl/source/detail/memory_manager.cpp
+++ b/sycl/source/detail/memory_manager.cpp
@@ -921,9 +921,6 @@ void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue,
                              std::vector<sycl::detail::pi::PiEvent> DepEvents,
                              sycl::detail::pi::PiEvent *OutEvent,
                              const detail::EventImplPtr &OutEventImpl) {
-  assert(!SrcQueue->getContextImplPtr()->is_host() &&
-         "Host queue not supported in fill_usm.");
-
   if (!Len) { // no-op, but ensure DepEvents will still be waited on
     if (!DepEvents.empty()) {
       if (OutEventImpl != nullptr)
@@ -962,9 +959,6 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length,
                              std::vector<sycl::detail::pi::PiEvent> DepEvents,
                              sycl::detail::pi::PiEvent *OutEvent,
                              const detail::EventImplPtr &OutEventImpl) {
-  assert(!Queue->getContextImplPtr()->is_host() &&
-         "Host queue not supported in fill_usm.");
-
   if (!Length) { // no-op, but ensure DepEvents will still be waited on
     if (!DepEvents.empty()) {
       if (OutEventImpl != nullptr)
@@ -1000,9 +994,6 @@ void MemoryManager::prefetch_usm(
     std::vector<sycl::detail::pi::PiEvent> DepEvents,
     sycl::detail::pi::PiEvent *OutEvent,
     const detail::EventImplPtr &OutEventImpl) {
-  assert(!Queue->getContextImplPtr()->is_host() &&
-         "Host queue not supported in prefetch_usm.");
-
   const PluginPtr &Plugin = Queue->getPlugin();
   if (OutEventImpl != nullptr)
     OutEventImpl->setHostEnqueueTime();
@@ -1024,9 +1015,6 @@ void MemoryManager::advise_usm(
     std::vector<sycl::detail::pi::PiEvent> /*DepEvents*/,
     sycl::detail::pi::PiEvent *OutEvent,
     const detail::EventImplPtr &OutEventImpl) {
-  assert(!Queue->getContextImplPtr()->is_host() &&
-         "Host queue not supported in advise_usm.");
-
   const PluginPtr &Plugin = Queue->getPlugin();
   if (OutEventImpl != nullptr)
     OutEventImpl->setHostEnqueueTime();
@@ -1049,9 +1037,6 @@ void MemoryManager::copy_2d_usm(
     std::vector<sycl::detail::pi::PiEvent> DepEvents,
     sycl::detail::pi::PiEvent *OutEvent,
     const detail::EventImplPtr &OutEventImpl) {
-  assert(!Queue->getContextImplPtr()->is_host() &&
-         "Host queue not supported in copy_2d_usm.");
-
   if (Width == 0 || Height == 0) {
     // no-op, but ensure DepEvents will still be waited on
     if (!DepEvents.empty()) {
@@ -1137,9 +1122,6 @@ void MemoryManager::fill_2d_usm(
     std::vector<sycl::detail::pi::PiEvent> DepEvents,
     sycl::detail::pi::PiEvent *OutEvent,
     const detail::EventImplPtr &OutEventImpl) {
-  assert(!Queue->getContextImplPtr()->is_host() &&
-         "Host queue not supported in fill_2d_usm.");
-
   if (Width == 0 || Height == 0) {
     // no-op, but ensure DepEvents will still be waited on
     if (!DepEvents.empty()) {
@@ -1177,9 +1159,6 @@ void MemoryManager::memset_2d_usm(
     char Value, std::vector<sycl::detail::pi::PiEvent> DepEvents,
     sycl::detail::pi::PiEvent *OutEvent,
     const detail::EventImplPtr &OutEventImpl) {
-  assert(!Queue->getContextImplPtr()->is_host() &&
-         "Host queue not supported in fill_2d_usm.");
-
   if (Width == 0 || Height == 0) {
     // no-op, but ensure DepEvents will still be waited on
     if (!DepEvents.empty()) {
@@ -1714,8 +1693,6 @@ void MemoryManager::ext_oneapi_prefetch_usm_cmd_buffer(
     sycl::detail::pi::PiExtCommandBuffer CommandBuffer, void *Mem,
     size_t Length, std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
     sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
-  assert(!Context->is_host() && "Host queue not supported in prefetch_usm.");
-
   const PluginPtr &Plugin = Context->getPlugin();
   Plugin->call<PiApiKind::piextCommandBufferPrefetchUSM>(
       CommandBuffer, Mem, Length, _pi_usm_migration_flags(0), Deps.size(),
@@ -1728,8 +1705,6 @@ void MemoryManager::ext_oneapi_advise_usm_cmd_buffer(
     size_t Length, pi_mem_advice Advice,
     std::vector<sycl::detail::pi::PiExtSyncPoint> Deps,
     sycl::detail::pi::PiExtSyncPoint *OutSyncPoint) {
-  assert(!Context->is_host() && "Host queue not supported in advise_usm.");
-
   const PluginPtr &Plugin = Context->getPlugin();
   Plugin->call<PiApiKind::piextCommandBufferAdviseUSM>(
       CommandBuffer, Mem, Length, Advice, Deps.size(), Deps.data(),
@@ -1748,8 +1723,6 @@ void MemoryManager::copy_image_bindless(
     const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
     sycl::detail::pi::PiEvent *OutEvent) {
 
-  assert(!Queue->getContextImplPtr()->is_host() &&
-         "Host queue not supported in copy_image_bindless.");
   assert((Flags == (sycl::detail::pi::PiImageCopyFlags)
                        ext::oneapi::experimental::image_copy_flags::HtoD ||
           Flags == (sycl::detail::pi::PiImageCopyFlags)
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index d6c41f39e9942..0a25d7b3ee6c1 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -671,12 +671,9 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep,
   const QueueImplPtr &WorkerQueue = getWorkerQueue();
   const ContextImplPtr &WorkerContext = WorkerQueue->getContextImplPtr();
 
-  // 1. Async work is not supported for host device.
-  // 2. Non-host events can be ignored if they are not fully initialized.
-  // 3. Some types of commands do not produce PI events after they are
-  // enqueued
-  //    (e.g. alloca). Note that we can't check the pi event to make that
-  //    distinction since the command might still be unenqueued at this point.
+  // 1. Non-host events can be ignored if they are not fully initialized.
+  // 2. Some types of commands do not produce PI events after they are
+  // enqueued (e.g. alloca). Note that we can't check the pi event to make that distinction since the command might still be unenqueued at this point.
   bool PiEventExpected = (!DepEvent->isHost() && DepEvent->isInitialized());
   if (auto *DepCmd = static_cast<Command *>(DepEvent->getCommand()))
     PiEventExpected &= DepCmd->producesPiEvent();
@@ -692,11 +689,13 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep,
 
   ContextImplPtr DepEventContext = DepEvent->getContextImpl();
   // If contexts don't match we'll connect them using host task
-  if (DepEventContext != WorkerContext && !WorkerContext->is_host()) {
+  if (DepEventContext == WorkerContext)
+    MPreparedDepsEvents.push_back(std::move(DepEvent));
+  else
+  {
     Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder;
     ConnectionCmd = GB.connectDepEvent(this, DepEvent, Dep, ToCleanUp);
-  } else
-    MPreparedDepsEvents.push_back(std::move(DepEvent));
+  }
 
   return ConnectionCmd;
 }
@@ -3106,10 +3105,6 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     return PI_SUCCESS;
   }
   case CG::CGTYPE::Barrier: {
-    if (MQueue->getDeviceImplPtr()->is_host()) {
-      // NOP for host device.
-      return PI_SUCCESS;
-    }
     const PluginPtr &Plugin = MQueue->getPlugin();
     if (MEvent != nullptr)
       MEvent->setHostEnqueueTime();
diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp
index d1b57182d78ff..bbb6d8de12f98 100644
--- a/sycl/source/detail/scheduler/graph_builder.cpp
+++ b/sycl/source/detail/scheduler/graph_builder.cpp
@@ -243,7 +243,7 @@ MemObjRecord *Scheduler::GraphBuilder::getOrInsertMemObjRecord(
     getOrCreateAllocaForReq(MemObject->MRecord.get(), Req, InteropQueuePtr,
                             ToEnqueue);
   } else
-    MemObject->MRecord.reset(new MemObjRecord{Queue->getContextImplPtr(),
+    MemObject->MRecord.reset(new MemObjRecord{Queue ? Queue->getContextImplPtr() : nullptr,
                                               LeafLimit, AllocateDependency});
 
   MMemObjs.push_back(MemObject);
@@ -317,7 +317,7 @@ static Command *insertMapUnmapForLinkedCmds(AllocaCommandBase *AllocaCmdSrc,
   assert(AllocaCmdSrc->MIsActive &&
          "Expected source alloca command to be active");
 
-  if (AllocaCmdSrc->getQueue()->is_host()) {
+  if (!AllocaCmdSrc->getQueue()) {
     UnMapMemObject *UnMapCmd = new UnMapMemObject(
         AllocaCmdDst, *AllocaCmdDst->getRequirement(),
         &AllocaCmdSrc->MMemAllocation, AllocaCmdDst->getQueue());
@@ -427,7 +427,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove(
 Command *Scheduler::GraphBuilder::remapMemoryObject(
     MemObjRecord *Record, Requirement *Req, AllocaCommandBase *HostAllocaCmd,
     std::vector<Command *> &ToEnqueue) {
-  assert(HostAllocaCmd->getQueue()->is_host() &&
+  assert(!HostAllocaCmd->getQueue() &&
          "Host alloca command expected");
   assert(HostAllocaCmd->MIsActive && "Active alloca command expected");
 
@@ -525,16 +525,14 @@ Scheduler::GraphBuilder::addHostAccessor(Requirement *Req,
     auto SYCLMemObj = static_cast<detail::SYCLMemObjT *>(Req->MSYCLMemObj);
     SYCLMemObj->handleWriteAccessorCreation();
   }
-
-  const QueueImplPtr &HostQueue = getInstance().getDefaultHostQueue();
-
-  MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req, ToEnqueue);
+  // Host accessor is not attached to any queue so no QueueImplPtr object to be sent to getOrInsertMemObjRecord.
+  MemObjRecord *Record = getOrInsertMemObjRecord(nullptr, Req, ToEnqueue);
   if (MPrintOptionsArray[BeforeAddHostAcc])
     printGraphAsDot("before_addHostAccessor");
   markModifiedIfWrite(Record, Req);
 
   AllocaCommandBase *HostAllocaCmd =
-      getOrCreateAllocaForReq(Record, Req, HostQueue, ToEnqueue);
+      getOrCreateAllocaForReq(Record, Req, nullptr, ToEnqueue);
 
   if (sameCtx(HostAllocaCmd->getQueue()->getContextImplPtr(),
               Record->MCurContext)) {
@@ -682,6 +680,10 @@ static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx) {
     if (std::strcmp(HUMConfig, "1") == 0)
       return true;
   }
+  // host task & host accessor is covered with no device context but provide required support.
+  if (Ctx == nullptr)
+    return true;
+
   for (const device &Device : Ctx->getDevices()) {
     if (!Device.get_info<info::device::host_unified_memory>())
       return false;
@@ -696,9 +698,9 @@ static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx) {
 AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
     MemObjRecord *Record, const Requirement *Req, const QueueImplPtr &Queue,
     std::vector<Command *> &ToEnqueue) {
-
+  auto Context = Queue != nullptr ? Queue->getContextImplPtr() : nullptr;
   AllocaCommandBase *AllocaCmd = findAllocaForReq(
-      Record, Req, Queue->getContextImplPtr(), /*AllowConst=*/false);
+      Record, Req, Context, /*AllowConst=*/false);
 
   if (!AllocaCmd) {
     std::vector<Command *> ToCleanUp;
@@ -729,7 +731,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
       // the user pointer is read-only is still not handled: it leads to
       // unnecessary copy on devices with unified host memory support.
       const bool HostUnifiedMemory =
-          checkHostUnifiedMemory(Queue->getContextImplPtr());
+          checkHostUnifiedMemory(Context);
       SYCLMemObjI *MemObj = Req->MSYCLMemObj;
       const bool InitFromUserData = Record->MAllocaCommands.empty() &&
                                     (HostUnifiedMemory || MemObj->isInterop());
@@ -745,16 +747,14 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
           // There's no need to make a host allocation if the buffer is not
           // initialized with user data.
           if (MemObj->hasUserDataPtr()) {
-            QueueImplPtr DefaultHostQueue =
-                Scheduler::getInstance().getDefaultHostQueue();
             AllocaCommand *HostAllocaCmd = new AllocaCommand(
-                DefaultHostQueue, FullReq, true /* InitFromUserData */,
+                nullptr, FullReq, true /* InitFromUserData */,
                 nullptr /* LinkedAllocaCmd */,
                 MemObj->isHostPointerReadOnly() /* IsConst */);
             Record->MAllocaCommands.push_back(HostAllocaCmd);
             Record->MWriteLeaves.push_back(HostAllocaCmd, ToEnqueue);
             ++(HostAllocaCmd->MLeafCounter);
-            Record->MCurContext = DefaultHostQueue->getContextImplPtr();
+            Record->usedOnHost();
           }
         }
       } else {
@@ -766,7 +766,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
           // new one. There could be situations when we could setup link with
           // "not" current allocation, but it will require memory copy.
           // Can setup link between cl and host allocations only
-          if (Queue->is_host() != Record->MCurContext->is_host()) {
+          if ((Context != nullptr) + (Record->MCurContext != nullptr) == 1) {
             // Linked commands assume that the host allocation is reused by the
             // plugin runtime and that can lead to unnecessary copy overhead on
             // devices that do not support host unified memory. Do not link the
@@ -778,7 +778,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
             bool PinnedHostMemory = MemObj->usesPinnedHostMemory();
 
             bool HostUnifiedMemoryOnNonHostDevice =
-                Queue->is_host() ? checkHostUnifiedMemory(Record->MCurContext)
+                Queue == nullptr ? checkHostUnifiedMemory(Record->MCurContext)
                                  : HostUnifiedMemory;
             if (PinnedHostMemory || HostUnifiedMemoryOnNonHostDevice) {
               AllocaCommandBase *LinkedAllocaCmdCand = findAllocaForReq(
@@ -818,14 +818,14 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
         // construction, host allocation doesn't. So, device allocation should
         // always be active here. Also if the "follower" command is a device one
         // we have to change current context to the device one.
-        if (Queue->is_host()) {
+        if (Queue == nullptr) {
           AllocaCmd->MIsActive = false;
         } else {
           LinkedAllocaCmd->MIsActive = false;
           Record->MCurContext = Queue->getContextImplPtr();
 
           std::set<Command *> Deps =
-              findDepsForReq(Record, Req, Queue->getContextImplPtr());
+              findDepsForReq(Record, Req, Context);
           for (Command *Dep : Deps) {
             Command *ConnCmd = AllocaCmd->addDep(
                 DepDesc{Dep, Req, LinkedAllocaCmd}, ToCleanUp);
@@ -1071,7 +1071,7 @@ void Scheduler::GraphBuilder::createGraphForCommand(
     if (isSameCtx) {
       // If the memory is already in the required host context, check if the
       // required access mode is valid, remap if not.
-      if (Record->MCurContext->is_host() &&
+      if (!Record->MCurContext &&
           !isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess)) {
         remapMemoryObject(Record, Req,
                           Req->MIsSubBuffer
@@ -1093,7 +1093,7 @@ void Scheduler::GraphBuilder::createGraphForCommand(
           NeedMemMoveToHost = true;
           MemMoveTargetQueue = HT.MQueue;
         }
-      } else if (!Queue->is_host() && !Record->MCurContext->is_host())
+      } else if (Queue && Record->MCurContext)
         NeedMemMoveToHost = true;
 
       if (NeedMemMoveToHost)
@@ -1714,12 +1714,12 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate(
       bool NeedMemMoveToHost = false;
       auto MemMoveTargetQueue = Queue;
 
-      if (!Queue->is_host() && !Record->MCurContext->is_host())
+      if (Queue && Record->MCurContext)
         NeedMemMoveToHost = true;
 
       if (NeedMemMoveToHost)
         insertMemoryMove(Record, Req,
-                         Scheduler::getInstance().getDefaultHostQueue(),
+                        nullptr,
                          ToEnqueue);
       insertMemoryMove(Record, Req, MemMoveTargetQueue, ToEnqueue);
     }
diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp
index 6fa95cb4a4a54..bcb930bc8194a 100644
--- a/sycl/source/detail/scheduler/scheduler.hpp
+++ b/sycl/source/detail/scheduler/scheduler.hpp
@@ -199,12 +199,12 @@ using FusionMap = std::unordered_map<QueueIdT, FusionList>;
 /// There must be a single MemObjRecord for each SYCL memory object.
 ///
 /// \ingroup sycl_graph
-struct MemObjRecord {
+class MemObjRecord {
   MemObjRecord(ContextImplPtr Ctx, std::size_t LeafLimit,
                LeavesCollection::AllocateDependencyF AllocateDependency)
       : MReadLeaves{this, LeafLimit, AllocateDependency},
-        MWriteLeaves{this, LeafLimit, AllocateDependency}, MCurContext{Ctx} {}
-
+        MWriteLeaves{this, LeafLimit, AllocateDependency}, MCurContext{Ctx}, MCurHostAccess{ MCurContext == nullptr } {}
+public:
   // Contains all allocation commands for the memory object.
   std::vector<AllocaCommandBase *> MAllocaCommands;
 
@@ -214,16 +214,32 @@ struct MemObjRecord {
   // Contains latest write commands working with memory object.
   LeavesCollection MWriteLeaves;
 
+  // The flag indicates that the content of the memory object was/will be
+  // modified. Used while deciding if copy back needed.
+  bool MMemModified = false;
+
+  void usedOnDevice(ContextImplPtr& NewContext)
+  {
+    MCurContext = NewContext;
+    MCurHostAccess = false;
+  }
+
+  void usedOnHost()
+  {
+    MCurContext = nullptr;
+    MCurHostAccess = true;
+  }
+
+  bool usedOnHost() { return MCurHostAccess; }
+protected:
   // The context which has the latest state of the memory object.
   ContextImplPtr MCurContext;
 
-  // The mode this object can be accessed with from the host context.
-  // Valid only if the current context is host.
+  // The mode this object can be accessed with from the host (host_accessor).
+  // Valid only if the current usage is on host.
   access::mode MHostAccess = access::mode::read_write;
 
-  // The flag indicates that the content of the memory object was/will be
-  // modified. Used while deciding if copy back needed.
-  bool MMemModified = false;
+  bool MCurHostAccess = false;
 };
 
 /// DPC++ graph scheduler class.

From 21ed380f362dd560342f75f94a58b84da50edd9c Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 29 May 2024 05:58:36 -0700
Subject: [PATCH 12/58] non-buildable: eliminate getDefaultHostQueue usage

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/memory_manager.cpp         | 14 +--
 sycl/source/detail/scheduler/commands.cpp     |  6 +-
 .../source/detail/scheduler/graph_builder.cpp | 96 +++++++++----------
 sycl/source/detail/scheduler/scheduler.cpp    |  4 +-
 sycl/source/detail/scheduler/scheduler.hpp    | 18 ++--
 5 files changed, 65 insertions(+), 73 deletions(-)

diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp
index 792c1c57bd3f1..3c0ad08e0763f 100644
--- a/sycl/source/detail/memory_manager.cpp
+++ b/sycl/source/detail/memory_manager.cpp
@@ -750,23 +750,23 @@ void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem,
                          sycl::detail::pi::PiEvent &OutEvent,
                          const detail::EventImplPtr &OutEventImpl) {
 
-  if (SrcQueue->is_host()) {
-    if (TgtQueue->is_host())
-      copyH2H(SYCLMemObj, (char *)SrcMem, std::move(SrcQueue), DimSrc, SrcSize,
+  if (!SrcQueue) {
+    if (!TgtQueue)
+      copyH2H(SYCLMemObj, (char *)SrcMem, nullptr, DimSrc, SrcSize,
               SrcAccessRange, SrcOffset, SrcElemSize, (char *)DstMem,
-              std::move(TgtQueue), DimDst, DstSize, DstAccessRange, DstOffset,
+              nullptr, DimDst, DstSize, DstAccessRange, DstOffset,
               DstElemSize, std::move(DepEvents), OutEvent, OutEventImpl);
     else
-      copyH2D(SYCLMemObj, (char *)SrcMem, std::move(SrcQueue), DimSrc, SrcSize,
+      copyH2D(SYCLMemObj, (char *)SrcMem, nullptr, DimSrc, SrcSize,
               SrcAccessRange, SrcOffset, SrcElemSize,
               pi::cast<sycl::detail::pi::PiMem>(DstMem), std::move(TgtQueue),
               DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize,
               std::move(DepEvents), OutEvent, OutEventImpl);
   } else {
-    if (TgtQueue->is_host())
+    if (!TgtQueue)
       copyD2H(SYCLMemObj, pi::cast<sycl::detail::pi::PiMem>(SrcMem),
               std::move(SrcQueue), DimSrc, SrcSize, SrcAccessRange, SrcOffset,
-              SrcElemSize, (char *)DstMem, std::move(TgtQueue), DimDst, DstSize,
+              SrcElemSize, (char *)DstMem, nullptr, DimDst, DstSize,
               DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents),
               OutEvent, OutEventImpl);
     else
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index 0a25d7b3ee6c1..f0e3471a0f6f6 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -2872,7 +2872,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
         AllocaCmd->getSYCLMemObj(), AllocaCmd->getMemAllocation(), MQueue,
         Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset,
         Req->MElemSize, Copy->getDst(),
-        Scheduler::getInstance().getDefaultHostQueue(), Req->MDims,
+        nullptr, Req->MDims,
         Req->MAccessRange, Req->MAccessRange, /*DstOffset=*/{0, 0, 0},
         Req->MElemSize, std::move(RawEvents), MEvent->getHandleRef(), MEvent);
 
@@ -2883,11 +2883,9 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     Requirement *Req = (Requirement *)(Copy->getDst());
     AllocaCommandBase *AllocaCmd = getAllocaForReq(Req);
 
-    Scheduler::getInstance().getDefaultHostQueue();
-
     MemoryManager::copy(
         AllocaCmd->getSYCLMemObj(), Copy->getSrc(),
-        Scheduler::getInstance().getDefaultHostQueue(), Req->MDims,
+        nullptr, Req->MDims,
         Req->MAccessRange, Req->MAccessRange,
         /*SrcOffset*/ {0, 0, 0}, Req->MElemSize, AllocaCmd->getMemAllocation(),
         MQueue, Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset,
diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp
index bbb6d8de12f98..6c9244f9ecb2c 100644
--- a/sycl/source/detail/scheduler/graph_builder.cpp
+++ b/sycl/source/detail/scheduler/graph_builder.cpp
@@ -49,15 +49,16 @@ static bool doOverlap(const Requirement *LHS, const Requirement *RHS) {
           LHS->MOffsetInBytes);
 }
 
-static bool sameCtx(const ContextImplPtr &LHS, const ContextImplPtr &RHS) {
-  return LHS == RHS;
-}
-
 /// Checks if current requirement is requirement for sub buffer.
 static bool IsSuitableSubReq(const Requirement *Req) {
   return Req->MIsSubBuffer;
 }
 
+static ContextImplPtr GetContext(const QueueImplPtr& Queue)
+{
+  return Queue ? Queue->getContextImplPtr() : nullptr;
+}
+
 /// Checks if the required access mode is allowed under the current one.
 static bool isAccessModeAllowed(access::mode Required, access::mode Current) {
   switch (Current) {
@@ -243,7 +244,7 @@ MemObjRecord *Scheduler::GraphBuilder::getOrInsertMemObjRecord(
     getOrCreateAllocaForReq(MemObject->MRecord.get(), Req, InteropQueuePtr,
                             ToEnqueue);
   } else
-    MemObject->MRecord.reset(new MemObjRecord{Queue ? Queue->getContextImplPtr() : nullptr,
+    MemObject->MRecord.reset(new MemObjRecord{GetContext(Queue),
                                               LeafLimit, AllocateDependency});
 
   MMemObjs.push_back(MemObject);
@@ -282,8 +283,9 @@ void Scheduler::GraphBuilder::addNodeToLeaves(
 UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd(
     MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue,
     std::vector<Command *> &ToEnqueue) {
+  auto Context = GetContext(Queue);
   AllocaCommandBase *AllocaCmd =
-      findAllocaForReq(Record, Req, Queue->getContextImplPtr());
+      findAllocaForReq(Record, Req, Context);
   assert(AllocaCmd && "There must be alloca for requirement!");
   UpdateHostRequirementCommand *UpdateCommand =
       new UpdateHostRequirementCommand(Queue, *Req, AllocaCmd, &Req->MData);
@@ -292,7 +294,7 @@ UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd(
   const Requirement *StoredReq = UpdateCommand->getRequirement();
 
   std::set<Command *> Deps =
-      findDepsForReq(Record, Req, Queue->getContextImplPtr());
+      findDepsForReq(Record, Req, Context);
   std::vector<Command *> ToCleanUp;
   for (Command *Dep : Deps) {
     Command *ConnCmd =
@@ -345,8 +347,9 @@ Command *Scheduler::GraphBuilder::insertMemoryMove(
   if (!AllocaCmdDst)
     throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY);
 
+  auto Context = GetContext(Queue);
   std::set<Command *> Deps =
-      findDepsForReq(Record, Req, Queue->getContextImplPtr());
+      findDepsForReq(Record, Req, Context);
   Deps.insert(AllocaCmdDst);
   // Get parent allocation of sub buffer to perform full copy of whole buffer
   if (IsSuitableSubReq(Req)) {
@@ -362,8 +365,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove(
     // current context, need to find a parent alloca command for it (it must be
     // there)
     auto IsSuitableAlloca = [Record](AllocaCommandBase *AllocaCmd) {
-      bool Res = sameCtx(AllocaCmd->getQueue()->getContextImplPtr(),
-                         Record->MCurContext) &&
+      bool Res = Record->isSameContext(AllocaCmd->getQueue()) &&
                  // Looking for a parent buffer alloca command
                  AllocaCmd->getType() == Command::CommandType::ALLOCA;
       return Res;
@@ -398,7 +400,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove(
 
     if ((Req->MAccessMode == access::mode::discard_write) ||
         (Req->MAccessMode == access::mode::discard_read_write)) {
-      Record->MCurContext = Queue->getContextImplPtr();
+      Record->updateUsage(Context);
       return nullptr;
     } else {
       // Full copy of buffer is needed to avoid loss of data that may be caused
@@ -420,7 +422,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove(
   addNodeToLeaves(Record, NewCmd, access::mode::read_write, ToEnqueue);
   for (Command *Cmd : ToCleanUp)
     cleanupCommand(Cmd);
-  Record->MCurContext = Queue->getContextImplPtr();
+  Record->updateUsage(Context);
   return NewCmd;
 }
 
@@ -474,7 +476,6 @@ Command *Scheduler::GraphBuilder::remapMemoryObject(
 Command *
 Scheduler::GraphBuilder::addCopyBack(Requirement *Req,
                                      std::vector<Command *> &ToEnqueue) {
-  QueueImplPtr HostQueue = Scheduler::getInstance().getDefaultHostQueue();
   SYCLMemObjI *MemObj = Req->MSYCLMemObj;
   MemObjRecord *Record = getMemObjRecord(MemObj);
   if (Record && MPrintOptionsArray[BeforeAddCopyBack])
@@ -485,13 +486,13 @@ Scheduler::GraphBuilder::addCopyBack(Requirement *Req,
     return nullptr;
 
   std::set<Command *> Deps =
-      findDepsForReq(Record, Req, HostQueue->getContextImplPtr());
+      findDepsForReq(Record, Req, nullptr);
   AllocaCommandBase *SrcAllocaCmd =
       findAllocaForReq(Record, Req, Record->MCurContext);
 
   auto MemCpyCmdUniquePtr = std::make_unique<MemCpyCommandHost>(
       *SrcAllocaCmd->getRequirement(), SrcAllocaCmd, *Req, &Req->MData,
-      SrcAllocaCmd->getQueue(), std::move(HostQueue));
+      SrcAllocaCmd->getQueue(), nullptr);
 
   if (!MemCpyCmdUniquePtr)
     throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY);
@@ -534,8 +535,7 @@ Scheduler::GraphBuilder::addHostAccessor(Requirement *Req,
   AllocaCommandBase *HostAllocaCmd =
       getOrCreateAllocaForReq(Record, Req, nullptr, ToEnqueue);
 
-  if (sameCtx(HostAllocaCmd->getQueue()->getContextImplPtr(),
-              Record->MCurContext)) {
+  if (Record->isSameContext(HostAllocaCmd->getQueue())) {
     if (!isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess)) {
       remapMemoryObject(Record, Req,
                         Req->MIsSubBuffer ? (static_cast<AllocaSubBufCommand *>(
@@ -545,15 +545,14 @@ Scheduler::GraphBuilder::addHostAccessor(Requirement *Req,
                         ToEnqueue);
     }
   } else
-    insertMemoryMove(Record, Req, HostQueue, ToEnqueue);
+    insertMemoryMove(Record, Req, nullptr, ToEnqueue);
 
   Command *UpdateHostAccCmd =
-      insertUpdateHostReqCmd(Record, Req, HostQueue, ToEnqueue);
+      insertUpdateHostReqCmd(Record, Req, nullptr, ToEnqueue);
 
   // Need empty command to be blocked until host accessor is destructed
   EmptyCommand *EmptyCmd =
-      addEmptyCmd(UpdateHostAccCmd, {Req}, HostQueue,
-                  Command::BlockReason::HostAccessor, ToEnqueue);
+      addEmptyCmd(UpdateHostAccCmd, {Req}, Command::BlockReason::HostAccessor, ToEnqueue);
 
   Req->MBlockedCmd = EmptyCmd;
 
@@ -564,14 +563,14 @@ Scheduler::GraphBuilder::addHostAccessor(Requirement *Req,
 }
 
 Command *Scheduler::GraphBuilder::addCGUpdateHost(
-    std::unique_ptr<detail::CG> CommandGroup, const QueueImplPtr &HostQueue,
+    std::unique_ptr<detail::CG> CommandGroup,
     std::vector<Command *> &ToEnqueue) {
 
   auto UpdateHost = static_cast<CGUpdateHost *>(CommandGroup.get());
   Requirement *Req = UpdateHost->getReqToUpdate();
 
-  MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req, ToEnqueue);
-  return insertMemoryMove(Record, Req, HostQueue, ToEnqueue);
+  MemObjRecord *Record = getOrInsertMemObjRecord(nullptr, Req, ToEnqueue);
+  return insertMemoryMove(Record, Req, nullptr, ToEnqueue);
 }
 
 /// Start the search for the record from list of "leaf" commands and check if
@@ -618,8 +617,10 @@ Scheduler::GraphBuilder::findDepsForReq(MemObjRecord *Record,
 
       // Going through copying memory between contexts is not supported.
       if (Dep.MDepCommand)
-        CanBypassDep &=
-            sameCtx(Context, Dep.MDepCommand->getQueue()->getContextImplPtr());
+      {
+        auto DepQueue = Dep.MDepCommand->getQueue();
+        CanBypassDep &= IsOnSameContext(Context, DepQueue);
+      }
 
       if (!CanBypassDep) {
         RetDeps.insert(DepCmd);
@@ -658,7 +659,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::findAllocaForReq(
     bool AllowConst) {
   auto IsSuitableAlloca = [&Context, Req,
                            AllowConst](AllocaCommandBase *AllocaCmd) {
-    bool Res = sameCtx(AllocaCmd->getQueue()->getContextImplPtr(), Context);
+    bool Res = IsOnSameContext(Context, AllocaCmd->getQueue());
     if (IsSuitableSubReq(Req)) {
       const Requirement *TmpReq = AllocaCmd->getRequirement();
       Res &= AllocaCmd->getType() == Command::CommandType::ALLOCA_SUB_BUF;
@@ -698,7 +699,7 @@ static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx) {
 AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
     MemObjRecord *Record, const Requirement *Req, const QueueImplPtr &Queue,
     std::vector<Command *> &ToEnqueue) {
-  auto Context = Queue != nullptr ? Queue->getContextImplPtr() : nullptr;
+  auto Context = GetContext(Queue);
   AllocaCommandBase *AllocaCmd = findAllocaForReq(
       Record, Req, Context, /*AllowConst=*/false);
 
@@ -754,7 +755,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
             Record->MAllocaCommands.push_back(HostAllocaCmd);
             Record->MWriteLeaves.push_back(HostAllocaCmd, ToEnqueue);
             ++(HostAllocaCmd->MLeafCounter);
-            Record->usedOnHost();
+            Record->updateUsage(nullptr);
           }
         }
       } else {
@@ -766,7 +767,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
           // new one. There could be situations when we could setup link with
           // "not" current allocation, but it will require memory copy.
           // Can setup link between cl and host allocations only
-          if ((Context != nullptr) + (Record->MCurContext != nullptr) == 1) {
+          if ((Context != nullptr) + (Record->usedOnDevice()) == 1) {
             // Linked commands assume that the host allocation is reused by the
             // plugin runtime and that can lead to unnecessary copy overhead on
             // devices that do not support host unified memory. Do not link the
@@ -822,7 +823,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
           AllocaCmd->MIsActive = false;
         } else {
           LinkedAllocaCmd->MIsActive = false;
-          Record->MCurContext = Queue->getContextImplPtr();
+          Record->updateUsage(Context);
 
           std::set<Command *> Deps =
               findDepsForReq(Record, Req, Context);
@@ -865,10 +866,9 @@ void Scheduler::GraphBuilder::markModifiedIfWrite(MemObjRecord *Record,
 
 EmptyCommand *Scheduler::GraphBuilder::addEmptyCmd(
     Command *Cmd, const std::vector<Requirement *> &Reqs,
-    const QueueImplPtr &Queue, Command::BlockReason Reason,
+    Command::BlockReason Reason,
     std::vector<Command *> &ToEnqueue, const bool AddDepsToLeaves) {
-  EmptyCommand *EmptyCmd =
-      new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue());
+  EmptyCommand *EmptyCmd = new EmptyCommand();
 
   if (!EmptyCmd)
     throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY);
@@ -878,9 +878,9 @@ EmptyCommand *Scheduler::GraphBuilder::addEmptyCmd(
   EmptyCmd->MBlockReason = Reason;
 
   for (Requirement *Req : Reqs) {
-    MemObjRecord *Record = getOrInsertMemObjRecord(Queue, Req, ToEnqueue);
+    MemObjRecord *Record = getOrInsertMemObjRecord(nullptr, Req, ToEnqueue);
     AllocaCommandBase *AllocaCmd =
-        getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue);
+        getOrCreateAllocaForReq(Record, Req, nullptr, ToEnqueue);
     EmptyCmd->addRequirement(Cmd, AllocaCmd, Req);
   }
   // addRequirement above call addDep that already will add EmptyCmd as user for
@@ -1062,8 +1062,7 @@ void Scheduler::GraphBuilder::createGraphForCommand(
       AllocaCmd =
           getOrCreateAllocaForReq(Record, Req, QueueForAlloca, ToEnqueue);
 
-      isSameCtx =
-          sameCtx(QueueForAlloca->getContextImplPtr(), Record->MCurContext);
+      isSameCtx = Record->isSameContext(QueueForAlloca);
     }
 
     // If there is alloca command we need to check if the latest memory is in
@@ -1071,7 +1070,7 @@ void Scheduler::GraphBuilder::createGraphForCommand(
     if (isSameCtx) {
       // If the memory is already in the required host context, check if the
       // required access mode is valid, remap if not.
-      if (!Record->MCurContext &&
+      if (!Record->usedOnDevice() &&
           !isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess)) {
         remapMemoryObject(Record, Req,
                           Req->MIsSubBuffer
@@ -1089,21 +1088,20 @@ void Scheduler::GraphBuilder::createGraphForCommand(
       if (isInteropTask) {
         const detail::CGHostTask &HT = static_cast<detail::CGHostTask &>(CG);
 
-        if (HT.MQueue->getContextImplPtr() != Record->MCurContext) {
+        if (!(Record->isSameContext(HT.MQueue)) {
           NeedMemMoveToHost = true;
           MemMoveTargetQueue = HT.MQueue;
         }
-      } else if (Queue && Record->MCurContext)
+      } else if (Queue && Record->usedOnDevice())
         NeedMemMoveToHost = true;
 
       if (NeedMemMoveToHost)
-        insertMemoryMove(Record, Req,
-                         Scheduler::getInstance().getDefaultHostQueue(),
-                         ToEnqueue);
+        insertMemoryMove(Record, Req, nullptr, ToEnqueue);
       insertMemoryMove(Record, Req, MemMoveTargetQueue, ToEnqueue);
     }
+
     std::set<Command *> Deps =
-        findDepsForReq(Record, Req, Queue->getContextImplPtr());
+        findDepsForReq(Record, Req, GetContext(Queue));
 
     for (Command *Dep : Deps) {
       if (Dep != NewCmd) {
@@ -1343,7 +1341,7 @@ Command *Scheduler::GraphBuilder::connectDepEvent(
         CG::CodeplayHostTask,
         /* Payload */ {}));
     ConnectCmd = new ExecCGCommand(
-        std::move(ConnectCG), Scheduler::getInstance().getDefaultHostQueue());
+        std::move(ConnectCG), Cmd->getQueue());
   } catch (const std::bad_alloc &) {
     throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY);
   }
@@ -1705,7 +1703,7 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate(
 
       AllocaCmd = getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue);
 
-      isSameCtx = sameCtx(Queue->getContextImplPtr(), Record->MCurContext);
+      isSameCtx = Record->isSameContext(Queue);
     }
 
     if (!isSameCtx) {
@@ -1714,7 +1712,7 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate(
       bool NeedMemMoveToHost = false;
       auto MemMoveTargetQueue = Queue;
 
-      if (Queue && Record->MCurContext)
+      if (Queue && Record->usedOnDevice())
         NeedMemMoveToHost = true;
 
       if (NeedMemMoveToHost)
@@ -1724,7 +1722,7 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate(
       insertMemoryMove(Record, Req, MemMoveTargetQueue, ToEnqueue);
     }
     std::set<Command *> Deps =
-        findDepsForReq(Record, Req, Queue->getContextImplPtr());
+        findDepsForReq(Record, Req,  GetContext(Queue));
 
     for (Command *Dep : Deps) {
       if (Dep != NewCmd.get()) {
diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp
index 0b061a86dbc62..7e5db05daf01a 100644
--- a/sycl/source/detail/scheduler/scheduler.cpp
+++ b/sycl/source/detail/scheduler/scheduler.cpp
@@ -118,12 +118,12 @@ EventImplPtr Scheduler::addCG(
     switch (Type) {
     case CG::UpdateHost:
       NewCmd = MGraphBuilder.addCGUpdateHost(std::move(CommandGroup),
-                                             DefaultHostQueue, AuxiliaryCmds);
+                                             AuxiliaryCmds);
       NewEvent = NewCmd->getEvent();
       break;
     case CG::CodeplayHostTask: {
       auto Result = MGraphBuilder.addCG(std::move(CommandGroup),
-                                        DefaultHostQueue, AuxiliaryCmds);
+                                        nullptr, AuxiliaryCmds);
       NewCmd = Result.NewCmd;
       NewEvent = Result.NewEvent;
       ShouldEnqueue = Result.ShouldEnqueue;
diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp
index bcb930bc8194a..6a2bcc4e5004a 100644
--- a/sycl/source/detail/scheduler/scheduler.hpp
+++ b/sycl/source/detail/scheduler/scheduler.hpp
@@ -203,7 +203,7 @@ class MemObjRecord {
   MemObjRecord(ContextImplPtr Ctx, std::size_t LeafLimit,
                LeavesCollection::AllocateDependencyF AllocateDependency)
       : MReadLeaves{this, LeafLimit, AllocateDependency},
-        MWriteLeaves{this, LeafLimit, AllocateDependency}, MCurContext{Ctx}, MCurHostAccess{ MCurContext == nullptr } {}
+        MWriteLeaves{this, LeafLimit, AllocateDependency}, MCurContext{Ctx} {}
 public:
   // Contains all allocation commands for the memory object.
   std::vector<AllocaCommandBase *> MAllocaCommands;
@@ -218,19 +218,19 @@ class MemObjRecord {
   // modified. Used while deciding if copy back needed.
   bool MMemModified = false;
 
-  void usedOnDevice(ContextImplPtr& NewContext)
+  void updateUsage(ContextImplPtr& NewContext)
   {
     MCurContext = NewContext;
-    MCurHostAccess = false;
   }
 
-  void usedOnHost()
+  bool isSameContext(const QueueImplPtr& Queue) const
   {
-    MCurContext = nullptr;
-    MCurHostAccess = true;
+    // Covers case for host usage (nullptr == nullptr) and existing device contexts comparison.
+    return LHS == (Queue ? Queue->getContextImplPtr() : nullptr);
   }
 
-  bool usedOnHost() { return MCurHostAccess; }
+  bool usedOnDevice( return MCurContext != nullptr; )
+
 protected:
   // The context which has the latest state of the memory object.
   ContextImplPtr MCurContext;
@@ -238,8 +238,6 @@ class MemObjRecord {
   // The mode this object can be accessed with from the host (host_accessor).
   // Valid only if the current usage is on host.
   access::mode MHostAccess = access::mode::read_write;
-
-  bool MCurHostAccess = false;
 };
 
 /// DPC++ graph scheduler class.
@@ -621,7 +619,6 @@ class Scheduler {
     ///
     /// \return a command that represents command group execution.
     Command *addCGUpdateHost(std::unique_ptr<detail::CG> CommandGroup,
-                             const QueueImplPtr &HostQueue,
                              std::vector<Command *> &ToEnqueue);
 
     /// Enqueues a command to update memory to the latest state.
@@ -759,7 +756,6 @@ class Scheduler {
 
     EmptyCommand *addEmptyCmd(Command *Cmd,
                               const std::vector<Requirement *> &Req,
-                              const QueueImplPtr &Queue,
                               Command::BlockReason Reason,
                               std::vector<Command *> &ToEnqueue,
                               const bool AddDepsToLeaves = true);

From c533af788609ed1b86dd27307eb48045f05c7565 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Tue, 4 Jun 2024 03:41:44 -0700
Subject: [PATCH 13/58] non-buildable: cleanup queue usages

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/queue_impl.cpp         |   3 +-
 sycl/source/detail/scheduler/commands.cpp | 208 +++++++++-------------
 2 files changed, 88 insertions(+), 123 deletions(-)

diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp
index bba423df61b60..c1c1d3835a54d 100644
--- a/sycl/source/detail/queue_impl.cpp
+++ b/sycl/source/detail/queue_impl.cpp
@@ -26,7 +26,8 @@
 namespace sycl {
 inline namespace _V1 {
 namespace detail {
-std::atomic<unsigned long long> queue_impl::MNextAvailableQueueID = 0;
+// Treat 0 as reserved for "host" queue
+std::atomic<unsigned long long> queue_impl::MNextAvailableQueueID = 1;
 
 static std::vector<sycl::detail::pi::PiEvent>
 getPIEvents(const std::vector<sycl::event> &DepEvents) {
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index f0e3471a0f6f6..f7962bb7a5d66 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -56,7 +56,7 @@ namespace detail {
 // Global graph for the application
 extern xpti::trace_event_data_t *GSYCLGraphEvent;
 
-bool CurrentCodeLocationValid() {
+static bool CurrentCodeLocationValid() {
   detail::tls_code_loc_t Tls;
   auto CodeLoc = Tls.query();
   auto FileName = CodeLoc.fileName();
@@ -65,7 +65,7 @@ bool CurrentCodeLocationValid() {
          (FunctionName && FunctionName[0] != '\0');
 }
 
-void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID,
+static void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID,
                                 xpti_td *TraceEvent, uint16_t Type,
                                 const void *Addr) {
   if (!(xptiCheckTraceEnabled(StreamID, Type) && TraceEvent))
@@ -74,6 +74,17 @@ void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID,
   xptiNotifySubscribers(StreamID, Type, detail::GSYCLGraphEvent,
                         static_cast<xpti_td *>(TraceEvent), InstanceID, Addr);
 }
+
+static addDeviceMetadata(xpti_td* TraceEvent, const QueueImplPtr& Queue)
+{
+    xpti::addMetadata(TraceEvent, "sycl_device",
+                      Queue ? deviceToID(MQueue->get_device()) : nullptr);
+    xpti::addMetadata(TraceEvent, "sycl_device_type",
+                      Queue ? deviceToString(MQueue->get_device()) : "host");
+    if (Queue)
+      xpti::addMetadata(TraceEvent, "sycl_device_name",
+                      getSyclObjImpl(MQueue->get_device())->getDeviceName());
+}
 #endif
 
 #ifdef __SYCL_ENABLE_GNU_DEMANGLING
@@ -236,9 +247,7 @@ Command::getPiEvents(const std::vector<EventImplPtr> &EventImpls) const {
     // current one is a host task. In this case we should not skip pi event due
     // to different sync mechanisms for different task types on in-order queue.
     const QueueImplPtr &WorkerQueue = getWorkerQueue();
-    // MWorkerQueue in command is always not null. So check if
-    // EventImpl->getWorkerQueue != nullptr is implicit.
-    if (EventImpl->getWorkerQueue() == WorkerQueue &&
+    if (WorkerQueue && EventImpl->getWorkerQueue() == WorkerQueue &&
         WorkerQueue->isInOrder() && !isHostTask())
       continue;
 
@@ -278,9 +287,7 @@ std::vector<sycl::detail::pi::PiEvent> Command::getPiEventsBlocking(
     // current one is a host task. In this case we should not skip pi event due
     // to different sync mechanisms for different task types on in-order queue.
     const QueueImplPtr &WorkerQueue = getWorkerQueue();
-    // MWorkerQueue in command is always not null. So check if
-    // EventImpl->getWorkerQueue != nullptr is implicit.
-    if (EventImpl->getWorkerQueue() == WorkerQueue &&
+    if (MWorkerQueue && EventImpl->getWorkerQueue() == WorkerQueue &&
         WorkerQueue->isInOrder() && !isHostTask())
       continue;
 
@@ -337,12 +344,10 @@ class DispatchHostTask {
         PluginWithEvents.first->call<PiApiKind::piEventsWait>(RawEvents.size(),
                                                               RawEvents.data());
       } catch (const sycl::exception &E) {
-        CGHostTask &HostTask = static_cast<CGHostTask &>(MThisCmd->getCG());
-        HostTask.MQueue->reportAsyncException(std::current_exception());
+        MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(std::current_exception());
         return (pi_result)E.get_cl_code();
       } catch (...) {
-        CGHostTask &HostTask = static_cast<CGHostTask &>(MThisCmd->getCG());
-        HostTask.MQueue->reportAsyncException(std::current_exception());
+        MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(std::current_exception());
         return PI_ERROR_UNKNOWN;
       }
     }
@@ -383,7 +388,7 @@ class DispatchHostTask {
       std::exception_ptr EPtr = std::make_exception_ptr(sycl::runtime_error(
           std::string("Couldn't wait for host-task's dependencies"),
           WaitResult));
-      HostTask.MQueue->reportAsyncException(EPtr);
+      MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(EPtr);
       // reset host-task's lambda and quit
       HostTask.MHostTask.reset();
       Scheduler::getInstance().NotifyHostTaskCompletion(MThisCmd);
@@ -394,7 +399,7 @@ class DispatchHostTask {
       // we're ready to call the user-defined lambda now
       if (HostTask.MHostTask->isInteropTask()) {
         interop_handle IH{MReqToMem, HostTask.MQueue,
-                          HostTask.MQueue->getDeviceImplPtr(),
+                        //  HostTask.MQueue->getDeviceImplPtr(),
                           HostTask.MQueue->getContextImplPtr()};
 
         HostTask.MHostTask->call(MThisCmd->MEvent->getHostProfilingInfo(), IH);
@@ -419,7 +424,7 @@ class DispatchHostTask {
         }
       }
 #endif
-      HostTask.MQueue->reportAsyncException(CurrentException);
+      MthisCmd->MEvent->getSubmittedQueue()->reportAsyncException(CurrentException);
     }
 
     HostTask.MHostTask.reset();
@@ -436,7 +441,7 @@ class DispatchHostTask {
       Scheduler::getInstance().NotifyHostTaskCompletion(MThisCmd);
     } catch (...) {
       auto CurrentException = std::current_exception();
-      HostTask.MQueue->reportAsyncException(CurrentException);
+      MthisCmd->MEvent->getSubmittedQueue()->reportAsyncException(CurrentException);
     }
   }
 };
@@ -449,6 +454,7 @@ void Command::waitForPreparedHostEvents() const {
 void Command::waitForEvents(QueueImplPtr Queue,
                             std::vector<EventImplPtr> &EventImpls,
                             sycl::detail::pi::PiEvent &Event) {
+  assert(Queue && "Device queue is expected here");
   if (!EventImpls.empty()) {
 #ifndef NDEBUG
       for (const EventImplPtr &Event : EventImpls)
@@ -484,7 +490,7 @@ Command::Command(
   MEvent->setWorkerQueue(MWorkerQueue);
   MEvent->setSubmittedQueue(MWorkerQueue);
   MEvent->setCommand(this);
-  MEvent->setContextImpl(MQueue->getContextImplPtr());
+  MEvent->setContextImpl(MQueue ? MQueue->getContextImplPtr(): nullptr);
   MEvent->setStateIncomplete();
   MEnqueueStatus = EnqueueResultT::SyclEnqueueReady;
 
@@ -669,7 +675,7 @@ void Command::makeTraceEventEpilog() {
 Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep,
                                   std::vector<Command *> &ToCleanUp) {
   const QueueImplPtr &WorkerQueue = getWorkerQueue();
-  const ContextImplPtr &WorkerContext = WorkerQueue->getContextImplPtr();
+  const ContextImplPtr &WorkerContext = WorkerQueue ? WorkerQueue->getContextImplPtr() : nullptr;
 
   // 1. Non-host events can be ignored if they are not fully initialized.
   // 2. Some types of commands do not produce PI events after they are
@@ -701,7 +707,8 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep,
 }
 
 const ContextImplPtr &Command::getWorkerContext() const {
-  return MQueue->getContextImplPtr();
+  assert(MWorkerQueue && "MWorkerQueue must not be nullptr");
+  return MWorkerQueue->getContextImplPtr();
 }
 
 const QueueImplPtr &Command::getWorkerQueue() const {
@@ -963,16 +970,12 @@ void AllocaCommandBase::emitInstrumentationData() {
   // Set the relevant meta data properties for this command
   if (MTraceEvent && MFirstInstance) {
     xpti_td *TE = static_cast<xpti_td *>(MTraceEvent);
-    xpti::addMetadata(TE, "sycl_device", deviceToID(MQueue->get_device()));
-    xpti::addMetadata(TE, "sycl_device_type",
-                      deviceToString(MQueue->get_device()));
-    xpti::addMetadata(TE, "sycl_device_name",
-                      getSyclObjImpl(MQueue->get_device())->getDeviceName());
+    addDeviceMetadata(TE, MQueue);
     xpti::addMetadata(TE, "memory_object", reinterpret_cast<size_t>(MAddress));
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue->getQueueID());
+                        MQueue ? MQueue->getQueueID() : 0);
   }
 #endif
 }
@@ -1022,7 +1025,7 @@ pi_int32 AllocaCommand::enqueueImp() {
   void *HostPtr = nullptr;
   if (!MIsLeaderAlloca) {
 
-    if (MQueue->is_host()) {
+    if (!MQueue) {
       // Do not need to make allocation if we have a linked device allocation
       Command::waitForEvents(MQueue, EventImpls, Event);
 
@@ -1033,7 +1036,7 @@ pi_int32 AllocaCommand::enqueueImp() {
   // TODO: Check if it is correct to use std::move on stack variable and
   // delete it RawEvents below.
   MMemAllocation = MemoryManager::allocate(
-      MQueue->getContextImplPtr(), getSYCLMemObj(), MInitFromUserData, HostPtr,
+      MQueue ? MQueue->getContextImplPtr() : nullptr, getSYCLMemObj(), MInitFromUserData, HostPtr,
       std::move(EventImpls), Event);
 
   return PI_SUCCESS;
@@ -1043,7 +1046,7 @@ void AllocaCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "ALLOCA ON " << deviceToString(MQueue->get_device()) << "\\n";
+  Stream << "ALLOCA ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n";
   Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n";
   Stream << " Link : " << this->MLinkedAllocaCmd << "\\n";
   Stream << "\"];" << std::endl;
@@ -1092,7 +1095,7 @@ void AllocaSubBufCommand::emitInstrumentationData() {
     xpti::addMetadata(TE, "access_range_end",
                       this->MRequirement.MAccessRange[1]);
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue->getQueueID());
+                                 MQueue ? MQueue->getQueueID() : 0);
     makeTraceEventEpilog();
   }
 #endif
@@ -1102,7 +1105,7 @@ void *AllocaSubBufCommand::getMemAllocation() const {
   // In some cases parent`s memory allocation might change (e.g., after
   // map/unmap operations). If parent`s memory allocation changes, sub-buffer
   // memory allocation should be changed as well.
-  if (MQueue->is_host()) {
+  if (!MQueue) {
     return static_cast<void *>(
         static_cast<char *>(MParentAlloca->getMemAllocation()) +
         MRequirement.MOffsetInBytes);
@@ -1116,7 +1119,7 @@ pi_int32 AllocaSubBufCommand::enqueueImp() {
   sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef();
 
   MMemAllocation = MemoryManager::allocateMemSubBuffer(
-      MQueue->getContextImplPtr(), MParentAlloca->getMemAllocation(),
+      MQueue ? MQueue->getContextImplPtr() : nullptr, MParentAlloca->getMemAllocation(),
       MRequirement.MElemSize, MRequirement.MOffsetInBytes,
       MRequirement.MAccessRange, std::move(EventImpls), Event);
 
@@ -1129,7 +1132,7 @@ void AllocaSubBufCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "ALLOCA SUB BUF ON " << deviceToString(MQueue->get_device())
+  Stream << "ALLOCA SUB BUF ON " << MQueue ? deviceToString(MQueue->get_device()) : "host"
          << "\\n";
   Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n";
   Stream << " Offset : " << this->MRequirement.MOffsetInBytes << "\\n";
@@ -1163,17 +1166,13 @@ void ReleaseCommand::emitInstrumentationData() {
 
   if (MFirstInstance) {
     xpti_td *TE = static_cast<xpti_td *>(MTraceEvent);
-    xpti::addMetadata(TE, "sycl_device", deviceToID(MQueue->get_device()));
-    xpti::addMetadata(TE, "sycl_device_type",
-                      deviceToString(MQueue->get_device()));
-    xpti::addMetadata(TE, "sycl_device_name",
-                      getSyclObjImpl(MQueue->get_device())->getDeviceName());
+    addDeviceMetadata(TE, MQueue);
     xpti::addMetadata(TE, "allocation_type",
                       commandToName(MAllocaCmd->getType()));
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue->getQueueID());
+                                 MQueue ? MQueue->getQueueID() : 0);
     makeTraceEventEpilog();
   }
 #endif
@@ -1187,9 +1186,9 @@ pi_int32 ReleaseCommand::enqueueImp() {
 
   // On host side we only allocate memory for full buffers.
   // Thus, deallocating sub buffers leads to double memory freeing.
-  SkipRelease |= MQueue->is_host() && MAllocaCmd->getType() == ALLOCA_SUB_BUF;
+  SkipRelease |= !MQueue && MAllocaCmd->getType() == ALLOCA_SUB_BUF;
 
-  const bool CurAllocaIsHost = MAllocaCmd->getQueue()->is_host();
+  const bool CurAllocaIsHost = !MAllocaCmd->getQueue();
   bool NeedUnmap = false;
   if (MAllocaCmd->MLinkedAllocaCmd) {
 
@@ -1213,7 +1212,7 @@ pi_int32 ReleaseCommand::enqueueImp() {
                                     : MAllocaCmd->getQueue();
 
     EventImplPtr UnmapEventImpl(new event_impl(Queue));
-    UnmapEventImpl->setContextImpl(Queue->getContextImplPtr());
+    UnmapEventImpl->setContextImpl(Queue ? Queue->getContextImplPtr() : nullptr);
     UnmapEventImpl->setStateIncomplete();
     sycl::detail::pi::PiEvent &UnmapEvent = UnmapEventImpl->getHandleRef();
 
@@ -1237,7 +1236,7 @@ pi_int32 ReleaseCommand::enqueueImp() {
     Command::waitForEvents(MQueue, EventImpls, Event);
   else {
     MemoryManager::release(
-        MQueue->getContextImplPtr(), MAllocaCmd->getSYCLMemObj(),
+        MQueue ? MQueue->getContextImplPtr() : nullptr, MAllocaCmd->getSYCLMemObj(),
         MAllocaCmd->getMemAllocation(), std::move(EventImpls), Event);
   }
   return PI_SUCCESS;
@@ -1247,7 +1246,7 @@ void ReleaseCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FF827A\", label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "RELEASE ON " << deviceToString(MQueue->get_device()) << "\\n";
+  Stream << "RELEASE ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n";
   Stream << " Alloca : " << MAllocaCmd << "\\n";
   Stream << " MemObj : " << MAllocaCmd->getSYCLMemObj() << "\\n";
   Stream << "\"];" << std::endl;
@@ -1287,16 +1286,12 @@ void MapMemObject::emitInstrumentationData() {
 
   if (MFirstInstance) {
     xpti_td *TE = static_cast<xpti_td *>(MTraceEvent);
-    xpti::addMetadata(TE, "sycl_device", deviceToID(MQueue->get_device()));
-    xpti::addMetadata(TE, "sycl_device_type",
-                      deviceToString(MQueue->get_device()));
-    xpti::addMetadata(TE, "sycl_device_name",
-                      getSyclObjImpl(MQueue->get_device())->getDeviceName());
+    addDeviceMetadata(TE, MQueue);
     xpti::addMetadata(TE, "memory_object", reinterpret_cast<size_t>(MAddress));
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue->getQueueID());
+                                 MQueue ? MQueue->getQueueID() : 0);
     makeTraceEventEpilog();
   }
 #endif
@@ -1321,7 +1316,7 @@ void MapMemObject::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#77AFFF\", label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "MAP ON " << deviceToString(MQueue->get_device()) << "\\n";
+  Stream << "MAP ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n";
 
   Stream << "\"];" << std::endl;
 
@@ -1352,16 +1347,12 @@ void UnMapMemObject::emitInstrumentationData() {
 
   if (MFirstInstance) {
     xpti_td *TE = static_cast<xpti_td *>(MTraceEvent);
-    xpti::addMetadata(TE, "sycl_device", deviceToID(MQueue->get_device()));
-    xpti::addMetadata(TE, "sycl_device_type",
-                      deviceToString(MQueue->get_device()));
-    xpti::addMetadata(TE, "sycl_device_name",
-                      getSyclObjImpl(MQueue->get_device())->getDeviceName());
+    addDeviceMetadata(TE, MQueue);
     xpti::addMetadata(TE, "memory_object", reinterpret_cast<size_t>(MAddress));
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue->getQueueID());
+                                 MQueue ? MQueue->getQueueID() : 0);
     makeTraceEventEpilog();
   }
 #endif
@@ -1383,9 +1374,9 @@ bool UnMapMemObject::producesPiEvent() const {
   // an event waitlist and Level Zero plugin attempts to batch these commands,
   // so the execution of kernel B starts only on step 4. This workaround
   // restores the old behavior in this case until this is resolved.
-  return MQueue->getDeviceImplPtr()->getBackend() !=
+  return MQueue && (MQueue->getDeviceImplPtr()->getBackend() !=
              backend::ext_oneapi_level_zero ||
-         MEvent->getHandleRef() != nullptr;
+         MEvent->getHandleRef() != nullptr);
 }
 
 pi_int32 UnMapMemObject::enqueueImp() {
@@ -1406,7 +1397,7 @@ void UnMapMemObject::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#EBC40F\", label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "UNMAP ON " << deviceToString(MQueue->get_device()) << "\\n";
+  Stream << "UNMAP ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n";
 
   Stream << "\"];" << std::endl;
 
@@ -1428,11 +1419,11 @@ MemCpyCommand::MemCpyCommand(Requirement SrcReq,
       MSrcQueue(SrcQueue), MSrcReq(std::move(SrcReq)),
       MSrcAllocaCmd(SrcAllocaCmd), MDstReq(std::move(DstReq)),
       MDstAllocaCmd(DstAllocaCmd) {
-  if (!MSrcQueue->is_host()) {
+  if (MSrcQueue) {
     MEvent->setContextImpl(MSrcQueue->getContextImplPtr());
   }
 
-  MWorkerQueue = MQueue->is_host() ? MSrcQueue : MQueue;
+  MWorkerQueue = !MQueue ? MSrcQueue : MQueue;
   MEvent->setWorkerQueue(MWorkerQueue);
 
   emitInstrumentationDataProxy();
@@ -1449,24 +1440,19 @@ void MemCpyCommand::emitInstrumentationData() {
 
   if (MFirstInstance) {
     xpti_td *CmdTraceEvent = static_cast<xpti_td *>(MTraceEvent);
-    xpti::addMetadata(CmdTraceEvent, "sycl_device",
-                      deviceToID(MQueue->get_device()));
-    xpti::addMetadata(CmdTraceEvent, "sycl_device_type",
-                      deviceToString(MQueue->get_device()));
-    xpti::addMetadata(CmdTraceEvent, "sycl_device_name",
-                      getSyclObjImpl(MQueue->get_device())->getDeviceName());
+    addDeviceMetadata(CmdTraceEvent, MQueue);
     xpti::addMetadata(CmdTraceEvent, "memory_object",
                       reinterpret_cast<size_t>(MAddress));
     xpti::addMetadata(CmdTraceEvent, "copy_from",
-                      reinterpret_cast<size_t>(
-                          getSyclObjImpl(MSrcQueue->get_device()).get()));
+                      MSrcQueue ? reinterpret_cast<size_t>(
+                          getSyclObjImpl(MSrcQueue->get_device()).get()) : nullptr);
     xpti::addMetadata(
         CmdTraceEvent, "copy_to",
-        reinterpret_cast<size_t>(getSyclObjImpl(MQueue->get_device()).get()));
+        MQueue ? reinterpret_cast<size_t>(getSyclObjImpl(MQueue->get_device()).get()): nullptr);
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue->getQueueID());
+                        MQueue ? MQueue->getQueueID() : 0);
     makeTraceEventEpilog();
   }
 #endif
@@ -1492,7 +1478,7 @@ bool MemCpyCommand::producesPiEvent() const {
   // an event waitlist and Level Zero plugin attempts to batch these commands,
   // so the execution of kernel B starts only on step 4. This workaround
   // restores the old behavior in this case until this is resolved.
-  return MQueue->is_host() ||
+  return !MQueue ||
          MQueue->getDeviceImplPtr()->getBackend() !=
              backend::ext_oneapi_level_zero ||
          MEvent->getHandleRef() != nullptr;
@@ -1521,10 +1507,10 @@ void MemCpyCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#C7EB15\" label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "MEMCPY ON " << deviceToString(MQueue->get_device()) << "\\n";
-  Stream << "From: " << MSrcAllocaCmd << " is host: " << MSrcQueue->is_host()
+  Stream << "MEMCPY ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n";
+  Stream << "From: " << MSrcAllocaCmd << " is host: " << !MSrcQueue
          << "\\n";
-  Stream << "To: " << MDstAllocaCmd << " is host: " << MQueue->is_host()
+  Stream << "To: " << MDstAllocaCmd << " is host: " << !MQueue
          << "\\n";
 
   Stream << "\"];" << std::endl;
@@ -1579,7 +1565,7 @@ void UpdateHostRequirementCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#f1337f\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "UPDATE REQ ON " << deviceToString(MQueue->get_device()) << "\\n";
+  Stream << "UPDATE REQ ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n";
   bool IsReqOnBuffer =
       MDstReq.MSYCLMemObj->getType() == SYCLMemObjI::MemObjType::Buffer;
   Stream << "TYPE: " << (IsReqOnBuffer ? "Buffer" : "Image") << "\\n";
@@ -1606,11 +1592,11 @@ MemCpyCommandHost::MemCpyCommandHost(Requirement SrcReq,
     : Command(CommandType::COPY_MEMORY, std::move(DstQueue)),
       MSrcQueue(SrcQueue), MSrcReq(std::move(SrcReq)),
       MSrcAllocaCmd(SrcAllocaCmd), MDstReq(std::move(DstReq)), MDstPtr(DstPtr) {
-  if (!MSrcQueue->is_host()) {
+  if (MSrcQueue) {
     MEvent->setContextImpl(MSrcQueue->getContextImplPtr());
   }
 
-  MWorkerQueue = MQueue->is_host() ? MSrcQueue : MQueue;
+  MWorkerQueue = !MQueue ? MSrcQueue : MQueue;
   MEvent->setWorkerQueue(MWorkerQueue);
 
   emitInstrumentationDataProxy();
@@ -1627,24 +1613,19 @@ void MemCpyCommandHost::emitInstrumentationData() {
 
   if (MFirstInstance) {
     xpti_td *CmdTraceEvent = static_cast<xpti_td *>(MTraceEvent);
-    xpti::addMetadata(CmdTraceEvent, "sycl_device",
-                      deviceToID(MQueue->get_device()));
-    xpti::addMetadata(CmdTraceEvent, "sycl_device_type",
-                      deviceToString(MQueue->get_device()));
-    xpti::addMetadata(CmdTraceEvent, "sycl_device_name",
-                      getSyclObjImpl(MQueue->get_device())->getDeviceName());
+    addDeviceMetadata(CmdTraceEvent, MQueue);
     xpti::addMetadata(CmdTraceEvent, "memory_object",
                       reinterpret_cast<size_t>(MAddress));
     xpti::addMetadata(CmdTraceEvent, "copy_from",
                       reinterpret_cast<size_t>(
-                          getSyclObjImpl(MSrcQueue->get_device()).get()));
+                          MSrcQueue ? getSyclObjImpl(MSrcQueue->get_device()).get()) : "nullptr");
     xpti::addMetadata(
         CmdTraceEvent, "copy_to",
-        reinterpret_cast<size_t>(getSyclObjImpl(MQueue->get_device()).get()));
+        MQueue ? reinterpret_cast<size_t>(getSyclObjImpl(MQueue->get_device()).get()) : "nullptr");
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue->getQueueID());
+                        MQueue ? MQueue->getQueueID() : 0);
     makeTraceEventEpilog();
   }
 #endif
@@ -1726,18 +1707,13 @@ void EmptyCommand::emitInstrumentationData() {
 
   if (MFirstInstance) {
     xpti_td *CmdTraceEvent = static_cast<xpti_td *>(MTraceEvent);
-    xpti::addMetadata(CmdTraceEvent, "sycl_device",
-                      deviceToID(MQueue->get_device()));
-    xpti::addMetadata(CmdTraceEvent, "sycl_device_type",
-                      deviceToString(MQueue->get_device()));
-    xpti::addMetadata(CmdTraceEvent, "sycl_device_name",
-                      getSyclObjImpl(MQueue->get_device())->getDeviceName());
+    addDeviceMetadata(CmdTraceEvent, MQueue);
     xpti::addMetadata(CmdTraceEvent, "memory_object",
                       reinterpret_cast<size_t>(MAddress));
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue->getQueueID());
+                        MQueue ? MQueue->getQueueID() : 0);
     makeTraceEventEpilog();
   }
 #endif
@@ -1766,7 +1742,7 @@ void MemCpyCommandHost::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#B6A2EB\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "MEMCPY HOST ON " << deviceToString(MQueue->get_device()) << "\\n";
+  Stream << "MEMCPY HOST ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n";
 
   Stream << "\"];" << std::endl;
 
@@ -1799,18 +1775,13 @@ void UpdateHostRequirementCommand::emitInstrumentationData() {
 
   if (MFirstInstance) {
     xpti_td *CmdTraceEvent = static_cast<xpti_td *>(MTraceEvent);
-    xpti::addMetadata(CmdTraceEvent, "sycl_device",
-                      deviceToID(MQueue->get_device()));
-    xpti::addMetadata(CmdTraceEvent, "sycl_device_type",
-                      deviceToString(MQueue->get_device()));
-    xpti::addMetadata(CmdTraceEvent, "sycl_device_name",
-                      getSyclObjImpl(MQueue->get_device())->getDeviceName());
+    addDeviceMetadata(CmdTraceEvent, MQueue);
     xpti::addMetadata(CmdTraceEvent, "memory_object",
                       reinterpret_cast<size_t>(MAddress));
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue->getQueueID());
+                        MQueue ? MQueue->getQueueID() : 0);
     makeTraceEventEpilog();
   }
 #endif
@@ -1960,6 +1931,7 @@ void instrumentationAddExtraKernelMetadata(
     if (!SyclKernel->isCreatedFromSource())
       EliminatedArgMask = SyclKernel->getKernelArgMask();
   } else {
+    assert(Queue && "Queue with submitted kernel could not be on host");
     std::tie(Kernel, KernelMutex, EliminatedArgMask, Program) =
         detail::ProgramManager::getInstance().getOrCreateKernel(
             Queue->getContextImplPtr(), Queue->getDeviceImplPtr(), KernelName);
@@ -2024,12 +1996,7 @@ void instrumentationFillCommonData(const std::string &KernelName,
     if (CGKernelInstanceNo > 1)
       return;
 
-    xpti::addMetadata(CmdTraceEvent, "sycl_device",
-                      deviceToID(Queue->get_device()));
-    xpti::addMetadata(CmdTraceEvent, "sycl_device_type",
-                      deviceToString(Queue->get_device()));
-    xpti::addMetadata(CmdTraceEvent, "sycl_device_name",
-                      getSyclObjImpl(Queue->get_device())->getDeviceName());
+    addDeviceMetadata(CmdTraceEvent, Queue);
     if (!KernelName.empty()) {
       xpti::addMetadata(CmdTraceEvent, "kernel_name", KernelName);
     }
@@ -2080,7 +2047,7 @@ std::pair<xpti_td *, uint64_t> emitKernelInstrumentationData(
   if (CmdTraceEvent) {
     // Stash the queue_id mutable metadata in TLS
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 Queue->getQueueID());
+                        MQueue ? MQueue->getQueueID() : 0);
 
     instrumentationAddExtraKernelMetadata(CmdTraceEvent, NDRDesc,
                                           KernelBundleImplPtr, SyclKernelName,
@@ -2126,7 +2093,7 @@ void ExecCGCommand::emitInstrumentationData() {
 
   if (CmdTraceEvent) {
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue->getQueueID());
+                        MQueue ? MQueue->getQueueID() : 0);
     MTraceEvent = static_cast<void *>(CmdTraceEvent);
     if (MCommandGroup->getType() == detail::CG::Kernel) {
       auto KernelCG =
@@ -2149,7 +2116,7 @@ void ExecCGCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "EXEC CG ON " << deviceToString(MQueue->get_device()) << "\\n";
+  Stream << "EXEC CG ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n";
 
   switch (MCommandGroup->getType()) {
   case detail::CG::Kernel: {
@@ -2330,6 +2297,7 @@ static pi_result SetKernelParamsAndLaunch(
     const KernelArgMask *EliminatedArgMask,
     const std::function<void *(Requirement *Req)> &getMemAllocationFunc,
     bool IsCooperative) {
+  assert(Queue && "Queue with submitted kernel could not be on host");
   const PluginPtr &Plugin = Queue->getPlugin();
 
   auto setFunc = [&Plugin, Kernel, &DeviceImageImpl, &getMemAllocationFunc,
@@ -2521,7 +2489,7 @@ pi_int32 enqueueImpKernel(
     const std::function<void *(Requirement *Req)> &getMemAllocationFunc,
     sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig,
     const bool KernelIsCooperative) {
-
+  assert(Queue && "Queue with submitted kernel could not be on host");
   // Run OpenCL kernel
   auto ContextImpl = Queue->getContextImplPtr();
   auto DeviceImpl = Queue->getDeviceImplPtr();
@@ -2636,6 +2604,7 @@ enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName,
                          bool blocking, void *ptr, size_t size,
                          std::vector<sycl::detail::pi::PiEvent> &RawEvents,
                          const detail::EventImplPtr &OutEventImpl, bool read) {
+  assert(Queue && "Queue with submitted read write host pipe could not be on host");
   detail::HostPipeMapEntry *hostPipeEntry =
       ProgramManager::getInstance().getHostPipeEntry(PipeName);
 
@@ -3309,19 +3278,14 @@ void KernelFusionCommand::emitInstrumentationData() {
   // This function is called in the constructor of the command. At this point
   // the kernel fusion list is still empty, so we don't have a terrible lot of
   // information we could attach to this node here.
-  if (MFirstInstance && CmdTraceEvent) {
-    xpti::addMetadata(CmdTraceEvent, "sycl_device",
-                      deviceToID(MQueue->get_device()));
-    xpti::addMetadata(CmdTraceEvent, "sycl_device_type",
-                      deviceToString(MQueue->get_device()));
-    xpti::addMetadata(CmdTraceEvent, "sycl_device_name",
-                      getSyclObjImpl(MQueue->get_device())->getDeviceName());
-  }
+  if (MFirstInstance && CmdTraceEvent)
+    addDeviceMetadata(CmdTraceEVent, MQueue);
+
   if (MFirstInstance) {
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue->getQueueID());
+                        MQueue ? MQueue->getQueueID() : 0);
     xptiNotifySubscribers(MStreamID, NotificationTraceType,
                           detail::GSYCLGraphEvent,
                           static_cast<xpti_td *>(MTraceEvent), MInstanceID,
@@ -3335,7 +3299,7 @@ void KernelFusionCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "KERNEL FUSION on " << deviceToString(MQueue->get_device()) << "\\n"
+  Stream << "KERNEL FUSION on " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n"
          << "FUSION LIST: {";
   bool Initial = true;
   for (auto *Cmd : MFusionList) {

From f0868f5ecb17b2886e999e4891725e1695e22c36 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 5 Jun 2024 04:31:26 -0700
Subject: [PATCH 14/58] handle nullptr Queue in commands.*

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/queue_impl.hpp         |  6 ++--
 sycl/source/detail/scheduler/commands.cpp | 39 ++++++++++++++++-------
 2 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp
index c205b5916f302..15e19f143f29d 100644
--- a/sycl/source/detail/queue_impl.hpp
+++ b/sycl/source/detail/queue_impl.hpp
@@ -670,9 +670,9 @@ class queue_impl {
     MExceptions.PushBack(ExceptionPtr);
   }
 
-  ThreadPool &getThreadPool() {
-    return GlobalHandler::instance().getHostTaskThreadPool();
-  }
+  // ThreadPool &getThreadPool() {
+  //   return GlobalHandler::instance().getHostTaskThreadPool();
+  // }
 
   /// Gets the native handle of the SYCL queue.
   ///
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index f7962bb7a5d66..55b29ac7dd426 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -87,6 +87,13 @@ static addDeviceMetadata(xpti_td* TraceEvent, const QueueImplPtr& Queue)
 }
 #endif
 
+static ContextImplPtr getContext(const QueueImplPtr& Queue)
+{
+  if (Queue)
+    return Queue->getContextImplPtr();
+  return nullptr;
+}
+
 #ifdef __SYCL_ENABLE_GNU_DEMANGLING
 struct DemangleHandle {
   char *p;
@@ -490,7 +497,8 @@ Command::Command(
   MEvent->setWorkerQueue(MWorkerQueue);
   MEvent->setSubmittedQueue(MWorkerQueue);
   MEvent->setCommand(this);
-  MEvent->setContextImpl(MQueue ? MQueue->getContextImplPtr(): nullptr);
+  if (MQueue)
+    MEvent->setContextImpl(MQueue->getContextImplPtr());
   MEvent->setStateIncomplete();
   MEnqueueStatus = EnqueueResultT::SyclEnqueueReady;
 
@@ -707,12 +715,12 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep,
 }
 
 const ContextImplPtr &Command::getWorkerContext() const {
-  assert(MWorkerQueue && "MWorkerQueue must not be nullptr");
+  if (!MWorkerQueue)
+    return nullptr;
   return MWorkerQueue->getContextImplPtr();
 }
 
 const QueueImplPtr &Command::getWorkerQueue() const {
-  assert(MWorkerQueue && "MWorkerQueue must not be nullptr");
   return MWorkerQueue;
 }
 
@@ -1036,7 +1044,7 @@ pi_int32 AllocaCommand::enqueueImp() {
   // TODO: Check if it is correct to use std::move on stack variable and
   // delete it RawEvents below.
   MMemAllocation = MemoryManager::allocate(
-      MQueue ? MQueue->getContextImplPtr() : nullptr, getSYCLMemObj(), MInitFromUserData, HostPtr,
+      getContext(MQueue), getSYCLMemObj(), MInitFromUserData, HostPtr,
       std::move(EventImpls), Event);
 
   return PI_SUCCESS;
@@ -1119,7 +1127,7 @@ pi_int32 AllocaSubBufCommand::enqueueImp() {
   sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef();
 
   MMemAllocation = MemoryManager::allocateMemSubBuffer(
-      MQueue ? MQueue->getContextImplPtr() : nullptr, MParentAlloca->getMemAllocation(),
+      getContext(MQueue), MParentAlloca->getMemAllocation(),
       MRequirement.MElemSize, MRequirement.MOffsetInBytes,
       MRequirement.MAccessRange, std::move(EventImpls), Event);
 
@@ -1212,7 +1220,7 @@ pi_int32 ReleaseCommand::enqueueImp() {
                                     : MAllocaCmd->getQueue();
 
     EventImplPtr UnmapEventImpl(new event_impl(Queue));
-    UnmapEventImpl->setContextImpl(Queue ? Queue->getContextImplPtr() : nullptr);
+    UnmapEventImpl->setContextImpl(getContext(Queue));
     UnmapEventImpl->setStateIncomplete();
     sycl::detail::pi::PiEvent &UnmapEvent = UnmapEventImpl->getHandleRef();
 
@@ -1236,7 +1244,7 @@ pi_int32 ReleaseCommand::enqueueImp() {
     Command::waitForEvents(MQueue, EventImpls, Event);
   else {
     MemoryManager::release(
-        MQueue ? MQueue->getContextImplPtr() : nullptr, MAllocaCmd->getSYCLMemObj(),
+        getContext(MQueue), MAllocaCmd->getSYCLMemObj(),
         MAllocaCmd->getMemAllocation(), std::move(EventImpls), Event);
   }
   return PI_SUCCESS;
@@ -2654,6 +2662,7 @@ enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName,
 }
 
 pi_int32 ExecCGCommand::enqueueImpCommandBuffer() {
+  assert(MQueue && "Device queue is required for command buffer enqueue");
   // Wait on host command dependencies
   waitForPreparedHostEvents();
 
@@ -2819,8 +2828,8 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
   auto RawEvents = getPiEvents(EventImpls);
   flushCrossQueueDeps(EventImpls, getWorkerQueue());
 
-  bool DiscardPiEvent = (MQueue->supportsDiscardingPiEvents() &&
-                         MCommandGroup->getRequirements().size() == 0);
+  bool DiscardPiEvent = MQueue && MQueue->supportsDiscardingPiEvents() &&
+                         (MCommandGroup->getRequirements().size() == 0);
   sycl::detail::pi::PiEvent *Event =
       DiscardPiEvent ? nullptr : &MEvent->getHandleRef();
   detail::EventImplPtr EventImpl = DiscardPiEvent ? nullptr : MEvent;
@@ -2894,6 +2903,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     return PI_SUCCESS;
   }
   case CG::CGTYPE::Kernel: {
+    assert(MQueue && "Device queue must be present for kernel command");
     CGExecKernel *ExecKernel = (CGExecKernel *)MCommandGroup.get();
 
     NDRDescT &NDRDesc = ExecKernel->MNDRDesc;
@@ -3039,8 +3049,8 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
             Req->MSYCLMemObj->MRecord->MAllocaCommands;
 
         for (AllocaCommandBase *AllocaCmd : AllocaCmds)
-          if (HostTask->MQueue->getContextImplPtr() ==
-              AllocaCmd->getQueue()->getContextImplPtr()) {
+          if (getContext(HostTask->MQueue) ==
+              getContext(AllocaCmd->getQueue()) {
             auto MemArg =
                 reinterpret_cast<pi_mem>(AllocaCmd->getMemAllocation());
             ReqToMem.emplace_back(std::make_pair(Req, MemArg));
@@ -3064,7 +3074,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     // submitted to report exception origin properly.
     copySubmissionCodeLocation();
 
-    MQueue->getThreadPool().submit<DispatchHostTask>(
+    getThreadPool().submit<DispatchHostTask>(
         DispatchHostTask(this, std::move(ReqToMem)));
 
     MShouldCompleteEventIfPossible = false;
@@ -3072,6 +3082,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     return PI_SUCCESS;
   }
   case CG::CGTYPE::Barrier: {
+    assert(MQueue && "Device queue must be present for barrier command");
     const PluginPtr &Plugin = MQueue->getPlugin();
     if (MEvent != nullptr)
       MEvent->setHostEnqueueTime();
@@ -3081,6 +3092,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     return PI_SUCCESS;
   }
   case CG::CGTYPE::BarrierWaitlist: {
+    assert(MQueue && "Device queue must be present for barrier with wait list command");
     CGBarrier *Barrier = static_cast<CGBarrier *>(MCommandGroup.get());
     std::vector<detail::EventImplPtr> Events = Barrier->MEventsWaitWithBarrier;
     std::vector<sycl::detail::pi::PiEvent> PiEvents =
@@ -3132,6 +3144,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
                                     typeSize, RawEvents, EventImpl, read);
   }
   case CG::CGTYPE::ExecCommandBuffer: {
+    assert(MQueue && "Device queue must be present for command buffer enqueue");
     CGExecCommandBuffer *CmdBufferCG =
         static_cast<CGExecCommandBuffer *>(MCommandGroup.get());
     if (MEvent != nullptr)
@@ -3155,6 +3168,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     return PI_SUCCESS;
   }
   case CG::CGTYPE::SemaphoreWait: {
+    assert(MQueue && "Device queue must be present for semaphore wait command");
     CGSemaphoreWait *SemWait = (CGSemaphoreWait *)MCommandGroup.get();
 
     const detail::PluginPtr &Plugin = MQueue->getPlugin();
@@ -3165,6 +3179,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     return PI_SUCCESS;
   }
   case CG::CGTYPE::SemaphoreSignal: {
+    assert(MQueue && "Device queue must be present for semaphore signal command");
     CGSemaphoreSignal *SemSignal = (CGSemaphoreSignal *)MCommandGroup.get();
 
     const detail::PluginPtr &Plugin = MQueue->getPlugin();

From 3d044e896cc6ff1d851c56268dfeb2dc623b55e9 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 5 Jun 2024 06:04:41 -0700
Subject: [PATCH 15/58] non-buildable: handle nullptr queue in
 memory_manager.cpp

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/event_impl.cpp     | 12 +++++++-----
 sycl/source/detail/memory_manager.cpp | 22 ++++++++++++++++++++--
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp
index 28bb37200392a..be32787c0aa4d 100644
--- a/sycl/source/detail/event_impl.cpp
+++ b/sycl/source/detail/event_impl.cpp
@@ -149,15 +149,16 @@ event_impl::event_impl(sycl::detail::pi::PiEvent Event,
 }
 
 event_impl::event_impl(const QueueImplPtr &Queue) {
-  this->setContextImpl(Queue->getContextImplPtr());
+  // Queue == nullptr means that it is a host task event
+  this->setContextImpl(getContext(Queue));
   this->associateWithQueue(Queue);
 }
 
 void event_impl::associateWithQueue(const QueueImplPtr &Queue) {
   MQueue = Queue;
-  MIsProfilingEnabled = Queue->MIsProfilingEnabled;
+  MIsProfilingEnabled = Queue && Queue->MIsProfilingEnabled;
   MFallbackProfiling = MIsProfilingEnabled && Queue->isProfilingFallback();
-  MState.store(HES_Complete);
+  MState.store(Queue ? HES_Complete : HES_NotComplete);
 }
 
 void *event_impl::instrumentationProlog(std::string &Name, int32_t StreamID,
@@ -402,8 +403,9 @@ event_impl::get_backend_info<info::platform::version>() const {
         ->get_platform()
         .get_info<info::platform::version>();
   }
-  return ""; // If the queue has been released, no platform will be associated
-             // so return empty string
+  // If the queue has been released, no platform will be associated
+  // so return empty string.
+  return ""; 
 }
 
 template <>
diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp
index 3c0ad08e0763f..30827adb15e8f 100644
--- a/sycl/source/detail/memory_manager.cpp
+++ b/sycl/source/detail/memory_manager.cpp
@@ -482,6 +482,7 @@ void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr,
              const detail::EventImplPtr &OutEventImpl) {
   (void)SrcAccessRange;
   assert(SYCLMemObj && "The SYCLMemObj is nullptr");
+  assert(TgtQueue && "Destination mem object queue must be not nullptr");
 
   const sycl::detail::pi::PiQueue Queue = TgtQueue->getHandleRef();
   const PluginPtr &Plugin = TgtQueue->getPlugin();
@@ -560,6 +561,7 @@ void copyD2H(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem,
              const detail::EventImplPtr &OutEventImpl) {
   (void)DstAccessRange;
   assert(SYCLMemObj && "The SYCLMemObj is nullptr");
+  assert(SrcQueue && "Source mem object queue is expected to be not nullptr");
 
   const sycl::detail::pi::PiQueue Queue = SrcQueue->getHandleRef();
   const PluginPtr &Plugin = SrcQueue->getPlugin();
@@ -641,6 +643,7 @@ void copyD2D(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem,
              sycl::detail::pi::PiEvent &OutEvent,
              const detail::EventImplPtr &OutEventImpl) {
   assert(SYCLMemObj && "The SYCLMemObj is nullptr");
+  assert(SrcQueue && TgtQueue && "Source mem object and target mem object queues are expected to be not nullptr");
 
   const sycl::detail::pi::PiQueue Queue = SrcQueue->getHandleRef();
   const PluginPtr &Plugin = SrcQueue->getPlugin();
@@ -804,6 +807,7 @@ void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue,
                          sycl::detail::pi::PiEvent &OutEvent,
                          const detail::EventImplPtr &OutEventImpl) {
   assert(SYCLMemObj && "The SYCLMemObj is nullptr");
+  assert(Queue && "Fill should be called only with a valid device queue");
 
   const PluginPtr &Plugin = Queue->getPlugin();
 
@@ -861,7 +865,7 @@ void *MemoryManager::map(SYCLMemObjI *, void *Mem, QueueImplPtr Queue,
                          unsigned int ElementSize,
                          std::vector<sycl::detail::pi::PiEvent> DepEvents,
                          sycl::detail::pi::PiEvent &OutEvent) {
-  if (Queue->is_host()) {
+  if (!Queue) {
     throw runtime_error("Not supported configuration of map requested",
                         PI_ERROR_INVALID_OPERATION);
   }
@@ -907,6 +911,10 @@ void MemoryManager::unmap(SYCLMemObjI *, void *Mem, QueueImplPtr Queue,
                           sycl::detail::pi::PiEvent &OutEvent) {
 
   // Host queue is not supported here.
+  if (!Queue) {
+    throw runtime_error("Not supported configuration of unmap requested",
+                        PI_ERROR_INVALID_OPERATION);
+  }
   // All DepEvents are to the same Context.
   // Using the plugin of the Queue.
 
@@ -921,6 +929,7 @@ void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue,
                              std::vector<sycl::detail::pi::PiEvent> DepEvents,
                              sycl::detail::pi::PiEvent *OutEvent,
                              const detail::EventImplPtr &OutEventImpl) {
+  assert(SrcQueue && "USM copy must be called with a valid device queue");
   if (!Len) { // no-op, but ensure DepEvents will still be waited on
     if (!DepEvents.empty()) {
       if (OutEventImpl != nullptr)
@@ -959,6 +968,7 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length,
                              std::vector<sycl::detail::pi::PiEvent> DepEvents,
                              sycl::detail::pi::PiEvent *OutEvent,
                              const detail::EventImplPtr &OutEventImpl) {
+  assert(Queue && "USM fill must be called with a valid device queue");
   if (!Length) { // no-op, but ensure DepEvents will still be waited on
     if (!DepEvents.empty()) {
       if (OutEventImpl != nullptr)
@@ -994,6 +1004,7 @@ void MemoryManager::prefetch_usm(
     std::vector<sycl::detail::pi::PiEvent> DepEvents,
     sycl::detail::pi::PiEvent *OutEvent,
     const detail::EventImplPtr &OutEventImpl) {
+  assert(Queue && "USM prefetch must be called with a valid device queue");
   const PluginPtr &Plugin = Queue->getPlugin();
   if (OutEventImpl != nullptr)
     OutEventImpl->setHostEnqueueTime();
@@ -1015,6 +1026,7 @@ void MemoryManager::advise_usm(
     std::vector<sycl::detail::pi::PiEvent> /*DepEvents*/,
     sycl::detail::pi::PiEvent *OutEvent,
     const detail::EventImplPtr &OutEventImpl) {
+  assert(Queue && "USM advise must be called with a valid device queue");
   const PluginPtr &Plugin = Queue->getPlugin();
   if (OutEventImpl != nullptr)
     OutEventImpl->setHostEnqueueTime();
@@ -1037,6 +1049,7 @@ void MemoryManager::copy_2d_usm(
     std::vector<sycl::detail::pi::PiEvent> DepEvents,
     sycl::detail::pi::PiEvent *OutEvent,
     const detail::EventImplPtr &OutEventImpl) {
+  assert(Queue && "USM copy 2d must be called with a valid device queue");
   if (Width == 0 || Height == 0) {
     // no-op, but ensure DepEvents will still be waited on
     if (!DepEvents.empty()) {
@@ -1122,6 +1135,7 @@ void MemoryManager::fill_2d_usm(
     std::vector<sycl::detail::pi::PiEvent> DepEvents,
     sycl::detail::pi::PiEvent *OutEvent,
     const detail::EventImplPtr &OutEventImpl) {
+  assert(Queue && "USM fill 2d must be called with a valid device queue");
   if (Width == 0 || Height == 0) {
     // no-op, but ensure DepEvents will still be waited on
     if (!DepEvents.empty()) {
@@ -1159,6 +1173,7 @@ void MemoryManager::memset_2d_usm(
     char Value, std::vector<sycl::detail::pi::PiEvent> DepEvents,
     sycl::detail::pi::PiEvent *OutEvent,
     const detail::EventImplPtr &OutEventImpl) {
+  assert(Queue && "USM memset 2d must be called with a valid device queue");
   if (Width == 0 || Height == 0) {
     // no-op, but ensure DepEvents will still be waited on
     if (!DepEvents.empty()) {
@@ -1198,6 +1213,7 @@ memcpyToDeviceGlobalUSM(QueueImplPtr Queue,
                         const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
                         sycl::detail::pi::PiEvent *OutEvent,
                         const detail::EventImplPtr &OutEventImpl) {
+  assert(Queue && "Copy to device global USM must be called with a valid device queue");
   // Get or allocate USM memory for the device_global.
   DeviceGlobalUSMMem &DeviceGlobalUSM =
       DeviceGlobalEntry->getOrAllocateDeviceGlobalUSM(Queue);
@@ -1299,6 +1315,7 @@ static void memcpyToDeviceGlobalDirect(
     size_t NumBytes, size_t Offset, const void *Src,
     const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
     sycl::detail::pi::PiEvent *OutEvent) {
+  assert(Queue && "Direct copy to device global must be called with a valid device queue");
   sycl::detail::pi::PiProgram Program =
       getOrBuildProgramForDeviceGlobal(Queue, DeviceGlobalEntry);
   const PluginPtr &Plugin = Queue->getPlugin();
@@ -1313,6 +1330,7 @@ static void memcpyFromDeviceGlobalDirect(
     size_t NumBytes, size_t Offset, void *Dest,
     const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
     sycl::detail::pi::PiEvent *OutEvent) {
+  assert(Queue && "Direct copy from device global must be called with a valid device queue");
   sycl::detail::pi::PiProgram Program =
       getOrBuildProgramForDeviceGlobal(Queue, DeviceGlobalEntry);
   const PluginPtr &Plugin = Queue->getPlugin();
@@ -1722,7 +1740,7 @@ void MemoryManager::copy_image_bindless(
     sycl::detail::pi::PiImageRegion CopyExtent,
     const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
     sycl::detail::pi::PiEvent *OutEvent) {
-
+  assert(Queue && "Copy image bindless must be called with a valid device queue");
   assert((Flags == (sycl::detail::pi::PiImageCopyFlags)
                        ext::oneapi::experimental::image_copy_flags::HtoD ||
           Flags == (sycl::detail::pi::PiImageCopyFlags)

From b3161e8bf8b978600e6910e7e8953a530ac26d23 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 5 Jun 2024 06:55:19 -0700
Subject: [PATCH 16/58] non-buildable: build enabling

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/queue_impl.hpp             |  6 ++---
 .../source/detail/scheduler/graph_builder.cpp |  6 +++++
 sycl/source/detail/scheduler/scheduler.hpp    | 23 ++++++++-----------
 sycl/source/handler.cpp                       |  9 ++++----
 4 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp
index 15e19f143f29d..a3463225a54d1 100644
--- a/sycl/source/detail/queue_impl.hpp
+++ b/sycl/source/detail/queue_impl.hpp
@@ -111,7 +111,7 @@ class queue_impl {
         MDiscardEvents(
             has_property<ext::oneapi::property::queue::discard_events>()),
         MIsProfilingEnabled(has_property<property::queue::enable_profiling>()),
-        MSupportsDiscardingPiEvents(MDiscardEvents && MIsInorder)),
+        MSupportsDiscardingPiEvents(MDiscardEvents && MIsInorder),
         MQueueID{
             MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} {
     if (has_property<property::queue::enable_profiling>()) {
@@ -285,7 +285,7 @@ class queue_impl {
         MDiscardEvents(
             has_property<ext::oneapi::property::queue::discard_events>()),
         MIsProfilingEnabled(has_property<property::queue::enable_profiling>()),
-        MSupportsDiscardingPiEvents(MDiscardEvents && MIsInorder)),
+        MSupportsDiscardingPiEvents(MDiscardEvents && MIsInorder),
         MQueueID{
             MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} {
     queue_impl_interop(PiQueue);
@@ -305,7 +305,7 @@ class queue_impl {
         MDiscardEvents(
             has_property<ext::oneapi::property::queue::discard_events>()),
         MIsProfilingEnabled(has_property<property::queue::enable_profiling>()),
-        MSupportsDiscardingPiEvents(MDiscardEvents && MIsInorder)) {
+        MSupportsDiscardingPiEvents(MDiscardEvents && MIsInorder) {
     queue_impl_interop(PiQueue);
   }
 
diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp
index 6c9244f9ecb2c..d9614e9ca9d51 100644
--- a/sycl/source/detail/scheduler/graph_builder.cpp
+++ b/sycl/source/detail/scheduler/graph_builder.cpp
@@ -59,6 +59,12 @@ static ContextImplPtr GetContext(const QueueImplPtr& Queue)
   return Queue ? Queue->getContextImplPtr() : nullptr;
 }
 
+bool MemObjRecord::isSameContext(const QueueImplPtr& Queue) const
+{
+  // Covers case for host usage (nullptr == nullptr) and existing device contexts comparison.
+  return MCurContext == (Queue ? Queue->getContextImplPtr() : nullptr);
+}
+
 /// Checks if the required access mode is allowed under the current one.
 static bool isAccessModeAllowed(access::mode Required, access::mode Current) {
   switch (Current) {
diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp
index 6a2bcc4e5004a..61f01863c477b 100644
--- a/sycl/source/detail/scheduler/scheduler.hpp
+++ b/sycl/source/detail/scheduler/scheduler.hpp
@@ -218,26 +218,21 @@ class MemObjRecord {
   // modified. Used while deciding if copy back needed.
   bool MMemModified = false;
 
-  void updateUsage(ContextImplPtr& NewContext)
-  {
-    MCurContext = NewContext;
-  }
-
-  bool isSameContext(const QueueImplPtr& Queue) const
-  {
-    // Covers case for host usage (nullptr == nullptr) and existing device contexts comparison.
-    return LHS == (Queue ? Queue->getContextImplPtr() : nullptr);
-  }
-
-  bool usedOnDevice( return MCurContext != nullptr; )
-
-protected:
   // The context which has the latest state of the memory object.
   ContextImplPtr MCurContext;
 
   // The mode this object can be accessed with from the host (host_accessor).
   // Valid only if the current usage is on host.
   access::mode MHostAccess = access::mode::read_write;
+
+  void updateUsage(ContextImplPtr& NewContext)
+  {
+    MCurContext = NewContext;
+  }
+
+  bool isSameContext(const QueueImplPtr& Queue) const;
+
+  bool usedOnDevice() { return MCurContext != nullptr; }
 };
 
 /// DPC++ graph scheduler class.
diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp
index 749ab6750df5e..c0e0438d9cd2f 100644
--- a/sycl/source/handler.cpp
+++ b/sycl/source/handler.cpp
@@ -80,16 +80,15 @@ void *getValueFromDynamicParameter(
 
 } // namespace detail
 
-handler::handler(std::shared_ptr<detail::queue_impl> Queue, bool IsHost)
-    : handler(Queue, Queue, nullptr, IsHost) {}
+handler::handler(std::shared_ptr<detail::queue_impl> Queue)
+    : handler(Queue, Queue, nullptr) {}
 
 handler::handler(std::shared_ptr<detail::queue_impl> Queue,
                  std::shared_ptr<detail::queue_impl> PrimaryQueue,
-                 std::shared_ptr<detail::queue_impl> SecondaryQueue,
-                 bool IsHost)
+                 std::shared_ptr<detail::queue_impl> SecondaryQueue)
     : MImpl(std::make_shared<detail::handler_impl>(std::move(PrimaryQueue),
                                                    std::move(SecondaryQueue))),
-      MQueue(std::move(Queue)), MIsHost(IsHost) {}
+      MQueue(std::move(Queue)) {}
 
 handler::handler(
     std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> Graph)

From 2258a1cbb812161a21af5dbb9a38c170a41badc8 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 5 Jun 2024 08:07:45 -0700
Subject: [PATCH 17/58] not-buildable: build enabling 2

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/buffer_impl.cpp            |   9 +-
 sycl/source/detail/event_impl.cpp             |   2 +-
 sycl/source/detail/memory_manager.cpp         |   4 +-
 sycl/source/detail/queue_impl.cpp             |   4 +-
 sycl/source/detail/queue_impl.hpp             |   5 +
 sycl/source/detail/scheduler/commands.cpp     | 136 +++++++++---------
 sycl/source/detail/scheduler/commands.hpp     |  12 +-
 .../source/detail/scheduler/graph_builder.cpp |  51 +++----
 sycl/source/detail/scheduler/scheduler.hpp    |  12 +-
 9 files changed, 108 insertions(+), 127 deletions(-)

diff --git a/sycl/source/detail/buffer_impl.cpp b/sycl/source/detail/buffer_impl.cpp
index d7d77205b162c..f13444107e9eb 100644
--- a/sycl/source/detail/buffer_impl.cpp
+++ b/sycl/source/detail/buffer_impl.cpp
@@ -68,10 +68,13 @@ buffer_impl::getNativeVector(backend BackendName) const {
     sycl::detail::pi::PiMem NativeMem =
         pi::cast<sycl::detail::pi::PiMem>(Cmd->getMemAllocation());
     auto Ctx = Cmd->getWorkerContext();
-    auto Platform = Ctx->getPlatformImpl();
     // If Host Shared Memory is not supported then there is alloca for host that
-    // doesn't have platform
-    if (!Platform || (Platform->getBackend() != BackendName))
+    // doesn't have context and platform
+    if (!Ctx)
+      continue;
+    PlatformImplPtr Platform = Ctx->getPlatformImpl();
+    assert(Platform && "Platform must be present for device context");
+    if (Platform->getBackend() != BackendName)
       continue;
 
     auto Plugin = Platform->getPlugin();
diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp
index be32787c0aa4d..e34597aa008d1 100644
--- a/sycl/source/detail/event_impl.cpp
+++ b/sycl/source/detail/event_impl.cpp
@@ -150,7 +150,7 @@ event_impl::event_impl(sycl::detail::pi::PiEvent Event,
 
 event_impl::event_impl(const QueueImplPtr &Queue) {
   // Queue == nullptr means that it is a host task event
-  this->setContextImpl(getContext(Queue));
+  this->setContextImpl(queue_impl::getContext(Queue));
   this->associateWithQueue(Queue);
 }
 
diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp
index 30827adb15e8f..e2c22f794f587 100644
--- a/sycl/source/detail/memory_manager.cpp
+++ b/sycl/source/detail/memory_manager.cpp
@@ -413,7 +413,7 @@ void *MemoryManager::allocateMemSubBuffer(ContextImplPtr TargetContext,
   waitForEvents(DepEvents);
   OutEvent = nullptr;
 
-  if (TargetContext->is_host())
+  if (!TargetContext)
     return static_cast<void *>(static_cast<char *>(ParentMemObj) + Offset);
 
   size_t SizeInBytes = ElemSize;
@@ -643,7 +643,7 @@ void copyD2D(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem,
              sycl::detail::pi::PiEvent &OutEvent,
              const detail::EventImplPtr &OutEventImpl) {
   assert(SYCLMemObj && "The SYCLMemObj is nullptr");
-  assert(SrcQueue && TgtQueue && "Source mem object and target mem object queues are expected to be not nullptr");
+  assert(SrcQueue && "Source mem object and target mem object queues are expected to be not nullptr");
 
   const sycl::detail::pi::PiQueue Queue = SrcQueue->getHandleRef();
   const PluginPtr &Plugin = SrcQueue->getPlugin();
diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp
index c1c1d3835a54d..ce4dd462eef32 100644
--- a/sycl/source/detail/queue_impl.cpp
+++ b/sycl/source/detail/queue_impl.cpp
@@ -284,12 +284,12 @@ void queue_impl::addEvent(const event &Event) {
     // if there is no command on the event, we cannot track it with MEventsWeak
     // as that will leave it with no owner. Track in MEventsShared only if we're
     // unable to call piQueueFinish during wait.
-    if (Event->isHost() || MEmulateOOO)
+    if (EImpl->isHost() || MEmulateOOO)
       addSharedEvent(Event);
   }
   // As long as the queue supports piQueueFinish we only need to store events
   // for unenqueued commands and host tasks.
-  else if (Event->isHost() || MEmulateOOO || EImpl->getHandleRef() == nullptr) {
+  else if (EImpl->isHost() || MEmulateOOO || EImpl->getHandleRef() == nullptr) {
     std::weak_ptr<event_impl> EventWeakPtr{EImpl};
     std::lock_guard<std::mutex> Lock{MMutex};
     MEventsWeak.push_back(std::move(EventWeakPtr));
diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp
index a3463225a54d1..61f34c35c7baf 100644
--- a/sycl/source/detail/queue_impl.hpp
+++ b/sycl/source/detail/queue_impl.hpp
@@ -750,6 +750,11 @@ class queue_impl {
   // tasks and host tasks is applicable for out of order queues only. Not neede
   // for in order ones.
   void revisitUnenqueuedCommandsState(const EventImplPtr &CompletedHostTask);
+ 
+  static ContextImplPtr getContext(const QueueImplPtr& Queue)
+  {
+    return Queue ? Queue->getContextImplPtr() : nullptr;
+  }
 
 protected:
   event discard_or_return(const event &Event);
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index 55b29ac7dd426..05873f23f45a9 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -75,16 +75,32 @@ static void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID,
                         static_cast<xpti_td *>(TraceEvent), InstanceID, Addr);
 }
 
-static addDeviceMetadata(xpti_td* TraceEvent, const QueueImplPtr& Queue)
+static size_t deviceToID(const device &Device) {
+  return reinterpret_cast<size_t>(getSyclObjImpl(Device)->getHandleRef());
+}
+
+static std::string deviceToString(device Device) {
+  if (Device.is_cpu())
+    return "CPU";
+  else if (Device.is_gpu())
+    return "GPU";
+  else if (Device.is_accelerator())
+    return "ACCELERATOR";
+  else
+    return "UNKNOWN";
+}
+
+static void addDeviceMetadata(xpti_td* TraceEvent, const QueueImplPtr& Queue)
 {
     xpti::addMetadata(TraceEvent, "sycl_device",
-                      Queue ? deviceToID(MQueue->get_device()) : nullptr);
+                      Queue ? deviceToID(Queue->get_device()) : 0);
     xpti::addMetadata(TraceEvent, "sycl_device_type",
-                      Queue ? deviceToString(MQueue->get_device()) : "host");
+                      Queue ? deviceToString(Queue->get_device()) : "host");
     if (Queue)
       xpti::addMetadata(TraceEvent, "sycl_device_name",
-                      getSyclObjImpl(MQueue->get_device())->getDeviceName());
+                      getSyclObjImpl(Queue->get_device())->getDeviceName());
 }
+
 #endif
 
 static ContextImplPtr getContext(const QueueImplPtr& Queue)
@@ -113,17 +129,6 @@ static std::string demangleKernelName(std::string Name) {
 static std::string demangleKernelName(std::string Name) { return Name; }
 #endif
 
-static std::string deviceToString(device Device) {
-  if (Device.is_cpu())
-    return "CPU";
-  else if (Device.is_gpu())
-    return "GPU";
-  else if (Device.is_accelerator())
-    return "ACCELERATOR";
-  else
-    return "UNKNOWN";
-}
-
 void applyFuncOnFilteredArgs(
     const KernelArgMask *EliminatedArgMask, std::vector<ArgDesc> &Args,
     std::function<void(detail::ArgDesc &Arg, int NextTrueIndex)> Func) {
@@ -158,12 +163,6 @@ void applyFuncOnFilteredArgs(
   }
 }
 
-#ifdef XPTI_ENABLE_INSTRUMENTATION
-static size_t deviceToID(const device &Device) {
-  return reinterpret_cast<size_t>(getSyclObjImpl(Device)->getHandleRef());
-}
-#endif
-
 static std::string accessModeToString(access::mode Mode) {
   switch (Mode) {
   case access::mode::read:
@@ -253,9 +252,8 @@ Command::getPiEvents(const std::vector<EventImplPtr> &EventImpls) const {
     // At this stage dependency is definitely pi task and need to check if
     // current one is a host task. In this case we should not skip pi event due
     // to different sync mechanisms for different task types on in-order queue.
-    const QueueImplPtr &WorkerQueue = getWorkerQueue();
-    if (WorkerQueue && EventImpl->getWorkerQueue() == WorkerQueue &&
-        WorkerQueue->isInOrder() && !isHostTask())
+    if (MWorkerQueue && EventImpl->getWorkerQueue() == MWorkerQueue &&
+        MWorkerQueue->isInOrder() && !isHostTask())
       continue;
 
     RetPiEvents.push_back(EventImpl->getHandleRef());
@@ -293,9 +291,8 @@ std::vector<sycl::detail::pi::PiEvent> Command::getPiEventsBlocking(
     // At this stage dependency is definitely pi task and need to check if
     // current one is a host task. In this case we should not skip pi event due
     // to different sync mechanisms for different task types on in-order queue.
-    const QueueImplPtr &WorkerQueue = getWorkerQueue();
-    if (MWorkerQueue && EventImpl->getWorkerQueue() == WorkerQueue &&
-        WorkerQueue->isInOrder() && !isHostTask())
+    if (MWorkerQueue && EventImpl->getWorkerQueue() == MWorkerQueue &&
+        MWorkerQueue->isInOrder() && !isHostTask())
       continue;
 
     RetPiEvents.push_back(EventImpl->getHandleRef());
@@ -431,7 +428,7 @@ class DispatchHostTask {
         }
       }
 #endif
-      MthisCmd->MEvent->getSubmittedQueue()->reportAsyncException(CurrentException);
+      MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(CurrentException);
     }
 
     HostTask.MHostTask.reset();
@@ -448,7 +445,7 @@ class DispatchHostTask {
       Scheduler::getInstance().NotifyHostTaskCompletion(MThisCmd);
     } catch (...) {
       auto CurrentException = std::current_exception();
-      MthisCmd->MEvent->getSubmittedQueue()->reportAsyncException(CurrentException);
+      MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(CurrentException);
     }
   }
 };
@@ -471,7 +468,7 @@ void Command::waitForEvents(QueueImplPtr Queue,
 
       std::vector<sycl::detail::pi::PiEvent> RawEvents =
           getPiEvents(EventImpls);
-      flushCrossQueueDeps(EventImpls, getWorkerQueue());
+      flushCrossQueueDeps(EventImpls, MWorkerQueue);
       const PluginPtr &Plugin = Queue->getPlugin();
 
       if (MEvent != nullptr)
@@ -682,8 +679,7 @@ void Command::makeTraceEventEpilog() {
 
 Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep,
                                   std::vector<Command *> &ToCleanUp) {
-  const QueueImplPtr &WorkerQueue = getWorkerQueue();
-  const ContextImplPtr &WorkerContext = WorkerQueue ? WorkerQueue->getContextImplPtr() : nullptr;
+  const ContextImplPtr &WorkerContext = getWorkerContext();
 
   // 1. Non-host events can be ignored if they are not fully initialized.
   // 2. Some types of commands do not produce PI events after they are
@@ -714,14 +710,10 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep,
   return ConnectionCmd;
 }
 
-const ContextImplPtr &Command::getWorkerContext() const {
-  if (!MWorkerQueue)
+ContextImplPtr Command::getWorkerContext() const {
+  if (!MQueue)
     return nullptr;
-  return MWorkerQueue->getContextImplPtr();
-}
-
-const QueueImplPtr &Command::getWorkerQueue() const {
-  return MWorkerQueue;
+  return MQueue->getContextImplPtr();
 }
 
 bool Command::producesPiEvent() const { return true; }
@@ -1054,7 +1046,7 @@ void AllocaCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "ALLOCA ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n";
+  Stream << "ALLOCA ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
   Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n";
   Stream << " Link : " << this->MLinkedAllocaCmd << "\\n";
   Stream << "\"];" << std::endl;
@@ -1140,7 +1132,7 @@ void AllocaSubBufCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "ALLOCA SUB BUF ON " << MQueue ? deviceToString(MQueue->get_device()) : "host"
+  Stream << "ALLOCA SUB BUF ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host")
          << "\\n";
   Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n";
   Stream << " Offset : " << this->MRequirement.MOffsetInBytes << "\\n";
@@ -1254,7 +1246,7 @@ void ReleaseCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FF827A\", label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "RELEASE ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n";
+  Stream << "RELEASE ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
   Stream << " Alloca : " << MAllocaCmd << "\\n";
   Stream << " MemObj : " << MAllocaCmd->getSYCLMemObj() << "\\n";
   Stream << "\"];" << std::endl;
@@ -1309,7 +1301,7 @@ pi_int32 MapMemObject::enqueueImp() {
   waitForPreparedHostEvents();
   std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
   std::vector<sycl::detail::pi::PiEvent> RawEvents = getPiEvents(EventImpls);
-  flushCrossQueueDeps(EventImpls, getWorkerQueue());
+  flushCrossQueueDeps(EventImpls, MWorkerQueue);
 
   sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef();
   *MDstPtr = MemoryManager::map(
@@ -1324,7 +1316,7 @@ void MapMemObject::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#77AFFF\", label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "MAP ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n";
+  Stream << "MAP ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
 
   Stream << "\"];" << std::endl;
 
@@ -1391,7 +1383,7 @@ pi_int32 UnMapMemObject::enqueueImp() {
   waitForPreparedHostEvents();
   std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
   std::vector<sycl::detail::pi::PiEvent> RawEvents = getPiEvents(EventImpls);
-  flushCrossQueueDeps(EventImpls, getWorkerQueue());
+  flushCrossQueueDeps(EventImpls, MWorkerQueue);
 
   sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef();
   MemoryManager::unmap(MDstAllocaCmd->getSYCLMemObj(),
@@ -1405,7 +1397,7 @@ void UnMapMemObject::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#EBC40F\", label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "UNMAP ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n";
+  Stream << "UNMAP ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
 
   Stream << "\"];" << std::endl;
 
@@ -1452,11 +1444,10 @@ void MemCpyCommand::emitInstrumentationData() {
     xpti::addMetadata(CmdTraceEvent, "memory_object",
                       reinterpret_cast<size_t>(MAddress));
     xpti::addMetadata(CmdTraceEvent, "copy_from",
-                      MSrcQueue ? reinterpret_cast<size_t>(
-                          getSyclObjImpl(MSrcQueue->get_device()).get()) : nullptr);
+                      MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0);
     xpti::addMetadata(
         CmdTraceEvent, "copy_to",
-        MQueue ? reinterpret_cast<size_t>(getSyclObjImpl(MQueue->get_device()).get()): nullptr);
+        MQueue ? deviceToID(MQueue->get_device()): 0);
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
@@ -1466,8 +1457,9 @@ void MemCpyCommand::emitInstrumentationData() {
 #endif
 }
 
-const ContextImplPtr &MemCpyCommand::getWorkerContext() const {
-  return getWorkerQueue()->getContextImplPtr();
+ContextImplPtr MemCpyCommand::getWorkerContext() const {
+  assert(MWorkerQueue && "Worker queue for mem cpy command must be not nullptr");
+  return MWorkerQueue->getContextImplPtr();
 }
 
 bool MemCpyCommand::producesPiEvent() const {
@@ -1499,7 +1491,7 @@ pi_int32 MemCpyCommand::enqueueImp() {
   sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef();
 
   auto RawEvents = getPiEvents(EventImpls);
-  flushCrossQueueDeps(EventImpls, getWorkerQueue());
+  flushCrossQueueDeps(EventImpls, MWorkerQueue);
 
   MemoryManager::copy(
       MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(),
@@ -1515,7 +1507,7 @@ void MemCpyCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#C7EB15\" label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "MEMCPY ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n";
+  Stream << "MEMCPY ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
   Stream << "From: " << MSrcAllocaCmd << " is host: " << !MSrcQueue
          << "\\n";
   Stream << "To: " << MDstAllocaCmd << " is host: " << !MQueue
@@ -1573,7 +1565,7 @@ void UpdateHostRequirementCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#f1337f\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "UPDATE REQ ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n";
+  Stream << "UPDATE REQ ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
   bool IsReqOnBuffer =
       MDstReq.MSYCLMemObj->getType() == SYCLMemObjI::MemObjType::Buffer;
   Stream << "TYPE: " << (IsReqOnBuffer ? "Buffer" : "Image") << "\\n";
@@ -1625,11 +1617,10 @@ void MemCpyCommandHost::emitInstrumentationData() {
     xpti::addMetadata(CmdTraceEvent, "memory_object",
                       reinterpret_cast<size_t>(MAddress));
     xpti::addMetadata(CmdTraceEvent, "copy_from",
-                      reinterpret_cast<size_t>(
-                          MSrcQueue ? getSyclObjImpl(MSrcQueue->get_device()).get()) : "nullptr");
+                          MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0);
     xpti::addMetadata(
         CmdTraceEvent, "copy_to",
-        MQueue ? reinterpret_cast<size_t>(getSyclObjImpl(MQueue->get_device()).get()) : "nullptr");
+        MQueue ? deviceToID(MQueue->get_device()) : 0);
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
@@ -1639,12 +1630,13 @@ void MemCpyCommandHost::emitInstrumentationData() {
 #endif
 }
 
-const ContextImplPtr &MemCpyCommandHost::getWorkerContext() const {
-  return getWorkerQueue()->getContextImplPtr();
+ContextImplPtr MemCpyCommandHost::getWorkerContext() const {
+  assert(MWorkerQueue && "Worker queue for mem cpy host command must be not nullptr");
+  return MWorkerQueue->getContextImplPtr();
 }
 
 pi_int32 MemCpyCommandHost::enqueueImp() {
-  const QueueImplPtr &Queue = getWorkerQueue();
+  const QueueImplPtr &Queue = MWorkerQueue;
   waitForPreparedHostEvents();
   std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
   std::vector<sycl::detail::pi::PiEvent> RawEvents = getPiEvents(EventImpls);
@@ -1660,7 +1652,7 @@ pi_int32 MemCpyCommandHost::enqueueImp() {
     return PI_SUCCESS;
   }
 
-  flushCrossQueueDeps(EventImpls, getWorkerQueue());
+  flushCrossQueueDeps(EventImpls, MWorkerQueue);
   MemoryManager::copy(
       MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(),
       MSrcQueue, MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange,
@@ -1671,8 +1663,8 @@ pi_int32 MemCpyCommandHost::enqueueImp() {
   return PI_SUCCESS;
 }
 
-EmptyCommand::EmptyCommand(QueueImplPtr Queue)
-    : Command(CommandType::EMPTY_TASK, std::move(Queue)) {
+EmptyCommand::EmptyCommand()
+    : Command(CommandType::EMPTY_TASK, nullptr) {
   emitInstrumentationDataProxy();
 }
 
@@ -1750,7 +1742,7 @@ void MemCpyCommandHost::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#B6A2EB\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "MEMCPY HOST ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n";
+  Stream << "MEMCPY HOST ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
 
   Stream << "\"];" << std::endl;
 
@@ -2055,7 +2047,7 @@ std::pair<xpti_td *, uint64_t> emitKernelInstrumentationData(
   if (CmdTraceEvent) {
     // Stash the queue_id mutable metadata in TLS
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                        MQueue ? MQueue->getQueueID() : 0);
+                        Queue ? Queue->getQueueID() : 0);
 
     instrumentationAddExtraKernelMetadata(CmdTraceEvent, NDRDesc,
                                           KernelBundleImplPtr, SyclKernelName,
@@ -2124,7 +2116,7 @@ void ExecCGCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "EXEC CG ON " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n";
+  Stream << "EXEC CG ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
 
   switch (MCommandGroup->getType()) {
   case detail::CG::Kernel: {
@@ -2670,7 +2662,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() {
   // submissions of the command buffer itself will not receive dependencies on
   // them, e.g. initial copies from host to device
   std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
-  flushCrossQueueDeps(EventImpls, getWorkerQueue());
+  flushCrossQueueDeps(EventImpls, MWorkerQueue);
   std::vector<sycl::detail::pi::PiEvent> RawEvents = getPiEvents(EventImpls);
   if (!RawEvents.empty()) {
     const PluginPtr &Plugin = MQueue->getPlugin();
@@ -2826,7 +2818,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     waitForPreparedHostEvents();
   std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
   auto RawEvents = getPiEvents(EventImpls);
-  flushCrossQueueDeps(EventImpls, getWorkerQueue());
+  flushCrossQueueDeps(EventImpls, MWorkerQueue);
 
   bool DiscardPiEvent = MQueue && MQueue->supportsDiscardingPiEvents() &&
                          (MCommandGroup->getRequirements().size() == 0);
@@ -3050,7 +3042,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
 
         for (AllocaCommandBase *AllocaCmd : AllocaCmds)
           if (getContext(HostTask->MQueue) ==
-              getContext(AllocaCmd->getQueue()) {
+              getContext(AllocaCmd->getQueue())) {
             auto MemArg =
                 reinterpret_cast<pi_mem>(AllocaCmd->getMemAllocation());
             ReqToMem.emplace_back(std::make_pair(Req, MemArg));
@@ -3294,7 +3286,7 @@ void KernelFusionCommand::emitInstrumentationData() {
   // the kernel fusion list is still empty, so we don't have a terrible lot of
   // information we could attach to this node here.
   if (MFirstInstance && CmdTraceEvent)
-    addDeviceMetadata(CmdTraceEVent, MQueue);
+    addDeviceMetadata(CmdTraceEvent, MQueue);
 
   if (MFirstInstance) {
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
@@ -3314,7 +3306,7 @@ void KernelFusionCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "KERNEL FUSION on " << MQueue ? deviceToString(MQueue->get_device()) : "host" << "\\n"
+  Stream << "KERNEL FUSION on " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"
          << "FUSION LIST: {";
   bool Initial = true;
   for (auto *Cmd : MFusionList) {
@@ -3354,7 +3346,7 @@ pi_int32 UpdateCommandBufferCommand::enqueueImp() {
   waitForPreparedHostEvents();
   std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
   auto RawEvents = getPiEvents(EventImpls);
-  flushCrossQueueDeps(EventImpls, getWorkerQueue());
+  flushCrossQueueDeps(EventImpls, MWorkerQueue);
 
   for (auto &Node : MNodes) {
     auto CG = static_cast<CGExecKernel *>(Node->MCommandGroup.get());
diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp
index 89cabd134a7e1..ea2ba3ea72118 100644
--- a/sycl/source/detail/scheduler/commands.hpp
+++ b/sycl/source/detail/scheduler/commands.hpp
@@ -223,11 +223,7 @@ class Command {
 
   /// Get the context of the queue this command will be submitted to. Could
   /// differ from the context of MQueue for memory copy commands.
-  virtual const ContextImplPtr &getWorkerContext() const;
-
-  /// Get the queue this command will be submitted to. Could differ from MQueue
-  /// for memory copy commands.
-  const QueueImplPtr &getWorkerQueue() const;
+  virtual ContextImplPtr getWorkerContext() const;
 
   /// Returns true iff the command produces a PI event on non-host devices.
   virtual bool producesPiEvent() const;
@@ -414,7 +410,7 @@ class Command {
 /// implement lock in the graph, or to merge several nodes into one.
 class EmptyCommand : public Command {
 public:
-  EmptyCommand(QueueImplPtr Queue);
+  EmptyCommand();
 
   void printDot(std::ostream &Stream) const final;
   const Requirement *getRequirement() const final { return &MRequirements[0]; }
@@ -586,7 +582,7 @@ class MemCpyCommand : public Command {
   void printDot(std::ostream &Stream) const final;
   const Requirement *getRequirement() const final { return &MDstReq; }
   void emitInstrumentationData() final;
-  const ContextImplPtr &getWorkerContext() const final;
+  ContextImplPtr getWorkerContext() const final;
   bool producesPiEvent() const final;
 
 private:
@@ -610,7 +606,7 @@ class MemCpyCommandHost : public Command {
   void printDot(std::ostream &Stream) const final;
   const Requirement *getRequirement() const final { return &MDstReq; }
   void emitInstrumentationData() final;
-  const ContextImplPtr &getWorkerContext() const final;
+  ContextImplPtr getWorkerContext() const final;
 
 private:
   pi_int32 enqueueImp() final;
diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp
index d9614e9ca9d51..8778ad6927c3e 100644
--- a/sycl/source/detail/scheduler/graph_builder.cpp
+++ b/sycl/source/detail/scheduler/graph_builder.cpp
@@ -54,15 +54,10 @@ static bool IsSuitableSubReq(const Requirement *Req) {
   return Req->MIsSubBuffer;
 }
 
-static ContextImplPtr GetContext(const QueueImplPtr& Queue)
-{
-  return Queue ? Queue->getContextImplPtr() : nullptr;
-}
-
-bool MemObjRecord::isSameContext(const QueueImplPtr& Queue) const
+static bool isOnSameContext(const ContextImplPtr Context, const QueueImplPtr& Queue)
 {
   // Covers case for host usage (nullptr == nullptr) and existing device contexts comparison.
-  return MCurContext == (Queue ? Queue->getContextImplPtr() : nullptr);
+  return Context == queue_impl::getContext(Queue);
 }
 
 /// Checks if the required access mode is allowed under the current one.
@@ -250,7 +245,7 @@ MemObjRecord *Scheduler::GraphBuilder::getOrInsertMemObjRecord(
     getOrCreateAllocaForReq(MemObject->MRecord.get(), Req, InteropQueuePtr,
                             ToEnqueue);
   } else
-    MemObject->MRecord.reset(new MemObjRecord{GetContext(Queue),
+    MemObject->MRecord.reset(new MemObjRecord{queue_impl::getContext(Queue),
                                               LeafLimit, AllocateDependency});
 
   MMemObjs.push_back(MemObject);
@@ -289,7 +284,7 @@ void Scheduler::GraphBuilder::addNodeToLeaves(
 UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd(
     MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue,
     std::vector<Command *> &ToEnqueue) {
-  auto Context = GetContext(Queue);
+  auto Context = queue_impl::getContext(Queue);
   AllocaCommandBase *AllocaCmd =
       findAllocaForReq(Record, Req, Context);
   assert(AllocaCmd && "There must be alloca for requirement!");
@@ -353,7 +348,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove(
   if (!AllocaCmdDst)
     throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY);
 
-  auto Context = GetContext(Queue);
+  auto Context = queue_impl::getContext(Queue);
   std::set<Command *> Deps =
       findDepsForReq(Record, Req, Context);
   Deps.insert(AllocaCmdDst);
@@ -371,7 +366,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove(
     // current context, need to find a parent alloca command for it (it must be
     // there)
     auto IsSuitableAlloca = [Record](AllocaCommandBase *AllocaCmd) {
-      bool Res = Record->isSameContext(AllocaCmd->getQueue()) &&
+      bool Res = isOnSameContext(Record->MCurContext, AllocaCmd->getQueue()) &&
                  // Looking for a parent buffer alloca command
                  AllocaCmd->getType() == Command::CommandType::ALLOCA;
       return Res;
@@ -406,7 +401,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove(
 
     if ((Req->MAccessMode == access::mode::discard_write) ||
         (Req->MAccessMode == access::mode::discard_read_write)) {
-      Record->updateUsage(Context);
+      Record->MCurContext = Context;
       return nullptr;
     } else {
       // Full copy of buffer is needed to avoid loss of data that may be caused
@@ -428,7 +423,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove(
   addNodeToLeaves(Record, NewCmd, access::mode::read_write, ToEnqueue);
   for (Command *Cmd : ToCleanUp)
     cleanupCommand(Cmd);
-  Record->updateUsage(Context);
+  Record->MCurContext = Context;
   return NewCmd;
 }
 
@@ -541,7 +536,7 @@ Scheduler::GraphBuilder::addHostAccessor(Requirement *Req,
   AllocaCommandBase *HostAllocaCmd =
       getOrCreateAllocaForReq(Record, Req, nullptr, ToEnqueue);
 
-  if (Record->isSameContext(HostAllocaCmd->getQueue())) {
+  if (isOnSameContext(Record->MCurContext, HostAllocaCmd->getQueue())) {
     if (!isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess)) {
       remapMemoryObject(Record, Req,
                         Req->MIsSubBuffer ? (static_cast<AllocaSubBufCommand *>(
@@ -625,7 +620,7 @@ Scheduler::GraphBuilder::findDepsForReq(MemObjRecord *Record,
       if (Dep.MDepCommand)
       {
         auto DepQueue = Dep.MDepCommand->getQueue();
-        CanBypassDep &= IsOnSameContext(Context, DepQueue);
+        CanBypassDep &= isOnSameContext(Context, DepQueue);
       }
 
       if (!CanBypassDep) {
@@ -665,7 +660,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::findAllocaForReq(
     bool AllowConst) {
   auto IsSuitableAlloca = [&Context, Req,
                            AllowConst](AllocaCommandBase *AllocaCmd) {
-    bool Res = IsOnSameContext(Context, AllocaCmd->getQueue());
+    bool Res = isOnSameContext(Context, AllocaCmd->getQueue());
     if (IsSuitableSubReq(Req)) {
       const Requirement *TmpReq = AllocaCmd->getRequirement();
       Res &= AllocaCmd->getType() == Command::CommandType::ALLOCA_SUB_BUF;
@@ -705,7 +700,7 @@ static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx) {
 AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
     MemObjRecord *Record, const Requirement *Req, const QueueImplPtr &Queue,
     std::vector<Command *> &ToEnqueue) {
-  auto Context = GetContext(Queue);
+  auto Context = queue_impl::getContext(Queue);
   AllocaCommandBase *AllocaCmd = findAllocaForReq(
       Record, Req, Context, /*AllowConst=*/false);
 
@@ -761,7 +756,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
             Record->MAllocaCommands.push_back(HostAllocaCmd);
             Record->MWriteLeaves.push_back(HostAllocaCmd, ToEnqueue);
             ++(HostAllocaCmd->MLeafCounter);
-            Record->updateUsage(nullptr);
+            Record->MCurContext = nullptr;
           }
         }
       } else {
@@ -773,7 +768,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
           // new one. There could be situations when we could setup link with
           // "not" current allocation, but it will require memory copy.
           // Can setup link between cl and host allocations only
-          if ((Context != nullptr) + (Record->usedOnDevice()) == 1) {
+          if ((Context != nullptr) != (Record->MCurContext != nullptr)) {
             // Linked commands assume that the host allocation is reused by the
             // plugin runtime and that can lead to unnecessary copy overhead on
             // devices that do not support host unified memory. Do not link the
@@ -829,7 +824,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
           AllocaCmd->MIsActive = false;
         } else {
           LinkedAllocaCmd->MIsActive = false;
-          Record->updateUsage(Context);
+          Record->MCurContext =Context;
 
           std::set<Command *> Deps =
               findDepsForReq(Record, Req, Context);
@@ -1068,7 +1063,7 @@ void Scheduler::GraphBuilder::createGraphForCommand(
       AllocaCmd =
           getOrCreateAllocaForReq(Record, Req, QueueForAlloca, ToEnqueue);
 
-      isSameCtx = Record->isSameContext(QueueForAlloca);
+      isSameCtx = isOnSameContext(Record->MCurContext, QueueForAlloca);
     }
 
     // If there is alloca command we need to check if the latest memory is in
@@ -1076,7 +1071,7 @@ void Scheduler::GraphBuilder::createGraphForCommand(
     if (isSameCtx) {
       // If the memory is already in the required host context, check if the
       // required access mode is valid, remap if not.
-      if (!Record->usedOnDevice() &&
+      if (!Record->MCurContext &&
           !isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess)) {
         remapMemoryObject(Record, Req,
                           Req->MIsSubBuffer
@@ -1094,11 +1089,11 @@ void Scheduler::GraphBuilder::createGraphForCommand(
       if (isInteropTask) {
         const detail::CGHostTask &HT = static_cast<detail::CGHostTask &>(CG);
 
-        if (!(Record->isSameContext(HT.MQueue)) {
+        if (!isOnSameContext(Record->MCurContext, HT.MQueue)) {
           NeedMemMoveToHost = true;
           MemMoveTargetQueue = HT.MQueue;
         }
-      } else if (Queue && Record->usedOnDevice())
+      } else if (Queue && Record->MCurContext)
         NeedMemMoveToHost = true;
 
       if (NeedMemMoveToHost)
@@ -1107,7 +1102,7 @@ void Scheduler::GraphBuilder::createGraphForCommand(
     }
 
     std::set<Command *> Deps =
-        findDepsForReq(Record, Req, GetContext(Queue));
+        findDepsForReq(Record, Req, queue_impl::getContext(Queue));
 
     for (Command *Dep : Deps) {
       if (Dep != NewCmd) {
@@ -1709,7 +1704,7 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate(
 
       AllocaCmd = getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue);
 
-      isSameCtx = Record->isSameContext(Queue);
+      isSameCtx = isOnSameContext(Record->MCurContext, Queue);
     }
 
     if (!isSameCtx) {
@@ -1718,7 +1713,7 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate(
       bool NeedMemMoveToHost = false;
       auto MemMoveTargetQueue = Queue;
 
-      if (Queue && Record->usedOnDevice())
+      if (Queue && Record->MCurContext)
         NeedMemMoveToHost = true;
 
       if (NeedMemMoveToHost)
@@ -1728,7 +1723,7 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate(
       insertMemoryMove(Record, Req, MemMoveTargetQueue, ToEnqueue);
     }
     std::set<Command *> Deps =
-        findDepsForReq(Record, Req,  GetContext(Queue));
+        findDepsForReq(Record, Req,  queue_impl::getContext(Queue));
 
     for (Command *Dep : Deps) {
       if (Dep != NewCmd.get()) {
diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp
index 61f01863c477b..d3462872c9ddf 100644
--- a/sycl/source/detail/scheduler/scheduler.hpp
+++ b/sycl/source/detail/scheduler/scheduler.hpp
@@ -199,12 +199,11 @@ using FusionMap = std::unordered_map<QueueIdT, FusionList>;
 /// There must be a single MemObjRecord for each SYCL memory object.
 ///
 /// \ingroup sycl_graph
-class MemObjRecord {
+struct MemObjRecord {
   MemObjRecord(ContextImplPtr Ctx, std::size_t LeafLimit,
                LeavesCollection::AllocateDependencyF AllocateDependency)
       : MReadLeaves{this, LeafLimit, AllocateDependency},
         MWriteLeaves{this, LeafLimit, AllocateDependency}, MCurContext{Ctx} {}
-public:
   // Contains all allocation commands for the memory object.
   std::vector<AllocaCommandBase *> MAllocaCommands;
 
@@ -224,15 +223,6 @@ class MemObjRecord {
   // The mode this object can be accessed with from the host (host_accessor).
   // Valid only if the current usage is on host.
   access::mode MHostAccess = access::mode::read_write;
-
-  void updateUsage(ContextImplPtr& NewContext)
-  {
-    MCurContext = NewContext;
-  }
-
-  bool isSameContext(const QueueImplPtr& Queue) const;
-
-  bool usedOnDevice() { return MCurContext != nullptr; }
 };
 
 /// DPC++ graph scheduler class.

From df27615254aff2efd52952930673920c521fd3fb Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 5 Jun 2024 08:49:20 -0700
Subject: [PATCH 18/58] almost buildable: build enabling 3

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/queue_impl.hpp         | 6 +++---
 sycl/source/detail/scheduler/commands.cpp | 6 +++---
 sycl/source/detail/scheduler/commands.hpp | 3 +--
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp
index 61f34c35c7baf..3bd7b6ea7ec0a 100644
--- a/sycl/source/detail/queue_impl.hpp
+++ b/sycl/source/detail/queue_impl.hpp
@@ -670,9 +670,9 @@ class queue_impl {
     MExceptions.PushBack(ExceptionPtr);
   }
 
-  // ThreadPool &getThreadPool() {
-  //   return GlobalHandler::instance().getHostTaskThreadPool();
-  // }
+  static ThreadPool &getThreadPool() {
+    return GlobalHandler::instance().getHostTaskThreadPool();
+  }
 
   /// Gets the native handle of the SYCL queue.
   ///
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index 05873f23f45a9..d0a790ed97059 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -65,7 +65,7 @@ static bool CurrentCodeLocationValid() {
          (FunctionName && FunctionName[0] != '\0');
 }
 
-static void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID,
+void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID,
                                 xpti_td *TraceEvent, uint16_t Type,
                                 const void *Addr) {
   if (!(xptiCheckTraceEnabled(StreamID, Type) && TraceEvent))
@@ -2424,7 +2424,7 @@ pi_int32 enqueueImpCommandBufferKernel(
                   &getMemAllocationFunc](sycl::detail::ArgDesc &Arg,
                                          size_t NextTrueIndex) {
     sycl::detail::SetArgBasedOnType(Plugin, PiKernel, DeviceImageImpl,
-                                    getMemAllocationFunc, Ctx, false, Arg,
+                                    getMemAllocationFunc, Ctx, Arg,
                                     NextTrueIndex);
   };
   // Copy args for modification
@@ -3066,7 +3066,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     // submitted to report exception origin properly.
     copySubmissionCodeLocation();
 
-    getThreadPool().submit<DispatchHostTask>(
+    queue_impl::getThreadPool().submit<DispatchHostTask>(
         DispatchHostTask(this, std::move(ReqToMem)));
 
     MShouldCompleteEventIfPossible = false;
diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp
index ea2ba3ea72118..628ccdf2593da 100644
--- a/sycl/source/detail/scheduler/commands.hpp
+++ b/sycl/source/detail/scheduler/commands.hpp
@@ -33,7 +33,6 @@ class node_impl;
 namespace detail {
 
 #ifdef XPTI_ENABLE_INSTRUMENTATION
-bool CurrentCodeLocationValid();
 void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID,
                                 xpti_td *TraceEvent, uint16_t Type,
                                 const void *Addr);
@@ -793,7 +792,7 @@ void SetArgBasedOnType(
     const detail::plugin &Plugin, sycl::detail::pi::PiKernel Kernel,
     const std::shared_ptr<device_image_impl> &DeviceImageImpl,
     const std::function<void *(Requirement *Req)> &getMemAllocationFunc,
-    const sycl::context &Context, bool IsHost, detail::ArgDesc &Arg,
+    const sycl::context &Context, detail::ArgDesc &Arg,
     size_t NextTrueIndex);
 
 void applyFuncOnFilteredArgs(

From eebc51933df59666baad0bb50100cb02dce4e485 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 5 Jun 2024 09:34:20 -0700
Subject: [PATCH 19/58] almost almost buildable: enable build 4

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/queue_impl.hpp         | 2 +-
 sycl/source/detail/scheduler/commands.cpp | 3 ++-
 sycl/source/handler.cpp                   | 6 +++---
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp
index 3bd7b6ea7ec0a..1315d32ecaa4f 100644
--- a/sycl/source/detail/queue_impl.hpp
+++ b/sycl/source/detail/queue_impl.hpp
@@ -844,7 +844,7 @@ class queue_impl {
           "function objects should use the sycl::handler API instead.");
     }
 
-    handler Handler(Self, PrimaryQueue, SecondaryQueue);
+    handler Handler(Self, PrimaryQueue, SecondaryQueue, false);
     Handler.saveCodeLoc(Loc);
     PreventSubmit = true;
     try {
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index d0a790ed97059..1683b874fba5d 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -402,8 +402,9 @@ class DispatchHostTask {
     try {
       // we're ready to call the user-defined lambda now
       if (HostTask.MHostTask->isInteropTask()) {
+        assert(HostTask.MQueue && "Submitted queue for host task must be device queue");
         interop_handle IH{MReqToMem, HostTask.MQueue,
-                        //  HostTask.MQueue->getDeviceImplPtr(),
+                          HostTask.MQueue->getDeviceImplPtr(),
                           HostTask.MQueue->getContextImplPtr()};
 
         HostTask.MHostTask->call(MThisCmd->MEvent->getHostProfilingInfo(), IH);
diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp
index c0e0438d9cd2f..015d690d67e7d 100644
--- a/sycl/source/handler.cpp
+++ b/sycl/source/handler.cpp
@@ -80,12 +80,12 @@ void *getValueFromDynamicParameter(
 
 } // namespace detail
 
-handler::handler(std::shared_ptr<detail::queue_impl> Queue)
-    : handler(Queue, Queue, nullptr) {}
+handler::handler(std::shared_ptr<detail::queue_impl> Queue, bool)
+    : handler(Queue, Queue, nullptr, false) {}
 
 handler::handler(std::shared_ptr<detail::queue_impl> Queue,
                  std::shared_ptr<detail::queue_impl> PrimaryQueue,
-                 std::shared_ptr<detail::queue_impl> SecondaryQueue)
+                 std::shared_ptr<detail::queue_impl> SecondaryQueue, bool)
     : MImpl(std::make_shared<detail::handler_impl>(std::move(PrimaryQueue),
                                                    std::move(SecondaryQueue))),
       MQueue(std::move(Queue)) {}

From c6fe5c8098daadcde4dd19241be937e146bf9a17 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Thu, 6 Jun 2024 10:12:13 -0700
Subject: [PATCH 20/58] buildable

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/device_impl.cpp |  7 -------
 sycl/source/detail/device_impl.hpp |  5 -----
 sycl/source/detail/stream_impl.cpp | 14 +++++---------
 sycl/source/detail/stream_impl.hpp |  4 ----
 4 files changed, 5 insertions(+), 25 deletions(-)

diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp
index c677b9165d71f..ae3b04486d1ea 100644
--- a/sycl/source/detail/device_impl.cpp
+++ b/sycl/source/detail/device_impl.cpp
@@ -716,13 +716,6 @@ bool device_impl::has(aspect Aspect) const {
                       PI_ERROR_INVALID_DEVICE);
 }
 
-std::shared_ptr<device_impl> device_impl::getHostDeviceImpl() {
-  static std::shared_ptr<device_impl> HostImpl =
-      std::make_shared<device_impl>();
-
-  return HostImpl;
-}
-
 bool device_impl::isAssertFailSupported() const {
   return MIsAssertFailSupported;
 }
diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp
index efec017d372f5..9249bbba59fe8 100644
--- a/sycl/source/detail/device_impl.hpp
+++ b/sycl/source/detail/device_impl.hpp
@@ -217,11 +217,6 @@ class device_impl {
   /// \return true if the SYCL device has the given feature.
   bool has(aspect Aspect) const;
 
-  /// Gets the single instance of the Host Device
-  ///
-  /// \return the host device_impl singleton
-  static std::shared_ptr<device_impl> getHostDeviceImpl();
-
   bool isAssertFailSupported() const;
 
   bool isRootDevice() const { return MRootDevice == nullptr; }
diff --git a/sycl/source/detail/stream_impl.cpp b/sycl/source/detail/stream_impl.cpp
index 4550b5cc26629..7268293433e82 100644
--- a/sycl/source/detail/stream_impl.cpp
+++ b/sycl/source/detail/stream_impl.cpp
@@ -94,12 +94,12 @@ void stream_impl::initStreamHost(QueueImplPtr Queue) {
 }
 
 void stream_impl::flush(const EventImplPtr &LeadEvent) {
+  assert(LeadEvent && "LeadEvent is expected to be not nullptr");
   // We don't want stream flushing to be blocking operation that is why submit a
   // host task to print stream buffer. It will fire up as soon as the kernel
   // finishes execution.
-  auto Q = detail::createSyclObjFromImpl<queue>(
-      sycl::detail::Scheduler::getInstance().getDefaultHostQueue());
-  event Event = Q.submit([&](handler &cgh) {
+  auto Q = LeadEvent->getSubmittedQueue();
+  event Event = detail::createSyclObjFromImpl<sycl::queue>(Q).submit([&](handler &cgh) {
     auto BufHostAcc =
         Buf_.get_access<access::mode::read_write, access::target::host_buffer>(
             cgh, range<1>(BufferSize_), id<1>(OffsetSize));
@@ -131,14 +131,10 @@ void stream_impl::flush(const EventImplPtr &LeadEvent) {
       fflush(stdout);
     });
   });
-  if (LeadEvent) {
-    LeadEvent->attachEventToComplete(detail::getSyclObjImpl(Event));
-    LeadEvent->getSubmittedQueue()->registerStreamServiceEvent(
-        detail::getSyclObjImpl(Event));
-  }
+  LeadEvent->attachEventToComplete(detail::getSyclObjImpl(Event));
+  Q->registerStreamServiceEvent(detail::getSyclObjImpl(Event));
 }
 
-void stream_impl::flush() { flush(nullptr); }
 } // namespace detail
 } // namespace _V1
 } // namespace sycl
diff --git a/sycl/source/detail/stream_impl.hpp b/sycl/source/detail/stream_impl.hpp
index 823653016c162..cd3d503b4b894 100644
--- a/sycl/source/detail/stream_impl.hpp
+++ b/sycl/source/detail/stream_impl.hpp
@@ -49,10 +49,6 @@ class __SYCL_EXPORT stream_impl {
   // LeadEvent as well as in queue LeadEvent associated with.
   void flush(const EventImplPtr &LeadEvent);
 
-  // Enqueue task to copy stream buffer to the host and print the contents
-  // Remove during next ABI breaking window
-  void flush();
-
   size_t size() const noexcept;
 
   size_t get_work_item_buffer_size() const;

From 24669e2a82d3765cc08800d4e8691e0c2bc5b28b Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Thu, 6 Jun 2024 10:52:53 -0700
Subject: [PATCH 21/58] RT-buildable: enabling UT build

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/unittests/scheduler/AllocaLinking.cpp    | 13 +++----------
 .../scheduler/CommandsWaitForEvents.cpp       | 10 ++--------
 .../scheduler/EnqueueWithDependsOnDeps.cpp    |  3 +--
 sycl/unittests/scheduler/GraphCleanup.cpp     | 11 +++--------
 sycl/unittests/scheduler/InOrderQueueDeps.cpp | 11 +++--------
 sycl/unittests/scheduler/LeafLimit.cpp        |  2 --
 .../scheduler/LeafLimitDiffContexts.cpp       |  2 +-
 sycl/unittests/scheduler/LeavesCollection.cpp |  9 ++++-----
 .../scheduler/LinkedAllocaDependencies.cpp    | 14 ++++----------
 .../scheduler/NoHostUnifiedMemory.cpp         | 19 +++++++------------
 sycl/unittests/scheduler/QueueFlushing.cpp    | 10 +++-------
 .../scheduler/SchedulerTestUtils.hpp          |  3 +--
 .../scheduler/StreamInitDependencyOnHost.cpp  |  9 +++------
 13 files changed, 35 insertions(+), 81 deletions(-)

diff --git a/sycl/unittests/scheduler/AllocaLinking.cpp b/sycl/unittests/scheduler/AllocaLinking.cpp
index a77995a203da3..e15cf24761ee1 100644
--- a/sycl/unittests/scheduler/AllocaLinking.cpp
+++ b/sycl/unittests/scheduler/AllocaLinking.cpp
@@ -47,13 +47,6 @@ static pi_result redefinedDeviceGetInfoAfter(pi_device Device,
 
 TEST_F(SchedulerTest, AllocaLinking) {
   HostUnifiedMemory = false;
-  // This host device constructor should be placed before Mock.redefine
-  // because it overrides the real implementation of get_device_info
-  // which is needed when creating a host device.
-  device HostDevice = detail::createSyclObjFromImpl<device>(
-      detail::device_impl::getHostDeviceImpl());
-  std::shared_ptr<detail::queue_impl> DefaultHostQueue{
-      new detail::queue_impl(detail::getSyclObjImpl(HostDevice), {}, {})};
 
   sycl::unittest::PiMock Mock;
   sycl::queue Q{Mock.getPlatform().get_devices()[0]};
@@ -73,7 +66,7 @@ TEST_F(SchedulerTest, AllocaLinking) {
     detail::AllocaCommandBase *NonHostAllocaCmd =
         MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds);
     detail::AllocaCommandBase *HostAllocaCmd =
-        MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds);
+        MS.getOrCreateAllocaForReq(Record, &Req, nullptr, AuxCmds);
 
     EXPECT_FALSE(HostAllocaCmd->MLinkedAllocaCmd);
     EXPECT_FALSE(NonHostAllocaCmd->MLinkedAllocaCmd);
@@ -90,7 +83,7 @@ TEST_F(SchedulerTest, AllocaLinking) {
     detail::AllocaCommandBase *NonHostAllocaCmd =
         MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds);
     detail::AllocaCommandBase *HostAllocaCmd =
-        MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds);
+        MS.getOrCreateAllocaForReq(Record, &Req, nullptr, AuxCmds);
 
     EXPECT_EQ(HostAllocaCmd->MLinkedAllocaCmd, NonHostAllocaCmd);
     EXPECT_EQ(NonHostAllocaCmd->MLinkedAllocaCmd, HostAllocaCmd);
@@ -107,7 +100,7 @@ TEST_F(SchedulerTest, AllocaLinking) {
     detail::AllocaCommandBase *NonHostAllocaCmd =
         MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds);
     detail::AllocaCommandBase *HostAllocaCmd =
-        MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds);
+        MS.getOrCreateAllocaForReq(Record, &Req, nullptr, AuxCmds);
 
     EXPECT_EQ(HostAllocaCmd->MLinkedAllocaCmd, NonHostAllocaCmd);
     EXPECT_EQ(NonHostAllocaCmd->MLinkedAllocaCmd, HostAllocaCmd);
diff --git a/sycl/unittests/scheduler/CommandsWaitForEvents.cpp b/sycl/unittests/scheduler/CommandsWaitForEvents.cpp
index d893c33f5cc26..499a45d0fe70f 100644
--- a/sycl/unittests/scheduler/CommandsWaitForEvents.cpp
+++ b/sycl/unittests/scheduler/CommandsWaitForEvents.cpp
@@ -219,13 +219,7 @@ TEST_F(SchedulerTest, CommandsWaitForEvents) {
   std::shared_ptr<detail::event_impl> E2(
       new detail::event_impl(TestContext->EventCtx2, Q2.get_context()));
 
-  device HostDevice = detail::createSyclObjFromImpl<device>(
-      detail::device_impl::getHostDeviceImpl());
-  std::shared_ptr<detail::queue_impl> DefaultHostQueue(new detail::queue_impl(
-      detail::getSyclObjImpl(HostDevice), /*AsyncHandler=*/{},
-      /*PropList=*/{}));
-
-  MockCommand Cmd(DefaultHostQueue);
+  MockCommand Cmd(nullptr);
 
   std::vector<std::shared_ptr<detail::event_impl>> Events;
   Events.push_back(E1);
@@ -233,7 +227,7 @@ TEST_F(SchedulerTest, CommandsWaitForEvents) {
 
   pi_event EventResult = nullptr;
 
-  Cmd.waitForEventsCall(DefaultHostQueue, Events, EventResult);
+  Cmd.waitForEventsCall(nullptr, Events, EventResult);
 
   ASSERT_TRUE(TestContext->EventCtx1WasWaited &&
               TestContext->EventCtx2WasWaited)
diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp
index fc816d1a4f3af..bd7531c964716 100644
--- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp
+++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp
@@ -83,7 +83,7 @@ class DependsOnTests : public ::testing::Test {
 
     detail::Command *NewCmd = MS.addCG(
         std::move(CmdGroup),
-        Type == TestCGType::HOST_TASK ? MS.getDefaultHostQueue() : QueueDevImpl,
+        Type == TestCGType::HOST_TASK ? nullptr : QueueDevImpl,
         ToEnqueue);
     EXPECT_EQ(ToEnqueue.size(), 0u);
     return NewCmd;
@@ -167,7 +167,6 @@ class DependsOnTests : public ::testing::Test {
 
 TEST_F(DependsOnTests, EnqueueNoMemObjTwoHostTasks) {
   // Checks enqueue of two dependent host tasks
-  detail::QueueImplPtr QueueHostImpl = MS.getDefaultHostQueue();
   std::vector<EventImplPtr> Events;
 
   detail::Command *Cmd1 =
diff --git a/sycl/unittests/scheduler/GraphCleanup.cpp b/sycl/unittests/scheduler/GraphCleanup.cpp
index 3389769569e5e..e0ec582db065c 100644
--- a/sycl/unittests/scheduler/GraphCleanup.cpp
+++ b/sycl/unittests/scheduler/GraphCleanup.cpp
@@ -172,7 +172,7 @@ static void checkCleanupOnEnqueue(MockScheduler &MS,
 }
 
 static void checkCleanupOnLeafUpdate(
-    MockScheduler &MS, detail::QueueImplPtr &QueueImpl, buffer<int, 1> &Buf,
+    MockScheduler &MS, detail::QueueImplPtr QueueImpl, buffer<int, 1> &Buf,
     detail::Requirement &MockReq,
     std::function<void(detail::MemObjRecord *)> SchedulerCall) {
   bool CommandDeleted = false;
@@ -247,15 +247,10 @@ TEST_F(SchedulerTest, PostEnqueueCleanup) {
   checkCleanupOnLeafUpdate(
       MS, QueueImpl, Buf, MockReq, [&](detail::MemObjRecord *Record) {
         detail::Command *Leaf = *Record->MWriteLeaves.begin();
-        MS.addEmptyCmd(Leaf, {&MockReq}, QueueImpl,
-                       detail::Command::BlockReason::HostTask, ToEnqueue);
+        MS.addEmptyCmd(Leaf, {&MockReq}, detail::Command::BlockReason::HostTask, ToEnqueue);
       });
-  device HostDevice = detail::createSyclObjFromImpl<device>(
-      detail::device_impl::getHostDeviceImpl());
-  detail::QueueImplPtr DefaultHostQueue{
-      new detail::queue_impl(detail::getSyclObjImpl(HostDevice), {}, {})};
   checkCleanupOnLeafUpdate(
-      MS, DefaultHostQueue, Buf, MockReq, [&](detail::MemObjRecord *Record) {
+      MS, nullptr, Buf, MockReq, [&](detail::MemObjRecord *Record) {
         MS.getOrCreateAllocaForReq(Record, &MockReq, QueueImpl, ToEnqueue);
       });
   // Check cleanup on exceeding leaf limit.
diff --git a/sycl/unittests/scheduler/InOrderQueueDeps.cpp b/sycl/unittests/scheduler/InOrderQueueDeps.cpp
index 337ef2ef3d403..c19b494f9c484 100644
--- a/sycl/unittests/scheduler/InOrderQueueDeps.cpp
+++ b/sycl/unittests/scheduler/InOrderQueueDeps.cpp
@@ -77,11 +77,6 @@ TEST_F(SchedulerTest, InOrderQueueDeps) {
   sycl::detail::QueueImplPtr InOrderQueueImpl =
       detail::getSyclObjImpl(InOrderQueue);
 
-  device HostDevice = detail::createSyclObjFromImpl<device>(
-      detail::device_impl::getHostDeviceImpl());
-  std::shared_ptr<detail::queue_impl> DefaultHostQueue{
-      new detail::queue_impl(detail::getSyclObjImpl(HostDevice), {}, {})};
-
   MockScheduler MS;
 
   int val;
@@ -92,18 +87,18 @@ TEST_F(SchedulerTest, InOrderQueueDeps) {
   detail::MemObjRecord *Record =
       MS.getOrInsertMemObjRecord(InOrderQueueImpl, &Req, AuxCmds);
   MS.getOrCreateAllocaForReq(Record, &Req, InOrderQueueImpl, AuxCmds);
-  MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds);
+  MS.getOrCreateAllocaForReq(Record, &Req, nullptr, AuxCmds);
 
   // Check that sequential memory movements submitted to the same in-order
   // queue do not depend on each other.
   detail::Command *Cmd =
-      MS.insertMemoryMove(Record, &Req, DefaultHostQueue, AuxCmds);
+      MS.insertMemoryMove(Record, &Req, nullptr, AuxCmds);
   detail::EnqueueResultT Res;
   auto ReadLock = MS.acquireGraphReadLock();
   MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING);
   Cmd = MS.insertMemoryMove(Record, &Req, InOrderQueueImpl, AuxCmds);
   MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING);
-  Cmd = MS.insertMemoryMove(Record, &Req, DefaultHostQueue, AuxCmds);
+  Cmd = MS.insertMemoryMove(Record, &Req, nullptr, AuxCmds);
   MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING);
 }
 
diff --git a/sycl/unittests/scheduler/LeafLimit.cpp b/sycl/unittests/scheduler/LeafLimit.cpp
index 36d8f459a324a..f3417b297bc31 100644
--- a/sycl/unittests/scheduler/LeafLimit.cpp
+++ b/sycl/unittests/scheduler/LeafLimit.cpp
@@ -36,8 +36,6 @@ TEST_F(SchedulerTest, LeafLimit) {
   unittest::ScopedEnvVar DisabledCleanup{
       DisableCleanupName, "1",
       detail::SYCLConfig<detail::SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP>::reset};
-  sycl::queue HQueue(detail::createSyclObjFromImpl<device>(
-      detail::device_impl::getHostDeviceImpl()));
   MockScheduler MS;
   std::vector<std::unique_ptr<MockCommand>> LeavesToAdd;
   std::unique_ptr<MockCommand> MockDepCmd;
diff --git a/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp b/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp
index 38d9ac784c09f..1af882a423af8 100644
--- a/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp
+++ b/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp
@@ -61,7 +61,7 @@ TEST_F(SchedulerTest, LeafLimitDiffContexts) {
       AllocaCmd = MS.getOrCreateAllocaForReq(
           Rec, &MockReq, detail::getSyclObjImpl(Queue), ToEnqueue);
       std::ignore = MS.getOrCreateAllocaForReq(
-          Rec, &MockReq, MS.getDefaultHostQueue(), ToEnqueue);
+          Rec, &MockReq, nullptr, ToEnqueue);
       DepCmd =
           std::make_unique<MockCommand>(detail::getSyclObjImpl(Queue), MockReq);
     }
diff --git a/sycl/unittests/scheduler/LeavesCollection.cpp b/sycl/unittests/scheduler/LeavesCollection.cpp
index ea883041add66..39146ffaa95e8 100644
--- a/sycl/unittests/scheduler/LeavesCollection.cpp
+++ b/sycl/unittests/scheduler/LeavesCollection.cpp
@@ -37,9 +37,8 @@ createGenericCommand(const std::shared_ptr<queue_impl> &Q) {
 }
 
 std::shared_ptr<Command>
-createEmptyCommand(const std::shared_ptr<queue_impl> &Q,
-                   const Requirement &Req) {
-  EmptyCommand *Cmd = new EmptyCommand(Q);
+createEmptyCommand(const Requirement &Req) {
+  EmptyCommand *Cmd = new EmptyCommand();
   Cmd->addRequirement(/* DepCmd = */ nullptr, /* AllocaCmd = */ nullptr, &Req);
   Cmd->MBlockReason = Command::BlockReason::HostAccessor;
   return std::shared_ptr<Command>{Cmd};
@@ -97,7 +96,7 @@ TEST_F(LeavesCollectionTest, PushBack) {
 
     for (size_t Idx = 0; Idx < GenericCmdsCapacity * 4; ++Idx) {
       auto Cmd = Idx % 2 ? createGenericCommand(getSyclObjImpl(Q))
-                         : createEmptyCommand(getSyclObjImpl(Q), MockReq);
+                         : createEmptyCommand(MockReq);
       Cmds.push_back(Cmd);
 
       LE.push_back(Cmds.back().get(), ToEnqueue);
@@ -137,7 +136,7 @@ TEST_F(LeavesCollectionTest, Remove) {
 
     for (size_t Idx = 0; Idx < GenericCmdsCapacity * 4; ++Idx) {
       auto Cmd = Idx % 2 ? createGenericCommand(getSyclObjImpl(Q))
-                         : createEmptyCommand(getSyclObjImpl(Q), MockReq);
+                         : createEmptyCommand(MockReq);
       Cmds.push_back(Cmd);
 
       if (LE.push_back(Cmds.back().get(), ToEnqueue))
diff --git a/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp b/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp
index 5ab9cfbb43f5a..6ae6b9bfc2344 100644
--- a/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp
+++ b/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp
@@ -64,28 +64,22 @@ TEST_F(SchedulerTest, LinkedAllocaDependencies) {
   sycl::queue Queue1{Dev};
   sycl::detail::QueueImplPtr Q1 = sycl::detail::getSyclObjImpl(Queue1);
 
-  device HostDevice = detail::createSyclObjFromImpl<device>(
-      detail::device_impl::getHostDeviceImpl());
-  std::shared_ptr<detail::queue_impl> DefaultHostQueue(new detail::queue_impl(
-      detail::getSyclObjImpl(HostDevice), /*AsyncHandler=*/{},
-      /*PropList=*/{}));
-
   auto AllocaDep = [](sycl::detail::Command *, sycl::detail::Command *,
                       sycl::detail::MemObjRecord *,
                       std::vector<sycl::detail::Command *> &) {};
 
   std::shared_ptr<sycl::detail::MemObjRecord> Record{
-      new sycl::detail::MemObjRecord(DefaultHostQueue->getContextImplPtr(), 10,
+      new sycl::detail::MemObjRecord(nullptr, 10,
                                      AllocaDep)};
 
   MemObjMock MemObj(Record);
   Req.MSYCLMemObj = &MemObj;
 
-  sycl::detail::AllocaCommand AllocaCmd1(DefaultHostQueue, Req, false);
+  sycl::detail::AllocaCommand AllocaCmd1(nullptr, Req, false);
   Record->MAllocaCommands.push_back(&AllocaCmd1);
 
-  MockCommand DepCmd(DefaultHostQueue, Req);
-  MockCommand DepDepCmd(DefaultHostQueue, Req);
+  MockCommand DepCmd(nullptr, Req);
+  MockCommand DepDepCmd(nullptr, Req);
   DepCmd.MDeps.push_back({&DepDepCmd, DepDepCmd.getRequirement(), &AllocaCmd1});
   DepDepCmd.MUsers.insert(&DepCmd);
   std::vector<sycl::detail::Command *> ToEnqueue;
diff --git a/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp b/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp
index 635a8e9c3389c..20cf879d53daf 100644
--- a/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp
+++ b/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp
@@ -91,11 +91,6 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) {
       redefinedMemCreateWithNativeHandle);
   sycl::detail::QueueImplPtr QImpl = detail::getSyclObjImpl(Q);
 
-  device HostDevice = detail::createSyclObjFromImpl<device>(
-      detail::device_impl::getHostDeviceImpl());
-  std::shared_ptr<detail::queue_impl> DefaultHostQueue{
-      new detail::queue_impl(detail::getSyclObjImpl(HostDevice), {}, {})};
-
   MockScheduler MS;
   // Check non-host alloca with non-discard access mode
   {
@@ -113,10 +108,10 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) {
     // order to perform a memory move.
     EXPECT_EQ(Record->MAllocaCommands.size(), 2U);
     detail::AllocaCommandBase *HostAllocaCmd = Record->MAllocaCommands[0];
-    EXPECT_TRUE(HostAllocaCmd->getQueue()->is_host());
+    EXPECT_TRUE(HostAllocaCmd->getQueue() == nullptr);
     EXPECT_TRUE(!HostAllocaCmd->MLinkedAllocaCmd);
     EXPECT_TRUE(!NonHostAllocaCmd->MLinkedAllocaCmd);
-    EXPECT_TRUE(Record->MCurContext->is_host());
+    EXPECT_TRUE(Record->MCurContext == nullptr);
 
     detail::Command *MemoryMove =
         MS.insertMemoryMove(Record, &Req, QImpl, AuxCmds);
@@ -162,9 +157,9 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) {
     // another and the transfer is done via a write operation.
     std::vector<detail::Command *> AuxCmds;
     detail::MemObjRecord *Record =
-        MS.getOrInsertMemObjRecord(DefaultHostQueue, &Req, AuxCmds);
+        MS.getOrInsertMemObjRecord(nullptr, &Req, AuxCmds);
     detail::AllocaCommandBase *HostAllocaCmd =
-        MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds);
+        MS.getOrCreateAllocaForReq(Record, &Req, nullptr, AuxCmds);
     EXPECT_EQ(Record->MAllocaCommands.size(), 1U);
     detail::AllocaCommandBase *NonHostAllocaCmd =
         MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds);
@@ -190,14 +185,14 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) {
     detail::MemObjRecord *Record =
         MS.getOrInsertMemObjRecord(QImpl, &Req, AuxCmds);
     MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds);
-    MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds);
+    MS.getOrCreateAllocaForReq(Record, &Req, nullptr, AuxCmds);
 
     // Memory movement operations should be omitted for discard access modes.
     detail::Command *MemoryMove =
-        MS.insertMemoryMove(Record, &DiscardReq, DefaultHostQueue, AuxCmds);
+        MS.insertMemoryMove(Record, &DiscardReq, nullptr, AuxCmds);
     EXPECT_TRUE(MemoryMove == nullptr);
     // The current context for the record should still be modified.
-    EXPECT_EQ(Record->MCurContext, DefaultHostQueue->getContextImplPtr());
+    EXPECT_EQ(Record->MCurContext, nullptr);
   }
   // Check that interoperability memory objects are initialized.
   {
diff --git a/sycl/unittests/scheduler/QueueFlushing.cpp b/sycl/unittests/scheduler/QueueFlushing.cpp
index c97428b9d55c6..330ff7e0f02d2 100644
--- a/sycl/unittests/scheduler/QueueFlushing.cpp
+++ b/sycl/unittests/scheduler/QueueFlushing.cpp
@@ -122,21 +122,17 @@ TEST_F(SchedulerTest, QueueFlushing) {
                                     QueueImplA};
     testCommandEnqueue(&UnmapCmd, QueueImplB, MockReq);
 
-    device HostDevice = detail::createSyclObjFromImpl<device>(
-        detail::device_impl::getHostDeviceImpl());
-    detail::QueueImplPtr DefaultHostQueue{
-        new detail::queue_impl(detail::getSyclObjImpl(HostDevice), {}, {})};
     detail::AllocaCommand HostAllocaCmd =
-        detail::AllocaCommand(DefaultHostQueue, MockReq);
+        detail::AllocaCommand(nullptr, MockReq);
 
     detail::MemCpyCommand MemCpyCmd{MockReq,    &AllocaCmd,
                                     MockReq,    &HostAllocaCmd,
-                                    QueueImplA, DefaultHostQueue};
+                                    QueueImplA, nullptr};
     testCommandEnqueue(&MemCpyCmd, QueueImplB, MockReq);
 
     detail::MemCpyCommandHost MemCpyCmdHost{MockReq,    &AllocaCmd,
                                             MockReq,    &MockHostPtr,
-                                            QueueImplA, DefaultHostQueue};
+                                            QueueImplA, nullptr};
     testCommandEnqueue(&MemCpyCmdHost, QueueImplB, MockReq);
 
     std::unique_ptr<detail::CG> CG{
diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp
index 88ced1f25904a..20f82f9165c01 100644
--- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp
+++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp
@@ -189,10 +189,9 @@ class MockScheduler : public sycl::detail::Scheduler {
   sycl::detail::EmptyCommand *
   addEmptyCmd(sycl::detail::Command *Cmd,
               const std::vector<sycl::detail::Requirement *> &Reqs,
-              const sycl::detail::QueueImplPtr &Queue,
               sycl::detail::Command::BlockReason Reason,
               std::vector<sycl::detail::Command *> &ToEnqueue) {
-    return MGraphBuilder.addEmptyCmd(Cmd, Reqs, Queue, Reason, ToEnqueue);
+    return MGraphBuilder.addEmptyCmd(Cmd, Reqs, Reason, ToEnqueue);
   }
 
   sycl::detail::Command *
diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
index 18c0b3e1a8070..838b60809472c 100644
--- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
+++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
@@ -80,12 +80,9 @@ TEST_F(SchedulerTest, StreamInitDependencyOnHost) {
   unittest::ScopedEnvVar DisabledCleanup{
       DisableCleanupName, "1",
       detail::SYCLConfig<detail::SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP>::reset};
-  std::shared_ptr<detail::queue_impl> HQueueImpl(new detail::queue_impl(
-      detail::device_impl::getHostDeviceImpl(), /*AsyncHandler=*/{},
-      /*PropList=*/{}));
 
   // Emulating processing of command group function
-  MockHandlerStreamInit MockCGH(HQueueImpl, true);
+  MockHandlerStreamInit MockCGH(nullptr, true);
   MockCGH.setType(detail::CG::Kernel);
 
   auto EmptyKernel = [](sycl::nd_item<1>) {};
@@ -114,11 +111,11 @@ TEST_F(SchedulerTest, StreamInitDependencyOnHost) {
       static_cast<detail::CGExecKernel *>(MainCG.get())->getStreams();
   ASSERT_EQ(Streams.size(), 1u) << "Invalid number of stream objects";
 
-  Streams[0]->initStreamHost(HQueueImpl);
+  Streams[0]->initStreamHost(nullptr);
 
   MockScheduler MS;
   std::vector<detail::Command *> AuxCmds;
-  detail::Command *NewCmd = MS.addCG(std::move(MainCG), HQueueImpl, AuxCmds);
+  detail::Command *NewCmd = MS.addCG(std::move(MainCG), nullptr, AuxCmds);
   ASSERT_TRUE(!!NewCmd) << "Failed to add command group into scheduler";
   ASSERT_GT(NewCmd->MDeps.size(), 0u)
       << "No deps appeared in the new exec kernel command";

From fcc7748699821b8a53db059de50b94dff5f96232 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Fri, 7 Jun 2024 03:42:25 -0700
Subject: [PATCH 22/58] RT-buildable: restore incorrectly deleted code

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/memory_manager.cpp     | 28 ++++++++++++++--
 sycl/source/detail/memory_manager.hpp     |  3 ++
 sycl/source/detail/scheduler/commands.cpp | 41 +++++++++++++++++++----
 3 files changed, 63 insertions(+), 9 deletions(-)

diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp
index e2c22f794f587..461cf8b85915c 100644
--- a/sycl/source/detail/memory_manager.cpp
+++ b/sycl/source/detail/memory_manager.cpp
@@ -266,6 +266,11 @@ void MemoryManager::releaseMemObj(ContextImplPtr TargetContext,
     return;
   }
 
+  if (!TargetContext) {
+    MemObj->releaseHostMem(MemAllocation);
+    return;
+  }
+
   const PluginPtr &Plugin = TargetContext->getPlugin();
   memReleaseHelper(Plugin, pi::cast<sycl::detail::pi::PiMem>(MemAllocation));
 }
@@ -283,6 +288,19 @@ void *MemoryManager::allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj,
                              OutEvent);
 }
 
+void *MemoryManager::allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr,
+                                        bool HostPtrReadOnly, size_t Size,
+                                        const sycl::property_list &) {
+  std::ignore = HostPtrReadOnly;
+  std::ignore = Size;
+
+  // Can return user pointer directly if it is not a nullptr.
+  if (UserPtr)
+    return UserPtr;
+
+  return MemObj->allocateHostMem();
+}
+
 void *MemoryManager::allocateInteropMemObject(
     ContextImplPtr TargetContext, void *UserPtr,
     const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext,
@@ -379,9 +397,10 @@ void *MemoryManager::allocateMemBuffer(
     const ContextImplPtr &InteropContext, const sycl::property_list &PropsList,
     sycl::detail::pi::PiEvent &OutEventToWait) {
   void *MemPtr;
-  if (UserPtr && InteropContext)
-    MemPtr =
-        allocateInteropMemObject(TargetContext, UserPtr, InteropEvent,
+  if (!TargetContext)
+    MemPtr = allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size, PropsList);
+  else if (UserPtr && InteropContext)
+    MemPtr = allocateInteropMemObject(TargetContext, UserPtr, InteropEvent,
                                  InteropContext, PropsList, OutEventToWait);
   else
     MemPtr = allocateBufferObject(TargetContext, UserPtr, HostPtrReadOnly, Size,
@@ -398,6 +417,9 @@ void *MemoryManager::allocateMemImage(
     const EventImplPtr &InteropEvent, const ContextImplPtr &InteropContext,
     const sycl::property_list &PropsList,
     sycl::detail::pi::PiEvent &OutEventToWait) {
+  if (!TargetContext)
+    return allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size,
+                              PropsList);
   if (UserPtr && InteropContext)
     return allocateInteropMemObject(TargetContext, UserPtr, InteropEvent,
                                     InteropContext, PropsList, OutEventToWait);
diff --git a/sycl/source/detail/memory_manager.hpp b/sycl/source/detail/memory_manager.hpp
index 7be17898bc0d9..deefda9ccd8ff 100644
--- a/sycl/source/detail/memory_manager.hpp
+++ b/sycl/source/detail/memory_manager.hpp
@@ -85,6 +85,9 @@ class __SYCL_EXPORT MemoryManager {
   static void releaseMemObj(ContextImplPtr TargetContext, SYCLMemObjI *MemObj,
                             void *MemAllocation, void *UserPtr);
 
+  static void *allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr,
+                                  bool HostPtrReadOnly, size_t Size,
+                                  const sycl::property_list &PropsList);
   static void *
   allocateInteropMemObject(ContextImplPtr TargetContext, void *UserPtr,
                            const EventImplPtr &InteropEvent,
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index 1683b874fba5d..b1713473f2de3 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -459,8 +459,38 @@ void Command::waitForPreparedHostEvents() const {
 void Command::waitForEvents(QueueImplPtr Queue,
                             std::vector<EventImplPtr> &EventImpls,
                             sycl::detail::pi::PiEvent &Event) {
-  assert(Queue && "Device queue is expected here");
   if (!EventImpls.empty()) {
+      if (!Queue) {
+      // Host queue can wait for events from different contexts, i.e. it may
+      // contain events with different contexts in its MPreparedDepsEvents.
+      // OpenCL 2.1 spec says that clWaitForEvents will return
+      // CL_INVALID_CONTEXT if events specified in the list do not belong to
+      // the same context. Thus we split all the events into per-context map.
+      // An example. We have two queues for the same CPU device: Q1, Q2. Thus
+      // we will have two different contexts for the same CPU device: C1, C2.
+      // Also we have default host queue. This queue is accessible via
+      // Scheduler. Now, let's assume we have three different events: E1(C1),
+      // E2(C1), E3(C2). The command's MPreparedDepsEvents will contain all
+      // three events (E1, E2, E3). Now, if piEventsWait is called for all
+      // three events we'll experience failure with CL_INVALID_CONTEXT 'cause
+      // these events refer to different contexts.
+      std::map<context_impl *, std::vector<EventImplPtr>>
+          RequiredEventsPerContext;
+
+      for (const EventImplPtr &Event : EventImpls) {
+        ContextImplPtr Context = Event->getContextImpl();
+        assert(Context.get() &&
+               "Only non-host events are expected to be waited for here");
+        RequiredEventsPerContext[Context.get()].push_back(Event);
+      }
+
+      for (auto &CtxWithEvents : RequiredEventsPerContext) {
+        std::vector<sycl::detail::pi::PiEvent> RawEvents =
+            getPiEvents(CtxWithEvents.second);
+        CtxWithEvents.first->getPlugin()->call<PiApiKind::piEventsWait>(
+            RawEvents.size(), RawEvents.data());
+      }
+    } else {
 #ifndef NDEBUG
       for (const EventImplPtr &Event : EventImpls)
         assert(!Event->isHost() &&
@@ -477,6 +507,7 @@ void Command::waitForEvents(QueueImplPtr Queue,
       Plugin->call<PiApiKind::piEnqueueEventsWait>(
           Queue->getHandleRef(), RawEvents.size(), &RawEvents[0], &Event);
   }
+  }
 }
 
 /// It is safe to bind MPreparedDepsEvents and MPreparedHostDepsEvents
@@ -700,13 +731,11 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep,
 
   ContextImplPtr DepEventContext = DepEvent->getContextImpl();
   // If contexts don't match we'll connect them using host task
-  if (DepEventContext == WorkerContext)
-    MPreparedDepsEvents.push_back(std::move(DepEvent));
-  else
-  {
+  if (DepEventContext != WorkerContext && WorkerContext){
     Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder;
     ConnectionCmd = GB.connectDepEvent(this, DepEvent, Dep, ToCleanUp);
-  }
+  } else
+    MPreparedDepsEvents.push_back(std::move(DepEvent));
 
   return ConnectionCmd;
 }

From 7aa76d9f1e51eb430909125e9c4acc54518c7e81 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Fri, 7 Jun 2024 05:59:28 -0700
Subject: [PATCH 23/58] RT buildable: check-sycl-AccessorTests passed

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/event_impl.cpp         |  2 +-
 sycl/source/detail/event_impl.hpp         |  4 ++--
 sycl/source/detail/scheduler/commands.cpp | 17 +++++++++--------
 sycl/source/detail/sycl_mem_obj_t.cpp     |  2 +-
 4 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp
index e34597aa008d1..e38c15e04879a 100644
--- a/sycl/source/detail/event_impl.cpp
+++ b/sycl/source/detail/event_impl.cpp
@@ -566,7 +566,7 @@ void event_impl::setCommand(void *Cmd) {
   MCommand = Cmd;
   auto TypedCommand = static_cast<Command*>(Cmd);
   if (TypedCommand)
-    MIsHostTask = TypedCommand->isHostTask();
+    MIsHostEvent = TypedCommand->getWorkerContext() == nullptr;
 }
 
 } // namespace detail
diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp
index 7c1eb99e3b286..237939ea37bd8 100644
--- a/sycl/source/detail/event_impl.hpp
+++ b/sycl/source/detail/event_impl.hpp
@@ -337,7 +337,7 @@ class event_impl {
 
   void setEnqueued() { MIsEnqueued = true; }
 
-  bool isHost() { return MIsHostTask; }
+  bool isHost() { return MIsHostEvent; }
 
 protected:
   // When instrumentation is enabled emits trace event for event wait begin and
@@ -406,7 +406,7 @@ class event_impl {
                   std::shared_ptr<sycl::detail::context_impl> Context);
 
   std::atomic_bool MIsEnqueued{false};
-  bool MIsHostTask{false};
+  bool MIsHostEvent{false};
 };
 
 } // namespace detail
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index b1713473f2de3..f7b9805ff17ec 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -459,6 +459,11 @@ void Command::waitForPreparedHostEvents() const {
 void Command::waitForEvents(QueueImplPtr Queue,
                             std::vector<EventImplPtr> &EventImpls,
                             sycl::detail::pi::PiEvent &Event) {
+  #ifndef NDEBUG
+      for (const EventImplPtr &Event : EventImpls)
+        assert(!Event->isHost() &&
+               "Only non-host events are expected to be waited for here");
+#endif
   if (!EventImpls.empty()) {
       if (!Queue) {
       // Host queue can wait for events from different contexts, i.e. it may
@@ -491,12 +496,6 @@ void Command::waitForEvents(QueueImplPtr Queue,
             RawEvents.size(), RawEvents.data());
       }
     } else {
-#ifndef NDEBUG
-      for (const EventImplPtr &Event : EventImpls)
-        assert(!Event->isHost() &&
-               "Only non-host events are expected to be waited for here");
-#endif
-
       std::vector<sycl::detail::pi::PiEvent> RawEvents =
           getPiEvents(EventImpls);
       flushCrossQueueDeps(EventImpls, MWorkerQueue);
@@ -1488,7 +1487,8 @@ void MemCpyCommand::emitInstrumentationData() {
 }
 
 ContextImplPtr MemCpyCommand::getWorkerContext() const {
-  assert(MWorkerQueue && "Worker queue for mem cpy command must be not nullptr");
+  if (!MWorkerQueue)
+    return nullptr;
   return MWorkerQueue->getContextImplPtr();
 }
 
@@ -1661,7 +1661,8 @@ void MemCpyCommandHost::emitInstrumentationData() {
 }
 
 ContextImplPtr MemCpyCommandHost::getWorkerContext() const {
-  assert(MWorkerQueue && "Worker queue for mem cpy host command must be not nullptr");
+  if (!MWorkerQueue)
+    return nullptr;
   return MWorkerQueue->getContextImplPtr();
 }
 
diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp
index 87f005fe8ca78..a95b9b43d7f5c 100644
--- a/sycl/source/detail/sycl_mem_obj_t.cpp
+++ b/sycl/source/detail/sycl_mem_obj_t.cpp
@@ -209,7 +209,7 @@ void SYCLMemObjT::detachMemoryObject(
       !MOwnNativeHandle ||
       (MInteropContext && !MInteropContext->isOwnedByRuntime());
 
-  if (MRecord && MRecord->MCurContext->isOwnedByRuntime() &&
+  if (MRecord && MRecord->MCurContext && MRecord->MCurContext->isOwnedByRuntime() &&
       !InteropObjectsUsed && (!MHostPtrProvided || MIsInternal))
     Scheduler::getInstance().deferMemObjRelease(Self);
 }

From dc4a94ea111456a188ec60eaeef7ff9a053bf3bd Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Fri, 7 Jun 2024 06:28:04 -0700
Subject: [PATCH 24/58] RT-buildable: enable unittests 2

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/event_impl.cpp          | 3 ++-
 sycl/source/detail/scheduler/scheduler.cpp | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp
index e38c15e04879a..8f676a97f187d 100644
--- a/sycl/source/detail/event_impl.cpp
+++ b/sycl/source/detail/event_impl.cpp
@@ -81,7 +81,7 @@ void event_impl::waitInternal(bool *Success) {
 }
 
 void event_impl::setComplete() {
-  if (!MEvent) {
+  if (MIsHostEvent || !MEvent) {
     {
       std::unique_lock<std::mutex> lock(MMutex);
 #ifndef NDEBUG
@@ -126,6 +126,7 @@ const PluginPtr &event_impl::getPlugin() {
 void event_impl::setStateIncomplete() { MState = HES_NotComplete; }
 
 void event_impl::setContextImpl(const ContextImplPtr &Context) {
+  MIsHostEvent = Context == nullptr;
   MContext = Context;
   MIsContextInitialized = true;
 }
diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp
index 7e5db05daf01a..d3fe7b523e689 100644
--- a/sycl/source/detail/scheduler/scheduler.cpp
+++ b/sycl/source/detail/scheduler/scheduler.cpp
@@ -459,7 +459,8 @@ void Scheduler::NotifyHostTaskCompletion(Command *Cmd) {
 
   std::vector<Command *> ToCleanUp;
   auto CmdEvent = Cmd->getEvent();
-  auto QueueImpl = Cmd->getQueue();
+  auto QueueImpl = CmdEvent->getSubmittedQueue();
+  assert(QueueImpl && "Submitted queue for host task must not be null");
   {
     ReadLockT Lock = acquireReadLock();
 

From 8c57888b2a5a733d248322287e599d0f08855444 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Fri, 7 Jun 2024 08:52:24 -0700
Subject: [PATCH 25/58] RT-buildable: unittests enabling 3

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 .../source/detail/scheduler/graph_builder.cpp |  2 +-
 sycl/source/detail/stream_impl.cpp            | 70 +++++++++----------
 .../scheduler/StreamInitDependencyOnHost.cpp  | 10 ++-
 3 files changed, 44 insertions(+), 38 deletions(-)

diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp
index 8778ad6927c3e..6d3fbdd157618 100644
--- a/sycl/source/detail/scheduler/graph_builder.cpp
+++ b/sycl/source/detail/scheduler/graph_builder.cpp
@@ -1342,7 +1342,7 @@ Command *Scheduler::GraphBuilder::connectDepEvent(
         CG::CodeplayHostTask,
         /* Payload */ {}));
     ConnectCmd = new ExecCGCommand(
-        std::move(ConnectCG), Cmd->getQueue());
+        std::move(ConnectCG), nullptr);
   } catch (const std::bad_alloc &) {
     throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY);
   }
diff --git a/sycl/source/detail/stream_impl.cpp b/sycl/source/detail/stream_impl.cpp
index 7268293433e82..cb46510551a30 100644
--- a/sycl/source/detail/stream_impl.cpp
+++ b/sycl/source/detail/stream_impl.cpp
@@ -98,41 +98,41 @@ void stream_impl::flush(const EventImplPtr &LeadEvent) {
   // We don't want stream flushing to be blocking operation that is why submit a
   // host task to print stream buffer. It will fire up as soon as the kernel
   // finishes execution.
-  auto Q = LeadEvent->getSubmittedQueue();
-  event Event = detail::createSyclObjFromImpl<sycl::queue>(Q).submit([&](handler &cgh) {
-    auto BufHostAcc =
-        Buf_.get_access<access::mode::read_write, access::target::host_buffer>(
-            cgh, range<1>(BufferSize_), id<1>(OffsetSize));
-    // Create accessor to the flush buffer even if not using it yet. Otherwise
-    // kernel will be a leaf for the flush buffer and scheduler will not be able
-    // to cleanup the kernel. TODO: get rid of finalize method by using host
-    // accessor to the flush buffer.
-    auto FlushBufHostAcc =
-        FlushBuf_
-            .get_access<access::mode::read_write, access::target::host_buffer>(
-                cgh);
-    cgh.host_task([=] {
-      if (!BufHostAcc.empty()) {
-        // SYCL 2020, 4.16:
-        // > If the totalBufferSize or workItemBufferSize limits are exceeded,
-        // > it is implementation-defined whether the streamed characters
-        // > exceeding the limit are output, or silently ignored/discarded, and
-        // > if output it is implementation-defined whether those extra
-        // > characters exceeding the workItemBufferSize limit count toward the
-        // > totalBufferSize limit. Regardless of this implementation defined
-        // > behavior of output exceeding the limits, no undefined or erroneous
-        // > behavior is permitted of an implementation when the limits are
-        // > exceeded.
-        //
-        // Defend against zero-sized buffers (although they'd have no practical
-        // use).
-        printf("%s", &(BufHostAcc[0]));
-      }
-      fflush(stdout);
-    });
-  });
-  LeadEvent->attachEventToComplete(detail::getSyclObjImpl(Event));
-  Q->registerStreamServiceEvent(detail::getSyclObjImpl(Event));
+  // auto Q = LeadEvent->getSubmittedQueue();
+  // event Event = detail::createSyclObjFromImpl<sycl::queue>(Q).submit([&](handler &cgh) {
+  //   auto BufHostAcc =
+  //       Buf_.get_access<access::mode::read_write, access::target::host_buffer>(
+  //           cgh, range<1>(BufferSize_), id<1>(OffsetSize));
+  //   // Create accessor to the flush buffer even if not using it yet. Otherwise
+  //   // kernel will be a leaf for the flush buffer and scheduler will not be able
+  //   // to cleanup the kernel. TODO: get rid of finalize method by using host
+  //   // accessor to the flush buffer.
+  //   auto FlushBufHostAcc =
+  //       FlushBuf_
+  //           .get_access<access::mode::read_write, access::target::host_buffer>(
+  //               cgh);
+  //   cgh.host_task([=] {
+  //     if (!BufHostAcc.empty()) {
+  //       // SYCL 2020, 4.16:
+  //       // > If the totalBufferSize or workItemBufferSize limits are exceeded,
+  //       // > it is implementation-defined whether the streamed characters
+  //       // > exceeding the limit are output, or silently ignored/discarded, and
+  //       // > if output it is implementation-defined whether those extra
+  //       // > characters exceeding the workItemBufferSize limit count toward the
+  //       // > totalBufferSize limit. Regardless of this implementation defined
+  //       // > behavior of output exceeding the limits, no undefined or erroneous
+  //       // > behavior is permitted of an implementation when the limits are
+  //       // > exceeded.
+  //       //
+  //       // Defend against zero-sized buffers (although they'd have no practical
+  //       // use).
+  //       printf("%s", &(BufHostAcc[0]));
+  //     }
+  //     fflush(stdout);
+  //   });
+  // });
+  // LeadEvent->attachEventToComplete(detail::getSyclObjImpl(Event));
+  // Q->registerStreamServiceEvent(detail::getSyclObjImpl(Event));
 }
 
 } // namespace detail
diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
index 838b60809472c..4b34a1f4d6828 100644
--- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
+++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
@@ -12,6 +12,7 @@
 #include <detail/config.hpp>
 #include <detail/handler_impl.hpp>
 #include <helpers/ScopedEnvVar.hpp>
+#include <helpers/PiMock.hpp>
 
 using namespace sycl;
 
@@ -81,8 +82,13 @@ TEST_F(SchedulerTest, StreamInitDependencyOnHost) {
       DisableCleanupName, "1",
       detail::SYCLConfig<detail::SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP>::reset};
 
+  sycl::unittest::PiMock Mock;
+  sycl::platform Plt = Mock.getPlatform();
+  sycl::queue Q(Plt.get_devices()[0]);
+  std::shared_ptr<detail::queue_impl> QImpl = detail::getSyclObjImpl(Q);
+
   // Emulating processing of command group function
-  MockHandlerStreamInit MockCGH(nullptr, true);
+  MockHandlerStreamInit MockCGH(QImpl, true);
   MockCGH.setType(detail::CG::Kernel);
 
   auto EmptyKernel = [](sycl::nd_item<1>) {};
@@ -111,7 +117,7 @@ TEST_F(SchedulerTest, StreamInitDependencyOnHost) {
       static_cast<detail::CGExecKernel *>(MainCG.get())->getStreams();
   ASSERT_EQ(Streams.size(), 1u) << "Invalid number of stream objects";
 
-  Streams[0]->initStreamHost(nullptr);
+  Streams[0]->initStreamHost(QImpl);
 
   MockScheduler MS;
   std::vector<detail::Command *> AuxCmds;

From abfc5bfbdf48b8bfe48cfb17e68d9a91bb64ba9e Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Mon, 17 Jun 2024 07:49:32 -0700
Subject: [PATCH 26/58] tiny cleanup

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 .../source/detail/scheduler/graph_builder.cpp | 22 +++++++++----------
 sycl/source/detail/scheduler/scheduler.hpp    |  3 +--
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp
index 6d3fbdd157618..1932f18d697ac 100644
--- a/sycl/source/detail/scheduler/graph_builder.cpp
+++ b/sycl/source/detail/scheduler/graph_builder.cpp
@@ -868,7 +868,7 @@ void Scheduler::GraphBuilder::markModifiedIfWrite(MemObjRecord *Record,
 EmptyCommand *Scheduler::GraphBuilder::addEmptyCmd(
     Command *Cmd, const std::vector<Requirement *> &Reqs,
     Command::BlockReason Reason,
-    std::vector<Command *> &ToEnqueue, const bool AddDepsToLeaves) {
+    std::vector<Command *> &ToEnqueue) {
   EmptyCommand *EmptyCmd = new EmptyCommand();
 
   if (!EmptyCmd)
@@ -889,19 +889,17 @@ EmptyCommand *Scheduler::GraphBuilder::addEmptyCmd(
   if (!Reqs.size())
     Cmd->addUser(EmptyCmd);
 
-  if (AddDepsToLeaves) {
-    const std::vector<DepDesc> &Deps = Cmd->MDeps;
-    std::vector<Command *> ToCleanUp;
-    for (const DepDesc &Dep : Deps) {
-      const Requirement *Req = Dep.MDepRequirement;
-      MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj);
+  const std::vector<DepDesc> &Deps = Cmd->MDeps;
+  std::vector<Command *> ToCleanUp;
+  for (const DepDesc &Dep : Deps) {
+    const Requirement *Req = Dep.MDepRequirement;
+    MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj);
 
-      updateLeaves({Cmd}, Record, Req->MAccessMode, ToCleanUp);
-      addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode, ToEnqueue);
-    }
-    for (Command *Cmd : ToCleanUp)
-      cleanupCommand(Cmd);
+    updateLeaves({Cmd}, Record, Req->MAccessMode, ToCleanUp);
+    addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode, ToEnqueue);
   }
+  for (Command *Cmd : ToCleanUp)
+    cleanupCommand(Cmd);
 
   return EmptyCmd;
 }
diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp
index d3462872c9ddf..4e0bf465d59fd 100644
--- a/sycl/source/detail/scheduler/scheduler.hpp
+++ b/sycl/source/detail/scheduler/scheduler.hpp
@@ -742,8 +742,7 @@ class Scheduler {
     EmptyCommand *addEmptyCmd(Command *Cmd,
                               const std::vector<Requirement *> &Req,
                               Command::BlockReason Reason,
-                              std::vector<Command *> &ToEnqueue,
-                              const bool AddDepsToLeaves = true);
+                              std::vector<Command *> &ToEnqueue);
 
     void createGraphForCommand(Command *NewCmd, CG &CG, bool isInteropTask,
                                std::vector<Requirement *> &Reqs,

From 75f6eab8dd7a8f5b008d1b955bad3c3fc36914ba Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Mon, 17 Jun 2024 07:21:30 -0700
Subject: [PATCH 27/58] move stream_impl flush

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/queue_impl.cpp             | 19 ++++-
 sycl/source/detail/queue_impl.hpp             |  3 +-
 sycl/source/detail/scheduler/scheduler.cpp    | 11 ---
 sycl/source/detail/stream_impl.cpp            | 83 ++++++-------------
 sycl/source/detail/stream_impl.hpp            | 10 +--
 .../scheduler/CommandsWaitForEvents.cpp       |  2 +-
 .../scheduler/StreamInitDependencyOnHost.cpp  | 62 --------------
 7 files changed, 49 insertions(+), 141 deletions(-)

diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp
index 298d4078cc922..af7af19ede120 100644
--- a/sycl/source/detail/queue_impl.cpp
+++ b/sycl/source/detail/queue_impl.cpp
@@ -361,8 +361,10 @@ event queue_impl::submit_impl(const std::function<void(handler &)> &CGF,
   // Host and interop tasks, however, are not submitted to low-level runtimes
   // and require separate dependency management.
   const CG::CGTYPE Type = Handler.getType();
-  event Event = detail::createSyclObjFromImpl<event>(
-      std::make_shared<detail::event_impl>());
+  event Event = detail::createSyclObjFromImpl<event>(std::make_shared<detail::event_impl>());
+  std::vector<StreamImplPtr> Streams;
+  if (Type == CG::Kernel)
+    Streams = std::move(Handler.MStreamStorage);
 
   if (PostProcess) {
     bool IsKernel = Type == CG::Kernel;
@@ -380,6 +382,19 @@ event queue_impl::submit_impl(const std::function<void(handler &)> &CGF,
     finalizeHandler(Handler, Event);
 
   addEvent(Event);
+
+  auto EventImpl = detail::getSyclObjImpl(Event);
+  for (auto &Stream : Streams) {
+    // We don't want stream flushing to be blocking operation that is why submit a
+    // host task to print stream buffer. It will fire up as soon as the kernel
+    // finishes execution.
+    event FlushEvent = submit_impl([&](handler &ServiceCGH) {
+      Stream->generateFlushCommand(ServiceCGH);
+    }, Self, PrimaryQueue, SecondaryQueue, Loc, {});
+    EventImpl->attachEventToComplete(detail::getSyclObjImpl(FlushEvent));
+    registerStreamServiceEvent(detail::getSyclObjImpl(FlushEvent));
+  }
+
   return Event;
 }
 
diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp
index c3d0c4c5752f8..e72ded829a798 100644
--- a/sycl/source/detail/queue_impl.hpp
+++ b/sycl/source/detail/queue_impl.hpp
@@ -13,10 +13,12 @@
 #include <detail/device_impl.hpp>
 #include <detail/device_info.hpp>
 #include <detail/event_impl.hpp>
+#include <detail/handler_impl.hpp>
 #include <detail/global_handler.hpp>
 #include <detail/kernel_impl.hpp>
 #include <detail/plugin.hpp>
 #include <detail/scheduler/scheduler.hpp>
+#include <detail/stream_impl.hpp>
 #include <detail/thread_pool.hpp>
 #include <sycl/context.hpp>
 #include <sycl/detail/assert_happened.hpp>
@@ -26,7 +28,6 @@
 #include <sycl/exception.hpp>
 #include <sycl/exception_list.hpp>
 #include <sycl/ext/codeplay/experimental/fusion_properties.hpp>
-#include <sycl/handler.hpp>
 #include <sycl/properties/context_properties.hpp>
 #include <sycl/properties/queue_properties.hpp>
 #include <sycl/property_list.hpp>
diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp
index d3fe7b523e689..52eb59b225004 100644
--- a/sycl/source/detail/scheduler/scheduler.cpp
+++ b/sycl/source/detail/scheduler/scheduler.cpp
@@ -99,13 +99,6 @@ EventImplPtr Scheduler::addCG(
   EventImplPtr NewEvent = nullptr;
   const CG::CGTYPE Type = CommandGroup->getType();
   std::vector<Command *> AuxiliaryCmds;
-  std::vector<StreamImplPtr> Streams;
-
-  if (Type == CG::Kernel) {
-    auto *CGExecKernelPtr = static_cast<CGExecKernel *>(CommandGroup.get());
-    Streams = CGExecKernelPtr->getStreams();
-    CGExecKernelPtr->clearStreams();
-  }
   std::vector<std::shared_ptr<const void>> AuxiliaryResources;
   AuxiliaryResources = CommandGroup->getAuxiliaryResources();
   CommandGroup->clearAuxiliaryResources();
@@ -143,10 +136,6 @@ EventImplPtr Scheduler::addCG(
 
   if (ShouldEnqueue) {
     enqueueCommandForCG(NewEvent, AuxiliaryCmds);
-
-    for (const auto &StreamImplPtr : Streams) {
-      StreamImplPtr->flush(NewEvent);
-    }
   }
 
   if (!AuxiliaryResources.empty())
diff --git a/sycl/source/detail/stream_impl.cpp b/sycl/source/detail/stream_impl.cpp
index cb46510551a30..7d926fbdb83dd 100644
--- a/sycl/source/detail/stream_impl.cpp
+++ b/sycl/source/detail/stream_impl.cpp
@@ -76,65 +76,36 @@ size_t stream_impl::get_size() const { return BufferSize_; }
 
 size_t stream_impl::get_max_statement_size() const { return MaxStatementSize_; }
 
-void stream_impl::initStreamHost(QueueImplPtr Queue) {
-  // Real size of full flush buffer is saved only in buffer_impl field of
-  // FlushBuf object.
-  size_t FlushBufSize = getSyclObjImpl(FlushBuf_)->size();
-
-  auto Q = createSyclObjFromImpl<queue>(Queue);
-  Q.submit([&](handler &cgh) {
-    auto FlushBufAcc = FlushBuf_.get_access<access::mode::discard_write,
-                                            access::target::host_buffer>(
-        cgh, range<1>(1), id<1>(0));
-    cgh.host_task([=] {
-      char *FlushBufPtr = FlushBufAcc.get_pointer();
-      std::memset(FlushBufPtr, 0, FlushBufSize);
-    });
+void stream_impl::generateFlushCommand(handler& cgh)
+{
+  // Create accessor to the flush buffer even if not using it yet. Otherwise
+  // kernel will be a leaf for the flush buffer and scheduler will not be able
+  // to cleanup the kernel. TODO: get rid of finalize method by using host
+  // accessor to the flush buffer.
+  host_accessor<char, 1, access::mode::read_write> FlushBuffHostAcc(FlushBuf_, cgh);
+  host_accessor<char, 1, access::mode::read_write> BufHostAcc (Buf_, cgh, range<1>(BufferSize_), id<1>(OffsetSize));
+
+  cgh.host_task([=] {
+    if (!BufHostAcc.empty()) {
+      // SYCL 2020, 4.16:
+      // > If the totalBufferSize or workItemBufferSize limits are exceeded,
+      // > it is implementation-defined whether the streamed characters
+      // > exceeding the limit are output, or silently ignored/discarded, and
+      // > if output it is implementation-defined whether those extra
+      // > characters exceeding the workItemBufferSize limit count toward the
+      // > totalBufferSize limit. Regardless of this implementation defined
+      // > behavior of output exceeding the limits, no undefined or erroneous
+      // > behavior is permitted of an implementation when the limits are
+      // > exceeded.
+      //
+      // Defend against zero-sized buffers (although they'd have no practical
+      // use).
+      printf("%s", &(BufHostAcc[0]));
+    }
+    fflush(stdout);
   });
 }
 
-void stream_impl::flush(const EventImplPtr &LeadEvent) {
-  assert(LeadEvent && "LeadEvent is expected to be not nullptr");
-  // We don't want stream flushing to be blocking operation that is why submit a
-  // host task to print stream buffer. It will fire up as soon as the kernel
-  // finishes execution.
-  // auto Q = LeadEvent->getSubmittedQueue();
-  // event Event = detail::createSyclObjFromImpl<sycl::queue>(Q).submit([&](handler &cgh) {
-  //   auto BufHostAcc =
-  //       Buf_.get_access<access::mode::read_write, access::target::host_buffer>(
-  //           cgh, range<1>(BufferSize_), id<1>(OffsetSize));
-  //   // Create accessor to the flush buffer even if not using it yet. Otherwise
-  //   // kernel will be a leaf for the flush buffer and scheduler will not be able
-  //   // to cleanup the kernel. TODO: get rid of finalize method by using host
-  //   // accessor to the flush buffer.
-  //   auto FlushBufHostAcc =
-  //       FlushBuf_
-  //           .get_access<access::mode::read_write, access::target::host_buffer>(
-  //               cgh);
-  //   cgh.host_task([=] {
-  //     if (!BufHostAcc.empty()) {
-  //       // SYCL 2020, 4.16:
-  //       // > If the totalBufferSize or workItemBufferSize limits are exceeded,
-  //       // > it is implementation-defined whether the streamed characters
-  //       // > exceeding the limit are output, or silently ignored/discarded, and
-  //       // > if output it is implementation-defined whether those extra
-  //       // > characters exceeding the workItemBufferSize limit count toward the
-  //       // > totalBufferSize limit. Regardless of this implementation defined
-  //       // > behavior of output exceeding the limits, no undefined or erroneous
-  //       // > behavior is permitted of an implementation when the limits are
-  //       // > exceeded.
-  //       //
-  //       // Defend against zero-sized buffers (although they'd have no practical
-  //       // use).
-  //       printf("%s", &(BufHostAcc[0]));
-  //     }
-  //     fflush(stdout);
-  //   });
-  // });
-  // LeadEvent->attachEventToComplete(detail::getSyclObjImpl(Event));
-  // Q->registerStreamServiceEvent(detail::getSyclObjImpl(Event));
-}
-
 } // namespace detail
 } // namespace _V1
 } // namespace sycl
diff --git a/sycl/source/detail/stream_impl.hpp b/sycl/source/detail/stream_impl.hpp
index cd3d503b4b894..aacb495537943 100644
--- a/sycl/source/detail/stream_impl.hpp
+++ b/sycl/source/detail/stream_impl.hpp
@@ -41,14 +41,6 @@ class __SYCL_EXPORT stream_impl {
   // buffer and offset in the flush buffer
   GlobalOffsetAccessorT accessGlobalOffset(handler &CGH);
 
-  // Initialize flush buffers on host.
-  void initStreamHost(QueueImplPtr Queue);
-
-  // Enqueue task to copy stream buffer to the host and print the contents
-  // The host task event is then registered for post processing in the
-  // LeadEvent as well as in queue LeadEvent associated with.
-  void flush(const EventImplPtr &LeadEvent);
-
   size_t size() const noexcept;
 
   size_t get_work_item_buffer_size() const;
@@ -67,6 +59,8 @@ class __SYCL_EXPORT stream_impl {
     return PropList_.get_property<propertyT>();
   }
 
+  void generateFlushCommand(handler& cgh);
+
 private:
   // Size of the stream buffer
   size_t BufferSize_;
diff --git a/sycl/unittests/scheduler/CommandsWaitForEvents.cpp b/sycl/unittests/scheduler/CommandsWaitForEvents.cpp
index 499a45d0fe70f..43aa7a88775d7 100644
--- a/sycl/unittests/scheduler/CommandsWaitForEvents.cpp
+++ b/sycl/unittests/scheduler/CommandsWaitForEvents.cpp
@@ -163,7 +163,7 @@ TEST_F(SchedulerTest, StreamAUXCmdsWait) {
 
     auto EventImplProxy = std::static_pointer_cast<EventImplProxyT>(EventImpl);
 
-    ASSERT_TRUE(EventImplProxy->MPostCompleteEvents.size() == 1)
+    ASSERT_EQ(EventImplProxy->MPostCompleteEvents.size(), 1)
         << "Expected 1 post complete event";
 
     Q.wait();
diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
index 4b34a1f4d6828..d1e7f22aa9485 100644
--- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
+++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
@@ -74,65 +74,3 @@ static bool ValidateDepCommandsTree(const detail::Command *Cmd,
 
   return false;
 }
-
-TEST_F(SchedulerTest, StreamInitDependencyOnHost) {
-  // Disable post enqueue cleanup so that it doesn't interfere with dependency
-  // checks.
-  unittest::ScopedEnvVar DisabledCleanup{
-      DisableCleanupName, "1",
-      detail::SYCLConfig<detail::SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP>::reset};
-
-  sycl::unittest::PiMock Mock;
-  sycl::platform Plt = Mock.getPlatform();
-  sycl::queue Q(Plt.get_devices()[0]);
-  std::shared_ptr<detail::queue_impl> QImpl = detail::getSyclObjImpl(Q);
-
-  // Emulating processing of command group function
-  MockHandlerStreamInit MockCGH(QImpl, true);
-  MockCGH.setType(detail::CG::Kernel);
-
-  auto EmptyKernel = [](sycl::nd_item<1>) {};
-  MockCGH
-      .setHostKernel<decltype(EmptyKernel), sycl::nd_item<1>, 1, class Empty>(
-          EmptyKernel);
-  MockCGH.setNDRangeDesc(
-      sycl::nd_range<1>{sycl::range<1>{1}, sycl::range<1>{1}});
-
-  // Emulating construction of stream object inside command group
-  detail::StreamImplPtr StreamImpl =
-      std::make_shared<detail::stream_impl>(1024, 200, MockCGH);
-  detail::GlobalBufAccessorT FlushBufAcc =
-      StreamImpl->accessGlobalFlushBuf(MockCGH);
-  MockCGH.addStream(StreamImpl);
-
-  detail::SYCLMemObjI *FlushBufMemObjPtr =
-      detail::getSyclObjImpl(FlushBufAcc)->MSYCLMemObj;
-  ASSERT_TRUE(!!FlushBufMemObjPtr)
-      << "Memory object for stream flush buffer not initialized";
-
-  std::unique_ptr<detail::CG> MainCG = MockCGH.finalize();
-
-  // Emulate call of Scheduler::addCG
-  std::vector<detail::StreamImplPtr> Streams =
-      static_cast<detail::CGExecKernel *>(MainCG.get())->getStreams();
-  ASSERT_EQ(Streams.size(), 1u) << "Invalid number of stream objects";
-
-  Streams[0]->initStreamHost(QImpl);
-
-  MockScheduler MS;
-  std::vector<detail::Command *> AuxCmds;
-  detail::Command *NewCmd = MS.addCG(std::move(MainCG), nullptr, AuxCmds);
-  ASSERT_TRUE(!!NewCmd) << "Failed to add command group into scheduler";
-  ASSERT_GT(NewCmd->MDeps.size(), 0u)
-      << "No deps appeared in the new exec kernel command";
-
-  // Searching in dependencies for CG execution command that initializes flush
-  // buffer of a stream that is supposed to be used inside NewCmd's CG.
-  // Tree of dependencies should look like:
-  // [MAIN_CG] -> [EMPTY_NODE {FlushBufMemObj}] -> [FILL_CG {FlushBufMemObj}] ->
-  //     [[ALLOC_TASK {FlushBufMemObj}]
-  std::vector<CmdTypeTy> DepCmdsTypes({CmdTypeTy::RUN_CG, // FILL_CG
-                                       CmdTypeTy::ALLOCA});
-  ASSERT_TRUE(ValidateDepCommandsTree(NewCmd, DepCmdsTypes, FlushBufMemObjPtr))
-      << "Dependency on stream flush buffer initialization not found";
-}

From be12c01ecc837de0ff5f7f3c2f17ca34b03d921d Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 19 Jun 2024 04:44:06 -0700
Subject: [PATCH 28/58] test fix

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/device_impl.cpp            |  3 ++
 sycl/source/detail/event_impl.cpp             | 30 +++++++++----------
 sycl/source/detail/image_impl.cpp             |  2 ++
 .../scheduler/CommandsWaitForEvents.cpp       |  2 +-
 4 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp
index 846972254f7d9..e24b6f6f2510e 100644
--- a/sycl/source/detail/device_impl.cpp
+++ b/sycl/source/detail/device_impl.cpp
@@ -327,6 +327,9 @@ bool device_impl::has(aspect Aspect) const {
   size_t return_size = 0;
 
   switch (Aspect) {
+  case aspect::host:
+  //Deprecated
+    return false;
   case aspect::cpu:
     return is_cpu();
   case aspect::gpu:
diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp
index 0d2976e7ec271..93dc4b7fca1b1 100644
--- a/sycl/source/detail/event_impl.cpp
+++ b/sycl/source/detail/event_impl.cpp
@@ -155,15 +155,13 @@ event_impl::event_impl(const QueueImplPtr &Queue)
       MFallbackProfiling{MIsProfilingEnabled && Queue && Queue->isProfilingFallback()} {
   if (Queue)
     this->setContextImpl(Queue->getContextImplPtr());
-  if (!Queue) {
+  else {
     MState.store(HES_NotComplete);
-    if (Queue->has_property<property::queue::enable_profiling>()) {
-      MHostProfilingInfo.reset(new HostProfilingInfo());
-      if (!MHostProfilingInfo)
-        throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
-                              "Out of host memory " +
-                                  codeToString(PI_ERROR_OUT_OF_HOST_MEMORY));
-    }
+    MHostProfilingInfo.reset(new HostProfilingInfo());
+    if (!MHostProfilingInfo)
+      throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
+                            "Out of host memory " +
+                                codeToString(PI_ERROR_OUT_OF_HOST_MEMORY));
     return;
   }
   MState.store(HES_Complete);
@@ -381,13 +379,15 @@ event_impl::get_info<info::event::command_execution_status>() {
   if (MState == HES_Discarded)
     return info::event_command_status::ext_oneapi_unknown;
 
-  // Command is enqueued and PiEvent is ready
-  if (MEvent)
-    return get_event_info<info::event::command_execution_status>(
-        this->getHandleRef(), this->getPlugin());
-  // Command is blocked and not enqueued, PiEvent is not assigned yet
-  else if (MCommand)
-    return sycl::info::event_command_status::submitted;
+  if (!MIsHostEvent) {
+    // Command is enqueued and PiEvent is ready
+    if (MEvent)
+      return get_event_info<info::event::command_execution_status>(
+          this->getHandleRef(), this->getPlugin());
+    // Command is blocked and not enqueued, PiEvent is not assigned yet
+    else if (MCommand)
+      return sycl::info::event_command_status::submitted;
+  }
 
   return MState.load() != HES_Complete
              ? sycl::info::event_command_status::submitted
diff --git a/sycl/source/detail/image_impl.cpp b/sycl/source/detail/image_impl.cpp
index 0b512ae1aedbe..e5bacd33fc70d 100644
--- a/sycl/source/detail/image_impl.cpp
+++ b/sycl/source/detail/image_impl.cpp
@@ -471,6 +471,8 @@ bool image_impl::checkImageFormat(
 }
 
 std::vector<device> image_impl::getDevices(const ContextImplPtr Context) {
+  if (!Context)
+    return {};
   return Context->get_info<info::context::devices>();
 }
 
diff --git a/sycl/unittests/scheduler/CommandsWaitForEvents.cpp b/sycl/unittests/scheduler/CommandsWaitForEvents.cpp
index 43aa7a88775d7..daf8599947ad2 100644
--- a/sycl/unittests/scheduler/CommandsWaitForEvents.cpp
+++ b/sycl/unittests/scheduler/CommandsWaitForEvents.cpp
@@ -163,7 +163,7 @@ TEST_F(SchedulerTest, StreamAUXCmdsWait) {
 
     auto EventImplProxy = std::static_pointer_cast<EventImplProxyT>(EventImpl);
 
-    ASSERT_EQ(EventImplProxy->MPostCompleteEvents.size(), 1)
+    ASSERT_EQ(EventImplProxy->MPostCompleteEvents.size(), 1u)
         << "Expected 1 post complete event";
 
     Q.wait();

From e043ee01f185cecac5c0cbd2648853ac0ff4c6db Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 19 Jun 2024 05:35:10 -0700
Subject: [PATCH 29/58] restore & update ABI - not breaking

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/stream_impl.cpp    |  9 +++++++++
 sycl/source/detail/stream_impl.hpp    |  9 +++++++++
 sycl/test/abi/sycl_symbols_linux.dump | 17 +++++++++--------
 3 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/sycl/source/detail/stream_impl.cpp b/sycl/source/detail/stream_impl.cpp
index 7d926fbdb83dd..75c80745ec71c 100644
--- a/sycl/source/detail/stream_impl.cpp
+++ b/sycl/source/detail/stream_impl.cpp
@@ -106,6 +106,15 @@ void stream_impl::generateFlushCommand(handler& cgh)
   });
 }
 
+  // ABI break: remove
+  void stream_impl::initStreamHost(QueueImplPtr ){};
+
+  // ABI break: remove
+  void stream_impl::flush(const EventImplPtr &) {};
+
+  // ABI break: remove
+  void stream_impl::flush() {};
+
 } // namespace detail
 } // namespace _V1
 } // namespace sycl
diff --git a/sycl/source/detail/stream_impl.hpp b/sycl/source/detail/stream_impl.hpp
index aacb495537943..4fc1f4b1d5a8a 100644
--- a/sycl/source/detail/stream_impl.hpp
+++ b/sycl/source/detail/stream_impl.hpp
@@ -41,6 +41,15 @@ class __SYCL_EXPORT stream_impl {
   // buffer and offset in the flush buffer
   GlobalOffsetAccessorT accessGlobalOffset(handler &CGH);
 
+  // ABI break: remove
+  void initStreamHost(QueueImplPtr);
+
+  // ABI break: remove
+  void flush(const EventImplPtr &);
+
+  // ABI break: remove
+  void flush();
+
   size_t size() const noexcept;
 
   size_t get_work_item_buffer_size() const;
diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump
index 0edaaa25b4ba1..c60fdb1318905 100644
--- a/sycl/test/abi/sycl_symbols_linux.dump
+++ b/sycl/test/abi/sycl_symbols_linux.dump
@@ -3119,6 +3119,7 @@ _ZN4sycl3_V15queue10mem_adviseEPKvmiRKSt6vectorINS0_5eventESaIS5_EERKNS0_6detail
 _ZN4sycl3_V15queue10wait_proxyERKNS0_6detail13code_locationE
 _ZN4sycl3_V15queue11submit_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationE
 _ZN4sycl3_V15queue11submit_implESt8functionIFvRNS0_7handlerEEES1_RKNS0_6detail13code_locationE
+_ZN4sycl3_V15queue15ext_oneapi_prodEv
 _ZN4sycl3_V15queue17discard_or_returnERKNS0_5eventE
 _ZN4sycl3_V15queue18throw_asynchronousEv
 _ZN4sycl3_V15queue20memcpyToDeviceGlobalEPvPKvbmmRKSt6vectorINS0_5eventESaIS6_EE
@@ -3230,6 +3231,7 @@ _ZN4sycl3_V16detail11stream_impl14initStreamHostESt10shared_ptrINS1_10queue_impl
 _ZN4sycl3_V16detail11stream_impl15accessGlobalBufERNS0_7handlerE
 _ZN4sycl3_V16detail11stream_impl18accessGlobalOffsetERNS0_7handlerE
 _ZN4sycl3_V16detail11stream_impl20accessGlobalFlushBufERNS0_7handlerE
+_ZN4sycl3_V16detail11stream_impl20generateFlushCommandERNS0_7handlerE
 _ZN4sycl3_V16detail11stream_impl5flushERKSt10shared_ptrINS1_10event_implEE
 _ZN4sycl3_V16detail11stream_impl5flushEv
 _ZN4sycl3_V16detail11stream_implC1EmmRKNS0_13property_listE
@@ -3621,6 +3623,7 @@ _ZN4sycl3_V17handler28memcpyToHostOnlyDeviceGlobalEPKvS3_mbmm
 _ZN4sycl3_V17handler28setStateExplicitKernelBundleEv
 _ZN4sycl3_V17handler30memcpyFromHostOnlyDeviceGlobalEPvPKvbmm
 _ZN4sycl3_V17handler30verifyUsedKernelBundleInternalENS0_6detail11string_viewE
+_ZN4sycl3_V17handler32verifyDeviceHasProgressGuaranteeENS0_3ext6oneapi12experimental26forward_progress_guaranteeENS4_15execution_scopeES6_
 _ZN4sycl3_V17handler34ext_oneapi_wait_external_semaphoreENS0_3ext6oneapi12experimental24interop_semaphore_handleE
 _ZN4sycl3_V17handler36ext_oneapi_signal_external_semaphoreENS0_3ext6oneapi12experimental24interop_semaphore_handleE
 _ZN4sycl3_V17handler6memcpyEPvPKvm
@@ -3633,7 +3636,6 @@ _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEEb
 _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_3ext6oneapi12experimental6detail10graph_implEE
 _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEES5_S5_b
 _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEEb
-_ZN4sycl3_V17handler32verifyDeviceHasProgressGuaranteeENS0_3ext6oneapi12experimental26forward_progress_guaranteeENS4_15execution_scopeES6_
 _ZN4sycl3_V17samplerC1ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE
 _ZN4sycl3_V17samplerC1EP11_cl_samplerRKNS0_7contextE
 _ZN4sycl3_V17samplerC2ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE
@@ -3748,7 +3750,6 @@ _ZNK4sycl3_V15queue12has_propertyINS0_8property5queue16enable_profilingEEEbv
 _ZNK4sycl3_V15queue12has_propertyINS0_8property5queue4cuda18use_default_streamEEEbv
 _ZNK4sycl3_V15queue12has_propertyINS0_8property5queue8in_orderEEEbv
 _ZNK4sycl3_V15queue16ext_oneapi_emptyEv
-_ZN4sycl3_V15queue15ext_oneapi_prodEv
 _ZNK4sycl3_V15queue16get_backend_infoINS0_4info6device15backend_versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv
 _ZNK4sycl3_V15queue16get_backend_infoINS0_4info6device7versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv
 _ZNK4sycl3_V15queue16get_backend_infoINS0_4info8platform7versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv
@@ -3973,6 +3974,12 @@ _ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device22m
 _ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device22max_image_linear_widthEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv
 _ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device23max_image_linear_heightEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv
 _ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device26max_image_linear_row_pitchEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv
+_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31sub_group_progress_capabilitiesILNS5_15execution_scopeE2EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv
+_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31sub_group_progress_capabilitiesILNS5_15execution_scopeE3EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv
+_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31work_item_progress_capabilitiesILNS5_15execution_scopeE1EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv
+_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31work_item_progress_capabilitiesILNS5_15execution_scopeE2EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv
+_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31work_item_progress_capabilitiesILNS5_15execution_scopeE3EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv
+_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device32work_group_progress_capabilitiesILNS5_15execution_scopeE3EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv
 _ZNK4sycl3_V16device13get_info_implINS0_3ext8codeplay12experimental4info6device15supports_fusionEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv
 _ZNK4sycl3_V16device13get_info_implINS0_3ext8codeplay12experimental4info6device28max_registers_per_work_groupEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv
 _ZNK4sycl3_V16device13get_info_implINS0_4info6device10extensionsEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv
@@ -4084,12 +4091,6 @@ _ZNK4sycl3_V16device13get_info_implINS0_4info6device7versionEEENS0_6detail11ABIN
 _ZNK4sycl3_V16device13get_info_implINS0_4info6device8atomic64EEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv
 _ZNK4sycl3_V16device13get_info_implINS0_4info6device8platformEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv
 _ZNK4sycl3_V16device13get_info_implINS0_4info6device9vendor_idEEENS0_6detail11ABINeutralTINS6_19is_device_info_descIT_E11return_typeEE4typeEv
-_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device32work_group_progress_capabilitiesILNS5_15execution_scopeE3EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv
-_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31sub_group_progress_capabilitiesILNS5_15execution_scopeE3EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv
-_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31sub_group_progress_capabilitiesILNS5_15execution_scopeE2EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv
-_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31work_item_progress_capabilitiesILNS5_15execution_scopeE2EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv
-_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31work_item_progress_capabilitiesILNS5_15execution_scopeE3EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv
-_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device31work_item_progress_capabilitiesILNS5_15execution_scopeE1EEEEENS0_6detail11ABINeutralTINSB_19is_device_info_descIT_E11return_typeEE4typeEv
 _ZNK4sycl3_V16device13has_extensionERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
 _ZNK4sycl3_V16device14is_acceleratorEv
 _ZNK4sycl3_V16device16get_backend_infoINS0_4info6device15backend_versionEEENS0_6detail20is_backend_info_descIT_E11return_typeEv

From cea7c7271f0172ea8b45db2b3b221d4d5cb11937 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 19 Jun 2024 05:48:29 -0700
Subject: [PATCH 30/58] clang git-clang-format run on changed files

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/context.cpp                       |   4 +-
 sycl/source/detail/context_impl.cpp           |   3 +-
 sycl/source/detail/device_impl.cpp            |   5 +-
 sycl/source/detail/device_impl.hpp            |  12 +-
 sycl/source/detail/event_impl.cpp             |  18 +-
 sycl/source/detail/memory_manager.cpp         |  28 +--
 sycl/source/detail/platform_impl.hpp          |   8 +-
 sycl/source/detail/program_impl.cpp           |  22 +--
 sycl/source/detail/program_impl.hpp           |   4 +-
 sycl/source/detail/queue_impl.cpp             |  15 +-
 sycl/source/detail/queue_impl.hpp             |  19 +-
 sycl/source/detail/scheduler/commands.cpp     | 165 +++++++++---------
 sycl/source/detail/scheduler/commands.hpp     |  12 +-
 .../source/detail/scheduler/graph_builder.cpp |  59 +++----
 sycl/source/detail/scheduler/scheduler.cpp    |   8 +-
 sycl/source/detail/stream_impl.cpp            |  21 +--
 sycl/source/detail/stream_impl.hpp            |   2 +-
 sycl/source/detail/sycl_mem_obj_t.cpp         |   5 +-
 sycl/source/detail/usm/usm_impl.cpp           |  48 ++---
 .../scheduler/EnqueueWithDependsOnDeps.cpp    |   3 +-
 sycl/unittests/scheduler/GraphCleanup.cpp     |   3 +-
 sycl/unittests/scheduler/InOrderQueueDeps.cpp |   3 +-
 .../scheduler/LeafLimitDiffContexts.cpp       |   4 +-
 sycl/unittests/scheduler/LeavesCollection.cpp |   3 +-
 .../scheduler/LinkedAllocaDependencies.cpp    |   3 +-
 .../scheduler/NoHostUnifiedMemory.cpp         |   3 +-
 sycl/unittests/scheduler/QueueFlushing.cpp    |  10 +-
 .../scheduler/StreamInitDependencyOnHost.cpp  |   2 +-
 28 files changed, 239 insertions(+), 253 deletions(-)

diff --git a/sycl/source/context.cpp b/sycl/source/context.cpp
index 70b12836fc297..1261096b82047 100644
--- a/sycl/source/context.cpp
+++ b/sycl/source/context.cpp
@@ -56,13 +56,13 @@ context::context(const std::vector<device> &DeviceList,
     throw invalid_parameter_error("DeviceList is empty.",
                                   PI_ERROR_INVALID_VALUE);
   }
-  
+
   const auto &RefPlatform =
       detail::getSyclObjImpl(DeviceList[0].get_platform())->getHandleRef();
   if (std::any_of(DeviceList.begin(), DeviceList.end(),
                   [&](const device &CurrentDevice) {
                     return (detail::getSyclObjImpl(CurrentDevice.get_platform())
-                              ->getHandleRef() != RefPlatform);
+                                ->getHandleRef() != RefPlatform);
                   }))
     throw invalid_parameter_error(
         "Can't add devices across platforms to a single context.",
diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp
index 0c79ed2f70462..8ae13b345b250 100644
--- a/sycl/source/detail/context_impl.cpp
+++ b/sycl/source/detail/context_impl.cpp
@@ -33,8 +33,7 @@ context_impl::context_impl(const device &Device, async_handler AsyncHandler,
     : MOwnedByRuntime(true), MAsyncHandler(AsyncHandler), MDevices(1, Device),
       MContext(nullptr),
       MPlatform(detail::getSyclObjImpl(Device.get_platform())),
-      MPropList(PropList),
-      MSupportBufferLocationByDevices(NotChecked) {
+      MPropList(PropList), MSupportBufferLocationByDevices(NotChecked) {
   MKernelProgramCache.setContextPtr(this);
 }
 
diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp
index e24b6f6f2510e..ebad36158cfc6 100644
--- a/sycl/source/detail/device_impl.cpp
+++ b/sycl/source/detail/device_impl.cpp
@@ -34,8 +34,7 @@ device_impl::device_impl(sycl::detail::pi::PiDevice Device,
 device_impl::device_impl(pi_native_handle InteropDeviceHandle,
                          sycl::detail::pi::PiDevice Device,
                          PlatformImplPtr Platform, const PluginPtr &Plugin)
-    : MDevice(Device),
-      MDeviceHostBaseTime(std::make_pair(0, 0)) {
+    : MDevice(Device), MDeviceHostBaseTime(std::make_pair(0, 0)) {
 
   bool InteroperabilityConstructor = false;
   if (Device == nullptr) {
@@ -328,7 +327,7 @@ bool device_impl::has(aspect Aspect) const {
 
   switch (Aspect) {
   case aspect::host:
-  //Deprecated
+    // Deprecated
     return false;
   case aspect::cpu:
     return is_cpu();
diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp
index 9249bbba59fe8..a3344ecdd3870 100644
--- a/sycl/source/detail/device_impl.hpp
+++ b/sycl/source/detail/device_impl.hpp
@@ -64,18 +64,14 @@ class device_impl {
   /// For host device an exception is thrown
   ///
   /// \return non-constant reference to PI device
-  sycl::detail::pi::PiDevice &getHandleRef() {
-    return MDevice;
-  }
+  sycl::detail::pi::PiDevice &getHandleRef() { return MDevice; }
 
   /// Get constant reference to PI device
   ///
   /// For host device an exception is thrown
   ///
   /// \return constant reference to PI device
-  const sycl::detail::pi::PiDevice &getHandleRef() const {
-    return MDevice;
-  }
+  const sycl::detail::pi::PiDevice &getHandleRef() const { return MDevice; }
 
   /// Check if device is a CPU device
   ///
@@ -90,9 +86,7 @@ class device_impl {
   /// Check if device is an accelerator device
   ///
   /// \return true if SYCL device is an accelerator device
-  bool is_accelerator() const {
-    return MType == PI_DEVICE_TYPE_ACC;
-  }
+  bool is_accelerator() const { return MType == PI_DEVICE_TYPE_ACC; }
 
   /// Return device type
   ///
diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp
index 93dc4b7fca1b1..7d91129f25b51 100644
--- a/sycl/source/detail/event_impl.cpp
+++ b/sycl/source/detail/event_impl.cpp
@@ -38,8 +38,8 @@ void event_impl::ensureContextInitialized() {
     return;
 
   const device SyclDevice;
-  this->setContextImpl(detail::queue_impl::getDefaultOrNew(
-      detail::getSyclObjImpl(SyclDevice)));
+  this->setContextImpl(
+      detail::queue_impl::getDefaultOrNew(detail::getSyclObjImpl(SyclDevice)));
 }
 
 event_impl::~event_impl() {
@@ -134,8 +134,8 @@ void event_impl::setContextImpl(const ContextImplPtr &Context) {
 event_impl::event_impl(sycl::detail::pi::PiEvent Event,
                        const context &SyclContext)
     : MIsContextInitialized(true), MEvent(Event),
-      MContext(detail::getSyclObjImpl(SyclContext)),
-      MIsFlushed(true), MState(HES_Complete) {
+      MContext(detail::getSyclObjImpl(SyclContext)), MIsFlushed(true),
+      MState(HES_Complete) {
 
   sycl::detail::pi::PiContext TempContext;
   getPlugin()->call<PiApiKind::piEventGetInfo>(
@@ -150,9 +150,9 @@ event_impl::event_impl(sycl::detail::pi::PiEvent Event,
 }
 
 event_impl::event_impl(const QueueImplPtr &Queue)
-    : MQueue{Queue},
-      MIsProfilingEnabled{!Queue || Queue->MIsProfilingEnabled},
-      MFallbackProfiling{MIsProfilingEnabled && Queue && Queue->isProfilingFallback()} {
+    : MQueue{Queue}, MIsProfilingEnabled{!Queue || Queue->MIsProfilingEnabled},
+      MFallbackProfiling{MIsProfilingEnabled && Queue &&
+                         Queue->isProfilingFallback()} {
   if (Queue)
     this->setContextImpl(Queue->getContextImplPtr());
   else {
@@ -412,7 +412,7 @@ event_impl::get_backend_info<info::platform::version>() const {
   }
   // If the queue has been released, no platform will be associated
   // so return empty string.
-  return ""; 
+  return "";
 }
 
 template <>
@@ -571,7 +571,7 @@ bool event_impl::isCompleted() {
 
 void event_impl::setCommand(void *Cmd) {
   MCommand = Cmd;
-  auto TypedCommand = static_cast<Command*>(Cmd);
+  auto TypedCommand = static_cast<Command *>(Cmd);
   if (TypedCommand)
     MIsHostEvent = TypedCommand->getWorkerContext() == nullptr;
 }
diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp
index 461cf8b85915c..6f30ceef8eb51 100644
--- a/sycl/source/detail/memory_manager.cpp
+++ b/sycl/source/detail/memory_manager.cpp
@@ -398,9 +398,11 @@ void *MemoryManager::allocateMemBuffer(
     sycl::detail::pi::PiEvent &OutEventToWait) {
   void *MemPtr;
   if (!TargetContext)
-    MemPtr = allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size, PropsList);
+    MemPtr =
+        allocateHostMemory(MemObj, UserPtr, HostPtrReadOnly, Size, PropsList);
   else if (UserPtr && InteropContext)
-    MemPtr = allocateInteropMemObject(TargetContext, UserPtr, InteropEvent,
+    MemPtr =
+        allocateInteropMemObject(TargetContext, UserPtr, InteropEvent,
                                  InteropContext, PropsList, OutEventToWait);
   else
     MemPtr = allocateBufferObject(TargetContext, UserPtr, HostPtrReadOnly, Size,
@@ -665,7 +667,8 @@ void copyD2D(SYCLMemObjI *SYCLMemObj, sycl::detail::pi::PiMem SrcMem,
              sycl::detail::pi::PiEvent &OutEvent,
              const detail::EventImplPtr &OutEventImpl) {
   assert(SYCLMemObj && "The SYCLMemObj is nullptr");
-  assert(SrcQueue && "Source mem object and target mem object queues are expected to be not nullptr");
+  assert(SrcQueue && "Source mem object and target mem object queues are "
+                     "expected to be not nullptr");
 
   const sycl::detail::pi::PiQueue Queue = SrcQueue->getHandleRef();
   const PluginPtr &Plugin = SrcQueue->getPlugin();
@@ -778,9 +781,9 @@ void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem,
   if (!SrcQueue) {
     if (!TgtQueue)
       copyH2H(SYCLMemObj, (char *)SrcMem, nullptr, DimSrc, SrcSize,
-              SrcAccessRange, SrcOffset, SrcElemSize, (char *)DstMem,
-              nullptr, DimDst, DstSize, DstAccessRange, DstOffset,
-              DstElemSize, std::move(DepEvents), OutEvent, OutEventImpl);
+              SrcAccessRange, SrcOffset, SrcElemSize, (char *)DstMem, nullptr,
+              DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize,
+              std::move(DepEvents), OutEvent, OutEventImpl);
     else
       copyH2D(SYCLMemObj, (char *)SrcMem, nullptr, DimSrc, SrcSize,
               SrcAccessRange, SrcOffset, SrcElemSize,
@@ -1235,7 +1238,8 @@ memcpyToDeviceGlobalUSM(QueueImplPtr Queue,
                         const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
                         sycl::detail::pi::PiEvent *OutEvent,
                         const detail::EventImplPtr &OutEventImpl) {
-  assert(Queue && "Copy to device global USM must be called with a valid device queue");
+  assert(Queue &&
+         "Copy to device global USM must be called with a valid device queue");
   // Get or allocate USM memory for the device_global.
   DeviceGlobalUSMMem &DeviceGlobalUSM =
       DeviceGlobalEntry->getOrAllocateDeviceGlobalUSM(Queue);
@@ -1337,7 +1341,9 @@ static void memcpyToDeviceGlobalDirect(
     size_t NumBytes, size_t Offset, const void *Src,
     const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
     sycl::detail::pi::PiEvent *OutEvent) {
-  assert(Queue && "Direct copy to device global must be called with a valid device queue");
+  assert(
+      Queue &&
+      "Direct copy to device global must be called with a valid device queue");
   sycl::detail::pi::PiProgram Program =
       getOrBuildProgramForDeviceGlobal(Queue, DeviceGlobalEntry);
   const PluginPtr &Plugin = Queue->getPlugin();
@@ -1352,7 +1358,8 @@ static void memcpyFromDeviceGlobalDirect(
     size_t NumBytes, size_t Offset, void *Dest,
     const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
     sycl::detail::pi::PiEvent *OutEvent) {
-  assert(Queue && "Direct copy from device global must be called with a valid device queue");
+  assert(Queue && "Direct copy from device global must be called with a valid "
+                  "device queue");
   sycl::detail::pi::PiProgram Program =
       getOrBuildProgramForDeviceGlobal(Queue, DeviceGlobalEntry);
   const PluginPtr &Plugin = Queue->getPlugin();
@@ -1762,7 +1769,8 @@ void MemoryManager::copy_image_bindless(
     sycl::detail::pi::PiImageRegion CopyExtent,
     const std::vector<sycl::detail::pi::PiEvent> &DepEvents,
     sycl::detail::pi::PiEvent *OutEvent) {
-  assert(Queue && "Copy image bindless must be called with a valid device queue");
+  assert(Queue &&
+         "Copy image bindless must be called with a valid device queue");
   assert((Flags == (sycl::detail::pi::PiImageCopyFlags)
                        ext::oneapi::experimental::image_copy_flags::HtoD ||
           Flags == (sycl::detail::pi::PiImageCopyFlags)
diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp
index e13bd0a3a1b31..bc6278d54f32c 100644
--- a/sycl/source/detail/platform_impl.hpp
+++ b/sycl/source/detail/platform_impl.hpp
@@ -103,9 +103,7 @@ class platform_impl {
   }
 
   /// \return an instance of OpenCL cl_platform_id.
-  cl_platform_id get() const {
-    return pi::cast<cl_platform_id>(MPlatform);
-  }
+  cl_platform_id get() const { return pi::cast<cl_platform_id>(MPlatform); }
 
   /// Returns raw underlying plug-in platform handle.
   ///
@@ -114,9 +112,7 @@ class platform_impl {
   /// is in use.
   ///
   /// \return a raw plug-in platform handle.
-  const sycl::detail::pi::PiPlatform &getHandleRef() const {
-    return MPlatform;
-  }
+  const sycl::detail::pi::PiPlatform &getHandleRef() const { return MPlatform; }
 
   /// Returns all available SYCL platforms in the system.
   ///
diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp
index 584b2487f5dee..df95614d872c3 100644
--- a/sycl/source/detail/program_impl.cpp
+++ b/sycl/source/detail/program_impl.cpp
@@ -220,22 +220,22 @@ void program_impl::compile_with_kernel_name(std::string KernelName,
                                             std::string CompileOptions) {
   std::lock_guard<std::mutex> Lock(MMutex);
   throw_if_state_is_not(program_state::none);
-    create_pi_program_with_kernel_name(
-        KernelName,
-        /*JITCompilationIsRequired=*/(!CompileOptions.empty()));
-    compile(CompileOptions);
+  create_pi_program_with_kernel_name(
+      KernelName,
+      /*JITCompilationIsRequired=*/(!CompileOptions.empty()));
+  compile(CompileOptions);
   MState = program_state::compiled;
 }
 
 void program_impl::link(std::string LinkOptions) {
   std::lock_guard<std::mutex> Lock(MMutex);
   throw_if_state_is_not(program_state::compiled);
-    check_device_feature_support<info::device::is_linker_available>(MDevices);
-    std::vector<sycl::detail::pi::PiDevice> Devices(get_pi_devices());
-    const PluginPtr &Plugin = getPlugin();
-    const char *LinkOpts = SYCLConfig<SYCL_PROGRAM_LINK_OPTIONS>::get();
-    if (!LinkOpts) {
-      LinkOpts = LinkOptions.c_str();
+  check_device_feature_support<info::device::is_linker_available>(MDevices);
+  std::vector<sycl::detail::pi::PiDevice> Devices(get_pi_devices());
+  const PluginPtr &Plugin = getPlugin();
+  const char *LinkOpts = SYCLConfig<SYCL_PROGRAM_LINK_OPTIONS>::get();
+  if (!LinkOpts) {
+    LinkOpts = LinkOptions.c_str();
     }
 
     // Plugin resets MProgram with a new pi_program as a result of the call to
@@ -251,7 +251,7 @@ void program_impl::link(std::string LinkOptions) {
     Plugin->checkPiResult<compile_program_error>(Err);
     MLinkOptions = LinkOptions;
     MBuildOptions = LinkOptions;
-  MState = program_state::linked;
+    MState = program_state::linked;
 }
 
 bool program_impl::has_kernel(std::string KernelName,
diff --git a/sycl/source/detail/program_impl.hpp b/sycl/source/detail/program_impl.hpp
index 1fa8767774961..67c02e95734ab 100644
--- a/sycl/source/detail/program_impl.hpp
+++ b/sycl/source/detail/program_impl.hpp
@@ -216,9 +216,7 @@ class program_impl {
   }
 
   /// \return the Plugin associated with the context of this program.
-  const PluginPtr &getPlugin() const {
-    return MContext->getPlugin();
-  }
+  const PluginPtr &getPlugin() const { return MContext->getPlugin(); }
 
   ContextImplPtr getContextImplPtr() const { return MContext; }
 
diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp
index af7af19ede120..83f33688ed0b1 100644
--- a/sycl/source/detail/queue_impl.cpp
+++ b/sycl/source/detail/queue_impl.cpp
@@ -361,7 +361,8 @@ event queue_impl::submit_impl(const std::function<void(handler &)> &CGF,
   // Host and interop tasks, however, are not submitted to low-level runtimes
   // and require separate dependency management.
   const CG::CGTYPE Type = Handler.getType();
-  event Event = detail::createSyclObjFromImpl<event>(std::make_shared<detail::event_impl>());
+  event Event = detail::createSyclObjFromImpl<event>(
+      std::make_shared<detail::event_impl>());
   std::vector<StreamImplPtr> Streams;
   if (Type == CG::Kernel)
     Streams = std::move(Handler.MStreamStorage);
@@ -385,12 +386,12 @@ event queue_impl::submit_impl(const std::function<void(handler &)> &CGF,
 
   auto EventImpl = detail::getSyclObjImpl(Event);
   for (auto &Stream : Streams) {
-    // We don't want stream flushing to be blocking operation that is why submit a
-    // host task to print stream buffer. It will fire up as soon as the kernel
+    // We don't want stream flushing to be blocking operation that is why submit
+    // a host task to print stream buffer. It will fire up as soon as the kernel
     // finishes execution.
-    event FlushEvent = submit_impl([&](handler &ServiceCGH) {
-      Stream->generateFlushCommand(ServiceCGH);
-    }, Self, PrimaryQueue, SecondaryQueue, Loc, {});
+    event FlushEvent = submit_impl(
+        [&](handler &ServiceCGH) { Stream->generateFlushCommand(ServiceCGH); },
+        Self, PrimaryQueue, SecondaryQueue, Loc, {});
     EventImpl->attachEventToComplete(detail::getSyclObjImpl(FlushEvent));
     registerStreamServiceEvent(detail::getSyclObjImpl(FlushEvent));
   }
@@ -707,7 +708,7 @@ void queue_impl::revisitUnenqueuedCommandsState(
               Deps.UnenqueuedCmdEvents.begin(), Deps.UnenqueuedCmdEvents.end(),
               [](const EventImplPtr &CommandEvent) {
                 return (CommandEvent->isHost() ? CommandEvent->isCompleted()
-                                                : CommandEvent->isEnqueued());
+                                               : CommandEvent->isEnqueued());
               }),
           Deps.UnenqueuedCmdEvents.end());
     }
diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp
index e72ded829a798..d0a74cc80c793 100644
--- a/sycl/source/detail/queue_impl.hpp
+++ b/sycl/source/detail/queue_impl.hpp
@@ -13,8 +13,8 @@
 #include <detail/device_impl.hpp>
 #include <detail/device_info.hpp>
 #include <detail/event_impl.hpp>
-#include <detail/handler_impl.hpp>
 #include <detail/global_handler.hpp>
+#include <detail/handler_impl.hpp>
 #include <detail/kernel_impl.hpp>
 #include <detail/plugin.hpp>
 #include <detail/scheduler/scheduler.hpp>
@@ -194,14 +194,13 @@ class queue_impl {
         if (MDevice) {
           xpti::addMetadata(TEvent, "sycl_device_name",
                             MDevice->getDeviceName());
-          xpti::addMetadata(
-              TEvent, "sycl_device",
-              reinterpret_cast<size_t>(MDevice->getHandleRef()));
+          xpti::addMetadata(TEvent, "sycl_device",
+                            reinterpret_cast<size_t>(MDevice->getHandleRef()));
         }
         xpti::addMetadata(TEvent, "is_inorder", MIsInorder);
         xpti::addMetadata(TEvent, "queue_id", MQueueID);
         xpti::addMetadata(TEvent, "queue_handle",
-                            reinterpret_cast<size_t>(getHandleRef()));
+                          reinterpret_cast<size_t>(getHandleRef()));
       });
       // Also publish to TLS
       xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
@@ -257,9 +256,8 @@ class queue_impl {
         if (MDevice) {
           xpti::addMetadata(TEvent, "sycl_device_name",
                             MDevice->getDeviceName());
-          xpti::addMetadata(
-              TEvent, "sycl_device",
-              reinterpret_cast<size_t>(MDevice->getHandleRef()));
+          xpti::addMetadata(TEvent, "sycl_device",
+                            reinterpret_cast<size_t>(MDevice->getHandleRef()));
         }
         xpti::addMetadata(TEvent, "is_inorder", MIsInorder);
         xpti::addMetadata(TEvent, "queue_id", MQueueID);
@@ -751,9 +749,8 @@ class queue_impl {
   // tasks and host tasks is applicable for out of order queues only. Not neede
   // for in order ones.
   void revisitUnenqueuedCommandsState(const EventImplPtr &CompletedHostTask);
- 
-  static ContextImplPtr getContext(const QueueImplPtr& Queue)
-  {
+
+  static ContextImplPtr getContext(const QueueImplPtr &Queue) {
     return Queue ? Queue->getContextImplPtr() : nullptr;
   }
 
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index c751cf7438ae7..3d51fe7a1c12f 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -90,21 +90,19 @@ static std::string deviceToString(device Device) {
     return "UNKNOWN";
 }
 
-static void addDeviceMetadata(xpti_td* TraceEvent, const QueueImplPtr& Queue)
-{
-    xpti::addMetadata(TraceEvent, "sycl_device",
-                      Queue ? deviceToID(Queue->get_device()) : 0);
-    xpti::addMetadata(TraceEvent, "sycl_device_type",
-                      Queue ? deviceToString(Queue->get_device()) : "host");
-    if (Queue)
-      xpti::addMetadata(TraceEvent, "sycl_device_name",
+static void addDeviceMetadata(xpti_td *TraceEvent, const QueueImplPtr &Queue) {
+  xpti::addMetadata(TraceEvent, "sycl_device",
+                    Queue ? deviceToID(Queue->get_device()) : 0);
+  xpti::addMetadata(TraceEvent, "sycl_device_type",
+                    Queue ? deviceToString(Queue->get_device()) : "host");
+  if (Queue)
+    xpti::addMetadata(TraceEvent, "sycl_device_name",
                       getSyclObjImpl(Queue->get_device())->getDeviceName());
 }
 
 #endif
 
-static ContextImplPtr getContext(const QueueImplPtr& Queue)
-{
+static ContextImplPtr getContext(const QueueImplPtr &Queue) {
   if (Queue)
     return Queue->getContextImplPtr();
   return nullptr;
@@ -350,10 +348,12 @@ class DispatchHostTask {
         PluginWithEvents.first->call<PiApiKind::piEventsWait>(RawEvents.size(),
                                                               RawEvents.data());
       } catch (const sycl::exception &E) {
-        MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(std::current_exception());
+        MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(
+            std::current_exception());
         return (pi_result)E.get_cl_code();
       } catch (...) {
-        MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(std::current_exception());
+        MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(
+            std::current_exception());
         return PI_ERROR_UNKNOWN;
       }
     }
@@ -404,7 +404,8 @@ class DispatchHostTask {
     try {
       // we're ready to call the user-defined lambda now
       if (HostTask.MHostTask->isInteropTask()) {
-        assert(HostTask.MQueue && "Submitted queue for host task must be device queue");
+        assert(HostTask.MQueue &&
+               "Submitted queue for host task must be device queue");
         interop_handle IH{MReqToMem, HostTask.MQueue,
                           HostTask.MQueue->getDeviceImplPtr(),
                           HostTask.MQueue->getContextImplPtr()};
@@ -431,7 +432,8 @@ class DispatchHostTask {
         }
       }
 #endif
-      MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(CurrentException);
+      MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(
+          CurrentException);
     }
 
     HostTask.MHostTask.reset();
@@ -448,7 +450,8 @@ class DispatchHostTask {
       Scheduler::getInstance().NotifyHostTaskCompletion(MThisCmd);
     } catch (...) {
       auto CurrentException = std::current_exception();
-      MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(CurrentException);
+      MThisCmd->MEvent->getSubmittedQueue()->reportAsyncException(
+          CurrentException);
     }
   }
 };
@@ -461,13 +464,13 @@ void Command::waitForPreparedHostEvents() const {
 void Command::waitForEvents(QueueImplPtr Queue,
                             std::vector<EventImplPtr> &EventImpls,
                             sycl::detail::pi::PiEvent &Event) {
-  #ifndef NDEBUG
-      for (const EventImplPtr &Event : EventImpls)
-        assert(!Event->isHost() &&
-               "Only non-host events are expected to be waited for here");
+#ifndef NDEBUG
+  for (const EventImplPtr &Event : EventImpls)
+    assert(!Event->isHost() &&
+           "Only non-host events are expected to be waited for here");
 #endif
   if (!EventImpls.empty()) {
-      if (!Queue) {
+    if (!Queue) {
       // Host queue can wait for events from different contexts, i.e. it may
       // contain events with different contexts in its MPreparedDepsEvents.
       // OpenCL 2.1 spec says that clWaitForEvents will return
@@ -507,7 +510,7 @@ void Command::waitForEvents(QueueImplPtr Queue,
         MEvent->setHostEnqueueTime();
       Plugin->call<PiApiKind::piEnqueueEventsWait>(
           Queue->getHandleRef(), RawEvents.size(), &RawEvents[0], &Event);
-  }
+    }
   }
 }
 
@@ -716,7 +719,8 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep,
 
   // 1. Non-host events can be ignored if they are not fully initialized.
   // 2. Some types of commands do not produce PI events after they are
-  // enqueued (e.g. alloca). Note that we can't check the pi event to make that distinction since the command might still be unenqueued at this point.
+  // enqueued (e.g. alloca). Note that we can't check the pi event to make that
+  // distinction since the command might still be unenqueued at this point.
   bool PiEventExpected = (!DepEvent->isHost() && DepEvent->isInitialized());
   if (auto *DepCmd = static_cast<Command *>(DepEvent->getCommand()))
     PiEventExpected &= DepCmd->producesPiEvent();
@@ -732,7 +736,7 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep,
 
   ContextImplPtr DepEventContext = DepEvent->getContextImpl();
   // If contexts don't match we'll connect them using host task
-  if (DepEventContext != WorkerContext && WorkerContext){
+  if (DepEventContext != WorkerContext && WorkerContext) {
     Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder;
     ConnectionCmd = GB.connectDepEvent(this, DepEvent, Dep, ToCleanUp);
   } else
@@ -1006,7 +1010,7 @@ void AllocaCommandBase::emitInstrumentationData() {
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                        MQueue ? MQueue->getQueueID() : 0);
+                                 MQueue ? MQueue->getQueueID() : 0);
   }
 #endif
 }
@@ -1066,9 +1070,9 @@ pi_int32 AllocaCommand::enqueueImp() {
   }
   // TODO: Check if it is correct to use std::move on stack variable and
   // delete it RawEvents below.
-  MMemAllocation = MemoryManager::allocate(
-      getContext(MQueue), getSYCLMemObj(), MInitFromUserData, HostPtr,
-      std::move(EventImpls), Event);
+  MMemAllocation = MemoryManager::allocate(getContext(MQueue), getSYCLMemObj(),
+                                           MInitFromUserData, HostPtr,
+                                           std::move(EventImpls), Event);
 
   return PI_SUCCESS;
 }
@@ -1077,7 +1081,8 @@ void AllocaCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "ALLOCA ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
+  Stream << "ALLOCA ON "
+         << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
   Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n";
   Stream << " Link : " << this->MLinkedAllocaCmd << "\\n";
   Stream << "\"];" << std::endl;
@@ -1163,8 +1168,8 @@ void AllocaSubBufCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "ALLOCA SUB BUF ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host")
-         << "\\n";
+  Stream << "ALLOCA SUB BUF ON "
+         << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
   Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n";
   Stream << " Offset : " << this->MRequirement.MOffsetInBytes << "\\n";
   Stream << " Access range : " << this->MRequirement.MAccessRange[0] << "\\n";
@@ -1266,9 +1271,9 @@ pi_int32 ReleaseCommand::enqueueImp() {
   if (SkipRelease)
     Command::waitForEvents(MQueue, EventImpls, Event);
   else {
-    MemoryManager::release(
-        getContext(MQueue), MAllocaCmd->getSYCLMemObj(),
-        MAllocaCmd->getMemAllocation(), std::move(EventImpls), Event);
+    MemoryManager::release(getContext(MQueue), MAllocaCmd->getSYCLMemObj(),
+                           MAllocaCmd->getMemAllocation(),
+                           std::move(EventImpls), Event);
   }
   return PI_SUCCESS;
 }
@@ -1277,7 +1282,8 @@ void ReleaseCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FF827A\", label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "RELEASE ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
+  Stream << "RELEASE ON "
+         << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
   Stream << " Alloca : " << MAllocaCmd << "\\n";
   Stream << " MemObj : " << MAllocaCmd->getSYCLMemObj() << "\\n";
   Stream << "\"];" << std::endl;
@@ -1347,7 +1353,8 @@ void MapMemObject::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#77AFFF\", label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "MAP ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
+  Stream << "MAP ON "
+         << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
 
   Stream << "\"];" << std::endl;
 
@@ -1406,8 +1413,8 @@ bool UnMapMemObject::producesPiEvent() const {
   // so the execution of kernel B starts only on step 4. This workaround
   // restores the old behavior in this case until this is resolved.
   return MQueue && (MQueue->getDeviceImplPtr()->getBackend() !=
-             backend::ext_oneapi_level_zero ||
-         MEvent->getHandleRef() != nullptr);
+                        backend::ext_oneapi_level_zero ||
+                    MEvent->getHandleRef() != nullptr);
 }
 
 pi_int32 UnMapMemObject::enqueueImp() {
@@ -1428,7 +1435,8 @@ void UnMapMemObject::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#EBC40F\", label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "UNMAP ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
+  Stream << "UNMAP ON "
+         << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
 
   Stream << "\"];" << std::endl;
 
@@ -1476,13 +1484,12 @@ void MemCpyCommand::emitInstrumentationData() {
                       reinterpret_cast<size_t>(MAddress));
     xpti::addMetadata(CmdTraceEvent, "copy_from",
                       MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0);
-    xpti::addMetadata(
-        CmdTraceEvent, "copy_to",
-        MQueue ? deviceToID(MQueue->get_device()): 0);
+    xpti::addMetadata(CmdTraceEvent, "copy_to",
+                      MQueue ? deviceToID(MQueue->get_device()) : 0);
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                        MQueue ? MQueue->getQueueID() : 0);
+                                 MQueue ? MQueue->getQueueID() : 0);
     makeTraceEventEpilog();
   }
 #endif
@@ -1539,11 +1546,10 @@ void MemCpyCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#C7EB15\" label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "MEMCPY ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
-  Stream << "From: " << MSrcAllocaCmd << " is host: " << !MSrcQueue
-         << "\\n";
-  Stream << "To: " << MDstAllocaCmd << " is host: " << !MQueue
-         << "\\n";
+  Stream << "MEMCPY ON "
+         << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
+  Stream << "From: " << MSrcAllocaCmd << " is host: " << !MSrcQueue << "\\n";
+  Stream << "To: " << MDstAllocaCmd << " is host: " << !MQueue << "\\n";
 
   Stream << "\"];" << std::endl;
 
@@ -1597,7 +1603,8 @@ void UpdateHostRequirementCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#f1337f\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "UPDATE REQ ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
+  Stream << "UPDATE REQ ON "
+         << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
   bool IsReqOnBuffer =
       MDstReq.MSYCLMemObj->getType() == SYCLMemObjI::MemObjType::Buffer;
   Stream << "TYPE: " << (IsReqOnBuffer ? "Buffer" : "Image") << "\\n";
@@ -1649,14 +1656,13 @@ void MemCpyCommandHost::emitInstrumentationData() {
     xpti::addMetadata(CmdTraceEvent, "memory_object",
                       reinterpret_cast<size_t>(MAddress));
     xpti::addMetadata(CmdTraceEvent, "copy_from",
-                          MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0);
-    xpti::addMetadata(
-        CmdTraceEvent, "copy_to",
-        MQueue ? deviceToID(MQueue->get_device()) : 0);
+                      MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0);
+    xpti::addMetadata(CmdTraceEvent, "copy_to",
+                      MQueue ? deviceToID(MQueue->get_device()) : 0);
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                        MQueue ? MQueue->getQueueID() : 0);
+                                 MQueue ? MQueue->getQueueID() : 0);
     makeTraceEventEpilog();
   }
 #endif
@@ -1696,8 +1702,7 @@ pi_int32 MemCpyCommandHost::enqueueImp() {
   return PI_SUCCESS;
 }
 
-EmptyCommand::EmptyCommand()
-    : Command(CommandType::EMPTY_TASK, nullptr) {
+EmptyCommand::EmptyCommand() : Command(CommandType::EMPTY_TASK, nullptr) {
   emitInstrumentationDataProxy();
 }
 
@@ -1746,7 +1751,7 @@ void EmptyCommand::emitInstrumentationData() {
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                        MQueue ? MQueue->getQueueID() : 0);
+                                 MQueue ? MQueue->getQueueID() : 0);
     makeTraceEventEpilog();
   }
 #endif
@@ -1775,7 +1780,8 @@ void MemCpyCommandHost::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#B6A2EB\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "MEMCPY HOST ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
+  Stream << "MEMCPY HOST ON "
+         << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
 
   Stream << "\"];" << std::endl;
 
@@ -1814,7 +1820,7 @@ void UpdateHostRequirementCommand::emitInstrumentationData() {
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                        MQueue ? MQueue->getQueueID() : 0);
+                                 MQueue ? MQueue->getQueueID() : 0);
     makeTraceEventEpilog();
   }
 #endif
@@ -2082,7 +2088,7 @@ std::pair<xpti_td *, uint64_t> emitKernelInstrumentationData(
   if (CmdTraceEvent) {
     // Stash the queue_id mutable metadata in TLS
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                        Queue ? Queue->getQueueID() : 0);
+                                 Queue ? Queue->getQueueID() : 0);
 
     instrumentationAddExtraKernelMetadata(CmdTraceEvent, NDRDesc,
                                           KernelBundleImplPtr, SyclKernelName,
@@ -2128,7 +2134,7 @@ void ExecCGCommand::emitInstrumentationData() {
 
   if (CmdTraceEvent) {
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                        MQueue ? MQueue->getQueueID() : 0);
+                                 MQueue ? MQueue->getQueueID() : 0);
     MTraceEvent = static_cast<void *>(CmdTraceEvent);
     if (MCommandGroup->getType() == detail::CG::Kernel) {
       auto KernelCG =
@@ -2151,7 +2157,8 @@ void ExecCGCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "EXEC CG ON " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
+  Stream << "EXEC CG ON "
+         << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
 
   switch (MCommandGroup->getType()) {
   case detail::CG::Kernel: {
@@ -2247,8 +2254,7 @@ void SetArgBasedOnType(
     const PluginPtr &Plugin, sycl::detail::pi::PiKernel Kernel,
     const std::shared_ptr<device_image_impl> &DeviceImageImpl,
     const std::function<void *(Requirement *Req)> &getMemAllocationFunc,
-    const sycl::context &Context, detail::ArgDesc &Arg,
-    size_t NextTrueIndex) {
+    const sycl::context &Context, detail::ArgDesc &Arg, size_t NextTrueIndex) {
   switch (Arg.MType) {
   case kernel_param_kind_t::kind_stream:
     break;
@@ -2338,8 +2344,7 @@ static pi_result SetKernelParamsAndLaunch(
   auto setFunc = [&Plugin, Kernel, &DeviceImageImpl, &getMemAllocationFunc,
                   &Queue](detail::ArgDesc &Arg, size_t NextTrueIndex) {
     SetArgBasedOnType(Plugin, Kernel, DeviceImageImpl, getMemAllocationFunc,
-                      Queue->get_context(), Arg,
-                      NextTrueIndex);
+                      Queue->get_context(), Arg, NextTrueIndex);
   };
 
   applyFuncOnFilteredArgs(EliminatedArgMask, Args, setFunc);
@@ -2639,7 +2644,8 @@ enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName,
                          bool blocking, void *ptr, size_t size,
                          std::vector<sycl::detail::pi::PiEvent> &RawEvents,
                          const detail::EventImplPtr &OutEventImpl, bool read) {
-  assert(Queue && "Queue with submitted read write host pipe could not be on host");
+  assert(Queue &&
+         "Queue with submitted read write host pipe could not be on host");
   detail::HostPipeMapEntry *hostPipeEntry =
       ProgramManager::getInstance().getHostPipeEntry(PipeName);
 
@@ -2856,7 +2862,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
   flushCrossQueueDeps(EventImpls, MWorkerQueue);
 
   bool DiscardPiEvent = MQueue && MQueue->supportsDiscardingPiEvents() &&
-                         (MCommandGroup->getRequirements().size() == 0);
+                        (MCommandGroup->getRequirements().size() == 0);
   sycl::detail::pi::PiEvent *Event =
       DiscardPiEvent ? nullptr : &MEvent->getHandleRef();
   detail::EventImplPtr EventImpl = DiscardPiEvent ? nullptr : MEvent;
@@ -2876,10 +2882,9 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     MemoryManager::copy(
         AllocaCmd->getSYCLMemObj(), AllocaCmd->getMemAllocation(), MQueue,
         Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset,
-        Req->MElemSize, Copy->getDst(),
-        nullptr, Req->MDims,
-        Req->MAccessRange, Req->MAccessRange, /*DstOffset=*/{0, 0, 0},
-        Req->MElemSize, std::move(RawEvents), MEvent->getHandleRef(), MEvent);
+        Req->MElemSize, Copy->getDst(), nullptr, Req->MDims, Req->MAccessRange,
+        Req->MAccessRange, /*DstOffset=*/{0, 0, 0}, Req->MElemSize,
+        std::move(RawEvents), MEvent->getHandleRef(), MEvent);
 
     return PI_SUCCESS;
   }
@@ -2889,8 +2894,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     AllocaCommandBase *AllocaCmd = getAllocaForReq(Req);
 
     MemoryManager::copy(
-        AllocaCmd->getSYCLMemObj(), Copy->getSrc(),
-        nullptr, Req->MDims,
+        AllocaCmd->getSYCLMemObj(), Copy->getSrc(), nullptr, Req->MDims,
         Req->MAccessRange, Req->MAccessRange,
         /*SrcOffset*/ {0, 0, 0}, Req->MElemSize, AllocaCmd->getMemAllocation(),
         MQueue, Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset,
@@ -2937,7 +2941,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     std::vector<ArgDesc> &Args = ExecKernel->MArgs;
 
     if (MQueue->getDeviceImplPtr()->getBackend() ==
-                              backend::ext_intel_esimd_emulator) {
+        backend::ext_intel_esimd_emulator) {
       for (ArgDesc &Arg : Args)
         if (kernel_param_kind_t::kind_accessor == Arg.MType) {
           Requirement *Req = (Requirement *)(Arg.MPtr);
@@ -2959,7 +2963,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
             reinterpret_cast<pi_kernel>(ExecKernel->MHostKernel->getPtr()),
             NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0],
             &NDRDesc.LocalSize[0], 0, nullptr, nullptr);
-      return PI_SUCCESS;
+        return PI_SUCCESS;
     }
 
     auto getMemAllocationFunc = [this](Requirement *Req) {
@@ -3119,7 +3123,8 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     return PI_SUCCESS;
   }
   case CG::CGTYPE::BarrierWaitlist: {
-    assert(MQueue && "Device queue must be present for barrier with wait list command");
+    assert(MQueue &&
+           "Device queue must be present for barrier with wait list command");
     CGBarrier *Barrier = static_cast<CGBarrier *>(MCommandGroup.get());
     std::vector<detail::EventImplPtr> Events = Barrier->MEventsWaitWithBarrier;
     std::vector<sycl::detail::pi::PiEvent> PiEvents =
@@ -3224,7 +3229,8 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     return PI_SUCCESS;
   }
   case CG::CGTYPE::SemaphoreSignal: {
-    assert(MQueue && "Device queue must be present for semaphore signal command");
+    assert(MQueue &&
+           "Device queue must be present for semaphore signal command");
     CGSemaphoreSignal *SemSignal = (CGSemaphoreSignal *)MCommandGroup.get();
 
     const detail::PluginPtr &Plugin = MQueue->getPlugin();
@@ -3348,7 +3354,7 @@ void KernelFusionCommand::emitInstrumentationData() {
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
     xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                        MQueue ? MQueue->getQueueID() : 0);
+                                 MQueue ? MQueue->getQueueID() : 0);
     xptiNotifySubscribers(MStreamID, NotificationTraceType,
                           detail::GSYCLGraphEvent,
                           static_cast<xpti_td *>(MTraceEvent), MInstanceID,
@@ -3362,7 +3368,8 @@ void KernelFusionCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "KERNEL FUSION on " << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"
+  Stream << "KERNEL FUSION on "
+         << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"
          << "FUSION LIST: {";
   bool Initial = true;
   for (auto *Cmd : MFusionList) {
diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp
index 628ccdf2593da..63fb4853d88e4 100644
--- a/sycl/source/detail/scheduler/commands.hpp
+++ b/sycl/source/detail/scheduler/commands.hpp
@@ -373,10 +373,11 @@ class Command {
   std::string MSubmissionFunctionName;
 
   // This flag allows to control whether event should be set complete
-  // after successfull enqueue of command. Event is considered as "host" event if
-  // there is no backend representation of event (i.e. getHandleRef() return reference to nullptr value).
-  // By default the flag is set to true due to most of host operations are
-  // synchronous. The only asynchronous operation currently is host-task.
+  // after successfull enqueue of command. Event is considered as "host" event
+  // if there is no backend representation of event (i.e. getHandleRef() return
+  // reference to nullptr value). By default the flag is set to true due to most
+  // of host operations are synchronous. The only asynchronous operation
+  // currently is host-task.
   bool MShouldCompleteEventIfPossible = true;
 
   /// Indicates that the node will be freed by graph cleanup. Such nodes should
@@ -792,8 +793,7 @@ void SetArgBasedOnType(
     const detail::plugin &Plugin, sycl::detail::pi::PiKernel Kernel,
     const std::shared_ptr<device_image_impl> &DeviceImageImpl,
     const std::function<void *(Requirement *Req)> &getMemAllocationFunc,
-    const sycl::context &Context, detail::ArgDesc &Arg,
-    size_t NextTrueIndex);
+    const sycl::context &Context, detail::ArgDesc &Arg, size_t NextTrueIndex);
 
 void applyFuncOnFilteredArgs(
     const KernelArgMask *EliminatedArgMask, std::vector<ArgDesc> &Args,
diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp
index 2919932c4e788..2ac97baefb543 100644
--- a/sycl/source/detail/scheduler/graph_builder.cpp
+++ b/sycl/source/detail/scheduler/graph_builder.cpp
@@ -54,9 +54,10 @@ static bool IsSuitableSubReq(const Requirement *Req) {
   return Req->MIsSubBuffer;
 }
 
-static bool isOnSameContext(const ContextImplPtr Context, const QueueImplPtr& Queue)
-{
-  // Covers case for host usage (nullptr == nullptr) and existing device contexts comparison.
+static bool isOnSameContext(const ContextImplPtr Context,
+                            const QueueImplPtr &Queue) {
+  // Covers case for host usage (nullptr == nullptr) and existing device
+  // contexts comparison.
   return Context == queue_impl::getContext(Queue);
 }
 
@@ -289,8 +290,7 @@ UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd(
     MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue,
     std::vector<Command *> &ToEnqueue) {
   auto Context = queue_impl::getContext(Queue);
-  AllocaCommandBase *AllocaCmd =
-      findAllocaForReq(Record, Req, Context);
+  AllocaCommandBase *AllocaCmd = findAllocaForReq(Record, Req, Context);
   assert(AllocaCmd && "There must be alloca for requirement!");
   UpdateHostRequirementCommand *UpdateCommand =
       new UpdateHostRequirementCommand(Queue, *Req, AllocaCmd, &Req->MData);
@@ -298,8 +298,7 @@ UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd(
   // dependencies become invalid if requirement is stored by pointer.
   const Requirement *StoredReq = UpdateCommand->getRequirement();
 
-  std::set<Command *> Deps =
-      findDepsForReq(Record, Req, Context);
+  std::set<Command *> Deps = findDepsForReq(Record, Req, Context);
   std::vector<Command *> ToCleanUp;
   for (Command *Dep : Deps) {
     Command *ConnCmd =
@@ -353,8 +352,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove(
     throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY);
 
   auto Context = queue_impl::getContext(Queue);
-  std::set<Command *> Deps =
-      findDepsForReq(Record, Req, Context);
+  std::set<Command *> Deps = findDepsForReq(Record, Req, Context);
   Deps.insert(AllocaCmdDst);
   // Get parent allocation of sub buffer to perform full copy of whole buffer
   if (IsSuitableSubReq(Req)) {
@@ -434,8 +432,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove(
 Command *Scheduler::GraphBuilder::remapMemoryObject(
     MemObjRecord *Record, Requirement *Req, AllocaCommandBase *HostAllocaCmd,
     std::vector<Command *> &ToEnqueue) {
-  assert(!HostAllocaCmd->getQueue() &&
-         "Host alloca command expected");
+  assert(!HostAllocaCmd->getQueue() && "Host alloca command expected");
   assert(HostAllocaCmd->MIsActive && "Active alloca command expected");
 
   AllocaCommandBase *LinkedAllocaCmd = HostAllocaCmd->MLinkedAllocaCmd;
@@ -490,8 +487,7 @@ Scheduler::GraphBuilder::addCopyBack(Requirement *Req,
   if (nullptr == Record || !Record->MMemModified)
     return nullptr;
 
-  std::set<Command *> Deps =
-      findDepsForReq(Record, Req, nullptr);
+  std::set<Command *> Deps = findDepsForReq(Record, Req, nullptr);
   AllocaCommandBase *SrcAllocaCmd =
       findAllocaForReq(Record, Req, Record->MCurContext);
 
@@ -531,7 +527,8 @@ Scheduler::GraphBuilder::addHostAccessor(Requirement *Req,
     auto SYCLMemObj = static_cast<detail::SYCLMemObjT *>(Req->MSYCLMemObj);
     SYCLMemObj->handleWriteAccessorCreation();
   }
-  // Host accessor is not attached to any queue so no QueueImplPtr object to be sent to getOrInsertMemObjRecord.
+  // Host accessor is not attached to any queue so no QueueImplPtr object to be
+  // sent to getOrInsertMemObjRecord.
   MemObjRecord *Record = getOrInsertMemObjRecord(nullptr, Req);
   if (MPrintOptionsArray[BeforeAddHostAcc])
     printGraphAsDot("before_addHostAccessor");
@@ -556,8 +553,8 @@ Scheduler::GraphBuilder::addHostAccessor(Requirement *Req,
       insertUpdateHostReqCmd(Record, Req, nullptr, ToEnqueue);
 
   // Need empty command to be blocked until host accessor is destructed
-  EmptyCommand *EmptyCmd =
-      addEmptyCmd(UpdateHostAccCmd, {Req}, Command::BlockReason::HostAccessor, ToEnqueue);
+  EmptyCommand *EmptyCmd = addEmptyCmd(
+      UpdateHostAccCmd, {Req}, Command::BlockReason::HostAccessor, ToEnqueue);
 
   Req->MBlockedCmd = EmptyCmd;
 
@@ -621,8 +618,7 @@ Scheduler::GraphBuilder::findDepsForReq(MemObjRecord *Record,
       CanBypassDep |= !doOverlap(Dep.MDepRequirement, Req);
 
       // Going through copying memory between contexts is not supported.
-      if (Dep.MDepCommand)
-      {
+      if (Dep.MDepCommand) {
         auto DepQueue = Dep.MDepCommand->getQueue();
         CanBypassDep &= isOnSameContext(Context, DepQueue);
       }
@@ -686,7 +682,8 @@ static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx) {
     if (std::strcmp(HUMConfig, "1") == 0)
       return true;
   }
-  // host task & host accessor is covered with no device context but provide required support.
+  // host task & host accessor is covered with no device context but provide
+  // required support.
   if (Ctx == nullptr)
     return true;
 
@@ -705,8 +702,8 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
     MemObjRecord *Record, const Requirement *Req, const QueueImplPtr &Queue,
     std::vector<Command *> &ToEnqueue) {
   auto Context = queue_impl::getContext(Queue);
-  AllocaCommandBase *AllocaCmd = findAllocaForReq(
-      Record, Req, Context, /*AllowConst=*/false);
+  AllocaCommandBase *AllocaCmd =
+      findAllocaForReq(Record, Req, Context, /*AllowConst=*/false);
 
   if (!AllocaCmd) {
     std::vector<Command *> ToCleanUp;
@@ -736,8 +733,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
       // TODO the case where the first alloca is made with a discard mode and
       // the user pointer is read-only is still not handled: it leads to
       // unnecessary copy on devices with unified host memory support.
-      const bool HostUnifiedMemory =
-          checkHostUnifiedMemory(Context);
+      const bool HostUnifiedMemory = checkHostUnifiedMemory(Context);
       SYCLMemObjI *MemObj = Req->MSYCLMemObj;
       const bool InitFromUserData = Record->MAllocaCommands.empty() &&
                                     (HostUnifiedMemory || MemObj->isInterop());
@@ -828,10 +824,9 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
           AllocaCmd->MIsActive = false;
         } else {
           LinkedAllocaCmd->MIsActive = false;
-          Record->MCurContext =Context;
+          Record->MCurContext = Context;
 
-          std::set<Command *> Deps =
-              findDepsForReq(Record, Req, Context);
+          std::set<Command *> Deps = findDepsForReq(Record, Req, Context);
           for (Command *Dep : Deps) {
             Command *ConnCmd = AllocaCmd->addDep(
                 DepDesc{Dep, Req, LinkedAllocaCmd}, ToCleanUp);
@@ -871,8 +866,7 @@ void Scheduler::GraphBuilder::markModifiedIfWrite(MemObjRecord *Record,
 
 EmptyCommand *Scheduler::GraphBuilder::addEmptyCmd(
     Command *Cmd, const std::vector<Requirement *> &Reqs,
-    Command::BlockReason Reason,
-    std::vector<Command *> &ToEnqueue) {
+    Command::BlockReason Reason, std::vector<Command *> &ToEnqueue) {
   EmptyCommand *EmptyCmd = new EmptyCommand();
 
   if (!EmptyCmd)
@@ -1343,8 +1337,7 @@ Command *Scheduler::GraphBuilder::connectDepEvent(
             /* DepEvents = */ {DepEvent}),
         CG::CodeplayHostTask,
         /* Payload */ {}));
-    ConnectCmd = new ExecCGCommand(
-        std::move(ConnectCG), nullptr);
+    ConnectCmd = new ExecCGCommand(std::move(ConnectCG), nullptr);
   } catch (const std::bad_alloc &) {
     throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY);
   }
@@ -1719,13 +1712,11 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate(
         NeedMemMoveToHost = true;
 
       if (NeedMemMoveToHost)
-        insertMemoryMove(Record, Req,
-                        nullptr,
-                         ToEnqueue);
+        insertMemoryMove(Record, Req, nullptr, ToEnqueue);
       insertMemoryMove(Record, Req, MemMoveTargetQueue, ToEnqueue);
     }
     std::set<Command *> Deps =
-        findDepsForReq(Record, Req,  queue_impl::getContext(Queue));
+        findDepsForReq(Record, Req, queue_impl::getContext(Queue));
 
     for (Command *Dep : Deps) {
       if (Dep != NewCmd.get()) {
diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp
index 52eb59b225004..4d26c2a822457 100644
--- a/sycl/source/detail/scheduler/scheduler.cpp
+++ b/sycl/source/detail/scheduler/scheduler.cpp
@@ -110,13 +110,13 @@ EventImplPtr Scheduler::addCG(
     Command *NewCmd = nullptr;
     switch (Type) {
     case CG::UpdateHost:
-      NewCmd = MGraphBuilder.addCGUpdateHost(std::move(CommandGroup),
-                                             AuxiliaryCmds);
+      NewCmd =
+          MGraphBuilder.addCGUpdateHost(std::move(CommandGroup), AuxiliaryCmds);
       NewEvent = NewCmd->getEvent();
       break;
     case CG::CodeplayHostTask: {
-      auto Result = MGraphBuilder.addCG(std::move(CommandGroup),
-                                        nullptr, AuxiliaryCmds);
+      auto Result =
+          MGraphBuilder.addCG(std::move(CommandGroup), nullptr, AuxiliaryCmds);
       NewCmd = Result.NewCmd;
       NewEvent = Result.NewEvent;
       ShouldEnqueue = Result.ShouldEnqueue;
diff --git a/sycl/source/detail/stream_impl.cpp b/sycl/source/detail/stream_impl.cpp
index 75c80745ec71c..7e81e964bdc17 100644
--- a/sycl/source/detail/stream_impl.cpp
+++ b/sycl/source/detail/stream_impl.cpp
@@ -76,14 +76,15 @@ size_t stream_impl::get_size() const { return BufferSize_; }
 
 size_t stream_impl::get_max_statement_size() const { return MaxStatementSize_; }
 
-void stream_impl::generateFlushCommand(handler& cgh)
-{
+void stream_impl::generateFlushCommand(handler &cgh) {
   // Create accessor to the flush buffer even if not using it yet. Otherwise
   // kernel will be a leaf for the flush buffer and scheduler will not be able
   // to cleanup the kernel. TODO: get rid of finalize method by using host
   // accessor to the flush buffer.
-  host_accessor<char, 1, access::mode::read_write> FlushBuffHostAcc(FlushBuf_, cgh);
-  host_accessor<char, 1, access::mode::read_write> BufHostAcc (Buf_, cgh, range<1>(BufferSize_), id<1>(OffsetSize));
+  host_accessor<char, 1, access::mode::read_write> FlushBuffHostAcc(FlushBuf_,
+                                                                    cgh);
+  host_accessor<char, 1, access::mode::read_write> BufHostAcc(
+      Buf_, cgh, range<1>(BufferSize_), id<1>(OffsetSize));
 
   cgh.host_task([=] {
     if (!BufHostAcc.empty()) {
@@ -106,14 +107,14 @@ void stream_impl::generateFlushCommand(handler& cgh)
   });
 }
 
-  // ABI break: remove
-  void stream_impl::initStreamHost(QueueImplPtr ){};
+// ABI break: remove
+void stream_impl::initStreamHost(QueueImplPtr){};
 
-  // ABI break: remove
-  void stream_impl::flush(const EventImplPtr &) {};
+// ABI break: remove
+void stream_impl::flush(const EventImplPtr &) {};
 
-  // ABI break: remove
-  void stream_impl::flush() {};
+// ABI break: remove
+void stream_impl::flush() {};
 
 } // namespace detail
 } // namespace _V1
diff --git a/sycl/source/detail/stream_impl.hpp b/sycl/source/detail/stream_impl.hpp
index 4fc1f4b1d5a8a..670931c815185 100644
--- a/sycl/source/detail/stream_impl.hpp
+++ b/sycl/source/detail/stream_impl.hpp
@@ -68,7 +68,7 @@ class __SYCL_EXPORT stream_impl {
     return PropList_.get_property<propertyT>();
   }
 
-  void generateFlushCommand(handler& cgh);
+  void generateFlushCommand(handler &cgh);
 
 private:
   // Size of the stream buffer
diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp
index 7440a3b816ce2..68207bec67d53 100644
--- a/sycl/source/detail/sycl_mem_obj_t.cpp
+++ b/sycl/source/detail/sycl_mem_obj_t.cpp
@@ -209,8 +209,9 @@ void SYCLMemObjT::detachMemoryObject(
       !MOwnNativeHandle ||
       (MInteropContext && !MInteropContext->isOwnedByRuntime());
 
-   if (MRecord && MRecord->MCurContext && MRecord->MCurContext->isOwnedByRuntime() &&
-      !InteropObjectsUsed && (!MHostPtrProvided || MIsInternal)) {
+  if (MRecord && MRecord->MCurContext &&
+      MRecord->MCurContext->isOwnedByRuntime() && !InteropObjectsUsed &&
+      (!MHostPtrProvided || MIsInternal)) {
     bool okToDefer = GlobalHandler::instance().isOkToDefer();
     if (okToDefer)
       Scheduler::getInstance().deferMemObjRelease(Self);
diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp
index 753c27d5f678d..57c54275069e6 100755
--- a/sycl/source/detail/usm/usm_impl.cpp
+++ b/sycl/source/detail/usm/usm_impl.cpp
@@ -73,33 +73,33 @@ void *alignedAllocHost(size_t Alignment, size_t Size, const context &Ctxt,
     return nullptr;
 
   std::shared_ptr<context_impl> CtxImpl = detail::getSyclObjImpl(Ctxt);
-    pi_context C = CtxImpl->getHandleRef();
-    const PluginPtr &Plugin = CtxImpl->getPlugin();
-    pi_result Error = PI_ERROR_INVALID_VALUE;
-
-    switch (Kind) {
-    case alloc::host: {
-      std::array<pi_usm_mem_properties, 3> Props;
-      auto PropsIter = Props.begin();
-
-      if (PropList.has_property<sycl::ext::intel::experimental::property::usm::
-                                    buffer_location>() &&
-          Ctxt.get_platform().has_extension(
-              "cl_intel_mem_alloc_buffer_location")) {
-        *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION;
-        *PropsIter++ = PropList
-                           .get_property<sycl::ext::intel::experimental::
-                                             property::usm::buffer_location>()
-                           .get_buffer_location();
-      }
+  pi_context C = CtxImpl->getHandleRef();
+  const PluginPtr &Plugin = CtxImpl->getPlugin();
+  pi_result Error = PI_ERROR_INVALID_VALUE;
+
+  switch (Kind) {
+  case alloc::host: {
+    std::array<pi_usm_mem_properties, 3> Props;
+    auto PropsIter = Props.begin();
+
+    if (PropList.has_property<
+            sycl::ext::intel::experimental::property::usm::buffer_location>() &&
+        Ctxt.get_platform().has_extension(
+            "cl_intel_mem_alloc_buffer_location")) {
+      *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION;
+      *PropsIter++ = PropList
+                         .get_property<sycl::ext::intel::experimental::
+                                           property::usm::buffer_location>()
+                         .get_buffer_location();
+    }
 
-      assert(PropsIter >= Props.begin() && PropsIter < Props.end());
-      *PropsIter++ = 0; // null-terminate property list
+    assert(PropsIter >= Props.begin() && PropsIter < Props.end());
+    *PropsIter++ = 0; // null-terminate property list
 
-      Error = Plugin->call_nocheck<PiApiKind::piextUSMHostAlloc>(
-          &RetVal, C, Props.data(), Size, Alignment);
+    Error = Plugin->call_nocheck<PiApiKind::piextUSMHostAlloc>(
+        &RetVal, C, Props.data(), Size, Alignment);
 
-      break;
+    break;
     }
     case alloc::device:
     case alloc::shared:
diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp
index 1947e31b7daaa..e1bc8c894f311 100644
--- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp
+++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp
@@ -83,8 +83,7 @@ class DependsOnTests : public ::testing::Test {
 
     detail::Command *NewCmd = MS.addCG(
         std::move(CmdGroup),
-        Type == TestCGType::HOST_TASK ? nullptr : QueueDevImpl,
-        ToEnqueue);
+        Type == TestCGType::HOST_TASK ? nullptr : QueueDevImpl, ToEnqueue);
     EXPECT_EQ(ToEnqueue.size(), 0u);
     return NewCmd;
   }
diff --git a/sycl/unittests/scheduler/GraphCleanup.cpp b/sycl/unittests/scheduler/GraphCleanup.cpp
index 437f98b1579a6..c3681bfc07a3b 100644
--- a/sycl/unittests/scheduler/GraphCleanup.cpp
+++ b/sycl/unittests/scheduler/GraphCleanup.cpp
@@ -245,7 +245,8 @@ TEST_F(SchedulerTest, PostEnqueueCleanup) {
   checkCleanupOnLeafUpdate(
       MS, QueueImpl, Buf, MockReq, [&](detail::MemObjRecord *Record) {
         detail::Command *Leaf = *Record->MWriteLeaves.begin();
-        MS.addEmptyCmd(Leaf, {&MockReq}, detail::Command::BlockReason::HostTask, ToEnqueue);
+        MS.addEmptyCmd(Leaf, {&MockReq}, detail::Command::BlockReason::HostTask,
+                       ToEnqueue);
       });
   checkCleanupOnLeafUpdate(
       MS, nullptr, Buf, MockReq, [&](detail::MemObjRecord *Record) {
diff --git a/sycl/unittests/scheduler/InOrderQueueDeps.cpp b/sycl/unittests/scheduler/InOrderQueueDeps.cpp
index bffdf6af4afe2..9ce9a1f944349 100644
--- a/sycl/unittests/scheduler/InOrderQueueDeps.cpp
+++ b/sycl/unittests/scheduler/InOrderQueueDeps.cpp
@@ -91,8 +91,7 @@ TEST_F(SchedulerTest, InOrderQueueDeps) {
 
   // Check that sequential memory movements submitted to the same in-order
   // queue do not depend on each other.
-  detail::Command *Cmd =
-      MS.insertMemoryMove(Record, &Req, nullptr, AuxCmds);
+  detail::Command *Cmd = MS.insertMemoryMove(Record, &Req, nullptr, AuxCmds);
   detail::EnqueueResultT Res;
   auto ReadLock = MS.acquireGraphReadLock();
   MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING);
diff --git a/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp b/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp
index 71f30f91117a0..565c3b2a2314c 100644
--- a/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp
+++ b/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp
@@ -60,8 +60,8 @@ TEST_F(SchedulerTest, LeafLimitDiffContexts) {
       std::vector<detail::Command *> ToEnqueue;
       AllocaCmd = MS.getOrCreateAllocaForReq(
           Rec, &MockReq, detail::getSyclObjImpl(Queue), ToEnqueue);
-      std::ignore = MS.getOrCreateAllocaForReq(
-          Rec, &MockReq, nullptr, ToEnqueue);
+      std::ignore =
+          MS.getOrCreateAllocaForReq(Rec, &MockReq, nullptr, ToEnqueue);
       DepCmd =
           std::make_unique<MockCommand>(detail::getSyclObjImpl(Queue), MockReq);
     }
diff --git a/sycl/unittests/scheduler/LeavesCollection.cpp b/sycl/unittests/scheduler/LeavesCollection.cpp
index 39146ffaa95e8..e0732926537b0 100644
--- a/sycl/unittests/scheduler/LeavesCollection.cpp
+++ b/sycl/unittests/scheduler/LeavesCollection.cpp
@@ -36,8 +36,7 @@ createGenericCommand(const std::shared_ptr<queue_impl> &Q) {
   return std::shared_ptr<Command>{new MockCommand(Q, Command::RUN_CG)};
 }
 
-std::shared_ptr<Command>
-createEmptyCommand(const Requirement &Req) {
+std::shared_ptr<Command> createEmptyCommand(const Requirement &Req) {
   EmptyCommand *Cmd = new EmptyCommand();
   Cmd->addRequirement(/* DepCmd = */ nullptr, /* AllocaCmd = */ nullptr, &Req);
   Cmd->MBlockReason = Command::BlockReason::HostAccessor;
diff --git a/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp b/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp
index 6ae6b9bfc2344..b08b211d1e2dc 100644
--- a/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp
+++ b/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp
@@ -69,8 +69,7 @@ TEST_F(SchedulerTest, LinkedAllocaDependencies) {
                       std::vector<sycl::detail::Command *> &) {};
 
   std::shared_ptr<sycl::detail::MemObjRecord> Record{
-      new sycl::detail::MemObjRecord(nullptr, 10,
-                                     AllocaDep)};
+      new sycl::detail::MemObjRecord(nullptr, 10, AllocaDep)};
 
   MemObjMock MemObj(Record);
   Req.MSYCLMemObj = &MemObj;
diff --git a/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp b/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp
index 83a0702861141..24a19977844fb 100644
--- a/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp
+++ b/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp
@@ -152,8 +152,7 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) {
 
     // No special handling required: alloca commands are created one after
     // another and the transfer is done via a write operation.
-    detail::MemObjRecord *Record =
-        MS.getOrInsertMemObjRecord(nullptr, &Req);
+    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(nullptr, &Req);
     std::vector<detail::Command *> AuxCmds;
     detail::AllocaCommandBase *HostAllocaCmd =
         MS.getOrCreateAllocaForReq(Record, &Req, nullptr, AuxCmds);
diff --git a/sycl/unittests/scheduler/QueueFlushing.cpp b/sycl/unittests/scheduler/QueueFlushing.cpp
index 330ff7e0f02d2..c90db25fc019a 100644
--- a/sycl/unittests/scheduler/QueueFlushing.cpp
+++ b/sycl/unittests/scheduler/QueueFlushing.cpp
@@ -125,14 +125,12 @@ TEST_F(SchedulerTest, QueueFlushing) {
     detail::AllocaCommand HostAllocaCmd =
         detail::AllocaCommand(nullptr, MockReq);
 
-    detail::MemCpyCommand MemCpyCmd{MockReq,    &AllocaCmd,
-                                    MockReq,    &HostAllocaCmd,
-                                    QueueImplA, nullptr};
+    detail::MemCpyCommand MemCpyCmd{MockReq,        &AllocaCmd, MockReq,
+                                    &HostAllocaCmd, QueueImplA, nullptr};
     testCommandEnqueue(&MemCpyCmd, QueueImplB, MockReq);
 
-    detail::MemCpyCommandHost MemCpyCmdHost{MockReq,    &AllocaCmd,
-                                            MockReq,    &MockHostPtr,
-                                            QueueImplA, nullptr};
+    detail::MemCpyCommandHost MemCpyCmdHost{MockReq,      &AllocaCmd, MockReq,
+                                            &MockHostPtr, QueueImplA, nullptr};
     testCommandEnqueue(&MemCpyCmdHost, QueueImplB, MockReq);
 
     std::unique_ptr<detail::CG> CG{
diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
index d1e7f22aa9485..789961b081da8 100644
--- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
+++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
@@ -11,8 +11,8 @@
 
 #include <detail/config.hpp>
 #include <detail/handler_impl.hpp>
-#include <helpers/ScopedEnvVar.hpp>
 #include <helpers/PiMock.hpp>
+#include <helpers/ScopedEnvVar.hpp>
 
 using namespace sycl;
 

From c76484daf99edc74b77d6722fdbb4d62b707df56 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 19 Jun 2024 05:56:31 -0700
Subject: [PATCH 31/58] fix clang-format

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/program_impl.cpp |  93 ++++++++--------
 sycl/source/detail/usm/usm_impl.cpp | 160 ++++++++++++++--------------
 2 files changed, 126 insertions(+), 127 deletions(-)
 mode change 100755 => 100644 sycl/source/detail/usm/usm_impl.cpp

diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp
index df95614d872c3..f3ac2185627f9 100644
--- a/sycl/source/detail/program_impl.cpp
+++ b/sycl/source/detail/program_impl.cpp
@@ -94,22 +94,22 @@ program_impl::program_impl(
     }
   }
 
-    std::vector<sycl::detail::pi::PiDevice> Devices(get_pi_devices());
-    std::vector<sycl::detail::pi::PiProgram> Programs;
-    bool NonInterOpToLink = false;
-    for (const auto &Prg : ProgramList) {
-      if (!Prg->MLinkable && NonInterOpToLink)
-        continue;
-      NonInterOpToLink |= !Prg->MLinkable;
-      Programs.push_back(Prg->MProgram);
-    }
-    const PluginPtr &Plugin = getPlugin();
-    sycl::detail::pi::PiResult Err =
-        Plugin->call_nocheck<PiApiKind::piProgramLink>(
-            MContext->getHandleRef(), Devices.size(), Devices.data(),
-            LinkOptions.c_str(), Programs.size(), Programs.data(), nullptr,
-            nullptr, &MProgram);
-    Plugin->checkPiResult<compile_program_error>(Err);
+  std::vector<sycl::detail::pi::PiDevice> Devices(get_pi_devices());
+  std::vector<sycl::detail::pi::PiProgram> Programs;
+  bool NonInterOpToLink = false;
+  for (const auto &Prg : ProgramList) {
+    if (!Prg->MLinkable && NonInterOpToLink)
+      continue;
+    NonInterOpToLink |= !Prg->MLinkable;
+    Programs.push_back(Prg->MProgram);
+  }
+  const PluginPtr &Plugin = getPlugin();
+  sycl::detail::pi::PiResult Err =
+      Plugin->call_nocheck<PiApiKind::piProgramLink>(
+          MContext->getHandleRef(), Devices.size(), Devices.data(),
+          LinkOptions.c_str(), Programs.size(), Programs.data(), nullptr,
+          nullptr, &MProgram);
+  Plugin->checkPiResult<compile_program_error>(Err);
 }
 
 program_impl::program_impl(ContextImplPtr Context,
@@ -236,22 +236,22 @@ void program_impl::link(std::string LinkOptions) {
   const char *LinkOpts = SYCLConfig<SYCL_PROGRAM_LINK_OPTIONS>::get();
   if (!LinkOpts) {
     LinkOpts = LinkOptions.c_str();
-    }
+  }
 
-    // Plugin resets MProgram with a new pi_program as a result of the call to
-    // "piProgramLink". Thus, we need to release MProgram before the call to
-    // piProgramLink.
-    if (MProgram != nullptr)
-      Plugin->call<PiApiKind::piProgramRelease>(MProgram);
-
-    sycl::detail::pi::PiResult Err =
-        Plugin->call_nocheck<PiApiKind::piProgramLink>(
-            MContext->getHandleRef(), Devices.size(), Devices.data(), LinkOpts,
-            /*num_input_programs*/ 1, &MProgram, nullptr, nullptr, &MProgram);
-    Plugin->checkPiResult<compile_program_error>(Err);
-    MLinkOptions = LinkOptions;
-    MBuildOptions = LinkOptions;
-    MState = program_state::linked;
+  // Plugin resets MProgram with a new pi_program as a result of the call to
+  // "piProgramLink". Thus, we need to release MProgram before the call to
+  // piProgramLink.
+  if (MProgram != nullptr)
+    Plugin->call<PiApiKind::piProgramRelease>(MProgram);
+
+  sycl::detail::pi::PiResult Err =
+      Plugin->call_nocheck<PiApiKind::piProgramLink>(
+          MContext->getHandleRef(), Devices.size(), Devices.data(), LinkOpts,
+          /*num_input_programs*/ 1, &MProgram, nullptr, nullptr, &MProgram);
+  Plugin->checkPiResult<compile_program_error>(Err);
+  MLinkOptions = LinkOptions;
+  MBuildOptions = LinkOptions;
+  MState = program_state::linked;
 }
 
 bool program_impl::has_kernel(std::string KernelName,
@@ -363,24 +363,23 @@ std::pair<sycl::detail::pi::PiKernel, const KernelArgMask *>
 program_impl::get_pi_kernel_arg_mask_pair(const std::string &KernelName) const {
   std::pair<sycl::detail::pi::PiKernel, const KernelArgMask *> Result;
 
-    const PluginPtr &Plugin = getPlugin();
-    sycl::detail::pi::PiResult Err =
-        Plugin->call_nocheck<PiApiKind::piKernelCreate>(
-            MProgram, KernelName.c_str(), &Result.first);
-    if (Err == PI_ERROR_INVALID_KERNEL_NAME) {
-      throw invalid_object_error(
-          "This instance of program does not contain the kernel requested",
-          Err);
-    }
-    Plugin->checkPiResult(Err);
+  const PluginPtr &Plugin = getPlugin();
+  sycl::detail::pi::PiResult Err =
+      Plugin->call_nocheck<PiApiKind::piKernelCreate>(
+          MProgram, KernelName.c_str(), &Result.first);
+  if (Err == PI_ERROR_INVALID_KERNEL_NAME) {
+    throw invalid_object_error(
+        "This instance of program does not contain the kernel requested", Err);
+  }
+  Plugin->checkPiResult(Err);
 
-    // Some PI Plugins (like OpenCL) require this call to enable USM
-    // For others, PI will turn this into a NOP.
-    if (getContextImplPtr()->getPlatformImpl()->supports_usm())
-      Plugin->call<PiApiKind::piKernelSetExecInfo>(
-          Result.first, PI_USM_INDIRECT_ACCESS, sizeof(pi_bool), &PI_TRUE);
+  // Some PI Plugins (like OpenCL) require this call to enable USM
+  // For others, PI will turn this into a NOP.
+  if (getContextImplPtr()->getPlatformImpl()->supports_usm())
+    Plugin->call<PiApiKind::piKernelSetExecInfo>(
+        Result.first, PI_USM_INDIRECT_ACCESS, sizeof(pi_bool), &PI_TRUE);
 
-    return Result;
+  return Result;
 }
 
 std::vector<device>
diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp
old mode 100755
new mode 100644
index 57c54275069e6..7237e88be440f
--- a/sycl/source/detail/usm/usm_impl.cpp
+++ b/sycl/source/detail/usm/usm_impl.cpp
@@ -100,20 +100,20 @@ void *alignedAllocHost(size_t Alignment, size_t Size, const context &Ctxt,
         &RetVal, C, Props.data(), Size, Alignment);
 
     break;
-    }
-    case alloc::device:
-    case alloc::shared:
-    case alloc::unknown: {
-      RetVal = nullptr;
-      Error = PI_ERROR_INVALID_VALUE;
-      break;
-    }
-    }
+  }
+  case alloc::device:
+  case alloc::shared:
+  case alloc::unknown: {
+    RetVal = nullptr;
+    Error = PI_ERROR_INVALID_VALUE;
+    break;
+  }
+  }
 
-    // Error is for debugging purposes.
-    // The spec wants a nullptr returned, not an exception.
-    if (Error != PI_SUCCESS)
-      return nullptr;
+  // Error is for debugging purposes.
+  // The spec wants a nullptr returned, not an exception.
+  if (Error != PI_SUCCESS)
+    return nullptr;
 #ifdef XPTI_ENABLE_INSTRUMENTATION
   xpti::addMetadata(PrepareNotify.traceEvent(), "memory_ptr",
                     reinterpret_cast<size_t>(RetVal));
@@ -139,79 +139,79 @@ void *alignedAllocInternal(size_t Alignment, size_t Size,
   if (Size == 0)
     return nullptr;
 
-    pi_context C = CtxImpl->getHandleRef();
-    const PluginPtr &Plugin = CtxImpl->getPlugin();
-    pi_result Error = PI_ERROR_INVALID_VALUE;
-    pi_device Id;
+  pi_context C = CtxImpl->getHandleRef();
+  const PluginPtr &Plugin = CtxImpl->getPlugin();
+  pi_result Error = PI_ERROR_INVALID_VALUE;
+  pi_device Id;
 
-    switch (Kind) {
-    case alloc::device: {
-      Id = DevImpl->getHandleRef();
+  switch (Kind) {
+  case alloc::device: {
+    Id = DevImpl->getHandleRef();
 
-      std::array<pi_usm_mem_properties, 3> Props;
-      auto PropsIter = Props.begin();
+    std::array<pi_usm_mem_properties, 3> Props;
+    auto PropsIter = Props.begin();
 
-      // Buffer location is only supported on FPGA devices
-      if (PropList.has_property<sycl::ext::intel::experimental::property::usm::
-                                    buffer_location>() &&
-          DevImpl->has_extension("cl_intel_mem_alloc_buffer_location")) {
-        *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION;
-        *PropsIter++ = PropList
-                           .get_property<sycl::ext::intel::experimental::
-                                             property::usm::buffer_location>()
-                           .get_buffer_location();
-      }
+    // Buffer location is only supported on FPGA devices
+    if (PropList.has_property<
+            sycl::ext::intel::experimental::property::usm::buffer_location>() &&
+        DevImpl->has_extension("cl_intel_mem_alloc_buffer_location")) {
+      *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION;
+      *PropsIter++ = PropList
+                         .get_property<sycl::ext::intel::experimental::
+                                           property::usm::buffer_location>()
+                         .get_buffer_location();
+    }
 
-      assert(PropsIter >= Props.begin() && PropsIter < Props.end());
-      *PropsIter++ = 0; // null-terminate property list
+    assert(PropsIter >= Props.begin() && PropsIter < Props.end());
+    *PropsIter++ = 0; // null-terminate property list
 
-      Error = Plugin->call_nocheck<PiApiKind::piextUSMDeviceAlloc>(
-          &RetVal, C, Id, Props.data(), Size, Alignment);
+    Error = Plugin->call_nocheck<PiApiKind::piextUSMDeviceAlloc>(
+        &RetVal, C, Id, Props.data(), Size, Alignment);
 
-      break;
-    }
-    case alloc::shared: {
-      Id = DevImpl->getHandleRef();
-
-      std::array<pi_usm_mem_properties, 5> Props;
-      auto PropsIter = Props.begin();
-
-      if (PropList.has_property<
-              sycl::ext::oneapi::property::usm::device_read_only>()) {
-        *PropsIter++ = PI_MEM_ALLOC_FLAGS;
-        *PropsIter++ = PI_MEM_ALLOC_DEVICE_READ_ONLY;
-      }
-
-      if (PropList.has_property<sycl::ext::intel::experimental::property::usm::
-                                    buffer_location>() &&
-          DevImpl->has_extension("cl_intel_mem_alloc_buffer_location")) {
-        *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION;
-        *PropsIter++ = PropList
-                           .get_property<sycl::ext::intel::experimental::
-                                             property::usm::buffer_location>()
-                           .get_buffer_location();
-      }
-
-      assert(PropsIter >= Props.begin() && PropsIter < Props.end());
-      *PropsIter++ = 0; // null-terminate property list
-
-      Error = Plugin->call_nocheck<PiApiKind::piextUSMSharedAlloc>(
-          &RetVal, C, Id, Props.data(), Size, Alignment);
-
-      break;
-    }
-    case alloc::host:
-    case alloc::unknown: {
-      RetVal = nullptr;
-      Error = PI_ERROR_INVALID_VALUE;
-      break;
+    break;
+  }
+  case alloc::shared: {
+    Id = DevImpl->getHandleRef();
+
+    std::array<pi_usm_mem_properties, 5> Props;
+    auto PropsIter = Props.begin();
+
+    if (PropList.has_property<
+            sycl::ext::oneapi::property::usm::device_read_only>()) {
+      *PropsIter++ = PI_MEM_ALLOC_FLAGS;
+      *PropsIter++ = PI_MEM_ALLOC_DEVICE_READ_ONLY;
     }
+
+    if (PropList.has_property<
+            sycl::ext::intel::experimental::property::usm::buffer_location>() &&
+        DevImpl->has_extension("cl_intel_mem_alloc_buffer_location")) {
+      *PropsIter++ = PI_MEM_USM_ALLOC_BUFFER_LOCATION;
+      *PropsIter++ = PropList
+                         .get_property<sycl::ext::intel::experimental::
+                                           property::usm::buffer_location>()
+                         .get_buffer_location();
     }
 
-    // Error is for debugging purposes.
-    // The spec wants a nullptr returned, not an exception.
-    if (Error != PI_SUCCESS)
-      return nullptr;
+    assert(PropsIter >= Props.begin() && PropsIter < Props.end());
+    *PropsIter++ = 0; // null-terminate property list
+
+    Error = Plugin->call_nocheck<PiApiKind::piextUSMSharedAlloc>(
+        &RetVal, C, Id, Props.data(), Size, Alignment);
+
+    break;
+  }
+  case alloc::host:
+  case alloc::unknown: {
+    RetVal = nullptr;
+    Error = PI_ERROR_INVALID_VALUE;
+    break;
+  }
+  }
+
+  // Error is for debugging purposes.
+  // The spec wants a nullptr returned, not an exception.
+  if (Error != PI_SUCCESS)
+    return nullptr;
   return RetVal;
 }
 
@@ -250,9 +250,9 @@ void *alignedAlloc(size_t Alignment, size_t Size, const context &Ctxt,
 void freeInternal(void *Ptr, const context_impl *CtxImpl) {
   if (Ptr == nullptr)
     return;
-    pi_context C = CtxImpl->getHandleRef();
-    const PluginPtr &Plugin = CtxImpl->getPlugin();
-    Plugin->call<PiApiKind::piextUSMFree>(C, Ptr);
+  pi_context C = CtxImpl->getHandleRef();
+  const PluginPtr &Plugin = CtxImpl->getPlugin();
+  Plugin->call<PiApiKind::piextUSMFree>(C, Ptr);
 }
 
 void free(void *Ptr, const context &Ctxt,

From 61d1c6208e4ef52c3b72908b9f904ba9869ffdb5 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 19 Jun 2024 08:52:31 -0700
Subject: [PATCH 32/58] fix connect task queue

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/scheduler/graph_builder.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp
index 2ac97baefb543..7cfc0446fdd69 100644
--- a/sycl/source/detail/scheduler/graph_builder.cpp
+++ b/sycl/source/detail/scheduler/graph_builder.cpp
@@ -1330,7 +1330,8 @@ Command *Scheduler::GraphBuilder::connectDepEvent(
   try {
     std::unique_ptr<detail::HostTask> HT(new detail::HostTask);
     std::unique_ptr<detail::CG> ConnectCG(new detail::CGHostTask(
-        std::move(HT), /* Queue = */ {}, /* Context = */ {}, /* Args = */ {},
+        std::move(HT), /* Queue = */ Cmd->getQueue(), /* Context = */ {},
+        /* Args = */ {},
         detail::CG::StorageInitHelper(
             /* ArgsStorage = */ {}, /* AccStorage = */ {},
             /* SharedPtrStorage = */ {}, /* Requirements = */ {},

From 5814e466577f0b99d6d6095d3e0d68a25452203c Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Thu, 20 Jun 2024 06:30:09 -0700
Subject: [PATCH 33/58] fix bugs

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/event_impl.cpp | 11 +++++++++--
 sycl/source/detail/queue_impl.cpp |  4 ++--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp
index 7d91129f25b51..a270867f6b637 100644
--- a/sycl/source/detail/event_impl.cpp
+++ b/sycl/source/detail/event_impl.cpp
@@ -262,7 +262,8 @@ void event_impl::wait_and_throw(
 void event_impl::checkProfilingPreconditions() const {
   std::weak_ptr<queue_impl> EmptyPtr;
 
-  if (!EmptyPtr.owner_before(MQueue) && !MQueue.owner_before(EmptyPtr)) {
+  if (!MIsHostEvent && !EmptyPtr.owner_before(MQueue) &&
+      !MQueue.owner_before(EmptyPtr)) {
     throw sycl::exception(make_error_code(sycl::errc::invalid),
                           "Profiling information is unavailable as the event "
                           "has no associated queue.");
@@ -300,7 +301,7 @@ event_impl::get_profiling_info<info::event_profiling::command_submit>() {
   // made by forcing the re-sync of submit time to start time is less than
   // 0.5ms. These timing values were obtained empirically using an integrated
   // Intel GPU).
-  if (MEventFromSubmittedExecCommandBuffer && MEvent) {
+  if (MEventFromSubmittedExecCommandBuffer && !MIsHostEvent && MEvent) {
     uint64_t StartTime =
         get_event_profiling_info<info::event_profiling::command_start>(
             this->getHandleRef(), this->getPlugin());
@@ -546,6 +547,12 @@ void event_impl::setSubmissionTime() {
                   e.what());
         std::rethrow_exception(std::current_exception());
       }
+    } else {
+      // Returning host time
+      using namespace std::chrono;
+      MSubmitTime =
+          duration_cast<nanoseconds>(steady_clock::now().time_since_epoch())
+              .count();
     }
   } else {
     // Capture the host timestamp for a return value of function call
diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp
index 83f33688ed0b1..572b0b8cf568a 100644
--- a/sycl/source/detail/queue_impl.cpp
+++ b/sycl/source/detail/queue_impl.cpp
@@ -299,12 +299,12 @@ void queue_impl::addEvent(const event &Event) {
     // if there is no command on the event, we cannot track it with MEventsWeak
     // as that will leave it with no owner. Track in MEventsShared only if we're
     // unable to call piQueueFinish during wait.
-    if (EImpl->isHost() || MEmulateOOO)
+    if (MEmulateOOO)
       addSharedEvent(Event);
   }
   // As long as the queue supports piQueueFinish we only need to store events
   // for unenqueued commands and host tasks.
-  else if (EImpl->isHost() || MEmulateOOO || EImpl->getHandleRef() == nullptr) {
+  else if (MEmulateOOO || EImpl->getHandleRef() == nullptr) {
     std::weak_ptr<event_impl> EventWeakPtr{EImpl};
     std::lock_guard<std::mutex> Lock{MMutex};
     MEventsWeak.push_back(std::move(EventWeakPtr));

From a03468173acf6f9c58593685069d030955a4782c Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Thu, 20 Jun 2024 09:43:06 -0700
Subject: [PATCH 34/58] fix work with graph

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/event_impl.cpp         |  4 ++--
 sycl/source/detail/queue_impl.cpp         | 21 ++++++++++++++++-----
 sycl/source/detail/queue_impl.hpp         | 16 +++++++++++++---
 sycl/source/detail/scheduler/commands.cpp | 20 ++++++++++----------
 4 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp
index a270867f6b637..e203924d2d612 100644
--- a/sycl/source/detail/event_impl.cpp
+++ b/sycl/source/detail/event_impl.cpp
@@ -48,7 +48,7 @@ event_impl::~event_impl() {
 }
 
 void event_impl::waitInternal(bool *Success) {
-  if (MEvent) {
+  if (!MIsHostEvent && MEvent) {
     // Wait for the native event
     sycl::detail::pi::PiResult Err =
         getPlugin()->call_nocheck<PiApiKind::piEventsWait>(1, &MEvent);
@@ -390,7 +390,7 @@ event_impl::get_info<info::event::command_execution_status>() {
       return sycl::info::event_command_status::submitted;
   }
 
-  return MState.load() != HES_Complete
+  return MIsHostEvent && MState.load() != HES_Complete
              ? sycl::info::event_command_status::submitted
              : info::event_command_status::complete;
 }
diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp
index 572b0b8cf568a..a5f9ae9964ac6 100644
--- a/sycl/source/detail/queue_impl.cpp
+++ b/sycl/source/detail/queue_impl.cpp
@@ -696,6 +696,19 @@ void queue_impl::revisitUnenqueuedCommandsState(
     const EventImplPtr &CompletedHostTask) {
   if (MIsInorder)
     return;
+
+  std::unique_lock<std::mutex> Lock{MMutex, std::try_to_lock};
+  if (Lock.owns_lock())
+    doUnenqueuedCommandCleanup(CompletedHostTask->getCommandGraph());
+  else {
+    std::lock_guard<std::mutex> RequestLock(MMissedCleanupRequestsMtx);
+    MMissedCleanupRequests.push_back(CompletedHostTask->getCommandGraph());
+  }
+}
+
+void queue_impl::doUnenqueuedCommandCleanup(
+    const std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
+        &Graph) {
   auto tryToCleanup = [](DependencyTrackingItems &Deps) {
     if (Deps.LastBarrier && Deps.LastBarrier->isEnqueued()) {
       Deps.LastBarrier = nullptr;
@@ -713,14 +726,12 @@ void queue_impl::revisitUnenqueuedCommandsState(
           Deps.UnenqueuedCmdEvents.end());
     }
   };
-  std::lock_guard<std::mutex> Lock{MMutex};
   // Barrier enqueue could be significantly postponed due to host task
   // dependency if any. No guarantee that it will happen while same graph deps
   // are still recording.
-  if (auto Graph = CompletedHostTask->getCommandGraph()) {
-    if (Graph == getCommandGraph())
-      tryToCleanup(MExtGraphDeps);
-  } else
+  if (Graph && Graph == getCommandGraph())
+    tryToCleanup(MExtGraphDeps);
+  else
     tryToCleanup(MDefaultGraphDeps);
 }
 
diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp
index d0a74cc80c793..aa3dd9fc780bf 100644
--- a/sycl/source/detail/queue_impl.hpp
+++ b/sycl/source/detail/queue_impl.hpp
@@ -93,7 +93,7 @@ class queue_impl {
   /// \param PropList is a list of properties to use for queue construction.
   queue_impl(const DeviceImplPtr &Device, const async_handler &AsyncHandler,
              const property_list &PropList)
-      : queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList){};
+      : queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList) {};
 
   /// Constructs a SYCL queue with an async_handler and property_list provided
   /// form a device and a context.
@@ -749,6 +749,9 @@ class queue_impl {
   // tasks and host tasks is applicable for out of order queues only. Not neede
   // for in order ones.
   void revisitUnenqueuedCommandsState(const EventImplPtr &CompletedHostTask);
+  void doUnenqueuedCommandCleanup(
+      const std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
+          &Graph);
 
   static ContextImplPtr getContext(const QueueImplPtr &Queue) {
     return Queue ? Queue->getContextImplPtr() : nullptr;
@@ -790,13 +793,12 @@ class queue_impl {
       EventToBuildDeps = getSyclObjImpl(EventRet);
     } else {
       const CG::CGTYPE Type = Handler.getType();
-
+      std::lock_guard<std::mutex> Lock{MMutex};
       // The following code supports barrier synchronization if host task is
       // involved in the scenario. Native barriers cannot handle host task
       // dependency so in the case where some commands were not enqueued
       // (blocked), we track them to prevent barrier from being enqueued
       // earlier.
-      std::lock_guard<std::mutex> Lock{MMutex};
       auto &Deps = MGraph.expired() ? MDefaultGraphDeps : MExtGraphDeps;
       if (Type == CG::Barrier && !Deps.UnenqueuedCmdEvents.empty()) {
         Handler.depends_on(Deps.UnenqueuedCmdEvents);
@@ -814,6 +816,10 @@ class queue_impl {
         } else
           Deps.UnenqueuedCmdEvents.push_back(EventRetImpl);
       }
+      std::lock_guard<std::mutex> RequestLock(MMissedCleanupRequestsMtx);
+      for (auto &UpdatedGraph : MMissedCleanupRequests)
+        doUnenqueuedCommandCleanup(UpdatedGraph);
+      MMissedCleanupRequests.clear();
     }
   }
 
@@ -966,6 +972,10 @@ class queue_impl {
   unsigned long long MQueueID;
   static std::atomic<unsigned long long> MNextAvailableQueueID;
 
+  std::deque<std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>>
+      MMissedCleanupRequests;
+  std::mutex MMissedCleanupRequestsMtx;
+
   friend class sycl::ext::oneapi::experimental::detail::node_impl;
 };
 
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index 3d51fe7a1c12f..6322b904fd6bc 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -2954,16 +2954,16 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
         Plugin->call<PiApiKind::piEventsWait>(RawEvents.size(), &RawEvents[0]);
       }
 
-        assert(MQueue->getDeviceImplPtr()->getBackend() ==
-               backend::ext_intel_esimd_emulator);
-        if (MEvent != nullptr)
-          MEvent->setHostEnqueueTime();
-        MQueue->getPlugin()->call<PiApiKind::piEnqueueKernelLaunch>(
-            nullptr,
-            reinterpret_cast<pi_kernel>(ExecKernel->MHostKernel->getPtr()),
-            NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0],
-            &NDRDesc.LocalSize[0], 0, nullptr, nullptr);
-        return PI_SUCCESS;
+      assert(MQueue->getDeviceImplPtr()->getBackend() ==
+             backend::ext_intel_esimd_emulator);
+      if (MEvent != nullptr)
+        MEvent->setHostEnqueueTime();
+      MQueue->getPlugin()->call<PiApiKind::piEnqueueKernelLaunch>(
+          nullptr,
+          reinterpret_cast<pi_kernel>(ExecKernel->MHostKernel->getPtr()),
+          NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0],
+          &NDRDesc.LocalSize[0], 0, nullptr, nullptr);
+      return PI_SUCCESS;
     }
 
     auto getMemAllocationFunc = [this](Requirement *Req) {

From c274c5ec74a0e92306824194a7f5ef9509c83df2 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Thu, 20 Jun 2024 10:14:54 -0700
Subject: [PATCH 35/58] fix tracing tests

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 .../Tracing/code_location_queue_submit.cpp     | 13 +++----------
 sycl/test-e2e/Tracing/task_execution.cpp       | 18 ++++++------------
 .../Tracing/task_execution_handler.cpp         |  4 ++--
 3 files changed, 11 insertions(+), 24 deletions(-)

diff --git a/sycl/test-e2e/Tracing/code_location_queue_submit.cpp b/sycl/test-e2e/Tracing/code_location_queue_submit.cpp
index 6ebfe43e936e5..ce780f5e81725 100644
--- a/sycl/test-e2e/Tracing/code_location_queue_submit.cpp
+++ b/sycl/test-e2e/Tracing/code_location_queue_submit.cpp
@@ -5,8 +5,7 @@
 
 // Test tracing of the code location data for queue.submit in case of failure
 // (exception generation)
-// First queue creation (id = 0) is queue created on line 15.
-// The second queue is a host queue created on first scheduler usage.
+// First queue creation (id = 0) is queue created on line 17.
 
 #include <sycl/detail/core.hpp>
 #include <sycl/usm.hpp>
@@ -19,16 +18,10 @@ int main() {
     unsigned char *HostAllocDst = NULL;
     // CHECK: [SYCL] Queue create:
     // CHECK-DAG:        queue_handle : {{.*}}
-    // CHECK-DAG:        queue_id : 0
-    // CHECK-DAG:        is_inorder : false
-    // CHECK-DAG:        sycl_device : {{.*}}
-    // CHECK-DAG:        sycl_device_name : {{.*}}
-    // CHECK-DAG:        sycl_context : {{.*}}
-    // CHECK-NEXT: [SYCL] Queue create:
     // CHECK-DAG:        queue_id : 1
     // CHECK-DAG:        is_inorder : false
     // CHECK-DAG:        sycl_device : {{.*}}
-    // CHECK-DAG:        sycl_device_name : SYCL host device
+    // CHECK-DAG:        sycl_device_name : {{.*}}
     // CHECK-DAG:        sycl_context : {{.*}}
     // CHECK: [SYCL] Runtime reports:
     // CHECK-NEXT: what:  NULL pointer argument in memory copy operation. -30 (PI_ERROR_INVALID_VALUE)
@@ -44,6 +37,6 @@ int main() {
     sycl::free(HostAllocSrc, Q);
   }
   // CHECK-NEXT: [SYCL] Queue destroy:
-  // CHECK-DAG:        queue_id : 0
+  // CHECK-DAG:        queue_id : 1
   return !ExceptionCaught;
 }
diff --git a/sycl/test-e2e/Tracing/task_execution.cpp b/sycl/test-e2e/Tracing/task_execution.cpp
index d591c20b8f6c0..b4932df0eda55 100644
--- a/sycl/test-e2e/Tracing/task_execution.cpp
+++ b/sycl/test-e2e/Tracing/task_execution.cpp
@@ -15,38 +15,32 @@ int main() {
     Q.copy(AllocDst, AllocSrc, 1).wait();
     // CHECK: [SYCL] Queue create:
     // CHECK-DAG:        queue_handle : {{.*}}
-    // CHECK-DAG:        queue_id : 0
+    // CHECK-DAG:        queue_id : 1
     // CHECK-DAG:        is_inorder : false
     // CHECK-DAG:        sycl_device : {{.*}}
     // CHECK-DAG:        sycl_device_name : {{.*}}
     // CHECK-DAG:        sycl_context : {{.*}}
     // CHECK-NEXT: [SYCL] Task begin (event={{.*}},instanceID={{.*}})
-    // CHECK-DAG:          queue_id : 0
+    // CHECK-DAG:          queue_id : 1
     // CHECK-DAG:          memory_size : 1
     // CHECK-DAG:          value_set : 0
     // CHECK-DAG:          memory_ptr : {{.*}}
     // CHECK-DAG:          sycl_device : {{.*}}
     // CHECK-NEXT: [SYCL] Task end   (event={{.*}},instanceID={{.*}})
     // CHECK-NEXT: [SYCL] Task begin (event={{.*}},instanceID={{.*}})
-    // CHECK-DAG:          queue_id : 0
+    // CHECK-DAG:          queue_id : 1
     // CHECK-DAG:          memory_size : 1
     // CHECK-DAG:          dest_memory_ptr : {{.*}}
     // CHECK-DAG:          src_memory_ptr : {{.*}}
     // CHECK-DAG:          sycl_device : {{.*}}
     // CHECK-NEXT: [SYCL] Task end   (event={{.*}},instanceID={{.*}})
-    // CHECK-NEXT: [SYCL] Queue create:
-    // CHECK-DAG:        queue_id : 1
-    // CHECK-DAG:        is_inorder : false
-    // CHECK-DAG:        sycl_device : {{.*}}
-    // CHECK-DAG:        sycl_device_name : SYCL host device
-    // CHECK-DAG:        sycl_context : {{.*}}
     Q.single_task<class E2ETestKernel>([]() {}).wait();
     // CHECK-NEXT: [SYCL] Task begin (event={{.*}},instanceID={{.*}})
     // CHECK-DAG:          enqueue_kernel_data : {{.*}}
     // CHECK-DAG:          sym_column_no : {{.*}}
-    // CHECK-DAG:          sym_line_no : 43
+    // CHECK-DAG:          sym_line_no : 37
     // CHECK-DAG:          sym_source_file_name : {{.*}}task_execution.cpp
-    // CHECK-DAG:          queue_id : 0
+    // CHECK-DAG:          queue_id : 1
     // CHECK-DAG:          sym_function_name : typeinfo name for main::E2ETestKernel
     // CHECK-DAG:          from_source : {{.*}}
     // CHECK-DAG:          sycl_device_name : {{.*}}
@@ -55,7 +49,7 @@ int main() {
     // CHECK-DAG:          sycl_device : {{.*}}
     // CHECK-NEXT: [SYCL] Task end   (event={{.*}},instanceID={{.*}})
     // CHECK-NEXT: [SYCL] Queue destroy:
-    // CHECK-DAG:        queue_id : 0
+    // CHECK-DAG:        queue_id : 1
     sycl::free(AllocSrc, Q);
     sycl::free(AllocDst, Q);
   }
diff --git a/sycl/test-e2e/Tracing/task_execution_handler.cpp b/sycl/test-e2e/Tracing/task_execution_handler.cpp
index 0563275f81312..a208fe6655bda 100644
--- a/sycl/test-e2e/Tracing/task_execution_handler.cpp
+++ b/sycl/test-e2e/Tracing/task_execution_handler.cpp
@@ -16,7 +16,7 @@ int main() {
              { cgh.memset(AllocSrc, 0, 1); })
         .wait();
     // CHECK: [SYCL] Task begin (event={{.*}},instanceID={{.*}})
-    // CHECK-DAG:          queue_id : 0
+    // CHECK-DAG:          queue_id : 1
     // CHECK-DAG:          sym_column_no : {{.*}}
     // CHECK-DAG:          sym_function_name : {{.*}}
     // CHECK-DAG:          kernel_name : {{.*}}
@@ -27,7 +27,7 @@ int main() {
     // CHECK-DAG:          sycl_device : {{.*}}
     // CHECK-NEXT: [SYCL] Task end   (event={{.*}},instanceID={{.*}})
     // CHECK-NEXT: [SYCL] Task begin (event={{.*}},instanceID={{.*}})
-    // CHECK-DAG:          queue_id : 0
+    // CHECK-DAG:          queue_id : 1
     // CHECK-DAG:          sym_column_no : {{.*}}
     // CHECK-DAG:          sym_function_name : {{.*}}
     // CHECK-DAG:          kernel_name : {{.*}}

From f50526bf29351cbc0d897ae6a59c699aca910522 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Fri, 21 Jun 2024 04:23:03 -0700
Subject: [PATCH 36/58] fix test

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/scheduler/scheduler.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp
index 4d26c2a822457..905ca889aaf0d 100644
--- a/sycl/source/detail/scheduler/scheduler.cpp
+++ b/sycl/source/detail/scheduler/scheduler.cpp
@@ -207,7 +207,7 @@ EventImplPtr Scheduler::addCopyBack(Requirement *Req) {
   {
     WriteLockT Lock = acquireWriteLock();
     NewCmd = MGraphBuilder.addCopyBack(Req, AuxiliaryCmds);
-    // Command was not creted because there were no operations with
+    // Command was not created because there were no operations with
     // buffer.
     if (!NewCmd)
       return nullptr;
@@ -232,7 +232,9 @@ EventImplPtr Scheduler::addCopyBack(Requirement *Req) {
       throw runtime_error("Enqueue process failed.",
                           PI_ERROR_INVALID_OPERATION);
   } catch (...) {
-    NewCmd->getQueue()->reportAsyncException(std::current_exception());
+    auto WorkerQueue = NewCmd->getEvent()->getWorkerQueue();
+    assert(WorkerQueue && "WorkerQueue for CopyBack command must be not null");
+    WorkerQueue->reportAsyncException(std::current_exception());
   }
   EventImplPtr NewEvent = NewCmd->getEvent();
   cleanupCommands(ToCleanUp);

From 2bd06e3a3ab0170ce0dfef9ace4ae16573ce7c69 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Mon, 24 Jun 2024 04:17:25 -0700
Subject: [PATCH 37/58] update win symbols

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/test/abi/sycl_symbols_windows.dump | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump
index e8610211e8572..c091a7751a0cc 100644
--- a/sycl/test/abi/sycl_symbols_windows.dump
+++ b/sycl/test/abi/sycl_symbols_windows.dump
@@ -41,18 +41,12 @@
 ??$get_info@U?$max_work_groups@$00@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$id@$00@23@XZ
 ??$get_info@U?$max_work_groups@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$id@$01@23@XZ
 ??$get_info@U?$max_work_groups@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$id@$02@23@XZ
+??$get_info@U?$sub_group_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
 ??$get_info@U?$sub_group_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
-??$get_info@U?$work_item_progress_capabilities@$00@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
 ??$get_info@U?$work_group_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
-??$get_info_impl@U?$work_item_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
+??$get_info@U?$work_item_progress_capabilities@$00@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
 ??$get_info@U?$work_item_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
-??$get_info_impl@U?$work_item_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
 ??$get_info@U?$work_item_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
-??$get_info_impl@U?$sub_group_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
-??$get_info_impl@U?$work_item_progress_capabilities@$00@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
-??$get_info@U?$sub_group_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
-??$get_info_impl@U?$work_group_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
-??$get_info_impl@U?$sub_group_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
 ??$get_info@Uarchitecture@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AW4architecture@experimental@oneapi@ext@23@XZ
 ??$get_info@Uatomic_fence_order_capabilities@context@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$vector@W4memory_order@_V1@sycl@@V?$allocator@W4memory_order@_V1@sycl@@@std@@@std@@XZ
 ??$get_info@Uatomic_fence_scope_capabilities@context@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$vector@W4memory_scope@_V1@sycl@@V?$allocator@W4memory_scope@_V1@sycl@@@std@@@std@@XZ
@@ -108,6 +102,12 @@
 ??$get_info_impl@U?$max_work_item_sizes@$00@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$range@$00@12@XZ
 ??$get_info_impl@U?$max_work_item_sizes@$01@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$range@$01@12@XZ
 ??$get_info_impl@U?$max_work_item_sizes@$02@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$range@$02@12@XZ
+??$get_info_impl@U?$sub_group_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
+??$get_info_impl@U?$sub_group_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
+??$get_info_impl@U?$work_group_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
+??$get_info_impl@U?$work_item_progress_capabilities@$00@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
+??$get_info_impl@U?$work_item_progress_capabilities@$01@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
+??$get_info_impl@U?$work_item_progress_capabilities@$02@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@V?$allocator@W4forward_progress_guarantee@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
 ??$get_info_impl@Uaddress_bits@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ
 ??$get_info_impl@Uarchitecture@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AW4architecture@experimental@oneapi@ext@12@XZ
 ??$get_info_impl@Uaspects@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4aspect@_V1@sycl@@V?$allocator@W4aspect@_V1@sycl@@@std@@@std@@XZ
@@ -4080,7 +4080,6 @@
 ?ext_intel_read_host_pipe@handler@_V1@sycl@@AEAAXVstring_view@detail@23@PEAX_K_N@Z
 ?ext_intel_write_host_pipe@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAX_K_N@Z
 ?ext_intel_write_host_pipe@handler@_V1@sycl@@AEAAXVstring_view@detail@23@PEAX_K_N@Z
-?verifyDeviceHasProgressGuarantee@handler@_V1@sycl@@AEAAXW4forward_progress_guarantee@experimental@oneapi@ext@23@W4execution_scope@56723@1@Z
 ?ext_oneapi_advise_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEBX_KW4_pi_mem_advice@@V?$vector@IV?$allocator@I@std@@@6@PEAI@Z
 ?ext_oneapi_architecture_is@device@_V1@sycl@@QEAA_NW4arch_category@experimental@oneapi@ext@23@@Z
 ?ext_oneapi_architecture_is@device@_V1@sycl@@QEAA_NW4architecture@experimental@oneapi@ext@23@@Z
@@ -4096,7 +4095,6 @@
 ?ext_oneapi_copy@handler@_V1@sycl@@QEAAXUimage_mem_handle@experimental@oneapi@ext@23@0AEBUimage_descriptor@56723@@Z
 ?ext_oneapi_copy@handler@_V1@sycl@@QEAAXUimage_mem_handle@experimental@oneapi@ext@23@PEAXAEBUimage_descriptor@56723@@Z
 ?ext_oneapi_copy@handler@_V1@sycl@@QEAAXUimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@56723@PEAX111@Z
-?ext_oneapi_prod@queue@_V1@sycl@@QEAAXXZ
 ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEAX0AEBUimage_descriptor@experimental@oneapi@ext@23@_KAEBUcode_location@detail@23@@Z
 ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEAX0AEBUimage_descriptor@experimental@oneapi@ext@23@_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z
 ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@PEAX0AEBUimage_descriptor@experimental@oneapi@ext@23@_KV423@AEBUcode_location@detail@23@@Z
@@ -4158,6 +4156,7 @@
 ?ext_oneapi_owner_before@?$OwnerLessBase@Vstream@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vstream@_V1@sycl@@@2oneapi@ext@34@@Z
 ?ext_oneapi_owner_before@?$OwnerLessBase@Vstream@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVstream@34@@Z
 ?ext_oneapi_prefetch_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAX_KV?$vector@IV?$allocator@I@std@@@6@PEAI@Z
+?ext_oneapi_prod@queue@_V1@sycl@@QEAAXXZ
 ?ext_oneapi_set_external_event@queue@_V1@sycl@@QEAAXAEBVevent@23@@Z
 ?ext_oneapi_signal_external_semaphore@handler@_V1@sycl@@QEAAXUinterop_semaphore_handle@experimental@oneapi@ext@23@@Z
 ?ext_oneapi_signal_external_semaphore@handler@_V1@sycl@@QEAAXUinterop_semaphore_handle@experimental@oneapi@ext@23@_K@Z
@@ -4205,6 +4204,7 @@
 ?frexp_impl@detail@_V1@sycl@@YA?AVhalf@half_impl@123@V45123@PEAH@Z
 ?frexp_impl@detail@_V1@sycl@@YAMMPEAH@Z
 ?frexp_impl@detail@_V1@sycl@@YANNPEAH@Z
+?generateFlushCommand@stream_impl@detail@_V1@sycl@@QEAAXAEAVhandler@34@@Z
 ?get@context@_V1@sycl@@QEBAPEAU_cl_context@@XZ
 ?get@device@_V1@sycl@@QEBAPEAU_cl_device_id@@XZ
 ?get@kernel@_V1@sycl@@QEBAPEAU_cl_kernel@@XZ
@@ -4655,6 +4655,7 @@
 ?useHostPtr@SYCLMemObjT@detail@_V1@sycl@@QEAA_NXZ
 ?use_kernel_bundle@handler@_V1@sycl@@QEAAXAEBV?$kernel_bundle@$01@23@@Z
 ?usesPinnedHostMemory@SYCLMemObjT@detail@_V1@sycl@@UEBA_NXZ
+?verifyDeviceHasProgressGuarantee@handler@_V1@sycl@@AEAAXW4forward_progress_guarantee@experimental@oneapi@ext@23@W4execution_scope@56723@1@Z
 ?verifyKernelInvoc@handler@_V1@sycl@@AEAAXAEBVkernel@23@@Z
 ?verifyUsedKernelBundle@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z
 ?verifyUsedKernelBundleInternal@handler@_V1@sycl@@AEAAXVstring_view@detail@23@@Z

From 5fbcb1ead2551a055366f906a093c9267ccaf978 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Mon, 24 Jun 2024 05:17:33 -0700
Subject: [PATCH 38/58] fix format

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/stream_impl.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sycl/source/detail/stream_impl.cpp b/sycl/source/detail/stream_impl.cpp
index 7e81e964bdc17..1ba09ed36369c 100644
--- a/sycl/source/detail/stream_impl.cpp
+++ b/sycl/source/detail/stream_impl.cpp
@@ -108,13 +108,13 @@ void stream_impl::generateFlushCommand(handler &cgh) {
 }
 
 // ABI break: remove
-void stream_impl::initStreamHost(QueueImplPtr){};
+void stream_impl::initStreamHost(QueueImplPtr){}
 
 // ABI break: remove
-void stream_impl::flush(const EventImplPtr &) {};
+void stream_impl::flush(const EventImplPtr &) {}
 
 // ABI break: remove
-void stream_impl::flush() {};
+void stream_impl::flush() {}
 
 } // namespace detail
 } // namespace _V1

From d5d15bf8f4b4317e3a9f43ce179a65f7a195f849 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Mon, 24 Jun 2024 08:28:19 -0700
Subject: [PATCH 39/58] fix formatting

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/stream_impl.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sycl/source/detail/stream_impl.cpp b/sycl/source/detail/stream_impl.cpp
index 1ba09ed36369c..b9f70581ac7a8 100644
--- a/sycl/source/detail/stream_impl.cpp
+++ b/sycl/source/detail/stream_impl.cpp
@@ -108,7 +108,7 @@ void stream_impl::generateFlushCommand(handler &cgh) {
 }
 
 // ABI break: remove
-void stream_impl::initStreamHost(QueueImplPtr){}
+void stream_impl::initStreamHost(QueueImplPtr) {}
 
 // ABI break: remove
 void stream_impl::flush(const EventImplPtr &) {}

From e185cbcca90a9d76827c95fe211aace1c7284f95 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Tue, 25 Jun 2024 08:25:30 -0700
Subject: [PATCH 40/58] self review comments fix

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/context.cpp                       |  2 +-
 sycl/source/detail/buffer_impl.cpp            |  4 +-
 sycl/source/detail/event_impl.cpp             |  2 +-
 sycl/source/detail/event_impl.hpp             |  2 +-
 sycl/source/detail/memory_manager.cpp         |  2 +-
 sycl/source/detail/platform_impl.hpp          |  6 --
 sycl/source/detail/queue_impl.cpp             |  2 +-
 sycl/source/detail/scheduler/commands.cpp     | 84 ++++++++-----------
 sycl/source/detail/scheduler/scheduler.hpp    | 10 +--
 sycl/source/device.cpp                        |  2 +-
 sycl/source/event.cpp                         |  2 +-
 sycl/source/kernel.cpp                        |  2 +-
 sycl/source/platform.cpp                      |  2 +-
 sycl/source/queue.cpp                         |  2 +-
 .../test-e2e/SubGroup/sub_groups_sycl2020.cpp |  4 -
 15 files changed, 52 insertions(+), 76 deletions(-)

diff --git a/sycl/source/context.cpp b/sycl/source/context.cpp
index 1261096b82047..e4c7404c7b078 100644
--- a/sycl/source/context.cpp
+++ b/sycl/source/context.cpp
@@ -127,7 +127,7 @@ context::get_backend_info() const {
 cl_context context::get() const { return impl->get(); }
 
 bool context::is_host() const {
-  assert(true && "context::is_host should not be called in implementation.");
+  assert(false && "context::is_host should not be called in implementation.");
   return false;
 }
 
diff --git a/sycl/source/detail/buffer_impl.cpp b/sycl/source/detail/buffer_impl.cpp
index f13444107e9eb..1795992594078 100644
--- a/sycl/source/detail/buffer_impl.cpp
+++ b/sycl/source/detail/buffer_impl.cpp
@@ -24,7 +24,9 @@ void *buffer_impl::allocateMem(ContextImplPtr Context, bool InitFromUserData,
                                sycl::detail::pi::PiEvent &OutEventToWait) {
   bool HostPtrReadOnly = false;
   BaseT::determineHostPtr(Context, InitFromUserData, HostPtr, HostPtrReadOnly);
-
+  assert(!(nullptr == HostPtr && BaseT::useHostPtr() && !Context) &&
+         "Internal error. Allocating memory on the host "
+         "while having use_host_ptr property");
   return MemoryManager::allocateMemBuffer(
       std::move(Context), this, HostPtr, HostPtrReadOnly,
       BaseT::getSizeInBytes(), BaseT::MInteropEvent, BaseT::MInteropContext,
diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp
index e203924d2d612..f4ad52221ed37 100644
--- a/sycl/source/detail/event_impl.cpp
+++ b/sycl/source/detail/event_impl.cpp
@@ -367,7 +367,7 @@ uint64_t event_impl::get_profiling_info<info::event_profiling::command_end>() {
 }
 
 template <> uint32_t event_impl::get_info<info::event::reference_count>() {
-  if (MEvent) {
+  if (!MIsHostEvent && MEvent) {
     return get_event_info<info::event::reference_count>(this->getHandleRef(),
                                                         this->getPlugin());
   }
diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp
index 8b46e715cd13e..12b58d25ab3cd 100644
--- a/sycl/source/detail/event_impl.hpp
+++ b/sycl/source/detail/event_impl.hpp
@@ -49,7 +49,7 @@ class event_impl {
   /// Normally constructs a host event, use std::nullopt to instead instantiate
   /// a device event.
   event_impl(std::optional<HostEventState> State = HES_Complete)
-      : MIsInitialized(false), MIsFlushed(true),
+      : MIsInitialized(false), MIsHostEvent(State), MIsFlushed(true),
         MState(State.value_or(HES_Complete)) {
     // Need to fail in event() constructor  if there are problems with the
     // ONEAPI_DEVICE_SELECTOR. Deferring may lead to conficts with noexcept
diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp
index 6f30ceef8eb51..97615960877ff 100644
--- a/sycl/source/detail/memory_manager.cpp
+++ b/sycl/source/detail/memory_manager.cpp
@@ -935,7 +935,7 @@ void MemoryManager::unmap(SYCLMemObjI *, void *Mem, QueueImplPtr Queue,
                           std::vector<sycl::detail::pi::PiEvent> DepEvents,
                           sycl::detail::pi::PiEvent &OutEvent) {
 
-  // Host queue is not supported here.
+  // Execution on host is not supported here.
   if (!Queue) {
     throw runtime_error("Not supported configuration of unmap requested",
                         PI_ERROR_INVALID_OPERATION);
diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp
index bc6278d54f32c..0a926712eb806 100644
--- a/sycl/source/detail/platform_impl.hpp
+++ b/sycl/source/detail/platform_impl.hpp
@@ -32,9 +32,6 @@ class device_impl;
 // TODO: implement parameters treatment for host device
 class platform_impl {
 public:
-  /// Constructs platform_impl for a SYCL host platform.
-  platform_impl() : MHostPlatform(true) {}
-
   /// Constructs platform_impl from a plug-in interoperability platform
   /// handle.
   ///
@@ -125,7 +122,6 @@ class platform_impl {
 
   // \return the Plugin associated with this platform.
   const PluginPtr &getPlugin() const {
-    assert(!MHostPlatform && "Plugin is not available for Host.");
     return MPlugin;
   }
 
@@ -134,7 +130,6 @@ class platform_impl {
   /// \param PluginPtr is a pointer to a plugin instance
   /// \param Backend is the backend that we want this platform to use
   void setPlugin(PluginPtr &PluginPtr, backend Backend) {
-    assert(!MHostPlatform && "Plugin is not available for Host");
     MPlugin = PluginPtr;
     MBackend = Backend;
   }
@@ -214,7 +209,6 @@ class platform_impl {
   filterDeviceFilter(std::vector<sycl::detail::pi::PiDevice> &PiDevices,
                      ListT *FilterList) const;
 
-  bool MHostPlatform = false;
   sycl::detail::pi::PiPlatform MPlatform = 0;
   backend MBackend;
 
diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp
index a5f9ae9964ac6..ae59239664327 100644
--- a/sycl/source/detail/queue_impl.cpp
+++ b/sycl/source/detail/queue_impl.cpp
@@ -655,7 +655,7 @@ bool queue_impl::ext_oneapi_empty() const {
                info::event_command_status::complete;
   }
 
-  // Check the status of the backend queue if this is not a host queue.
+  // Check the status of the backend queue.
   pi_bool IsReady = false;
   getPlugin()->call<PiApiKind::piQueueGetInfo>(
       MQueues[0], PI_EXT_ONEAPI_QUEUE_INFO_EMPTY, sizeof(pi_bool), &IsReady,
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index 6322b904fd6bc..d52fb0da025f3 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -79,7 +79,10 @@ static size_t deviceToID(const device &Device) {
   return reinterpret_cast<size_t>(getSyclObjImpl(Device)->getHandleRef());
 }
 
-static std::string deviceToString(device Device) {
+static std::string queueDeviceToString(const QueueImplPtr &Queue) {
+  if (!Queue)
+    return "host";
+  auto Device = Queue->get_device();
   if (Device.is_cpu())
     return "CPU";
   else if (Device.is_gpu())
@@ -91,15 +94,19 @@ static std::string deviceToString(device Device) {
 }
 
 static void addDeviceMetadata(xpti_td *TraceEvent, const QueueImplPtr &Queue) {
-  xpti::addMetadata(TraceEvent, "sycl_device",
-                    Queue ? deviceToID(Queue->get_device()) : 0);
-  xpti::addMetadata(TraceEvent, "sycl_device_type",
-                    Queue ? deviceToString(Queue->get_device()) : "host");
+  xpti::addMetadata(TraceEvent, "sycl_device_type", queueDeviceToString(Queue));
   if (Queue)
+  {
+    xpti::addMetadata(TraceEvent, "sycl_device", deviceToID(Queue->get_device()));
     xpti::addMetadata(TraceEvent, "sycl_device_name",
                       getSyclObjImpl(Queue->get_device())->getDeviceName());
+  }
 }
 
+static unsigned long long getQueueID(const QueueImplPtr& Queue)
+{
+  return Queue ? Queue->getQueueID() : 0;
+}
 #endif
 
 static ContextImplPtr getContext(const QueueImplPtr &Queue) {
@@ -1009,8 +1016,7 @@ void AllocaCommandBase::emitInstrumentationData() {
     xpti::addMetadata(TE, "memory_object", reinterpret_cast<size_t>(MAddress));
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue ? MQueue->getQueueID() : 0);
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
   }
 #endif
 }
@@ -1081,8 +1087,7 @@ void AllocaCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "ALLOCA ON "
-         << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
+  Stream << "ALLOCA ON " << queueDeviceToString(MQueue) << "\\n";
   Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n";
   Stream << " Link : " << this->MLinkedAllocaCmd << "\\n";
   Stream << "\"];" << std::endl;
@@ -1130,8 +1135,7 @@ void AllocaSubBufCommand::emitInstrumentationData() {
                       this->MRequirement.MAccessRange[0]);
     xpti::addMetadata(TE, "access_range_end",
                       this->MRequirement.MAccessRange[1]);
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue ? MQueue->getQueueID() : 0);
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
     makeTraceEventEpilog();
   }
 #endif
@@ -1168,8 +1172,7 @@ void AllocaSubBufCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "ALLOCA SUB BUF ON "
-         << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
+  Stream << "ALLOCA SUB BUF ON " << queueDeviceToString(MQueue)<< "\\n";
   Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n";
   Stream << " Offset : " << this->MRequirement.MOffsetInBytes << "\\n";
   Stream << " Access range : " << this->MRequirement.MAccessRange[0] << "\\n";
@@ -1207,8 +1210,7 @@ void ReleaseCommand::emitInstrumentationData() {
                       commandToName(MAllocaCmd->getType()));
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue ? MQueue->getQueueID() : 0);
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
     makeTraceEventEpilog();
   }
 #endif
@@ -1282,8 +1284,7 @@ void ReleaseCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FF827A\", label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "RELEASE ON "
-         << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
+  Stream << "RELEASE ON " << queueDeviceToString(MQueue) << "\\n";
   Stream << " Alloca : " << MAllocaCmd << "\\n";
   Stream << " MemObj : " << MAllocaCmd->getSYCLMemObj() << "\\n";
   Stream << "\"];" << std::endl;
@@ -1327,8 +1328,7 @@ void MapMemObject::emitInstrumentationData() {
     xpti::addMetadata(TE, "memory_object", reinterpret_cast<size_t>(MAddress));
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue ? MQueue->getQueueID() : 0);
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
     makeTraceEventEpilog();
   }
 #endif
@@ -1353,8 +1353,7 @@ void MapMemObject::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#77AFFF\", label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "MAP ON "
-         << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
+  Stream << "MAP ON " << queueDeviceToString(MQueue) : "host") << "\\n";
 
   Stream << "\"];" << std::endl;
 
@@ -1389,8 +1388,7 @@ void UnMapMemObject::emitInstrumentationData() {
     xpti::addMetadata(TE, "memory_object", reinterpret_cast<size_t>(MAddress));
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue ? MQueue->getQueueID() : 0);
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
     makeTraceEventEpilog();
   }
 #endif
@@ -1435,8 +1433,7 @@ void UnMapMemObject::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#EBC40F\", label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "UNMAP ON "
-         << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
+  Stream << "UNMAP ON " << queueDeviceToString(MQueue) << "\\n";
 
   Stream << "\"];" << std::endl;
 
@@ -1488,8 +1485,7 @@ void MemCpyCommand::emitInstrumentationData() {
                       MQueue ? deviceToID(MQueue->get_device()) : 0);
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue ? MQueue->getQueueID() : 0);
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
     makeTraceEventEpilog();
   }
 #endif
@@ -1546,8 +1542,7 @@ void MemCpyCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#C7EB15\" label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "MEMCPY ON "
-         << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
+  Stream << "MEMCPY ON " << queueDeviceToString(MQueue) << "\\n";
   Stream << "From: " << MSrcAllocaCmd << " is host: " << !MSrcQueue << "\\n";
   Stream << "To: " << MDstAllocaCmd << " is host: " << !MQueue << "\\n";
 
@@ -1603,8 +1598,7 @@ void UpdateHostRequirementCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#f1337f\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "UPDATE REQ ON "
-         << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
+  Stream << "UPDATE REQ ON " << queueDeviceToString(MQueue) << "\\n";
   bool IsReqOnBuffer =
       MDstReq.MSYCLMemObj->getType() == SYCLMemObjI::MemObjType::Buffer;
   Stream << "TYPE: " << (IsReqOnBuffer ? "Buffer" : "Image") << "\\n";
@@ -1661,8 +1655,7 @@ void MemCpyCommandHost::emitInstrumentationData() {
                       MQueue ? deviceToID(MQueue->get_device()) : 0);
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue ? MQueue->getQueueID() : 0);
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
     makeTraceEventEpilog();
   }
 #endif
@@ -1750,8 +1743,7 @@ void EmptyCommand::emitInstrumentationData() {
                       reinterpret_cast<size_t>(MAddress));
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue ? MQueue->getQueueID() : 0);
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
     makeTraceEventEpilog();
   }
 #endif
@@ -1780,8 +1772,7 @@ void MemCpyCommandHost::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#B6A2EB\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "MEMCPY HOST ON "
-         << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
+  Stream << "MEMCPY HOST ON " << queueDeviceToString(MQueue) << "\\n";
 
   Stream << "\"];" << std::endl;
 
@@ -1819,8 +1810,7 @@ void UpdateHostRequirementCommand::emitInstrumentationData() {
                       reinterpret_cast<size_t>(MAddress));
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue ? MQueue->getQueueID() : 0);
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
     makeTraceEventEpilog();
   }
 #endif
@@ -2087,9 +2077,7 @@ std::pair<xpti_td *, uint64_t> emitKernelInstrumentationData(
 
   if (CmdTraceEvent) {
     // Stash the queue_id mutable metadata in TLS
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 Queue ? Queue->getQueueID() : 0);
-
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(Queue));
     instrumentationAddExtraKernelMetadata(CmdTraceEvent, NDRDesc,
                                           KernelBundleImplPtr, SyclKernelName,
                                           SyclKernel, Queue, CGArgs);
@@ -2133,8 +2121,7 @@ void ExecCGCommand::emitInstrumentationData() {
                                 CmdTraceEvent);
 
   if (CmdTraceEvent) {
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue ? MQueue->getQueueID() : 0);
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
     MTraceEvent = static_cast<void *>(CmdTraceEvent);
     if (MCommandGroup->getType() == detail::CG::Kernel) {
       auto KernelCG =
@@ -2157,8 +2144,7 @@ void ExecCGCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "EXEC CG ON "
-         << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n";
+  Stream << "EXEC CG ON " << queueDeviceToString(MQueue) << "\\n";
 
   switch (MCommandGroup->getType()) {
   case detail::CG::Kernel: {
@@ -3353,8 +3339,7 @@ void KernelFusionCommand::emitInstrumentationData() {
   if (MFirstInstance) {
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
-                                 MQueue ? MQueue->getQueueID() : 0);
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
     xptiNotifySubscribers(MStreamID, NotificationTraceType,
                           detail::GSYCLGraphEvent,
                           static_cast<xpti_td *>(MTraceEvent), MInstanceID,
@@ -3368,8 +3353,7 @@ void KernelFusionCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "KERNEL FUSION on "
-         << (MQueue ? deviceToString(MQueue->get_device()) : "host") << "\\n"
+  Stream << "KERNEL FUSION on " << queueDeviceToString(MQueue) << "\\n"
          << "FUSION LIST: {";
   bool Initial = true;
   for (auto *Cmd : MFusionList) {
diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp
index 03372fc0b7a8f..cd5ae6bd0e0fe 100644
--- a/sycl/source/detail/scheduler/scheduler.hpp
+++ b/sycl/source/detail/scheduler/scheduler.hpp
@@ -213,16 +213,16 @@ struct MemObjRecord {
   // Contains latest write commands working with memory object.
   LeavesCollection MWriteLeaves;
 
-  // The flag indicates that the content of the memory object was/will be
-  // modified. Used while deciding if copy back needed.
-  bool MMemModified = false;
-
   // The context which has the latest state of the memory object.
   ContextImplPtr MCurContext;
 
-  // The mode this object can be accessed with from the host (host_accessor).
+  // The mode this object can be accessed from the host (host_accessor).
   // Valid only if the current usage is on host.
   access::mode MHostAccess = access::mode::read_write;
+
+  // The flag indicates that the content of the memory object was/will be
+  // modified. Used while deciding if copy back needed.
+  bool MMemModified = false;
 };
 
 /// DPC++ graph scheduler class.
diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp
index a3a88ebf6636a..18b9cf4036cda 100644
--- a/sycl/source/device.cpp
+++ b/sycl/source/device.cpp
@@ -71,7 +71,7 @@ std::vector<device> device::get_devices(info::device_type deviceType) {
 cl_device_id device::get() const { return impl->get(); }
 
 bool device::is_host() const {
-  assert(true && "device::is_host should not be called in implementation.");
+  assert(false && "device::is_host should not be called in implementation.");
   return false;
 }
 
diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp
index 12b4a7e68164e..69d62f354ea4c 100644
--- a/sycl/source/event.cpp
+++ b/sycl/source/event.cpp
@@ -38,7 +38,7 @@ bool event::operator==(const event &rhs) const { return rhs.impl == impl; }
 bool event::operator!=(const event &rhs) const { return !(*this == rhs); }
 
 bool event::is_host() const {
-  assert(true && "event::is_host should not be called in implementation.");
+  assert(false && "event::is_host should not be called in implementation.");
   return false;
 }
 
diff --git a/sycl/source/kernel.cpp b/sycl/source/kernel.cpp
index bc842f6e596a5..625eb995c47d3 100644
--- a/sycl/source/kernel.cpp
+++ b/sycl/source/kernel.cpp
@@ -31,7 +31,7 @@ kernel::kernel(cl_kernel ClKernel, const context &SyclContext)
 cl_kernel kernel::get() const { return impl->get(); }
 
 bool kernel::is_host() const {
-  assert(true && "kernel::is_host should not be called in implementation.");
+  assert(false && "kernel::is_host should not be called in implementation.");
   return false;
 }
 
diff --git a/sycl/source/platform.cpp b/sycl/source/platform.cpp
index 9a15943213ec6..179c8c09d0825 100644
--- a/sycl/source/platform.cpp
+++ b/sycl/source/platform.cpp
@@ -41,7 +41,7 @@ bool platform::has_extension(const std::string &ExtensionName) const {
 }
 
 bool platform::is_host() const {
-  assert(true && "platform::is_host should not be called in implementation.");
+  assert(false && "platform::is_host should not be called in implementation.");
   return false;
 }
 
diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp
index 174d1f9197af1..5cd0bd3449095 100644
--- a/sycl/source/queue.cpp
+++ b/sycl/source/queue.cpp
@@ -96,7 +96,7 @@ queue::ext_oneapi_get_graph() const {
 }
 
 bool queue::is_host() const {
-  assert(true && "queue::is_host should not be called in implementation.");
+  assert(false && "queue::is_host should not be called in implementation.");
   return false;
 }
 
diff --git a/sycl/test-e2e/SubGroup/sub_groups_sycl2020.cpp b/sycl/test-e2e/SubGroup/sub_groups_sycl2020.cpp
index 5b71a60a54051..a7d4c6493b8b5 100644
--- a/sycl/test-e2e/SubGroup/sub_groups_sycl2020.cpp
+++ b/sycl/test-e2e/SubGroup/sub_groups_sycl2020.cpp
@@ -1,9 +1,5 @@
 // RUN: %{build} -o %t.out
 // RUN: %{run} %t.out
-//
-// Assertion `!MHostPlatform && "Plugin is not available for Host."' failed on
-// Nvidia.
-// XFAIL: hip_nvidia
 
 #include <sycl/detail/core.hpp>
 

From a87b32817a46d1dfdba9205163106f2af565ea6c Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 26 Jun 2024 04:35:59 -0700
Subject: [PATCH 41/58] fix

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/event_impl.hpp         | 4 ++--
 sycl/source/detail/scheduler/commands.cpp | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp
index 12b58d25ab3cd..f609bd96b7189 100644
--- a/sycl/source/detail/event_impl.hpp
+++ b/sycl/source/detail/event_impl.hpp
@@ -49,8 +49,8 @@ class event_impl {
   /// Normally constructs a host event, use std::nullopt to instead instantiate
   /// a device event.
   event_impl(std::optional<HostEventState> State = HES_Complete)
-      : MIsInitialized(false), MIsHostEvent(State), MIsFlushed(true),
-        MState(State.value_or(HES_Complete)) {
+      : MIsInitialized(false), MIsFlushed(true),
+        MState(State.value_or(HES_Complete)),  MIsHostEvent(State) {
     // Need to fail in event() constructor  if there are problems with the
     // ONEAPI_DEVICE_SELECTOR. Deferring may lead to conficts with noexcept
     // event methods. This ::get() call uses static vars to read and parse the
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index d52fb0da025f3..9d9315652ed55 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -1353,7 +1353,7 @@ void MapMemObject::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#77AFFF\", label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "MAP ON " << queueDeviceToString(MQueue) : "host") << "\\n";
+  Stream << "MAP ON " << queueDeviceToString(MQueue) << "\\n";
 
   Stream << "\"];" << std::endl;
 

From 0a5a7583eef8f597c8b82c70a8671aeb1f45097c Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 26 Jun 2024 07:18:55 -0700
Subject: [PATCH 42/58] Update isCOntextInitialized stuff

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/event_impl.cpp             | 27 +++++-----
 sycl/source/detail/event_impl.hpp             | 39 ++++++++-------
 sycl/source/detail/helpers.cpp                |  4 +-
 sycl/source/detail/scheduler/commands.cpp     | 49 ++++++++++++-------
 sycl/source/detail/scheduler/scheduler.cpp    |  4 +-
 sycl/source/queue.cpp                         |  2 +-
 sycl/unittests/buffer/BufferReleaseBase.hpp   |  4 --
 sycl/unittests/pi/PiMock.cpp                  |  4 --
 .../scheduler/EnqueueWithDependsOnDeps.cpp    |  4 --
 .../scheduler/InOrderQueueHostTaskDeps.cpp    |  4 --
 sycl/unittests/scheduler/KernelFusion.cpp     |  4 --
 11 files changed, 66 insertions(+), 79 deletions(-)

diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp
index f4ad52221ed37..58a52230f1269 100644
--- a/sycl/source/detail/event_impl.cpp
+++ b/sycl/source/detail/event_impl.cpp
@@ -33,8 +33,8 @@ extern xpti::trace_event_data_t *GSYCLGraphEvent;
 #endif
 
 // If we do not yet have a context, use the default one.
-void event_impl::ensureContextInitialized() {
-  if (MIsContextInitialized)
+void event_impl::tryToInitContext() {
+  if (MContext || !MIsDefaultConstructed)
     return;
 
   const device SyclDevice;
@@ -114,12 +114,12 @@ const sycl::detail::pi::PiEvent &event_impl::getHandleRef() const {
 sycl::detail::pi::PiEvent &event_impl::getHandleRef() { return MEvent; }
 
 const ContextImplPtr &event_impl::getContextImpl() {
-  ensureContextInitialized();
+  tryToInitContext();
   return MContext;
 }
 
 const PluginPtr &event_impl::getPlugin() {
-  ensureContextInitialized();
+  tryToInitContext();
   return MContext->getPlugin();
 }
 
@@ -128,14 +128,12 @@ void event_impl::setStateIncomplete() { MState = HES_NotComplete; }
 void event_impl::setContextImpl(const ContextImplPtr &Context) {
   MIsHostEvent = Context == nullptr;
   MContext = Context;
-  MIsContextInitialized = true;
 }
 
 event_impl::event_impl(sycl::detail::pi::PiEvent Event,
                        const context &SyclContext)
-    : MIsContextInitialized(true), MEvent(Event),
-      MContext(detail::getSyclObjImpl(SyclContext)), MIsFlushed(true),
-      MState(HES_Complete) {
+    : MEvent(Event), MContext(detail::getSyclObjImpl(SyclContext)),
+      MIsFlushed(true), MState(HES_Complete) {
 
   sycl::detail::pi::PiContext TempContext;
   getPlugin()->call<PiApiKind::piEventGetInfo>(
@@ -398,7 +396,7 @@ event_impl::get_info<info::event::command_execution_status>() {
 template <>
 typename info::platform::version::return_type
 event_impl::get_backend_info<info::platform::version>() const {
-  if (!MIsContextInitialized) {
+  if (!MContext) {
     return "Context not initialized, no backend info available";
   }
   if (MContext->getBackend() != backend::opencl) {
@@ -419,7 +417,7 @@ event_impl::get_backend_info<info::platform::version>() const {
 template <>
 typename info::device::version::return_type
 event_impl::get_backend_info<info::device::version>() const {
-  if (!MIsContextInitialized) {
+  if (!MContext) {
     return "Context not initialized, no backend info available";
   }
   if (MContext->getBackend() != backend::opencl) {
@@ -437,7 +435,7 @@ event_impl::get_backend_info<info::device::version>() const {
 template <>
 typename info::device::backend_version::return_type
 event_impl::get_backend_info<info::device::backend_version>() const {
-  if (!MIsContextInitialized) {
+  if (!MContext) {
     return "Context not initialized, no backend info available";
   }
   if (MContext->getBackend() != backend::ext_oneapi_level_zero) {
@@ -456,11 +454,12 @@ void HostProfilingInfo::start() { StartTime = getTimestamp(); }
 void HostProfilingInfo::end() { EndTime = getTimestamp(); }
 
 pi_native_handle event_impl::getNative() {
-  ensureContextInitialized();
+  if (isHost())
+    return {};
+  tryToInitContext();
 
   auto Plugin = getPlugin();
-  if (!MIsInitialized) {
-    MIsInitialized = true;
+  if (MIsDefaultConstructed && !MEvent) {
     auto TempContext = MContext.get()->getHandleRef();
     Plugin->call<PiApiKind::piEventCreate>(TempContext, &MEvent);
   }
diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp
index f609bd96b7189..f4c2ac2e90a86 100644
--- a/sycl/source/detail/event_impl.hpp
+++ b/sycl/source/detail/event_impl.hpp
@@ -49,8 +49,8 @@ class event_impl {
   /// Normally constructs a host event, use std::nullopt to instead instantiate
   /// a device event.
   event_impl(std::optional<HostEventState> State = HES_Complete)
-      : MIsInitialized(false), MIsFlushed(true),
-        MState(State.value_or(HES_Complete)),  MIsHostEvent(State) {
+      : MIsFlushed(true), MState(State.value_or(HES_Complete)),
+        MIsDefaultConstructed(!State), MIsHostEvent(State) {
     // Need to fail in event() constructor  if there are problems with the
     // ONEAPI_DEVICE_SELECTOR. Deferring may lead to conficts with noexcept
     // event methods. This ::get() call uses static vars to read and parse the
@@ -255,15 +255,6 @@ class event_impl {
 
   QueueImplPtr getSubmittedQueue() const { return MSubmittedQueue.lock(); };
 
-  /// Checks if an event is in a fully intialized state. Default-constructed
-  /// events will return true only after having initialized its native event,
-  /// while other events will assume that they are fully initialized at
-  /// construction, relying on external sources to supply member data.
-  ///
-  /// \return true if the event is considered to be in a fully initialized
-  /// state.
-  bool isInitialized() const noexcept { return MIsInitialized; }
-
   /// Checks if this event is complete.
   ///
   /// \return true if this event is complete.
@@ -279,10 +270,11 @@ class event_impl {
     MPostCompleteEvents.push_back(Event);
   }
 
-  bool isContextInitialized() const noexcept { return MIsContextInitialized; }
+  bool isDefaultConstructed() const noexcept { return MIsDefaultConstructed; }
 
   ContextImplPtr getContextImplPtr() {
-    ensureContextInitialized();
+    if (MIsDefaultConstructed)
+      tryToInitContext();
     return MContext;
   }
 
@@ -347,11 +339,7 @@ class event_impl {
   void instrumentationEpilog(void *TelementryEvent, const std::string &Name,
                              int32_t StreamID, uint64_t IId) const;
   void checkProfilingPreconditions() const;
-  // Events constructed without a context will lazily use the default context
-  // when needed.
-  void ensureContextInitialized();
-  bool MIsInitialized = true;
-  bool MIsContextInitialized = false;
+
   sycl::detail::pi::PiEvent MEvent = nullptr;
   // Stores submission time of command associated with event
   uint64_t MSubmitTime = 0;
@@ -409,7 +397,20 @@ class event_impl {
                   std::shared_ptr<sycl::detail::context_impl> Context);
 
   std::atomic_bool MIsEnqueued{false};
-  bool MIsHostEvent{false};
+
+  // Events constructed without a context will lazily use the default context
+  // when needed.
+  void tryToInitContext();
+  // Event class represents 3 different kinds of operations:
+  // | type  | has PI event | MContext | MIsHostTask | MIsDefaultConstructed |
+  // | dev   | true         | !nullptr | false       | false                 |
+  // | host  | false        | nullptr  | true        | false                 |
+  // |default|   *          |    *     | false       | true                  |
+  // Default constructed event is created with empty ctor in host code, MContext
+  // is lazily initialized with default device context on first context query.
+  // MEvent is lazily created in first pi handle query.
+  bool MIsDefaultConstructed = false;
+  bool MIsHostEvent = false;
 };
 
 } // namespace detail
diff --git a/sycl/source/detail/helpers.cpp b/sycl/source/detail/helpers.cpp
index 75c6fd72b8fd0..901fd34b4cce8 100644
--- a/sycl/source/detail/helpers.cpp
+++ b/sycl/source/detail/helpers.cpp
@@ -31,9 +31,7 @@ getOrWaitEvents(std::vector<sycl::event> DepEvents, ContextImplPtr Context) {
     // throwaway events created with empty constructor will not have a context
     // (which is set lazily) calling getContextImpl() would set that
     // context, which we wish to avoid as it is expensive.
-    if ((!SyclEventImplPtr->isContextInitialized() &&
-         !SyclEventImplPtr->isHost()) ||
-        SyclEventImplPtr->isNOP()) {
+    if (SyclEventImplPtr->isDefaultConstructed() || SyclEventImplPtr->isNOP()) {
       continue;
     }
     // The fusion command and its event are associated with a non-host context,
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index 9d9315652ed55..1b9aea1c10f02 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -95,16 +95,15 @@ static std::string queueDeviceToString(const QueueImplPtr &Queue) {
 
 static void addDeviceMetadata(xpti_td *TraceEvent, const QueueImplPtr &Queue) {
   xpti::addMetadata(TraceEvent, "sycl_device_type", queueDeviceToString(Queue));
-  if (Queue)
-  {
-    xpti::addMetadata(TraceEvent, "sycl_device", deviceToID(Queue->get_device()));
+  if (Queue) {
+    xpti::addMetadata(TraceEvent, "sycl_device",
+                      deviceToID(Queue->get_device()));
     xpti::addMetadata(TraceEvent, "sycl_device_name",
                       getSyclObjImpl(Queue->get_device())->getDeviceName());
   }
 }
 
-static unsigned long long getQueueID(const QueueImplPtr& Queue)
-{
+static unsigned long long getQueueID(const QueueImplPtr &Queue) {
   return Queue ? Queue->getQueueID() : 0;
 }
 #endif
@@ -279,7 +278,7 @@ std::vector<sycl::detail::pi::PiEvent> Command::getPiEventsBlocking(
     // (which is set lazily) calling getContextImpl() would set that
     // context, which we wish to avoid as it is expensive.
     // Skip host task and NOP events also.
-    if (!EventImpl->isContextInitialized() || EventImpl->isHost() ||
+    if (EventImpl->isDefaultConstructed() || EventImpl->isHost() ||
         EventImpl->isNOP())
       continue;
     // In this path nullptr native event means that the command has not been
@@ -728,7 +727,8 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep,
   // 2. Some types of commands do not produce PI events after they are
   // enqueued (e.g. alloca). Note that we can't check the pi event to make that
   // distinction since the command might still be unenqueued at this point.
-  bool PiEventExpected = (!DepEvent->isHost() && DepEvent->isInitialized());
+  bool PiEventExpected =
+      (!DepEvent->isHost() && !DepEvent->isDefaultConstructed());
   if (auto *DepCmd = static_cast<Command *>(DepEvent->getCommand()))
     PiEventExpected &= DepCmd->producesPiEvent();
 
@@ -1016,7 +1016,8 @@ void AllocaCommandBase::emitInstrumentationData() {
     xpti::addMetadata(TE, "memory_object", reinterpret_cast<size_t>(MAddress));
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
+                                 getQueueID(MQueue));
   }
 #endif
 }
@@ -1135,7 +1136,8 @@ void AllocaSubBufCommand::emitInstrumentationData() {
                       this->MRequirement.MAccessRange[0]);
     xpti::addMetadata(TE, "access_range_end",
                       this->MRequirement.MAccessRange[1]);
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
+                                 getQueueID(MQueue));
     makeTraceEventEpilog();
   }
 #endif
@@ -1172,7 +1174,7 @@ void AllocaSubBufCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "ALLOCA SUB BUF ON " << queueDeviceToString(MQueue)<< "\\n";
+  Stream << "ALLOCA SUB BUF ON " << queueDeviceToString(MQueue) << "\\n";
   Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n";
   Stream << " Offset : " << this->MRequirement.MOffsetInBytes << "\\n";
   Stream << " Access range : " << this->MRequirement.MAccessRange[0] << "\\n";
@@ -1210,7 +1212,8 @@ void ReleaseCommand::emitInstrumentationData() {
                       commandToName(MAllocaCmd->getType()));
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
+                                 getQueueID(MQueue));
     makeTraceEventEpilog();
   }
 #endif
@@ -1328,7 +1331,8 @@ void MapMemObject::emitInstrumentationData() {
     xpti::addMetadata(TE, "memory_object", reinterpret_cast<size_t>(MAddress));
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
+                                 getQueueID(MQueue));
     makeTraceEventEpilog();
   }
 #endif
@@ -1388,7 +1392,8 @@ void UnMapMemObject::emitInstrumentationData() {
     xpti::addMetadata(TE, "memory_object", reinterpret_cast<size_t>(MAddress));
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
+                                 getQueueID(MQueue));
     makeTraceEventEpilog();
   }
 #endif
@@ -1485,7 +1490,8 @@ void MemCpyCommand::emitInstrumentationData() {
                       MQueue ? deviceToID(MQueue->get_device()) : 0);
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
+                                 getQueueID(MQueue));
     makeTraceEventEpilog();
   }
 #endif
@@ -1655,7 +1661,8 @@ void MemCpyCommandHost::emitInstrumentationData() {
                       MQueue ? deviceToID(MQueue->get_device()) : 0);
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
+                                 getQueueID(MQueue));
     makeTraceEventEpilog();
   }
 #endif
@@ -1743,7 +1750,8 @@ void EmptyCommand::emitInstrumentationData() {
                       reinterpret_cast<size_t>(MAddress));
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
+                                 getQueueID(MQueue));
     makeTraceEventEpilog();
   }
 #endif
@@ -1810,7 +1818,8 @@ void UpdateHostRequirementCommand::emitInstrumentationData() {
                       reinterpret_cast<size_t>(MAddress));
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
+                                 getQueueID(MQueue));
     makeTraceEventEpilog();
   }
 #endif
@@ -2121,7 +2130,8 @@ void ExecCGCommand::emitInstrumentationData() {
                                 CmdTraceEvent);
 
   if (CmdTraceEvent) {
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
+                                 getQueueID(MQueue));
     MTraceEvent = static_cast<void *>(CmdTraceEvent);
     if (MCommandGroup->getType() == detail::CG::Kernel) {
       auto KernelCG =
@@ -3339,7 +3349,8 @@ void KernelFusionCommand::emitInstrumentationData() {
   if (MFirstInstance) {
     // Since we do NOT add queue_id value to metadata, we are stashing it to TLS
     // as this data is mutable and the metadata is supposed to be invariant
-    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
+    xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
+                                 getQueueID(MQueue));
     xptiNotifySubscribers(MStreamID, NotificationTraceType,
                           detail::GSYCLGraphEvent,
                           static_cast<xpti_td *>(MTraceEvent), MInstanceID,
diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp
index 905ca889aaf0d..4acc5b6c3a6a4 100644
--- a/sycl/source/detail/scheduler/scheduler.cpp
+++ b/sycl/source/detail/scheduler/scheduler.cpp
@@ -697,9 +697,7 @@ bool CheckEventReadiness(const ContextImplPtr &Context,
   // don't represent actual dependencies. Calling getContextImpl() would set
   // their context, which we wish to avoid as it is expensive.
   // NOP events also don't represent actual dependencies.
-  if ((!SyclEventImplPtr->isContextInitialized() &&
-       !SyclEventImplPtr->isHost()) ||
-      SyclEventImplPtr->isNOP()) {
+  if ((SyclEventImplPtr->isDefaultConstructed()) || SyclEventImplPtr->isNOP()) {
     return true;
   }
   if (SyclEventImplPtr->isHost()) {
diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp
index 5cd0bd3449095..9c807f90061b5 100644
--- a/sycl/source/queue.cpp
+++ b/sycl/source/queue.cpp
@@ -244,7 +244,7 @@ event queue::ext_oneapi_submit_barrier(const std::vector<event> &WaitList,
   bool AllEventsEmptyOrNop = std::all_of(
       begin(WaitList), end(WaitList), [&](const event &Event) -> bool {
         auto EventImpl = detail::getSyclObjImpl(Event);
-        return !EventImpl->isContextInitialized() || EventImpl->isNOP();
+        return EventImpl->isDefaultConstructed() || EventImpl->isNOP();
       });
   if (is_in_order() && !impl->getCommandGraph() && !impl->MIsProfilingEnabled &&
       AllEventsEmptyOrNop)
diff --git a/sycl/unittests/buffer/BufferReleaseBase.hpp b/sycl/unittests/buffer/BufferReleaseBase.hpp
index b35d73cb3909c..bfcc4fb8369ed 100644
--- a/sycl/unittests/buffer/BufferReleaseBase.hpp
+++ b/sycl/unittests/buffer/BufferReleaseBase.hpp
@@ -43,10 +43,6 @@ class BufferDestructionCheckCommon : public ::testing::Test {
 
 protected:
   void SetUp() override {
-    if (Plt.is_host()) {
-      std::cout << "Not run due to host-only environment\n";
-      GTEST_SKIP();
-    }
     MockSchedulerPtr = new MockScheduler();
     sycl::detail::GlobalHandler::instance().attachScheduler(
         dynamic_cast<sycl::detail::Scheduler *>(MockSchedulerPtr));
diff --git a/sycl/unittests/pi/PiMock.cpp b/sycl/unittests/pi/PiMock.cpp
index c7014162f9cf8..02044d9631376 100644
--- a/sycl/unittests/pi/PiMock.cpp
+++ b/sycl/unittests/pi/PiMock.cpp
@@ -56,10 +56,6 @@ TEST(PiMockTest, ConstructFromQueue) {
   sycl::unittest::PiMock Mock;
   queue MockQ{Mock.getPlatform().get_devices()[0]};
   queue NormalQ;
-  if (NormalQ.is_host()) {
-    std::cerr << "Not run due to host-only environment\n";
-    return;
-  }
 
   const auto &NormalPiPlugin =
       detail::getSyclObjImpl(NormalQ)->getPlugin()->getPiPlugin();
diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp
index e1bc8c894f311..08f03420ac54e 100644
--- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp
+++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp
@@ -26,10 +26,6 @@ constexpr auto DisableCleanupName = "SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP";
 std::vector<std::pair<pi_uint32, const pi_event *>> PassedNumEvents;
 
 bool CheckTestExecutionRequirements(const platform &plt) {
-  if (plt.is_host()) {
-    std::cout << "Not run due to host-only environment\n";
-    return false;
-  }
   // This test only contains device image for SPIR-V capable devices.
   if (plt.get_backend() != sycl::backend::opencl &&
       plt.get_backend() != sycl::backend::ext_oneapi_level_zero) {
diff --git a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp
index 8693ff5e4c52b..929f8735bc85f 100644
--- a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp
+++ b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp
@@ -130,10 +130,6 @@ TEST_F(SchedulerTest, InOrderQueueCrossDepsShortcutFuncs) {
       customextUSMEnqueueMemset);
 
   sycl::platform Plt = Mock.getPlatform();
-  if (Plt.is_host()) {
-    std::cout << "Not run due to host-only environment\n";
-    GTEST_SKIP();
-  }
 
   context Ctx{Plt};
   queue InOrderQueue{Ctx, default_selector_v, property::queue::in_order()};
diff --git a/sycl/unittests/scheduler/KernelFusion.cpp b/sycl/unittests/scheduler/KernelFusion.cpp
index 8b45c03e37f1f..5a86636b13c09 100644
--- a/sycl/unittests/scheduler/KernelFusion.cpp
+++ b/sycl/unittests/scheduler/KernelFusion.cpp
@@ -42,10 +42,6 @@ detail::Command *CreateTaskCommand(MockScheduler &MS,
 }
 
 bool CheckTestExecRequirements(const platform &plt) {
-  if (plt.is_host()) {
-    std::cout << "Not run due to host-only environment\n";
-    return false;
-  }
   // This test only contains device image for SPIR-V capable devices.
   if (plt.get_backend() != sycl::backend::opencl &&
       plt.get_backend() != sycl::backend::ext_oneapi_level_zero) {

From 97c4ce548c894ab94b223fd66d1d18f7a97f7d78 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 26 Jun 2024 12:00:51 -0700
Subject: [PATCH 43/58] prepare removal from handler

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/include/sycl/handler.hpp        | 69 +++++++++-------------------
 sycl/source/detail/platform_impl.hpp |  4 +-
 2 files changed, 23 insertions(+), 50 deletions(-)

diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp
index a71f5400a813d..19d0c5ac1e85e 100644
--- a/sycl/include/sycl/handler.hpp
+++ b/sycl/include/sycl/handler.hpp
@@ -178,22 +178,22 @@ template <typename DataT, int Dimensions, access::mode AccessMode,
 class image_accessor;
 class HandlerAccess;
 template <typename RetType, typename Func, typename Arg>
-static Arg member_ptr_helper(RetType (Func::*)(Arg) const);
+static Arg member_ptr_helper(RetType (Func:: *)(Arg) const);
 
 // Non-const version of the above template to match functors whose 'operator()'
 // is declared w/o the 'const' qualifier.
 template <typename RetType, typename Func, typename Arg>
-static Arg member_ptr_helper(RetType (Func::*)(Arg));
+static Arg member_ptr_helper(RetType (Func:: *)(Arg));
 
 // Version with two arguments to handle the case when kernel_handler is passed
 // to a lambda
 template <typename RetType, typename Func, typename Arg1, typename Arg2>
-static Arg1 member_ptr_helper(RetType (Func::*)(Arg1, Arg2) const);
+static Arg1 member_ptr_helper(RetType (Func:: *)(Arg1, Arg2) const);
 
 // Non-const version of the above template to match functors whose 'operator()'
 // is declared w/o the 'const' qualifier.
 template <typename RetType, typename Func, typename Arg1, typename Arg2>
-static Arg1 member_ptr_helper(RetType (Func::*)(Arg1, Arg2));
+static Arg1 member_ptr_helper(RetType (Func:: *)(Arg1, Arg2));
 
 template <typename F, typename SuggestedArgType>
 decltype(member_ptr_helper(&F::operator())) argument_helper(int);
@@ -464,8 +464,8 @@ class __SYCL_EXPORT handler {
   /// Constructs SYCL handler from queue.
   ///
   /// \param Queue is a SYCL queue.
-  /// \param IsHost indicates if this handler is created for SYCL host device.
-  handler(std::shared_ptr<detail::queue_impl> Queue, bool IsHost);
+  handler(std::shared_ptr<detail::queue_impl> Queue,
+          bool /*ABI Break: to remove */);
 
   /// Constructs SYCL handler from the associated queue and the submission's
   /// primary and secondary queue.
@@ -475,10 +475,10 @@ class __SYCL_EXPORT handler {
   /// \param PrimaryQueue is the primary SYCL queue of the submission.
   /// \param SecondaryQueue is the secondary SYCL queue of the submission. This
   ///        is null if no secondary queue is associated with the submission.
-  /// \param IsHost indicates if this handler is created for SYCL host device.
   handler(std::shared_ptr<detail::queue_impl> Queue,
           std::shared_ptr<detail::queue_impl> PrimaryQueue,
-          std::shared_ptr<detail::queue_impl> SecondaryQueue, bool IsHost);
+          std::shared_ptr<detail::queue_impl> SecondaryQueue,
+          bool /*ABI Break: to remove */);
 
   /// Constructs SYCL handler from Graph.
   ///
@@ -609,7 +609,7 @@ class __SYCL_EXPORT handler {
   ~handler() = default;
 
   // TODO: Private and unusued. Remove when ABI break is allowed.
-  bool is_host() { return MIsHost; }
+  bool is_host() { return false; }
 
 #ifdef __SYCL_DEVICE_ONLY__
   // In device compilation accessor isn't inherited from host base classes, so
@@ -888,12 +888,6 @@ class __SYCL_EXPORT handler {
         detail::KernelLambdaHasKernelHandlerArgT<KernelType,
                                                  LambdaArgType>::value;
 
-    if (IsCallableWithKernelHandler && MIsHost) {
-      throw sycl::feature_not_supported(
-          "kernel_handler is not yet supported by host device.",
-          PI_ERROR_INVALID_OPERATION);
-    }
-
     KernelType *KernelPtr =
         ResetHostKernel<KernelType, LambdaArgType, Dims>(KernelFunc);
 
@@ -1042,8 +1036,7 @@ class __SYCL_EXPORT handler {
   std::enable_if_t<(DimSrc > 0) && (DimDst > 0), bool>
   copyAccToAccHelper(accessor<TSrc, DimSrc, ModeSrc, TargetSrc, IsPHSrc> Src,
                      accessor<TDst, DimDst, ModeDst, TargetDst, IsPHDst> Dst) {
-    if (!MIsHost &&
-        IsCopyingRectRegionAvailable(Src.get_range(), Dst.get_range()))
+    if (IsCopyingRectRegionAvailable(Src.get_range(), Dst.get_range()))
       return false;
 
     range<1> LinearizedRange(Src.size());
@@ -1065,6 +1058,7 @@ class __SYCL_EXPORT handler {
   ///
   /// \param Src is a source SYCL accessor.
   /// \param Dst is a destination SYCL accessor.
+  // ABI break: to remove whole method
   template <typename TSrc, int DimSrc, access::mode ModeSrc,
             access::target TargetSrc, typename TDst, int DimDst,
             access::mode ModeDst, access::target TargetDst,
@@ -1072,16 +1066,11 @@ class __SYCL_EXPORT handler {
   std::enable_if_t<DimSrc == 0 || DimDst == 0, bool>
   copyAccToAccHelper(accessor<TSrc, DimSrc, ModeSrc, TargetSrc, IsPHSrc> Src,
                      accessor<TDst, DimDst, ModeDst, TargetDst, IsPHDst> Dst) {
-    if (!MIsHost)
-      return false;
-
-    single_task<__copyAcc2Acc<TSrc, DimSrc, ModeSrc, TargetSrc, TDst, DimDst,
-                              ModeDst, TargetDst, IsPHSrc, IsPHDst>>(
-        [=]() { *(Dst.get_pointer()) = *(Src.get_pointer()); });
-    return true;
+    return false;
   }
 
 #ifndef __SYCL_DEVICE_ONLY__
+  // ABI break: to remove whole method
   /// Copies the content of memory object accessed by Src into the memory
   /// pointed by Dst.
   ///
@@ -1101,6 +1090,7 @@ class __SYCL_EXPORT handler {
         });
   }
 
+  // ABI break: to remove whole method
   /// Copies 1 element accessed by 0-dimensional accessor Src into the memory
   /// pointed by Dst.
   ///
@@ -1118,6 +1108,7 @@ class __SYCL_EXPORT handler {
         });
   }
 
+  // ABI break: to remove whole method
   /// Copies the memory pointed by Src into the memory accessed by Dst.
   ///
   /// \param Src is a pointer to source memory.
@@ -1135,6 +1126,7 @@ class __SYCL_EXPORT handler {
         });
   }
 
+  // ABI break: to remove whole method
   /// Copies 1 element pointed by Src to memory accessed by 0-dimensional
   /// accessor Dst.
   ///
@@ -2245,7 +2237,7 @@ class __SYCL_EXPORT handler {
     MNDRDesc.set(range<1>{1});
     MKernel = detail::getSyclObjImpl(std::move(Kernel));
     setType(detail::CG::Kernel);
-    if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
+    if (!lambdaAndKernelHaveEqualName<NameT>()) {
       extractArgsAndReqs();
       MKernelName = getKernelName();
     } else
@@ -2282,7 +2274,7 @@ class __SYCL_EXPORT handler {
     MKernel = detail::getSyclObjImpl(std::move(Kernel));
     setType(detail::CG::Kernel);
     setNDRangeUsed(false);
-    if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
+    if (!lambdaAndKernelHaveEqualName<NameT>()) {
       extractArgsAndReqs();
       MKernelName = getKernelName();
     } else
@@ -2322,7 +2314,7 @@ class __SYCL_EXPORT handler {
     MKernel = detail::getSyclObjImpl(std::move(Kernel));
     setType(detail::CG::Kernel);
     setNDRangeUsed(false);
-    if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
+    if (!lambdaAndKernelHaveEqualName<NameT>()) {
       extractArgsAndReqs();
       MKernelName = getKernelName();
     } else
@@ -2361,7 +2353,7 @@ class __SYCL_EXPORT handler {
     MKernel = detail::getSyclObjImpl(std::move(Kernel));
     setType(detail::CG::Kernel);
     setNDRangeUsed(true);
-    if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
+    if (!lambdaAndKernelHaveEqualName<NameT>()) {
       extractArgsAndReqs();
       MKernelName = getKernelName();
     } else
@@ -2688,14 +2680,6 @@ class __SYCL_EXPORT handler {
                   "Invalid accessor target for the copy method.");
     static_assert(isValidModeForSourceAccessor(AccessMode),
                   "Invalid accessor mode for the copy method.");
-#ifndef __SYCL_DEVICE_ONLY__
-    if (MIsHost) {
-      // TODO: Temporary implementation for host. Should be handled by memory
-      // manager.
-      copyAccToPtrHost(Src, Dst);
-      return;
-    }
-#endif
     setType(detail::CG::CopyAccToPtr);
 
     detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Src;
@@ -2732,14 +2716,7 @@ class __SYCL_EXPORT handler {
                   "Invalid accessor mode for the copy method.");
     // TODO: Add static_assert with is_device_copyable when vec is
     // device-copyable.
-#ifndef __SYCL_DEVICE_ONLY__
-    if (MIsHost) {
-      // TODO: Temporary implementation for host. Should be handled by memory
-      // manager.
-      copyPtrToAccHost(Src, Dst);
-      return;
-    }
-#endif
+
     setType(detail::CG::CopyPtrToAcc);
 
     detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Dst;
@@ -2853,8 +2830,6 @@ class __SYCL_EXPORT handler {
   fill(accessor<T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT>
            Dst,
        const T &Pattern) {
-    assert(!MIsHost && "fill() should no longer be callable on a host device.");
-
     if (Dst.is_placeholder())
       checkIfPlaceholderIsBoundToHandler(Dst);
 
@@ -3392,7 +3367,7 @@ class __SYCL_EXPORT handler {
   /// Storage for the CG created when handling graph nodes added explicitly.
   std::unique_ptr<detail::CG> MGraphNodeCG;
 
-  bool MIsHost = false;
+  bool MIsHost = false; // ABI break: to remove
 
   detail::code_location MCodeLoc = {};
   bool MIsFinalized = false;
diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp
index 0a926712eb806..dfb2597bf417b 100644
--- a/sycl/source/detail/platform_impl.hpp
+++ b/sycl/source/detail/platform_impl.hpp
@@ -121,9 +121,7 @@ class platform_impl {
   static std::vector<platform> get_platforms();
 
   // \return the Plugin associated with this platform.
-  const PluginPtr &getPlugin() const {
-    return MPlugin;
-  }
+  const PluginPtr &getPlugin() const { return MPlugin; }
 
   /// Sets the platform implementation to use another plugin.
   ///

From 6cf3171d7d43021fd668789e5b83d12331d41858 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 26 Jun 2024 12:05:12 -0700
Subject: [PATCH 44/58] fix test

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/test-e2e/Config/allowlist.cpp | 58 +++++++++++++-----------------
 1 file changed, 24 insertions(+), 34 deletions(-)

diff --git a/sycl/test-e2e/Config/allowlist.cpp b/sycl/test-e2e/Config/allowlist.cpp
index 121e911c0474c..7bfb16ca687d0 100644
--- a/sycl/test-e2e/Config/allowlist.cpp
+++ b/sycl/test-e2e/Config/allowlist.cpp
@@ -35,61 +35,51 @@ int main() {
   // Expected that the allowlist filter is not set
   if (getenv("PRINT_PLATFORM_INFO")) {
     for (const sycl::platform &Platform : sycl::platform::get_platforms())
-      if (!Platform.is_host()) {
+      std::string Name = Platform.get_info<sycl::info::platform::name>();
+      std::string Ver = Platform.get_info<sycl::info::platform::version>();
+      // As a string will be used as regexp pattern, we need to get rid of
+      // symbols that can be treated in a special way.
+      replaceSpecialCharacters(Name);
+      replaceSpecialCharacters(Ver);
 
-        std::string Name = Platform.get_info<sycl::info::platform::name>();
-        std::string Ver = Platform.get_info<sycl::info::platform::version>();
-        // As a string will be used as regexp pattern, we need to get rid of
-        // symbols that can be treated in a special way.
-        replaceSpecialCharacters(Name);
-        replaceSpecialCharacters(Ver);
+      std::cout << "SYCL_DEVICE_ALLOWLIST=PlatformName:{{" << Name
+                << "}},PlatformVersion:{{" << Ver << "}}";
 
-        std::cout << "SYCL_DEVICE_ALLOWLIST=PlatformName:{{" << Name
-                  << "}},PlatformVersion:{{" << Ver << "}}";
-
-        return 0;
-      }
-    throw std::runtime_error("Non host device is not found");
+      return 0;
   }
 
   // Expected that the allowlist filter is not set
   if (getenv("PRINT_DEVICE_INFO")) {
     for (const sycl::platform &Platform : sycl::platform::get_platforms())
-      if (!Platform.is_host()) {
-        const sycl::device Dev = Platform.get_devices().at(0);
-        std::string Name = Dev.get_info<sycl::info::device::name>();
-        std::string Ver = Dev.get_info<sycl::info::device::driver_version>();
+      const sycl::device Dev = Platform.get_devices().at(0);
+      std::string Name = Dev.get_info<sycl::info::device::name>();
+      std::string Ver = Dev.get_info<sycl::info::device::driver_version>();
 
-        // As a string will be used as regexp pattern, we need to get rid of
-        // symbols that can be treated in a special way.
-        replaceSpecialCharacters(Name);
-        replaceSpecialCharacters(Ver);
+      // As a string will be used as regexp pattern, we need to get rid of
+      // symbols that can be treated in a special way.
+      replaceSpecialCharacters(Name);
+      replaceSpecialCharacters(Ver);
 
-        std::cout << "SYCL_DEVICE_ALLOWLIST=DeviceName:{{" << Name
-                  << "}},DriverVersion:{{" << Ver << "}}";
+      std::cout << "SYCL_DEVICE_ALLOWLIST=DeviceName:{{" << Name
+                << "}},DriverVersion:{{" << Ver << "}}";
 
-        return 0;
-      }
-    throw std::runtime_error("Non host device is not found");
+      return 0;
   }
 
   // Expected the allowlist to be set with the "PRINT_DEVICE_INFO" run result
   if (getenv("TEST_DEVICE_AVAILABLE")) {
     for (const sycl::platform &Platform : sycl::platform::get_platforms())
-      if (!Platform.is_host()) {
-        if (Platform.get_devices().size() != 1)
-          throw std::runtime_error("Expected only one non host device.");
+      if (Platform.get_devices().size() != 1)
+        throw std::runtime_error("Expected only one device.");
 
-        return 0;
-      }
-    throw std::runtime_error("Non host device is not found");
+      return 0;
+    }
   }
 
   // Expected the allowlist to be set but empty
   if (getenv("TEST_DEVICE_IS_NOT_AVAILABLE")) {
     for (const sycl::platform &Platform : sycl::platform::get_platforms())
-      if (!Platform.is_host())
-        throw std::runtime_error("Expected no non host device is available");
+        throw std::runtime_error("Expected no device is available");
     return 0;
   }
 

From 989557abba027be8a90c106ac69bac046016565d Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Wed, 26 Jun 2024 12:22:56 -0700
Subject: [PATCH 45/58] fix clang-format

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/include/sycl/handler.hpp      |  8 +++---
 sycl/test-e2e/Config/allowlist.cpp | 40 +++++++++++++++---------------
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp
index 19d0c5ac1e85e..6df476e2d2d96 100644
--- a/sycl/include/sycl/handler.hpp
+++ b/sycl/include/sycl/handler.hpp
@@ -178,22 +178,22 @@ template <typename DataT, int Dimensions, access::mode AccessMode,
 class image_accessor;
 class HandlerAccess;
 template <typename RetType, typename Func, typename Arg>
-static Arg member_ptr_helper(RetType (Func:: *)(Arg) const);
+static Arg member_ptr_helper(RetType (Func::*)(Arg) const);
 
 // Non-const version of the above template to match functors whose 'operator()'
 // is declared w/o the 'const' qualifier.
 template <typename RetType, typename Func, typename Arg>
-static Arg member_ptr_helper(RetType (Func:: *)(Arg));
+static Arg member_ptr_helper(RetType (Func::*)(Arg));
 
 // Version with two arguments to handle the case when kernel_handler is passed
 // to a lambda
 template <typename RetType, typename Func, typename Arg1, typename Arg2>
-static Arg1 member_ptr_helper(RetType (Func:: *)(Arg1, Arg2) const);
+static Arg1 member_ptr_helper(RetType (Func::*)(Arg1, Arg2) const);
 
 // Non-const version of the above template to match functors whose 'operator()'
 // is declared w/o the 'const' qualifier.
 template <typename RetType, typename Func, typename Arg1, typename Arg2>
-static Arg1 member_ptr_helper(RetType (Func:: *)(Arg1, Arg2));
+static Arg1 member_ptr_helper(RetType (Func::*)(Arg1, Arg2));
 
 template <typename F, typename SuggestedArgType>
 decltype(member_ptr_helper(&F::operator())) argument_helper(int);
diff --git a/sycl/test-e2e/Config/allowlist.cpp b/sycl/test-e2e/Config/allowlist.cpp
index 7bfb16ca687d0..7891088db5abb 100644
--- a/sycl/test-e2e/Config/allowlist.cpp
+++ b/sycl/test-e2e/Config/allowlist.cpp
@@ -36,34 +36,34 @@ int main() {
   if (getenv("PRINT_PLATFORM_INFO")) {
     for (const sycl::platform &Platform : sycl::platform::get_platforms())
       std::string Name = Platform.get_info<sycl::info::platform::name>();
-      std::string Ver = Platform.get_info<sycl::info::platform::version>();
-      // As a string will be used as regexp pattern, we need to get rid of
-      // symbols that can be treated in a special way.
-      replaceSpecialCharacters(Name);
-      replaceSpecialCharacters(Ver);
+    std::string Ver = Platform.get_info<sycl::info::platform::version>();
+    // As a string will be used as regexp pattern, we need to get rid of
+    // symbols that can be treated in a special way.
+    replaceSpecialCharacters(Name);
+    replaceSpecialCharacters(Ver);
 
-      std::cout << "SYCL_DEVICE_ALLOWLIST=PlatformName:{{" << Name
-                << "}},PlatformVersion:{{" << Ver << "}}";
+    std::cout << "SYCL_DEVICE_ALLOWLIST=PlatformName:{{" << Name
+              << "}},PlatformVersion:{{" << Ver << "}}";
 
-      return 0;
+    return 0;
   }
 
   // Expected that the allowlist filter is not set
   if (getenv("PRINT_DEVICE_INFO")) {
     for (const sycl::platform &Platform : sycl::platform::get_platforms())
       const sycl::device Dev = Platform.get_devices().at(0);
-      std::string Name = Dev.get_info<sycl::info::device::name>();
-      std::string Ver = Dev.get_info<sycl::info::device::driver_version>();
+    std::string Name = Dev.get_info<sycl::info::device::name>();
+    std::string Ver = Dev.get_info<sycl::info::device::driver_version>();
 
-      // As a string will be used as regexp pattern, we need to get rid of
-      // symbols that can be treated in a special way.
-      replaceSpecialCharacters(Name);
-      replaceSpecialCharacters(Ver);
+    // As a string will be used as regexp pattern, we need to get rid of
+    // symbols that can be treated in a special way.
+    replaceSpecialCharacters(Name);
+    replaceSpecialCharacters(Ver);
 
-      std::cout << "SYCL_DEVICE_ALLOWLIST=DeviceName:{{" << Name
-                << "}},DriverVersion:{{" << Ver << "}}";
+    std::cout << "SYCL_DEVICE_ALLOWLIST=DeviceName:{{" << Name
+              << "}},DriverVersion:{{" << Ver << "}}";
 
-      return 0;
+    return 0;
   }
 
   // Expected the allowlist to be set with the "PRINT_DEVICE_INFO" run result
@@ -72,14 +72,14 @@ int main() {
       if (Platform.get_devices().size() != 1)
         throw std::runtime_error("Expected only one device.");
 
-      return 0;
-    }
+    return 0;
+  }
   }
 
   // Expected the allowlist to be set but empty
   if (getenv("TEST_DEVICE_IS_NOT_AVAILABLE")) {
     for (const sycl::platform &Platform : sycl::platform::get_platforms())
-        throw std::runtime_error("Expected no device is available");
+      throw std::runtime_error("Expected no device is available");
     return 0;
   }
 

From 1a139752d02529ac27903be31b1e772e994aeb34 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Thu, 27 Jun 2024 03:41:00 -0700
Subject: [PATCH 46/58] fix warning

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/include/sycl/handler.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp
index 6df476e2d2d96..a536d41f329e0 100644
--- a/sycl/include/sycl/handler.hpp
+++ b/sycl/include/sycl/handler.hpp
@@ -1064,8 +1064,8 @@ class __SYCL_EXPORT handler {
             access::mode ModeDst, access::target TargetDst,
             access::placeholder IsPHSrc, access::placeholder IsPHDst>
   std::enable_if_t<DimSrc == 0 || DimDst == 0, bool>
-  copyAccToAccHelper(accessor<TSrc, DimSrc, ModeSrc, TargetSrc, IsPHSrc> Src,
-                     accessor<TDst, DimDst, ModeDst, TargetDst, IsPHDst> Dst) {
+  copyAccToAccHelper(accessor<TSrc, DimSrc, ModeSrc, TargetSrc, IsPHSrc>,
+                     accessor<TDst, DimDst, ModeDst, TargetDst, IsPHDst>) {
     return false;
   }
 

From e9fffb6419638e729ca7a9da32bd054b50a1dc37 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Thu, 27 Jun 2024 03:48:10 -0700
Subject: [PATCH 47/58] fix allowlist test cherry-pick issues

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/test-e2e/Config/allowlist.cpp | 49 ++++++++++++++++--------------
 1 file changed, 27 insertions(+), 22 deletions(-)

diff --git a/sycl/test-e2e/Config/allowlist.cpp b/sycl/test-e2e/Config/allowlist.cpp
index 7891088db5abb..393326cb76283 100644
--- a/sycl/test-e2e/Config/allowlist.cpp
+++ b/sycl/test-e2e/Config/allowlist.cpp
@@ -34,46 +34,51 @@ int main() {
 
   // Expected that the allowlist filter is not set
   if (getenv("PRINT_PLATFORM_INFO")) {
-    for (const sycl::platform &Platform : sycl::platform::get_platforms())
+    for (const sycl::platform &Platform : sycl::platform::get_platforms()) {
       std::string Name = Platform.get_info<sycl::info::platform::name>();
-    std::string Ver = Platform.get_info<sycl::info::platform::version>();
-    // As a string will be used as regexp pattern, we need to get rid of
-    // symbols that can be treated in a special way.
-    replaceSpecialCharacters(Name);
-    replaceSpecialCharacters(Ver);
+      std::string Ver = Platform.get_info<sycl::info::platform::version>();
+      // As a string will be used as regexp pattern, we need to get rid of
+      // symbols that can be treated in a special way.
+      replaceSpecialCharacters(Name);
+      replaceSpecialCharacters(Ver);
 
-    std::cout << "SYCL_DEVICE_ALLOWLIST=PlatformName:{{" << Name
-              << "}},PlatformVersion:{{" << Ver << "}}";
+      std::cout << "SYCL_DEVICE_ALLOWLIST=PlatformName:{{" << Name
+                << "}},PlatformVersion:{{" << Ver << "}}";
 
-    return 0;
+      return 0;
+    }
+    throw std::runtime_error("No device is found");
   }
 
   // Expected that the allowlist filter is not set
   if (getenv("PRINT_DEVICE_INFO")) {
-    for (const sycl::platform &Platform : sycl::platform::get_platforms())
+    for (const sycl::platform &Platform : sycl::platform::get_platforms()) {
       const sycl::device Dev = Platform.get_devices().at(0);
-    std::string Name = Dev.get_info<sycl::info::device::name>();
-    std::string Ver = Dev.get_info<sycl::info::device::driver_version>();
+      std::string Name = Dev.get_info<sycl::info::device::name>();
+      std::string Ver = Dev.get_info<sycl::info::device::driver_version>();
 
-    // As a string will be used as regexp pattern, we need to get rid of
-    // symbols that can be treated in a special way.
-    replaceSpecialCharacters(Name);
-    replaceSpecialCharacters(Ver);
+      // As a string will be used as regexp pattern, we need to get rid of
+      // symbols that can be treated in a special way.
+      replaceSpecialCharacters(Name);
+      replaceSpecialCharacters(Ver);
 
-    std::cout << "SYCL_DEVICE_ALLOWLIST=DeviceName:{{" << Name
-              << "}},DriverVersion:{{" << Ver << "}}";
+      std::cout << "SYCL_DEVICE_ALLOWLIST=DeviceName:{{" << Name
+                << "}},DriverVersion:{{" << Ver << "}}";
 
-    return 0;
+      return 0;
+    }
+    throw std::runtime_error("No device is found");
   }
 
   // Expected the allowlist to be set with the "PRINT_DEVICE_INFO" run result
   if (getenv("TEST_DEVICE_AVAILABLE")) {
-    for (const sycl::platform &Platform : sycl::platform::get_platforms())
+    for (const sycl::platform &Platform : sycl::platform::get_platforms()) {
       if (Platform.get_devices().size() != 1)
         throw std::runtime_error("Expected only one device.");
 
-    return 0;
-  }
+      return 0;
+    }
+    throw std::runtime_error("No device is found");
   }
 
   // Expected the allowlist to be set but empty

From 6ec2b63ecaedf8476d8a7dab3ce1bcc7b6e5963d Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Mon, 1 Jul 2024 05:06:17 -0700
Subject: [PATCH 48/58] fix code review comments

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/queue_impl.cpp             | 14 +----
 sycl/source/detail/scheduler/commands.cpp     | 60 +++++++------------
 .../source/detail/scheduler/graph_builder.cpp |  4 +-
 sycl/source/detail/scheduler/scheduler.cpp    |  2 +-
 sycl/source/detail/xpti_registry.cpp          | 15 +++++
 sycl/source/detail/xpti_registry.hpp          |  3 +
 sycl/test-e2e/Config/allowlist.cpp            |  2 +-
 7 files changed, 47 insertions(+), 53 deletions(-)

diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp
index 0ec8f57abb596..6f6e72fbd2af9 100644
--- a/sycl/source/detail/queue_impl.cpp
+++ b/sycl/source/detail/queue_impl.cpp
@@ -26,7 +26,7 @@
 namespace sycl {
 inline namespace _V1 {
 namespace detail {
-// Treat 0 as reserved for "host" queue
+// Treat 0 as reserved for host task traces
 std::atomic<unsigned long long> queue_impl::MNextAvailableQueueID = 1;
 
 thread_local bool NestedCallsDetector = false;
@@ -498,17 +498,7 @@ void *queue_impl::instrumentationProlog(const detail::code_location &CodeLoc,
                     xpti_at::active, &QWaitInstanceNo);
   IId = QWaitInstanceNo;
   if (WaitEvent) {
-    device D = get_device();
-    std::string DevStr;
-    if (D.is_cpu())
-      DevStr = "CPU";
-    else if (D.is_gpu())
-      DevStr = "GPU";
-    else if (D.is_accelerator())
-      DevStr = "ACCELERATOR";
-    else
-      DevStr = "UNKNOWN";
-    xpti::addMetadata(WaitEvent, "sycl_device_type", DevStr);
+    xpti::addMetadata(WaitEvent, "sycl_device_type", queueDeviceToString(this));
     if (HasSourceInfo) {
       xpti::addMetadata(WaitEvent, "sym_function_name", CodeLoc.functionName());
       xpti::addMetadata(WaitEvent, "sym_source_file_name", CodeLoc.fileName());
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index 2ab4663c5db20..9ea45424f0ce5 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -79,22 +79,8 @@ static size_t deviceToID(const device &Device) {
   return reinterpret_cast<size_t>(getSyclObjImpl(Device)->getHandleRef());
 }
 
-static std::string queueDeviceToString(const QueueImplPtr &Queue) {
-  if (!Queue)
-    return "host";
-  auto Device = Queue->get_device();
-  if (Device.is_cpu())
-    return "CPU";
-  else if (Device.is_gpu())
-    return "GPU";
-  else if (Device.is_accelerator())
-    return "ACCELERATOR";
-  else
-    return "UNKNOWN";
-}
-
 static void addDeviceMetadata(xpti_td *TraceEvent, const QueueImplPtr &Queue) {
-  xpti::addMetadata(TraceEvent, "sycl_device_type", queueDeviceToString(Queue));
+  xpti::addMetadata(TraceEvent, "sycl_device_type", queueDeviceToString(Queue.get()));
   if (Queue) {
     xpti::addMetadata(TraceEvent, "sycl_device",
                       deviceToID(Queue->get_device()));
@@ -411,7 +397,7 @@ class DispatchHostTask {
       // we're ready to call the user-defined lambda now
       if (HostTask.MHostTask->isInteropTask()) {
         assert(HostTask.MQueue &&
-               "Submitted queue for host task must be device queue");
+               "Host task submissions should have an associated queue");
         interop_handle IH{MReqToMem, HostTask.MQueue,
                           HostTask.MQueue->getDeviceImplPtr(),
                           HostTask.MQueue->getContextImplPtr()};
@@ -1088,7 +1074,7 @@ void AllocaCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "ALLOCA ON " << queueDeviceToString(MQueue) << "\\n";
+  Stream << "ALLOCA ON " << queueDeviceToString(MQueue.get()) << "\\n";
   Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n";
   Stream << " Link : " << this->MLinkedAllocaCmd << "\\n";
   Stream << "\"];" << std::endl;
@@ -1174,7 +1160,7 @@ void AllocaSubBufCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FFD28A\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "ALLOCA SUB BUF ON " << queueDeviceToString(MQueue) << "\\n";
+  Stream << "ALLOCA SUB BUF ON " << queueDeviceToString(MQueue.get()) << "\\n";
   Stream << " MemObj : " << this->MRequirement.MSYCLMemObj << "\\n";
   Stream << " Offset : " << this->MRequirement.MOffsetInBytes << "\\n";
   Stream << " Access range : " << this->MRequirement.MAccessRange[0] << "\\n";
@@ -1287,7 +1273,7 @@ void ReleaseCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#FF827A\", label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "RELEASE ON " << queueDeviceToString(MQueue) << "\\n";
+  Stream << "RELEASE ON " << queueDeviceToString(MQueue.get()) << "\\n";
   Stream << " Alloca : " << MAllocaCmd << "\\n";
   Stream << " MemObj : " << MAllocaCmd->getSYCLMemObj() << "\\n";
   Stream << "\"];" << std::endl;
@@ -1357,7 +1343,7 @@ void MapMemObject::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#77AFFF\", label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "MAP ON " << queueDeviceToString(MQueue) << "\\n";
+  Stream << "MAP ON " << queueDeviceToString(MQueue.get()) << "\\n";
 
   Stream << "\"];" << std::endl;
 
@@ -1438,7 +1424,7 @@ void UnMapMemObject::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#EBC40F\", label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "UNMAP ON " << queueDeviceToString(MQueue) << "\\n";
+  Stream << "UNMAP ON " << queueDeviceToString(MQueue.get()) << "\\n";
 
   Stream << "\"];" << std::endl;
 
@@ -1548,7 +1534,7 @@ void MemCpyCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#C7EB15\" label=\"";
 
   Stream << "ID = " << this << " ; ";
-  Stream << "MEMCPY ON " << queueDeviceToString(MQueue) << "\\n";
+  Stream << "MEMCPY ON " << queueDeviceToString(MQueue.get()) << "\\n";
   Stream << "From: " << MSrcAllocaCmd << " is host: " << !MSrcQueue << "\\n";
   Stream << "To: " << MDstAllocaCmd << " is host: " << !MQueue << "\\n";
 
@@ -1604,7 +1590,7 @@ void UpdateHostRequirementCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#f1337f\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "UPDATE REQ ON " << queueDeviceToString(MQueue) << "\\n";
+  Stream << "UPDATE REQ ON " << queueDeviceToString(MQueue.get()) << "\\n";
   bool IsReqOnBuffer =
       MDstReq.MSYCLMemObj->getType() == SYCLMemObjI::MemObjType::Buffer;
   Stream << "TYPE: " << (IsReqOnBuffer ? "Buffer" : "Image") << "\\n";
@@ -1780,7 +1766,7 @@ void MemCpyCommandHost::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#B6A2EB\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "MEMCPY HOST ON " << queueDeviceToString(MQueue) << "\\n";
+  Stream << "MEMCPY HOST ON " << queueDeviceToString(MQueue.get()) << "\\n";
 
   Stream << "\"];" << std::endl;
 
@@ -1971,7 +1957,7 @@ void instrumentationAddExtraKernelMetadata(
     if (!SyclKernel->isCreatedFromSource())
       EliminatedArgMask = SyclKernel->getKernelArgMask();
   } else {
-    assert(Queue && "Queue with submitted kernel could not be on host");
+    assert(Queue && "Kernel submissions should have an associated queue");
     std::tie(Kernel, KernelMutex, EliminatedArgMask, Program) =
         detail::ProgramManager::getInstance().getOrCreateKernel(
             Queue->getContextImplPtr(), Queue->getDeviceImplPtr(), KernelName);
@@ -2154,7 +2140,7 @@ void ExecCGCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "EXEC CG ON " << queueDeviceToString(MQueue) << "\\n";
+  Stream << "EXEC CG ON " << queueDeviceToString(MQueue.get()) << "\\n";
 
   switch (MCommandGroup->getType()) {
   case detail::CG::Kernel: {
@@ -2345,7 +2331,7 @@ static pi_result SetKernelParamsAndLaunch(
     const KernelArgMask *EliminatedArgMask,
     const std::function<void *(Requirement *Req)> &getMemAllocationFunc,
     bool IsCooperative) {
-  assert(Queue && "Queue with submitted kernel could not be on host");
+  assert(Queue && "Kernel submissions should have an associated queue");
   const PluginPtr &Plugin = Queue->getPlugin();
 
   auto setFunc = [&Plugin, Kernel, &DeviceImageImpl, &getMemAllocationFunc,
@@ -2536,7 +2522,7 @@ pi_int32 enqueueImpKernel(
     const std::function<void *(Requirement *Req)> &getMemAllocationFunc,
     sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig,
     const bool KernelIsCooperative) {
-  assert(Queue && "Queue with submitted kernel could not be on host");
+  assert(Queue && "Kernel submissions should have an associated queue");
   // Run OpenCL kernel
   auto ContextImpl = Queue->getContextImplPtr();
   auto DeviceImpl = Queue->getDeviceImplPtr();
@@ -2652,7 +2638,7 @@ enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName,
                          std::vector<sycl::detail::pi::PiEvent> &RawEvents,
                          const detail::EventImplPtr &OutEventImpl, bool read) {
   assert(Queue &&
-         "Queue with submitted read write host pipe could not be on host");
+         "ReadWrite host pipe submissions should have an associated queue");
   detail::HostPipeMapEntry *hostPipeEntry =
       ProgramManager::getInstance().getHostPipeEntry(PipeName);
 
@@ -2702,7 +2688,7 @@ enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName,
 }
 
 pi_int32 ExecCGCommand::enqueueImpCommandBuffer() {
-  assert(MQueue && "Device queue is required for command buffer enqueue");
+  assert(MQueue && "Command buffer enqueue should have an associated queue");
   // Wait on host command dependencies
   waitForPreparedHostEvents();
 
@@ -2941,7 +2927,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     return PI_SUCCESS;
   }
   case CG::CGTYPE::Kernel: {
-    assert(MQueue && "Device queue must be present for kernel command");
+    assert(MQueue && "Kernel submissions should have an associated queue");
     CGExecKernel *ExecKernel = (CGExecKernel *)MCommandGroup.get();
 
     NDRDescT &NDRDesc = ExecKernel->MNDRDesc;
@@ -3094,7 +3080,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     return PI_SUCCESS;
   }
   case CG::CGTYPE::Barrier: {
-    assert(MQueue && "Device queue must be present for barrier command");
+    assert(MQueue && "Barrier submission should have an associated queue");
     const PluginPtr &Plugin = MQueue->getPlugin();
     if (MEvent != nullptr)
       MEvent->setHostEnqueueTime();
@@ -3105,7 +3091,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
   }
   case CG::CGTYPE::BarrierWaitlist: {
     assert(MQueue &&
-           "Device queue must be present for barrier with wait list command");
+           "Barrier submission should have an associated queue");
     CGBarrier *Barrier = static_cast<CGBarrier *>(MCommandGroup.get());
     std::vector<detail::EventImplPtr> Events = Barrier->MEventsWaitWithBarrier;
     std::vector<sycl::detail::pi::PiEvent> PiEvents =
@@ -3173,7 +3159,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
                                     typeSize, RawEvents, EventImpl, read);
   }
   case CG::CGTYPE::ExecCommandBuffer: {
-    assert(MQueue && "Device queue must be present for command buffer enqueue");
+    assert(MQueue && "Command buffer submissions should have an associated queue");
     CGExecCommandBuffer *CmdBufferCG =
         static_cast<CGExecCommandBuffer *>(MCommandGroup.get());
     if (MEvent != nullptr)
@@ -3197,7 +3183,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     return PI_SUCCESS;
   }
   case CG::CGTYPE::SemaphoreWait: {
-    assert(MQueue && "Device queue must be present for semaphore wait command");
+    assert(MQueue && "Semaphore wait submissions should have an associated queue");
     CGSemaphoreWait *SemWait = (CGSemaphoreWait *)MCommandGroup.get();
 
     const detail::PluginPtr &Plugin = MQueue->getPlugin();
@@ -3211,7 +3197,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
   }
   case CG::CGTYPE::SemaphoreSignal: {
     assert(MQueue &&
-           "Device queue must be present for semaphore signal command");
+           "Semaphore signal submissions should have an associated queue");
     CGSemaphoreSignal *SemSignal = (CGSemaphoreSignal *)MCommandGroup.get();
 
     const detail::PluginPtr &Plugin = MQueue->getPlugin();
@@ -3349,7 +3335,7 @@ void KernelFusionCommand::printDot(std::ostream &Stream) const {
   Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\"";
 
   Stream << "ID = " << this << "\\n";
-  Stream << "KERNEL FUSION on " << queueDeviceToString(MQueue) << "\\n"
+  Stream << "KERNEL FUSION on " << queueDeviceToString(MQueue.get()) << "\\n"
          << "FUSION LIST: {";
   bool Initial = true;
   for (auto *Cmd : MFusionList) {
diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp
index 7cfc0446fdd69..284985b2f9c16 100644
--- a/sycl/source/detail/scheduler/graph_builder.cpp
+++ b/sycl/source/detail/scheduler/graph_builder.cpp
@@ -678,7 +678,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::findAllocaForReq(
 static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx) {
   if (const char *HUMConfig = SYCLConfig<SYCL_HOST_UNIFIED_MEMORY>::get()) {
     if (std::strcmp(HUMConfig, "0") == 0)
-      return false;
+      return Ctx == nullptr;
     if (std::strcmp(HUMConfig, "1") == 0)
       return true;
   }
@@ -768,7 +768,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
           // new one. There could be situations when we could setup link with
           // "not" current allocation, but it will require memory copy.
           // Can setup link between cl and host allocations only
-          if ((Context != nullptr) != (Record->MCurContext != nullptr)) {
+          if ((Context == nullptr) != (Record->MCurContext == nullptr)) {
             // Linked commands assume that the host allocation is reused by the
             // plugin runtime and that can lead to unnecessary copy overhead on
             // devices that do not support host unified memory. Do not link the
diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp
index 4acc5b6c3a6a4..a14af63b1a2a0 100644
--- a/sycl/source/detail/scheduler/scheduler.cpp
+++ b/sycl/source/detail/scheduler/scheduler.cpp
@@ -697,7 +697,7 @@ bool CheckEventReadiness(const ContextImplPtr &Context,
   // don't represent actual dependencies. Calling getContextImpl() would set
   // their context, which we wish to avoid as it is expensive.
   // NOP events also don't represent actual dependencies.
-  if ((SyclEventImplPtr->isDefaultConstructed()) || SyclEventImplPtr->isNOP()) {
+  if (SyclEventImplPtr->isDefaultConstructed() || SyclEventImplPtr->isNOP()) {
     return true;
   }
   if (SyclEventImplPtr->isHost()) {
diff --git a/sycl/source/detail/xpti_registry.cpp b/sycl/source/detail/xpti_registry.cpp
index c08e620b0583d..ed629b39b9be0 100644
--- a/sycl/source/detail/xpti_registry.cpp
+++ b/sycl/source/detail/xpti_registry.cpp
@@ -8,6 +8,7 @@
 
 #include <detail/global_handler.hpp>
 #include <detail/xpti_registry.hpp>
+#include <detail/queue_impl.hpp>
 
 #ifdef XPTI_ENABLE_INSTRUMENTATION
 #include "xpti/xpti_trace_framework.hpp"
@@ -362,6 +363,20 @@ void XPTIRegistry::sampledImageHostAccessorNotification(
 #endif
 }
 
+std::string queueDeviceToString(const queue_impl* const &Queue) {
+  if (!Queue)
+    return "HOST";
+  auto Device = Queue->get_device();
+  if (Device.is_cpu())
+    return "CPU";
+  else if (Device.is_gpu())
+    return "GPU";
+  else if (Device.is_accelerator())
+    return "ACCELERATOR";
+  else
+    return "UNKNOWN";
+}
+
 } // namespace detail
 } // namespace _V1
 } // namespace sycl
diff --git a/sycl/source/detail/xpti_registry.hpp b/sycl/source/detail/xpti_registry.hpp
index 681e2841c027b..a66ac46a0cd34 100644
--- a/sycl/source/detail/xpti_registry.hpp
+++ b/sycl/source/detail/xpti_registry.hpp
@@ -319,6 +319,9 @@ class XPTIScope {
 }; // class XPTIScope
 #endif
 
+class queue_impl;
+std::string queueDeviceToString(const detail::queue_impl* const &Queue);
+
 } // namespace detail
 } // namespace _V1
 } // namespace sycl
diff --git a/sycl/test-e2e/Config/allowlist.cpp b/sycl/test-e2e/Config/allowlist.cpp
index 393326cb76283..063ebabc1aba5 100644
--- a/sycl/test-e2e/Config/allowlist.cpp
+++ b/sycl/test-e2e/Config/allowlist.cpp
@@ -83,7 +83,7 @@ int main() {
 
   // Expected the allowlist to be set but empty
   if (getenv("TEST_DEVICE_IS_NOT_AVAILABLE")) {
-    for (const sycl::platform &Platform : sycl::platform::get_platforms())
+   if (!sycl::platform::get_platforms().empty())
       throw std::runtime_error("Expected no device is available");
     return 0;
   }

From 954ba8b77e99d017fdaac40417b75da7419a0d11 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Mon, 1 Jul 2024 05:22:06 -0700
Subject: [PATCH 49/58] extra code review changes

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/event_impl.cpp | 8 ++++----
 sycl/source/detail/event_impl.hpp | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp
index 58a52230f1269..85afb56fcaf9b 100644
--- a/sycl/source/detail/event_impl.cpp
+++ b/sycl/source/detail/event_impl.cpp
@@ -33,7 +33,7 @@ extern xpti::trace_event_data_t *GSYCLGraphEvent;
 #endif
 
 // If we do not yet have a context, use the default one.
-void event_impl::tryToInitContext() {
+void event_impl::initContextIfNeeded() {
   if (MContext || !MIsDefaultConstructed)
     return;
 
@@ -114,12 +114,12 @@ const sycl::detail::pi::PiEvent &event_impl::getHandleRef() const {
 sycl::detail::pi::PiEvent &event_impl::getHandleRef() { return MEvent; }
 
 const ContextImplPtr &event_impl::getContextImpl() {
-  tryToInitContext();
+  initContextIfNeeded();
   return MContext;
 }
 
 const PluginPtr &event_impl::getPlugin() {
-  tryToInitContext();
+  initContextIfNeeded();
   return MContext->getPlugin();
 }
 
@@ -456,7 +456,7 @@ void HostProfilingInfo::end() { EndTime = getTimestamp(); }
 pi_native_handle event_impl::getNative() {
   if (isHost())
     return {};
-  tryToInitContext();
+  initContextIfNeeded();
 
   auto Plugin = getPlugin();
   if (MIsDefaultConstructed && !MEvent) {
diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp
index f4c2ac2e90a86..e52ac40ad78d7 100644
--- a/sycl/source/detail/event_impl.hpp
+++ b/sycl/source/detail/event_impl.hpp
@@ -274,7 +274,7 @@ class event_impl {
 
   ContextImplPtr getContextImplPtr() {
     if (MIsDefaultConstructed)
-      tryToInitContext();
+      initContextIfNeeded();
     return MContext;
   }
 
@@ -400,7 +400,7 @@ class event_impl {
 
   // Events constructed without a context will lazily use the default context
   // when needed.
-  void tryToInitContext();
+  void initContextIfNeeded();
   // Event class represents 3 different kinds of operations:
   // | type  | has PI event | MContext | MIsHostTask | MIsDefaultConstructed |
   // | dev   | true         | !nullptr | false       | false                 |

From 3fb26e0fdc88ee470b6a360f0fda3f3a35137b9c Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Mon, 1 Jul 2024 05:35:49 -0700
Subject: [PATCH 50/58] fix format

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/include/sycl/handler.hpp                     |  8 ++++----
 sycl/source/detail/queue_impl.cpp                 |  3 +--
 sycl/source/detail/scheduler/commands.cpp         | 12 +++++++-----
 sycl/source/detail/scheduler/graph_builder.cpp    |  5 ++---
 sycl/source/detail/scheduler/scheduler.cpp        |  5 ++---
 sycl/source/detail/xpti_registry.cpp              |  4 ++--
 sycl/source/detail/xpti_registry.hpp              |  2 +-
 sycl/source/handler.cpp                           | 15 +++++++--------
 sycl/test-e2e/Config/allowlist.cpp                |  2 +-
 .../scheduler/EnqueueWithDependsOnDeps.cpp        |  8 ++++----
 10 files changed, 31 insertions(+), 33 deletions(-)

diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp
index ec59dc8aece7c..61b23ffd707d5 100644
--- a/sycl/include/sycl/handler.hpp
+++ b/sycl/include/sycl/handler.hpp
@@ -488,8 +488,8 @@ class __SYCL_EXPORT handler {
   /// \param IsHost indicates if this handler is created for SYCL host device.
   /// \param CallerNeedsEvent indicates if the event resulting from this handler
   ///        is needed by the caller.
-  handler(std::shared_ptr<detail::queue_impl> Queue, bool /* ABI break: remove */,
-          bool CallerNeedsEvent);
+  handler(std::shared_ptr<detail::queue_impl> Queue,
+          bool /* ABI break: remove */, bool CallerNeedsEvent);
 
   /// Constructs SYCL handler from the associated queue and the submission's
   /// primary and secondary queue.
@@ -504,8 +504,8 @@ class __SYCL_EXPORT handler {
   ///        is needed by the caller.
   handler(std::shared_ptr<detail::queue_impl> Queue,
           std::shared_ptr<detail::queue_impl> PrimaryQueue,
-          std::shared_ptr<detail::queue_impl> SecondaryQueue, bool /* ABI break: remove */,
-          bool CallerNeedsEvent);
+          std::shared_ptr<detail::queue_impl> SecondaryQueue,
+          bool /* ABI break: remove */, bool CallerNeedsEvent);
 
   /// Constructs SYCL handler from Graph.
   ///
diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp
index 45ca3aa0b2291..588254743701f 100644
--- a/sycl/source/detail/queue_impl.cpp
+++ b/sycl/source/detail/queue_impl.cpp
@@ -354,8 +354,7 @@ event queue_impl::submit_impl(const std::function<void(handler &)> &CGF,
                               bool CallerNeedsEvent,
                               const detail::code_location &Loc,
                               const SubmitPostProcessF *PostProcess) {
-  handler Handler(Self, PrimaryQueue, SecondaryQueue, false,
-                  CallerNeedsEvent);
+  handler Handler(Self, PrimaryQueue, SecondaryQueue, false, CallerNeedsEvent);
   Handler.saveCodeLoc(Loc);
 
   {
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index 0b7f38d6e429d..38aa77e0c92ed 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -80,7 +80,8 @@ static size_t deviceToID(const device &Device) {
 }
 
 static void addDeviceMetadata(xpti_td *TraceEvent, const QueueImplPtr &Queue) {
-  xpti::addMetadata(TraceEvent, "sycl_device_type", queueDeviceToString(Queue.get()));
+  xpti::addMetadata(TraceEvent, "sycl_device_type",
+                    queueDeviceToString(Queue.get()));
   if (Queue) {
     xpti::addMetadata(TraceEvent, "sycl_device",
                       deviceToID(Queue->get_device()));
@@ -3099,8 +3100,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     return PI_SUCCESS;
   }
   case CG::CGTYPE::BarrierWaitlist: {
-    assert(MQueue &&
-           "Barrier submission should have an associated queue");
+    assert(MQueue && "Barrier submission should have an associated queue");
     CGBarrier *Barrier = static_cast<CGBarrier *>(MCommandGroup.get());
     std::vector<detail::EventImplPtr> Events = Barrier->MEventsWaitWithBarrier;
     std::vector<sycl::detail::pi::PiEvent> PiEvents =
@@ -3168,7 +3168,8 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
                                     typeSize, RawEvents, EventImpl, read);
   }
   case CG::CGTYPE::ExecCommandBuffer: {
-    assert(MQueue && "Command buffer submissions should have an associated queue");
+    assert(MQueue &&
+           "Command buffer submissions should have an associated queue");
     CGExecCommandBuffer *CmdBufferCG =
         static_cast<CGExecCommandBuffer *>(MCommandGroup.get());
     if (MEvent != nullptr)
@@ -3192,7 +3193,8 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     return PI_SUCCESS;
   }
   case CG::CGTYPE::SemaphoreWait: {
-    assert(MQueue && "Semaphore wait submissions should have an associated queue");
+    assert(MQueue &&
+           "Semaphore wait submissions should have an associated queue");
     CGSemaphoreWait *SemWait = (CGSemaphoreWait *)MCommandGroup.get();
 
     const detail::PluginPtr &Plugin = MQueue->getPlugin();
diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp
index dcd4a0aa96dce..f8397016fce41 100644
--- a/sycl/source/detail/scheduler/graph_builder.cpp
+++ b/sycl/source/detail/scheduler/graph_builder.cpp
@@ -1339,9 +1339,8 @@ Command *Scheduler::GraphBuilder::connectDepEvent(
             /* DepEvents = */ {DepEvent}),
         CG::CodeplayHostTask,
         /* Payload */ {}));
-    ConnectCmd = new ExecCGCommand(
-        std::move(ConnectCG), nullptr,
-        /*EventNeeded=*/true);
+    ConnectCmd = new ExecCGCommand(std::move(ConnectCG), nullptr,
+                                   /*EventNeeded=*/true);
   } catch (const std::bad_alloc &) {
     throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY);
   }
diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp
index cea700a311b7d..fbea6f14dea3d 100644
--- a/sycl/source/detail/scheduler/scheduler.cpp
+++ b/sycl/source/detail/scheduler/scheduler.cpp
@@ -115,9 +115,8 @@ EventImplPtr Scheduler::addCG(
       NewEvent = NewCmd->getEvent();
       break;
     case CG::CodeplayHostTask: {
-      auto Result =
-          MGraphBuilder.addCG(std::move(CommandGroup), nullptr,
-                              AuxiliaryCmds, EventNeeded);
+      auto Result = MGraphBuilder.addCG(std::move(CommandGroup), nullptr,
+                                        AuxiliaryCmds, EventNeeded);
       NewCmd = Result.NewCmd;
       NewEvent = Result.NewEvent;
       ShouldEnqueue = Result.ShouldEnqueue;
diff --git a/sycl/source/detail/xpti_registry.cpp b/sycl/source/detail/xpti_registry.cpp
index ed629b39b9be0..1884f5cd34265 100644
--- a/sycl/source/detail/xpti_registry.cpp
+++ b/sycl/source/detail/xpti_registry.cpp
@@ -7,8 +7,8 @@
 //===----------------------------------------------------------------------===//
 
 #include <detail/global_handler.hpp>
-#include <detail/xpti_registry.hpp>
 #include <detail/queue_impl.hpp>
+#include <detail/xpti_registry.hpp>
 
 #ifdef XPTI_ENABLE_INSTRUMENTATION
 #include "xpti/xpti_trace_framework.hpp"
@@ -363,7 +363,7 @@ void XPTIRegistry::sampledImageHostAccessorNotification(
 #endif
 }
 
-std::string queueDeviceToString(const queue_impl* const &Queue) {
+std::string queueDeviceToString(const queue_impl *const &Queue) {
   if (!Queue)
     return "HOST";
   auto Device = Queue->get_device();
diff --git a/sycl/source/detail/xpti_registry.hpp b/sycl/source/detail/xpti_registry.hpp
index a66ac46a0cd34..356679a75c2fb 100644
--- a/sycl/source/detail/xpti_registry.hpp
+++ b/sycl/source/detail/xpti_registry.hpp
@@ -320,7 +320,7 @@ class XPTIScope {
 #endif
 
 class queue_impl;
-std::string queueDeviceToString(const detail::queue_impl* const &Queue);
+std::string queueDeviceToString(const detail::queue_impl *const &Queue);
 
 } // namespace detail
 } // namespace _V1
diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp
index 011d3c4efce22..72277bb39ed31 100644
--- a/sycl/source/handler.cpp
+++ b/sycl/source/handler.cpp
@@ -87,8 +87,7 @@ handler::handler(std::shared_ptr<detail::queue_impl> Queue, bool)
 /// TODO: Unused. Remove with ABI break.
 handler::handler(std::shared_ptr<detail::queue_impl> Queue,
                  std::shared_ptr<detail::queue_impl> PrimaryQueue,
-                 std::shared_ptr<detail::queue_impl> SecondaryQueue,
-                 bool)
+                 std::shared_ptr<detail::queue_impl> SecondaryQueue, bool)
     : handler(Queue, PrimaryQueue, SecondaryQueue, false,
               /*CallerNeedsEvent=*/true) {}
 
@@ -98,8 +97,8 @@ handler::handler(std::shared_ptr<detail::queue_impl> Queue, bool,
 
 handler::handler(std::shared_ptr<detail::queue_impl> Queue,
                  std::shared_ptr<detail::queue_impl> PrimaryQueue,
-                 std::shared_ptr<detail::queue_impl> SecondaryQueue,
-                 bool, bool CallerNeedsEvent)
+                 std::shared_ptr<detail::queue_impl> SecondaryQueue, bool,
+                 bool CallerNeedsEvent)
     : MImpl(std::make_shared<detail::handler_impl>(std::move(PrimaryQueue),
                                                    std::move(SecondaryQueue),
                                                    CallerNeedsEvent)),
@@ -287,10 +286,10 @@ event handler::finalize() {
         detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent,
                                            xpti::trace_task_begin, nullptr);
 #endif
-        Result = enqueueImpKernel(
-            MQueue, MNDRDesc, MArgs, KernelBundleImpPtr, MKernel,
-            MKernelName.c_str(), RawEvents, NewEvent, nullptr,
-            MImpl->MKernelCacheConfig, MImpl->MKernelIsCooperative);
+        Result = enqueueImpKernel(MQueue, MNDRDesc, MArgs, KernelBundleImpPtr,
+                                  MKernel, MKernelName.c_str(), RawEvents,
+                                  NewEvent, nullptr, MImpl->MKernelCacheConfig,
+                                  MImpl->MKernelIsCooperative);
 #ifdef XPTI_ENABLE_INSTRUMENTATION
         // Emit signal only when event is created
         if (NewEvent != nullptr) {
diff --git a/sycl/test-e2e/Config/allowlist.cpp b/sycl/test-e2e/Config/allowlist.cpp
index 063ebabc1aba5..56dfbc081fb06 100644
--- a/sycl/test-e2e/Config/allowlist.cpp
+++ b/sycl/test-e2e/Config/allowlist.cpp
@@ -83,7 +83,7 @@ int main() {
 
   // Expected the allowlist to be set but empty
   if (getenv("TEST_DEVICE_IS_NOT_AVAILABLE")) {
-   if (!sycl::platform::get_platforms().empty())
+    if (!sycl::platform::get_platforms().empty())
       throw std::runtime_error("Expected no device is available");
     return 0;
   }
diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp
index 5ad8a17af15d9..31d4e92bf89a8 100644
--- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp
+++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp
@@ -78,10 +78,10 @@ class DependsOnTests : public ::testing::Test {
 
     std::unique_ptr<sycl::detail::CG> CmdGroup = MockCGH.finalize();
 
-    detail::Command *NewCmd = MS.addCG(
-        std::move(CmdGroup),
-        Type == TestCGType::HOST_TASK ? nullptr : QueueDevImpl,
-        ToEnqueue, /*EventNeeded=*/true);
+    detail::Command *NewCmd =
+        MS.addCG(std::move(CmdGroup),
+                 Type == TestCGType::HOST_TASK ? nullptr : QueueDevImpl,
+                 ToEnqueue, /*EventNeeded=*/true);
     EXPECT_EQ(ToEnqueue.size(), 0u);
     return NewCmd;
   }

From 67a546270431a328f5920883732bce9820c394df Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Mon, 1 Jul 2024 05:42:16 -0700
Subject: [PATCH 51/58] fix format 2

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/source/detail/queue_impl.hpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp
index 4e9936fe042fb..123efc3d87af6 100644
--- a/sycl/source/detail/queue_impl.hpp
+++ b/sycl/source/detail/queue_impl.hpp
@@ -350,9 +350,7 @@ class queue_impl {
   bool hasDiscardEventsProperty() const { return MDiscardEvents; }
 
   /// \return true if this queue allows for discarded events.
-  bool supportsDiscardingPiEvents() const {
-    return MIsInorder;
-  }
+  bool supportsDiscardingPiEvents() const { return MIsInorder; }
 
   bool isInOrder() const { return MIsInorder; }
 

From 76a073c7d04b31c7952d1ce3f6e9dda37f36e800 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova@intel.com>
Date: Mon, 1 Jul 2024 10:09:15 -0700
Subject: [PATCH 52/58] update win symbols

Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
---
 sycl/test/abi/sycl_symbols_windows.dump | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump
index 54c7a77403c92..d02be89140c5a 100644
--- a/sycl/test/abi/sycl_symbols_windows.dump
+++ b/sycl/test/abi/sycl_symbols_windows.dump
@@ -569,10 +569,10 @@
 ??0half@host_half_impl@detail@_V1@sycl@@QEAA@AEBM@Z
 ??0half@host_half_impl@detail@_V1@sycl@@QEAA@G@Z
 ??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vgraph_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@@Z
-??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@00_N@Z
 ??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@00_N1@Z
-??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_N@Z
+??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@00_N@Z
 ??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_N1@Z
+??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_N@Z
 ??0host_selector@_V1@sycl@@QEAA@$$QEAV012@@Z
 ??0host_selector@_V1@sycl@@QEAA@AEBV012@@Z
 ??0host_selector@_V1@sycl@@QEAA@XZ
@@ -4084,7 +4084,6 @@
 ?frexp_impl@detail@_V1@sycl@@YA?AVhalf@half_impl@123@V45123@PEAH@Z
 ?frexp_impl@detail@_V1@sycl@@YAMMPEAH@Z
 ?frexp_impl@detail@_V1@sycl@@YANNPEAH@Z
-?generateFlushCommand@stream_impl@detail@_V1@sycl@@QEAAXAEAVhandler@34@@Z
 ?get@context@_V1@sycl@@QEBAPEAU_cl_context@@XZ
 ?get@device@_V1@sycl@@QEBAPEAU_cl_device_id@@XZ
 ?get@kernel@_V1@sycl@@QEBAPEAU_cl_kernel@@XZ

From 741795d41e86599198e924f677e635cd38f67d5e Mon Sep 17 00:00:00 2001
From: Lorenc Bushi <113361374+lbushi25@users.noreply.github.com>
Date: Mon, 1 Jul 2024 08:40:49 -0400
Subject: [PATCH 53/58] [SYCL] Fix assertion failure in E2E marray test
 (#14234)

This PR fixes a GPU accuracy bug by upscaling the error-tolerance to a
double type if the GPU supports 64-bit floating point arithmetic.
---
 sycl/test-e2e/Basic/built-ins/helpers.hpp | 28 ++++++++++++++++-------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/sycl/test-e2e/Basic/built-ins/helpers.hpp b/sycl/test-e2e/Basic/built-ins/helpers.hpp
index 03a7c720e9afd..724e417c4d6e0 100644
--- a/sycl/test-e2e/Basic/built-ins/helpers.hpp
+++ b/sycl/test-e2e/Basic/built-ins/helpers.hpp
@@ -33,16 +33,28 @@ void test(bool CheckDevice, double delta, FuncTy F, ExpectedTy Expected,
 
   sycl::buffer<bool, 1> SuccessBuf{1};
 
+  sycl::queue q;
+  sycl::device dev = q.get_device();
   // Make sure we don't use fp64 on devices that don't support it.
-  sycl::detail::get_elem_type_t<ExpectedTy> d(delta);
-
-  sycl::queue{}.submit([&](sycl::handler &cgh) {
+  const bool fp64 = dev.has(sycl::aspect::fp64);
+  q.submit([&](sycl::handler &cgh) {
     sycl::accessor Success{SuccessBuf, cgh};
-    cgh.single_task([=]() {
-      auto R = F(Args...);
-      static_assert(std::is_same_v<decltype(Expected), decltype(R)>);
-      Success[0] = equal(R, Expected, d);
-    });
+    if (fp64) {
+      cgh.single_task([=]() {
+        auto R = F(Args...);
+        static_assert(std::is_same_v<decltype(Expected), decltype(R)>);
+        // use double precision error tolerance when fp64 supported
+        Success[0] = equal(R, Expected, delta);
+      });
+    } else {
+      // downscale the error tolerance when fp64 is not supported
+      sycl::detail::get_elem_type_t<ExpectedTy> d(delta);
+      cgh.single_task([=]() {
+        auto R = F(Args...);
+        static_assert(std::is_same_v<decltype(Expected), decltype(R)>);
+        Success[0] = equal(R, Expected, d);
+      });
+    }
   });
   assert(sycl::host_accessor{SuccessBuf}[0]);
 }

From ec9059089635dba20989427739e3ea2694f604c9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 1 Jul 2024 14:37:31 +0000
Subject: [PATCH 54/58] Bump the github-actions group with 2 updates (#14365)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps the github-actions group with 2 updates:
[github/codeql-action](https://github.com/github/codeql-action) and
[softprops/action-gh-release](https://github.com/softprops/action-gh-release).

Updates `github/codeql-action` from 3.25.7 to 3.25.11
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/github/codeql-action/blob/main/CHANGELOG.md">github/codeql-action's
changelog</a>.</em></p>
<blockquote>
<h1>CodeQL Action Changelog</h1>
<p>See the <a
href="https://github.com/github/codeql-action/releases">releases
page</a> for the relevant changes to the CodeQL CLI and language
packs.</p>
<p>Note that the only difference between <code>v2</code> and
<code>v3</code> of the CodeQL Action is the node version they support,
with <code>v3</code> running on node 20 while we continue to release
<code>v2</code> to support running on node 16. For example
<code>3.22.11</code> was the first <code>v3</code> release and is
functionally identical to <code>2.22.11</code>. This approach ensures an
easy way to track exactly which features are included in different
versions, indicated by the minor and patch version numbers.</p>
<h2>[UNRELEASED]</h2>
<p>No user facing changes.</p>
<h2>3.25.11 - 28 Jun 2024</h2>
<ul>
<li>Avoid failing the workflow run if there is an error while uploading
debug artifacts. <a
href="https://redirect.github.com/github/codeql-action/pull/2349">#2349</a></li>
<li>Update default CodeQL bundle version to 2.17.6. <a
href="https://redirect.github.com/github/codeql-action/pull/2352">#2352</a></li>
</ul>
<h2>3.25.10 - 13 Jun 2024</h2>
<ul>
<li>Update default CodeQL bundle version to 2.17.5. <a
href="https://redirect.github.com/github/codeql-action/pull/2327">#2327</a></li>
</ul>
<h2>3.25.9 - 12 Jun 2024</h2>
<ul>
<li>Avoid failing database creation if the database folder already
exists and contains some unexpected files. Requires CodeQL 2.18.0 or
higher. <a
href="https://redirect.github.com/github/codeql-action/pull/2330">#2330</a></li>
<li>The init Action will attempt to clean up the database cluster
directory before creating a new database and at the end of the job. This
will help to avoid issues where the database cluster directory is left
in an inconsistent state. <a
href="https://redirect.github.com/github/codeql-action/pull/2332">#2332</a></li>
</ul>
<h2>3.25.8 - 04 Jun 2024</h2>
<ul>
<li>Update default CodeQL bundle version to 2.17.4. <a
href="https://redirect.github.com/github/codeql-action/pull/2321">#2321</a></li>
</ul>
<h2>3.25.7 - 31 May 2024</h2>
<ul>
<li>We are rolling out a feature in May/June 2024 that will reduce the
Actions cache usage of the Action by keeping only the newest TRAP cache
for each language. <a
href="https://redirect.github.com/github/codeql-action/pull/2306">#2306</a></li>
</ul>
<h2>3.25.6 - 20 May 2024</h2>
<ul>
<li>Update default CodeQL bundle version to 2.17.3. <a
href="https://redirect.github.com/github/codeql-action/pull/2295">#2295</a></li>
</ul>
<h2>3.25.5 - 13 May 2024</h2>
<ul>
<li>Add a compatibility matrix of supported CodeQL Action, CodeQL CLI,
and GitHub Enterprise Server versions to the <a
href="https://github.com/github/codeql-action/blob/main/README.md">https://github.com/github/codeql-action/blob/main/README.md</a>.
<a
href="https://redirect.github.com/github/codeql-action/pull/2273">#2273</a></li>
<li>Avoid printing out a warning for a missing <code>on.push</code>
trigger when the CodeQL Action is triggered via a
<code>workflow_call</code> event. <a
href="https://redirect.github.com/github/codeql-action/pull/2274">#2274</a></li>
<li>The <code>tools: latest</code> input to the <code>init</code> Action
has been renamed to <code>tools: linked</code>. This option specifies
that the Action should use the tools shipped at the same time as the
Action. The old name will continue to work for backwards compatibility,
but we recommend that new workflows use the new name. <a
href="https://redirect.github.com/github/codeql-action/pull/2281">#2281</a></li>
</ul>
<h2>3.25.4 - 08 May 2024</h2>
<ul>
<li>Update default CodeQL bundle version to 2.17.2. <a
href="https://redirect.github.com/github/codeql-action/pull/2270">#2270</a></li>
</ul>
<h2>3.25.3 - 25 Apr 2024</h2>
<ul>
<li>Update default CodeQL bundle version to 2.17.1. <a
href="https://redirect.github.com/github/codeql-action/pull/2247">#2247</a></li>
<li>Workflows running on <code>macos-latest</code> using CodeQL CLI
versions before v2.15.1 will need to either upgrade their CLI version to
v2.15.1 or newer, or change the platform to an Intel MacOS runner, such
as <code>macos-12</code>. ARM machines with SIP disabled, including the
newest <code>macos-latest</code> image, are unsupported for CLI versions
before 2.15.1. <a
href="https://redirect.github.com/github/codeql-action/pull/2261">#2261</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/github/codeql-action/commit/b611370bb5703a7efb587f9d136a52ea24c5c38c"><code>b611370</code></a>
Merge pull request <a
href="https://redirect.github.com/github/codeql-action/issues/2357">#2357</a>
from github/update-v3.25.11-de945755c</li>
<li><a
href="https://github.com/github/codeql-action/commit/3e6431f3accd84bb42779fc3c9d9f447caa3a6d3"><code>3e6431f</code></a>
Update changelog for v3.25.11</li>
<li><a
href="https://github.com/github/codeql-action/commit/de945755c9edd3a4e5d160a71f1482ece6a3c271"><code>de94575</code></a>
Merge pull request <a
href="https://redirect.github.com/github/codeql-action/issues/2352">#2352</a>
from github/update-bundle/codeql-bundle-v2.17.6</li>
<li><a
href="https://github.com/github/codeql-action/commit/a32d3058b827f5d2ba08dc2570887f14b164a794"><code>a32d305</code></a>
Add changelog note</li>
<li><a
href="https://github.com/github/codeql-action/commit/9ccc99508a819cb9c340028d7711b129f96c8a2e"><code>9ccc995</code></a>
Update default bundle to codeql-bundle-v2.17.6</li>
<li><a
href="https://github.com/github/codeql-action/commit/9b7c22c3b39078582fa6d0d8f3841e944ec54582"><code>9b7c22c</code></a>
Merge pull request <a
href="https://redirect.github.com/github/codeql-action/issues/2351">#2351</a>
from github/dependabot/npm_and_yarn/npm-6791eaa26c</li>
<li><a
href="https://github.com/github/codeql-action/commit/9cf3243b0be3a9e0efff20a7fabd5a11246168e9"><code>9cf3243</code></a>
Rebuild</li>
<li><a
href="https://github.com/github/codeql-action/commit/1895b29ac8e2046ddb708ac1eca53e6d5e143337"><code>1895b29</code></a>
Update checked-in dependencies</li>
<li><a
href="https://github.com/github/codeql-action/commit/9dcfde966d641c9a59ee02a83f18329a2b2caace"><code>9dcfde9</code></a>
Bump the npm group with 2 updates</li>
<li><a
href="https://github.com/github/codeql-action/commit/8723b5be41df185b62efd22191bb83fc24539ca0"><code>8723b5b</code></a>
Merge pull request <a
href="https://redirect.github.com/github/codeql-action/issues/2350">#2350</a>
from github/angelapwen/add-exclude-pr-check-param</li>
<li>Additional commits viewable in <a
href="https://github.com/github/codeql-action/compare/f079b8493333aace61c81488f8bd40919487bd9f...b611370bb5703a7efb587f9d136a52ea24c5c38c">compare
view</a></li>
</ul>
</details>
<br />

Updates `softprops/action-gh-release` from 2.0.5 to 2.0.6
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/softprops/action-gh-release/releases">softprops/action-gh-release's
releases</a>.</em></p>
<blockquote>
<h2>v2.0.6</h2>
<p>maintenance release with updated dependencies</p>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/softprops/action-gh-release/blob/master/CHANGELOG.md">softprops/action-gh-release's
changelog</a>.</em></p>
<blockquote>
<h2>2.0.6</h2>
<ul>
<li>maintenance release with updated dependencies</li>
</ul>
<h2>2.0.5</h2>
<ul>
<li>Factor in file names with spaces when upserting files <a
href="https://redirect.github.com/softprops/action-gh-release/pull/446">#446</a>
via <a
href="https://github.com/MystiPanda"><code>@​MystiPanda</code></a></li>
<li>Improvements to error handling <a
href="https://redirect.github.com/softprops/action-gh-release/pull/449">#449</a>
via <a href="https://github.com/till"><code>@​till</code></a></li>
</ul>
<h2>2.0.4</h2>
<ul>
<li>Minor follow up to <a
href="https://redirect.github.com/softprops/action-gh-release/pull/417">#417</a>.
<a
href="https://redirect.github.com/softprops/action-gh-release/pull/425">#425</a></li>
</ul>
<h2>2.0.3</h2>
<ul>
<li>Declare <code>make_latest</code> as an input field in
<code>action.yml</code> <a
href="https://redirect.github.com/softprops/action-gh-release/pull/419">#419</a></li>
</ul>
<h2>2.0.2</h2>
<ul>
<li>Revisit approach to <a
href="https://redirect.github.com/softprops/action-gh-release/pull/384">#384</a>
making unresolved pattern failures opt-in <a
href="https://redirect.github.com/softprops/action-gh-release/pull/417">#417</a></li>
</ul>
<h2>2.0.1</h2>
<ul>
<li>Add support for make_latest property <a
href="https://redirect.github.com/softprops/action-gh-release/pull/304">#304</a>
via <a
href="https://github.com/samueljseay"><code>@​samueljseay</code></a></li>
<li>Fail run if files setting contains invalid patterns <a
href="https://redirect.github.com/softprops/action-gh-release/pull/384">#384</a>
via <a
href="https://github.com/rpdelaney"><code>@​rpdelaney</code></a></li>
<li>Add support for proxy env variables (don't use node-fetch) <a
href="https://redirect.github.com/softprops/action-gh-release/pull/386/">#386</a>
via <a
href="https://github.com/timor-raiman"><code>@​timor-raiman</code></a></li>
<li>Suppress confusing warning when input_files is empty <a
href="https://redirect.github.com/softprops/action-gh-release/pull/389">#389</a>
via <a href="https://github.com/Drowze"><code>@​Drowze</code></a></li>
</ul>
<h2>2.0.0</h2>
<ul>
<li><code>2.0.0</code>!? this release corrects a disjunction between git
tag versions used in the marketplace and versions list this file.
Previous versions should have really been 1.*. Going forward this should
be better aligned.</li>
<li>Upgrade action.yml declaration to node20 to address
deprecations</li>
</ul>
<h2>0.1.15</h2>
<ul>
<li>Upgrade to action.yml declaration to node16 to address
deprecations</li>
<li>Upgrade dependencies</li>
<li>Add <code>asset</code> output as a JSON array containing information
about the uploaded assets</li>
</ul>
<h2>0.1.14</h2>
<ul>
<li>provides an new workflow input option
<code>generate_release_notes</code> which when set to true will
automatically generate release notes for you based on GitHub activity <a
href="https://redirect.github.com/softprops/action-gh-release/pull/179">#179</a>.
Please see the <a
href="https://docs.github.com/en/repositories/releasing-projects-on-github/automatically-generated-release-notes">GitHub
docs for this feature</a> for more information</li>
</ul>
<h2>0.1.13</h2>
<ul>
<li>fix issue with multiple runs concatenating release bodies <a
href="https://redirect.github.com/softprops/action-gh-release/pull/145">#145</a></li>
</ul>
<h2>0.1.12</h2>
<ul>
<li>fix bug leading to empty strings subsituted for inputs users don't
provide breaking api calls <a
href="https://redirect.github.com/softprops/action-gh-release/pull/144">#144</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/softprops/action-gh-release/commit/a74c6b72af54cfa997e81df42d94703d6313a2d0"><code>a74c6b7</code></a>
update changelog</li>
<li><a
href="https://github.com/softprops/action-gh-release/commit/b909f761f0367defeee0c2804db31a81c41dd81e"><code>b909f76</code></a>
update dist/index.js</li>
<li><a
href="https://github.com/softprops/action-gh-release/commit/e49d08fa3245e75166ef8eb8bc9d7521d34658eb"><code>e49d08f</code></a>
chore(deps): bump glob from 8.0.3 to 10.4.2</li>
<li><a
href="https://github.com/softprops/action-gh-release/commit/f12ad255e1c9d5cfd57d1ea9dca3e3001392be35"><code>f12ad25</code></a>
chore(deps): bump <code>@​octokit/plugin-throttling</code> from 4.3.2 to
9.3.0</li>
<li><a
href="https://github.com/softprops/action-gh-release/commit/7039a825a7b0413e21c08b679d39690ada00386f"><code>7039a82</code></a>
chore: release 2.0.6</li>
<li><a
href="https://github.com/softprops/action-gh-release/commit/f9c2b6ca3738015453adef1caa20347e8ac0fa6d"><code>f9c2b6c</code></a>
chore: update deps and run build</li>
<li><a
href="https://github.com/softprops/action-gh-release/commit/73738a629386c54e67bc26a1e14437a5d0b686a0"><code>73738a6</code></a>
chore(deps): bump node dep and <code>@types/node</code></li>
<li><a
href="https://github.com/softprops/action-gh-release/commit/a500a35279e4ba77e64279527fb2a91bfc538dbe"><code>a500a35</code></a>
Bump ts-jest from 29.0.3 to 29.1.4 (<a
href="https://redirect.github.com/softprops/action-gh-release/issues/459">#459</a>)</li>
<li>See full diff in <a
href="https://github.com/softprops/action-gh-release/compare/69320dbe05506a9a39fc8ae11030b214ec2d1f87...a74c6b72af54cfa997e81df42d94703d6313a2d0">compare
view</a></li>
</ul>
</details>
<br />


Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore <dependency name> major version` will close this
group update PR and stop Dependabot creating any more for the specific
dependency's major version (unless you unignore this specific
dependency's major version or upgrade to it yourself)
- `@dependabot ignore <dependency name> minor version` will close this
group update PR and stop Dependabot creating any more for the specific
dependency's minor version (unless you unignore this specific
dependency's minor version or upgrade to it yourself)
- `@dependabot ignore <dependency name>` will close this group update PR
and stop Dependabot creating any more for the specific dependency
(unless you unignore this specific dependency or upgrade to it yourself)
- `@dependabot unignore <dependency name>` will remove all of the ignore
conditions of the specified dependency
- `@dependabot unignore <dependency name> <ignore condition>` will
remove the ignore condition of the specified dependency and ignore
conditions


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/scorecard.yml    | 2 +-
 .github/workflows/sycl-nightly.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml
index 9f8ea3499f696..896a2ea8c183a 100644
--- a/.github/workflows/scorecard.yml
+++ b/.github/workflows/scorecard.yml
@@ -57,6 +57,6 @@ jobs:
 
       # Upload the results to GitHub's code scanning dashboard.
       - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@f079b8493333aace61c81488f8bd40919487bd9f # v3.25.7
+        uses: github/codeql-action/upload-sarif@b611370bb5703a7efb587f9d136a52ea24c5c38c # v3.25.11
         with:
           sarif_file: results.sarif
diff --git a/.github/workflows/sycl-nightly.yml b/.github/workflows/sycl-nightly.yml
index fc0b90be7990a..32a7814fa1c5c 100644
--- a/.github/workflows/sycl-nightly.yml
+++ b/.github/workflows/sycl-nightly.yml
@@ -141,7 +141,7 @@ jobs:
           echo "TAG=$(date +'%Y-%m-%d')-${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
         fi
     - name: Upload binaries
-      uses: softprops/action-gh-release@69320dbe05506a9a39fc8ae11030b214ec2d1f87
+      uses: softprops/action-gh-release@a74c6b72af54cfa997e81df42d94703d6313a2d0
       with:
         files: |
           sycl_linux.tar.gz

From 4c4f1b6b6927135a8743af336155ace780cc53c6 Mon Sep 17 00:00:00 2001
From: Nick Sarnie <sarnex@users.noreply.github.com>
Date: Mon, 1 Jul 2024 10:45:20 -0400
Subject: [PATCH 55/58] [SYCL][E2E] Disable flaky test host_task_last.cpp on
 Gen12 Linux (#14352)

https://github.com/intel/llvm/issues/14350

---------

Signed-off-by: Sarnie, Nick <nick.sarnie@intel.com>
---
 sycl/test-e2e/Graph/Explicit/host_task_last.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sycl/test-e2e/Graph/Explicit/host_task_last.cpp b/sycl/test-e2e/Graph/Explicit/host_task_last.cpp
index 34df0750b5366..5371ea1df3708 100644
--- a/sycl/test-e2e/Graph/Explicit/host_task_last.cpp
+++ b/sycl/test-e2e/Graph/Explicit/host_task_last.cpp
@@ -2,8 +2,10 @@
 // RUN: %{run} %t.out
 // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
 // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+
+// Disabled due to https://github.com/intel/llvm/issues/14350
 // Extra run to check for immediate-command-list in Level Zero
-// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// xRUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
 
 // REQUIRES: aspect-usm_shared_allocations
 

From 3d90aba9c957cdd302c89eabb8be2b4cee7798e1 Mon Sep 17 00:00:00 2001
From: aelovikov-intel <andrei.elovikov@intel.com>
Date: Mon, 1 Jul 2024 08:23:17 -0700
Subject: [PATCH 56/58] [SYCL] Don't throw in `device_impl::has` (#14355)

1) It isn't right
2) We need this change to get rid of deprecated
   `sycl::exception::get_cl_code`
---
 sycl/source/detail/device_impl.cpp |  4 ++--
 sycl/source/device.cpp             | 13 +++----------
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp
index c0b28622b962d..6e2b69850d5e1 100644
--- a/sycl/source/detail/device_impl.cpp
+++ b/sycl/source/detail/device_impl.cpp
@@ -719,8 +719,8 @@ bool device_impl::has(aspect Aspect) const {
     return call_successful && support;
   }
   }
-  throw runtime_error("This device aspect has not been implemented yet.",
-                      PI_ERROR_INVALID_DEVICE);
+
+  return false; // This device aspect has not been implemented yet.
 }
 
 bool device_impl::isAssertFailSupported() const {
diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp
index 18b9cf4036cda..423ff7be44121 100644
--- a/sycl/source/device.cpp
+++ b/sycl/source/device.cpp
@@ -155,16 +155,9 @@ device::get_info_impl<info::device::aspects>() const {
 #undef __SYCL_ASPECT
   };
 
-  auto UnsupportedAspects = std::remove_if(
-      DeviceAspects.begin(), DeviceAspects.end(), [&](aspect Aspect) {
-        try {
-          return !impl->has(Aspect);
-        } catch (const runtime_error &ex) {
-          if (ex.get_cl_code() == PI_ERROR_INVALID_DEVICE)
-            return true;
-          throw;
-        }
-      });
+  auto UnsupportedAspects =
+      std::remove_if(DeviceAspects.begin(), DeviceAspects.end(),
+                     [&](aspect Aspect) { return !impl->has(Aspect); });
 
   DeviceAspects.erase(UnsupportedAspects, DeviceAspects.end());
 

From 40170305048cbbf40e229c6c83a2c5ec5d6278e1 Mon Sep 17 00:00:00 2001
From: Steffen Larsen <steffen.larsen@intel.com>
Date: Mon, 1 Jul 2024 17:31:45 +0200
Subject: [PATCH 57/58] [SYCL][Docs] Add sycl_ext_oneapi_virtual_mem extension
 and implementation (#8954)

This commit adds the sycl_ext_oneapi_virtual_mem experimental extension
for reserving and mapping virtual address ranges. Accompanying it is the
implementation in the SYCL runtime, together with CUDA and Level Zero
backend support for the corresponding features.

---------

Signed-off-by: Larsen, Steffen <steffen.larsen@intel.com>
---
 .../llvm/SYCLLowerIR/DeviceConfigFile.td      |   3 +-
 .../sycl_ext_oneapi_virtual_mem.asciidoc      | 398 ++++++++++++++++++
 sycl/include/sycl/detail/pi.def               |  12 +
 sycl/include/sycl/detail/pi.h                 | 145 ++++++-
 sycl/include/sycl/detail/pi.hpp               |   2 +
 sycl/include/sycl/device_aspect_macros.hpp    |  10 +
 .../ext/oneapi/virtual_mem/physical_mem.hpp   |  81 ++++
 .../ext/oneapi/virtual_mem/virtual_mem.hpp    |  61 +++
 sycl/include/sycl/info/aspects.def            |   1 +
 sycl/include/sycl/sycl.hpp                    |   2 +
 sycl/plugins/cuda/pi_cuda.cpp                 |  63 +++
 sycl/plugins/cuda/pi_cuda.hpp                 |   5 +
 sycl/plugins/hip/pi_hip.cpp                   |  63 +++
 sycl/plugins/hip/pi_hip.hpp                   |   5 +
 sycl/plugins/level_zero/pi_level_zero.cpp     | 138 ++++++
 sycl/plugins/native_cpu/pi_native_cpu.cpp     |  63 +++
 sycl/plugins/native_cpu/pi_native_cpu.hpp     |   5 +
 sycl/plugins/opencl/pi_opencl.cpp             |  63 +++
 sycl/plugins/unified_runtime/pi2ur.hpp        | 217 ++++++++++
 .../unified_runtime/pi_unified_runtime.cpp    |  66 +++
 sycl/source/CMakeLists.txt                    |   2 +
 sycl/source/detail/device_impl.cpp            |   8 +
 sycl/source/detail/physical_mem_impl.hpp      |  95 +++++
 sycl/source/feature_test.hpp.in               |   1 +
 sycl/source/physical_mem.cpp                  |  38 ++
 sycl/source/virtual_mem.cpp                   | 183 ++++++++
 .../VirtualMem/vector_with_virtual_mem.cpp    | 236 +++++++++++
 sycl/test/abi/pi_cuda_symbol_check.dump       |  10 +
 sycl/test/abi/pi_hip_symbol_check.dump        |  10 +
 sycl/test/abi/pi_level_zero_symbol_check.dump |  10 +
 sycl/test/abi/pi_nativecpu_symbol_check.dump  |  10 +
 sycl/test/abi/pi_opencl_symbol_check.dump     |  10 +
 sycl/test/abi/sycl_symbols_linux.dump         |  13 +
 sycl/test/abi/sycl_symbols_windows.dump       |  27 +-
 sycl/unittests/helpers/PiMockPlugin.hpp       |  55 +++
 35 files changed, 2108 insertions(+), 3 deletions(-)
 create mode 100644 sycl/doc/extensions/experimental/sycl_ext_oneapi_virtual_mem.asciidoc
 create mode 100644 sycl/include/sycl/ext/oneapi/virtual_mem/physical_mem.hpp
 create mode 100644 sycl/include/sycl/ext/oneapi/virtual_mem/virtual_mem.hpp
 create mode 100644 sycl/source/detail/physical_mem_impl.hpp
 create mode 100644 sycl/source/physical_mem.cpp
 create mode 100644 sycl/source/virtual_mem.cpp
 create mode 100644 sycl/test-e2e/VirtualMem/vector_with_virtual_mem.cpp

diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td
index 38d5f2512a1c4..54357d1377c77 100644
--- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td
+++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td
@@ -82,6 +82,7 @@ def AspectExt_intel_fpga_task_sequence : Aspect<"ext_intel_fpga_task_sequence">;
 def AspectExt_oneapi_limited_graph : Aspect<"ext_oneapi_limited_graph">;
 def AspectExt_oneapi_private_alloca : Aspect<"ext_oneapi_private_alloca">;
 def AspectExt_oneapi_queue_profiling_tag : Aspect<"ext_oneapi_queue_profiling_tag">;
+def AspectExt_oneapi_virtual_mem : Aspect<"ext_oneapi_virtual_mem">;
 // Deprecated aspects
 def AspectInt64_base_atomics : Aspect<"int64_base_atomics">;
 def AspectInt64_extended_atomics : Aspect<"int64_extended_atomics">;
@@ -139,7 +140,7 @@ def : TargetInfo<"__TestAspectList",
     AspectExt_oneapi_ballot_group, AspectExt_oneapi_fixed_size_group, AspectExt_oneapi_opportunistic_group,
     AspectExt_oneapi_tangle_group, AspectExt_intel_matrix, AspectExt_oneapi_is_composite, AspectExt_oneapi_is_component,
     AspectExt_oneapi_graph, AspectExt_intel_fpga_task_sequence, AspectExt_oneapi_limited_graph,
-    AspectExt_oneapi_private_alloca, AspectExt_oneapi_queue_profiling_tag],
+    AspectExt_oneapi_private_alloca, AspectExt_oneapi_queue_profiling_tag, AspectExt_oneapi_virtual_mem],
     []>;
 // This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT
 // match.
diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_virtual_mem.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_virtual_mem.asciidoc
new file mode 100644
index 0000000000000..72a6e1ed14f55
--- /dev/null
+++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_virtual_mem.asciidoc
@@ -0,0 +1,398 @@
+= sycl_ext_oneapi_virtual_mem
+
+:source-highlighter: coderay
+:coderay-linenums-mode: table
+
+// This section needs to be after the document title.
+:doctype: book
+:toc2:
+:toc: left
+:encoding: utf-8
+:lang: en
+:dpcpp: pass:[DPC++]
+:endnote: &#8212;{nbsp}end{nbsp}note
+
+// Set the default source code type in this document to C++,
+// for syntax highlighting purposes.  This is needed because
+// docbook uses c++ and html5 uses cpp.
+:language: {basebackend@docbook:c++:cpp}
+
+
+== Notice
+
+[%hardbreaks]
+Copyright (C) 2023 Intel Corporation.  All rights reserved.
+
+Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks
+of The Khronos Group Inc.  OpenCL(TM) is a trademark of Apple Inc. used by
+permission by Khronos.
+
+
+== Contact
+
+To report problems with this extension, please open a new issue at:
+
+https://github.com/intel/llvm/issues
+
+
+== Dependencies
+
+This extension is written against the SYCL 2020 revision 8 specification.  All
+references below to the "core SYCL specification" or to section numbers in the
+SYCL specification refer to that revision.
+
+
+== Status
+
+This is an experimental extension specification, intended to provide early
+access to features and gather community feedback.  Interfaces defined in this
+specification are implemented in {dpcpp}, but they are not finalized and may
+change incompatibly in future versions of {dpcpp} without prior notice.
+*Shipping software products should not rely on APIs defined in this
+specification.*
+
+
+== Backend support status
+
+The APIs in this extension may be used only on a device that has
+`aspect::ext_oneapi_virtual_mem`.  The application must check that the devices
+in the corresponding context have this aspect before using any of the APIs
+introduced in this extension.  If the application fails to do this, the
+implementation throws a synchronous exception with the
+`errc::feature_not_supported` error code.
+
+== Overview
+
+This extension adds the notion of "virtual memory ranges" to SYCL, introducing
+a way to map an address range onto multiple allocations of physical memory,
+allowing users to avoid expensive reallocations and potentially running out of
+device memory while relocating the corresponding memory.
+
+
+== Specification
+
+=== Feature test macro
+
+This extension provides a feature-test macro as described in the core SYCL
+specification.  An implementation supporting this extension must predefine the
+macro `SYCL_EXT_ONEAPI_VIRTUAL_MEM` to one of the values defined in the table
+below.  Applications can test for the existence of this macro to determine if
+the implementation supports this feature, or applications can test the macro's
+value to determine which of the extension's features the implementation
+supports.
+
+[%header,cols="1,5"]
+|===
+|Value
+|Description
+
+|1
+|The APIs of this experimental extension are not versioned, so the
+ feature-test macro always has this value.
+|===
+
+
+=== Device aspect
+
+Support for the features introduced in this extension can be queried using the
+new `aspect::ext_oneapi_virtual_mem` defined as:
+
+```c++
+namespace sycl {
+
+enum class aspect : /* unspecified */ {
+  ...
+  ext_oneapi_virtual_mem
+}
+
+} // namespace sycl
+```
+
+
+=== Memory granularity
+
+Working with virtual address ranges and the underlying physical memory requires
+the user to align and adjust in accordance with a specified minimum granularity.
+
+The interfaces make the distinction between device granularity, which is the
+granularity required for physical memory allocations, and context granularity,
+which is the granularity required for virtual memory range reservations.
+
+The queries provide both a minimum and a recommended granularity. The minimum
+device granularity is the smallest granularity that is supported for physical
+memory allocations, and the minimum context granularity is the smallest
+granularity that is supported from virtual memory range reservations. However,
+the recommended granularity may be larger than these minimums and may provide
+better performance.
+
+The interfaces for querying these granularities are defined as:
+
+```c++
+namespace sycl::ext::oneapi::experimental {
+
+enum class granularity_mode : /*unspecified*/ {
+  minimum,
+  recommended
+};
+
+size_t get_mem_granularity(const device &syclDevice, const context &syclContext,
+                           granularity_mode mode = granularity_mode::recommended);
+
+size_t get_mem_granularity(const context &syclContext,
+                           granularity_mode mode = granularity_mode::recommended);
+
+} // namespace sycl::ext::oneapi::experimental
+```
+
+[frame="topbot",options="header,footer"]
+|=====================
+|Function |Description
+
+|`size_t get_mem_granularity(const device &syclDevice, const context &syclContext, granularity_mode mode = granularity_mode::recommended)` |
+Returns the granularity of physical memory allocations on `syclDevice` in the
+`syclContext`. The `mode` argument specifies whether the query is for the
+minimum or recommended granularity.
+
+If `syclDevice` does not have `aspect::ext_oneapi_virtual_mem` the call throws
+an exception with `errc::feature_not_supported`.
+
+|`size_t get_mem_granularity(const context &syclContext, granularity_mode mode = granularity_mode::recommended)` |
+Returns the granularity of virtual memory range reservations in the
+`syclContext`. The `mode` argument specifies whether the query is for the
+minimum or recommended granularity.
+
+If any device in `syclContext` does not have `aspect::ext_oneapi_virtual_mem`
+the call throws an exception with `errc::feature_not_supported`.
+
+|=====================
+
+=== Reserving virtual address ranges
+
+Virtual address ranges are represented by a `uintptr_t` and a number of bytes
+reserved for it. The `uintptr_t` must be aligned in accordance with the minimum
+granularity of the corresponding `context`, as queried through
+`get_mem_granularity`, and likewise the number of bytes must be a multiple of
+this granularity. It is the responsibility of the user to manage the
+constituents of any virtual address range they reserve.
+
+The interfaces for reserving, freeing, and manipulating the access mode of a
+virtual address range are defined as:
+
+```c++
+namespace sycl::ext::oneapi::experimental {
+
+uintptr_t reserve_virtual_mem(uintptr_t start, size_t numBytes, const context &syclContext);
+uintptr_t reserve_virtual_mem(size_t numBytes, const context &syclContext);
+
+void free_virtual_mem(uintptr_t ptr, size_t numBytes, const context &syclContext);
+
+} // namespace sycl::ext::oneapi::experimental
+```
+
+[frame="topbot",options="header,footer"]
+|=====================
+|Function |Description
+
+|`uintptr_t reserve_virtual_mem(uintptr_t start, size_t numBytes, const context &syclContext)` |
+Reserves a virtual memory range in `syclContext` with `numBytes` bytes.
+
+`start` specifies the requested start of the new virtual memory range
+reservation. If the implementation is unable to reserve the virtual memory range
+at the specified address, the implementation will pick another suitable address.
+
+`start` must be aligned in accordance with the minimum granularity for
+`syclContext`, as returned by a call to `get_mem_granularity`. Likewise,
+`numBytes` must be a multiple of the minimum granularity. Attempting to call
+this function without meeting these requirements results in undefined behavior.
+
+If any of the devices in `syclContext` do not have
+`aspect::ext_oneapi_virtual_mem` the call throws an exception with
+`errc::feature_not_supported`.
+
+|`uintptr_t reserve_virtual_mem(size_t numBytes, const context &syclContext)` |
+Same as `reserve_virtual_mem(0, numBytes, syclContext)`.
+
+|`void free_virtual_mem(uintptr_t ptr, size_t numBytes, const context &syclContext)` |
+Frees a virtual memory range specified by `ptr` and `numBytes`. `ptr` must be
+the same as returned by a call to `reserve_virtual_mem` and `numBytes` must be
+the same as the size of the range specified in the reservation call.
+
+The virtual memory range must not currently be mapped to physical memory. A call
+to this function with a mapped virtual memory range results in undefined
+behavior.
+
+|=====================
+
+
+=== Physical memory representation
+
+:crs: https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:reference-semantics
+
+To represent the underlying physical device memory a virtual address is mapped
+to, the `physical_mem` class is added. This new class is defined as:
+
+```c++
+namespace sycl::ext::oneapi::experimental {
+
+enum class address_access_mode : /*unspecified*/ {
+  none,
+  read,
+  read_write
+};
+
+class physical_mem {
+public:
+  physical_mem(const device &syclDevice, const context &syclContext, size_t numBytes);
+  physical_mem(const queue &syclQueue, size_t numBytes);
+
+  /* -- common interface members -- */
+
+  void *map(uintptr_t ptr, size_t numBytes, address_access_mode mode, size_t offset = 0) const;
+
+  context get_context() const;
+  device get_device() const;
+
+  size_t size() const noexcept;
+};
+
+} // namespace sycl::ext::oneapi::experimental
+```
+
+`physical_mem` has common reference semantics, as described in
+{crs}[section 4.5.2. Common reference semantics].
+
+[frame="topbot",options="header,footer"]
+|============================
+|Member function |Description
+
+|`physical_mem(const device &syclDevice, const context &syclContext, size_t numBytes)` |
+Constructs a `physical_mem` instance using the `syclDevice` provided. This
+device must either be contained by `syclContext` or it must be a descendent
+device of some device that is contained by that context, otherwise this function
+throws a synchronous exception with the `errc::invalid` error code.
+
+This will allocate `numBytes` of physical memory on the device. `numBytes` must
+be a multiple of the granularity for `syclDevice`, as returned by a call to
+`get_mem_granularity`.
+
+If `syclDevice` does not have `aspect::ext_oneapi_virtual_mem` the call throws
+an exception with `errc::feature_not_supported`.
+
+If the constructor is unable to allocate the required memory on `syclDevice`,
+the call throws an exception with `errc::memory_allocation`.
+
+|`physical_mem(const queue &syclQueue, size_t numBytes)` |
+Same as `physical_mem(syclQueue.get_device(), syclQueue.get_context, numBytes)`.
+
+|`void *map(uintptr_t ptr, size_t numBytes, address_access_mode mode, size_t offset = 0)` |
+Maps a virtual memory range, specified by `ptr` and `numBytes`, to the physical
+memory corresponding to this instance of `physical_mem`, starting at an offset
+of `offset` bytes.
+
+It is required that `offset + numBytes` is less than or equal to `size()` and
+that `ptr`, `numBytes` and `offset` are all multiples of the minimum granularity
+for the device associated with this instance of `physical_mem`.
+
+If `mode` is `address_access_mode::read` or `address_access_mode::read_write`
+the returned pointer is accessible after the call as read-only or read-write
+respectively. Otherwise, it is considered inaccessible and accessing it will
+result in undefined behavior.
+
+The returned pointer is equivalent to `reinterpret_cast<void *>(ptr)`.
+
+Writing to any address in the virtual memory range with access mode set to
+`access_mode::read` results in undefined behavior.
+
+An accessible pointer behaves the same as a pointer to device USM memory and can
+be used in place of a device USM pointer in any interface accepting one.
+
+A virtual memory range cannot be simultaneously mapped to more than one
+physical memory region. Likewise, multiple virtual memory ranges cannot be
+mapped onto the same physical memory region. Attempting to violate either of
+these restrictions will result in undefined behavior.
+
+|`context get_context() const` |
+Returns the SYCL context associated with the instance of `physical_mem`.
+
+|`device get_device() const` |
+Returns the SYCL device associated with the instance of `physical_mem`.
+
+|`size_t size() const` |
+Returns the size of the corresponding physical memory in bytes.
+
+|============================
+
+Virtual memory address ranges are mapped to the a `physical_mem` through the
+`map` member functions, where the access mode can also be specified.
+To further get or set the access mode of a mapped virtual address range, the
+user does not need to know the associated `physical_mem` and can just call the
+following free functions.
+
+```c++
+namespace sycl::ext::oneapi::experimental {
+
+void set_access_mode(const void *ptr, size_t numBytes, address_access_mode mode, const context &syclContext);
+
+address_access_mode get_access_mode(const void *ptr, size_t numBytes, const context &syclContext);
+
+void unmap(const void *ptr, size_t numBytes, const context &syclContext);
+
+} // namespace sycl::ext::oneapi::experimental
+```
+
+[frame="topbot",options="header,footer"]
+|=====================
+|Function |Description
+
+|`void set_access_mode(const void *ptr, size_t numBytes, address_access_mode mode, const context &syclContext)` |
+Changes the access mode of a mapped virtual memory range specified by `ptr` and
+`numBytes`.
+
+If `mode` is `address_access_mode::read` or `address_access_mode::read_write`
+`ptr` pointer is accessible after the call as read-only or read-write
+respectively. Otherwise, it is considered inaccessible and accessing it will
+result in undefined behavior.
+
+The virtual memory range specified by `ptr` and `numBytes` must be a sub-range
+of virtual memory ranges previously mapped to `physical_mem`. `ptr`
+must be aligned to the minimum memory granularity of the device associated with
+the `physical_mem` the range is mapped to and `numBytes` must be a multiple of
+the minimum memory granularity of the device associated with the `physical_mem`
+the range is mapped to.
+
+Writing to any address in the virtual memory range with access mode set to
+`address_access_mode::read` results in undefined behavior.
+
+An accessible pointer behaves the same as a pointer to device USM memory and can
+be used in place of a device USM pointer in any interface accepting one.
+
+|`address_access_mode get_access_mode(const void *ptr, size_t numBytes, const context &syclContext)` |
+Returns the access mode of the mapped virtual memory range specified by `ptr`
+and `numBytes`.
+
+The virtual memory range specified by `ptr` and `numBytes` must be a sub-range
+of virtual memory ranges previously mapped to `physical_mem`. `ptr`
+must be aligned to the minimum memory granularity of the device associated with
+the `physical_mem` the range is mapped to and `numBytes` must be a multiple of
+the minimum memory granularity of the device associated with the `physical_mem`
+the range is mapped to.
+
+|`void unmap(const void *ptr, size_t numBytes, const device &syclDevice, const context &syclContext)` |
+Unmaps the range specified by `ptr` and `numBytes`. The range must have been
+mapped through a call to `physical_mem::map()` prior to calling this. The range
+must not be a proper sub-range of a previously mapped range. `syclContext` must
+be the same as the context returned by the `get_context()` member function on
+the `physical_mem` the address range is currently mapped to.
+
+After this call, the full range will again be ready to be mapped through a call
+to `physical_mem::map()`.
+
+[_Note:_ Unmapping ranges that span multiple contiguous mapped ranges is not
+supported. Doing so will result in undefined behavior. This restriction may be
+lifted in the future. _{endnote}_]
+
+[_Note:_ The destructor for `physical_mem` will not unmap ranges mapped to it.
+As such, the user must call `unmap` on ranges mapped to `physical_mem` objects
+prior to their destruction. _{endnote}_]
+
+|=====================
\ No newline at end of file
diff --git a/sycl/include/sycl/detail/pi.def b/sycl/include/sycl/detail/pi.def
index 995579d612afb..3090b2d488ee0 100644
--- a/sycl/include/sycl/detail/pi.def
+++ b/sycl/include/sycl/detail/pi.def
@@ -215,4 +215,16 @@ _PI_API(piextDestroyExternalSemaphore)
 _PI_API(piextWaitExternalSemaphore)
 _PI_API(piextSignalExternalSemaphore)
 
+// Virtual memory
+_PI_API(piextVirtualMemGranularityGetInfo)
+_PI_API(piextPhysicalMemCreate)
+_PI_API(piextPhysicalMemRetain)
+_PI_API(piextPhysicalMemRelease)
+_PI_API(piextVirtualMemReserve)
+_PI_API(piextVirtualMemFree)
+_PI_API(piextVirtualMemMap)
+_PI_API(piextVirtualMemUnmap)
+_PI_API(piextVirtualMemSetAccess)
+_PI_API(piextVirtualMemGetInfo)
+
 #undef _PI_API
diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h
index 79d67791ffc8d..ce7d34ef75899 100644
--- a/sycl/include/sycl/detail/pi.h
+++ b/sycl/include/sycl/detail/pi.h
@@ -191,9 +191,13 @@
 //       `win32_nt_dx12_resource` value.
 //       the `pi_external_semaphore_handle_type` enum now has a new
 //       `win32_nt_dx12_fence` value.
+// 15.54 Added piextVirtualMem* functions, and piextPhysicalMem* functions,
+// PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM device info descriptor,
+// _pi_virtual_mem_granularity_info enum, _pi_virtual_mem_info enum and
+// pi_virtual_access_flags bit flags.
 
 #define _PI_H_VERSION_MAJOR 15
-#define _PI_H_VERSION_MINOR 53
+#define _PI_H_VERSION_MINOR 54
 
 #define _PI_STRING_HELPER(a) #a
 #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
@@ -505,6 +509,9 @@ typedef enum {
 
   // Timestamp enqueue
   PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT = 0x2011D,
+
+  // Virtual memory support
+  PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM = 0x2011E,
 } _pi_device_info;
 
 typedef enum {
@@ -756,6 +763,15 @@ typedef enum {
   PI_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS = 0x1143,
 } _pi_sampler_cubemap_filter_mode;
 
+typedef enum {
+  PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM = 0x30100,
+  PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED = 0x30101,
+} _pi_virtual_mem_granularity_info;
+
+typedef enum {
+  PI_EXT_ONEAPI_VIRTUAL_MEM_INFO_ACCESS_MODE = 0x30200,
+} _pi_virtual_mem_info;
+
 using pi_context_properties = intptr_t;
 
 using pi_device_exec_capabilities = pi_bitfield;
@@ -848,6 +864,10 @@ constexpr pi_queue_properties PI_EXT_QUEUE_FLAG_SUBMISSION_NO_IMMEDIATE = (1 <<
 constexpr pi_queue_properties PI_EXT_QUEUE_FLAG_SUBMISSION_IMMEDIATE = (1 << 8);
 // clang-format on
 
+using pi_virtual_access_flags = pi_bitfield;
+constexpr pi_virtual_access_flags PI_VIRTUAL_ACCESS_FLAG_RW = (1 << 0);
+constexpr pi_virtual_access_flags PI_VIRTUAL_ACCESS_FLAG_READ_ONLY = (1 << 1);
+
 typedef enum {
   // No preference for SLM or data cache.
   PI_EXT_KERNEL_EXEC_INFO_CACHE_DEFAULT = 0x0,
@@ -889,6 +909,8 @@ using pi_program_binary_type = _pi_program_binary_type;
 using pi_kernel_info = _pi_kernel_info;
 using pi_profiling_info = _pi_profiling_info;
 using pi_kernel_cache_config = _pi_kernel_cache_config;
+using pi_virtual_mem_granularity_info = _pi_virtual_mem_granularity_info;
+using pi_virtual_mem_info = _pi_virtual_mem_info;
 
 using pi_image_copy_flags = _pi_image_copy_flags;
 
@@ -1241,6 +1263,7 @@ struct _pi_program;
 struct _pi_kernel;
 struct _pi_event;
 struct _pi_sampler;
+struct _pi_physical_mem;
 
 using pi_platform = _pi_platform *;
 using pi_device = _pi_device *;
@@ -1255,6 +1278,7 @@ using pi_image_handle = pi_uint64;
 using pi_image_mem_handle = void *;
 using pi_interop_mem_handle = pi_uint64;
 using pi_interop_semaphore_handle = pi_uint64;
+using pi_physical_mem = _pi_physical_mem *;
 
 typedef struct {
   pi_image_channel_order image_channel_order;
@@ -2338,6 +2362,125 @@ pi_result piextEnqueueDeviceGlobalVariableRead(
     size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list,
     const pi_event *event_wait_list, pi_event *event);
 
+///
+/// Virtual memory
+///
+
+/// API for getting information about the minimum and recommended granularity
+/// of physical and virtual memory.
+///
+/// \param context is the context to get the granularity from.
+/// \param device is the device to get the granularity from.
+/// \param param_name is the type of query to perform.
+/// \param param_value_size is the size of the result in bytes.
+/// \param param_value is the result.
+/// \param param_value_size_ret is how many bytes were written.
+__SYCL_EXPORT pi_result piextVirtualMemGranularityGetInfo(
+    pi_context context, pi_device device,
+    pi_virtual_mem_granularity_info param_name, size_t param_value_size,
+    void *param_value, size_t *param_value_size_ret);
+
+/// API for creating a physical memory handle that virtual memory can be mapped
+/// to.
+///
+/// \param context is the context within which the physical memory is allocated.
+/// \param device is the device the physical memory is on.
+/// \param mem_size is the size of physical memory to allocate. This must be a
+///        multiple of the minimum virtual memory granularity.
+/// \param ret_physical_mem is the handle for the resulting physical memory.
+__SYCL_EXPORT pi_result
+piextPhysicalMemCreate(pi_context context, pi_device device, size_t mem_size,
+                       pi_physical_mem *ret_physical_mem);
+
+/// API for retaining a physical memory handle.
+///
+/// \param physical_mem is the handle for the physical memory to retain.
+__SYCL_EXPORT pi_result piextPhysicalMemRetain(pi_physical_mem physical_mem);
+
+/// API for releasing a physical memory handle.
+///
+/// \param physical_mem is the handle for the physical memory to free.
+__SYCL_EXPORT pi_result piextPhysicalMemRelease(pi_physical_mem physical_mem);
+
+/// API for reserving a virtual memory range.
+///
+/// \param context is the context within which the virtual memory range is
+///        reserved.
+/// \param start is a pointer to the start of the region to reserve. If nullptr
+///        the implementation selects a start address.
+/// \param range_size is the size of the virtual address range to reserve in
+///        bytes.
+/// \param ret_ptr is the pointer to the start of the resulting virtual memory
+///        range.
+__SYCL_EXPORT pi_result piextVirtualMemReserve(pi_context context,
+                                               const void *start,
+                                               size_t range_size,
+                                               void **ret_ptr);
+
+/// API for freeing a virtual memory range.
+///
+/// \param context is the context within which the virtual memory range is
+///        reserved.
+/// \param ptr is the pointer to the start of the virtual memory range.
+/// \param range_size is the size of the virtual address range.
+__SYCL_EXPORT pi_result piextVirtualMemFree(pi_context context, const void *ptr,
+                                            size_t range_size);
+
+/// API for mapping a virtual memory range to a a physical memory allocation at
+/// a given offset.
+///
+/// \param context is the context within which both the virtual memory range is
+///        reserved and the physical memory is allocated.
+/// \param ptr is the pointer to the start of the virtual memory range.
+/// \param range_size is the size of the virtual address range.
+/// \param physical_mem is the handle for the physical memory to map ptr to.
+/// \param offset is the offset into physical_mem in bytes to map ptr to.
+/// \param flags is the access flags to set for the mapping.
+__SYCL_EXPORT pi_result piextVirtualMemMap(pi_context context, const void *ptr,
+                                           size_t range_size,
+                                           pi_physical_mem physical_mem,
+                                           size_t offset,
+                                           pi_virtual_access_flags flags);
+
+/// API for unmapping a virtual memory range previously mapped in a context.
+/// After a call to this function, the virtual memory range is left in a state
+/// ready to be remapped.
+///
+/// \param context is the context within which the virtual memory range is
+///        currently mapped.
+/// \param ptr is the pointer to the start of the virtual memory range.
+/// \param range_size is the size of the virtual address range in bytes.
+__SYCL_EXPORT pi_result piextVirtualMemUnmap(pi_context context,
+                                             const void *ptr,
+                                             size_t range_size);
+
+/// API for setting the access mode of a mapped virtual memory range.
+///
+/// \param context is the context within which the virtual memory range is
+///        currently mapped.
+/// \param ptr is the pointer to the start of the virtual memory range.
+/// \param range_size is the size of the virtual address range in bytes.
+/// \param flags is the access flags to set for the mapped virtual access range.
+__SYCL_EXPORT pi_result piextVirtualMemSetAccess(pi_context context,
+                                                 const void *ptr,
+                                                 size_t range_size,
+                                                 pi_virtual_access_flags flags);
+
+/// API for getting info about a mapped virtual memory range.
+///
+/// \param context is the context within which the virtual memory range is
+///        currently mapped.
+/// \param ptr is the pointer to the start of the virtual memory range.
+/// \param range_size is the size of the virtual address range in bytes.
+/// \param param_name is the type of query to perform.
+/// \param param_value_size is the size of the result in bytes.
+/// \param param_value is the result.
+/// \param param_value_size_ret is how many bytes were written.
+__SYCL_EXPORT pi_result
+piextVirtualMemGetInfo(pi_context context, const void *ptr, size_t range_size,
+                       pi_virtual_mem_info param_name, size_t param_value_size,
+                       void *param_value, size_t *param_value_size_ret);
+
 ///
 /// Plugin
 ///
diff --git a/sycl/include/sycl/detail/pi.hpp b/sycl/include/sycl/detail/pi.hpp
index 3500c576bb599..1fe21d36a8aaa 100644
--- a/sycl/include/sycl/detail/pi.hpp
+++ b/sycl/include/sycl/detail/pi.hpp
@@ -146,6 +146,8 @@ using PiExternalMemDescriptor = ::pi_external_mem_descriptor;
 using PiExternalSemaphoreDescriptor = ::pi_external_semaphore_descriptor;
 using PiImageOffset = ::pi_image_offset_struct;
 using PiImageRegion = ::pi_image_region_struct;
+using PiPhysicalMem = ::pi_physical_mem;
+using PiVirtualAccessFlags = ::pi_virtual_access_flags;
 
 __SYCL_EXPORT void contextSetExtendedDeleter(const sycl::context &constext,
                                              pi_context_extended_deleter func,
diff --git a/sycl/include/sycl/device_aspect_macros.hpp b/sycl/include/sycl/device_aspect_macros.hpp
index df6c827de60f2..d756b0a62e88a 100644
--- a/sycl/include/sycl/device_aspect_macros.hpp
+++ b/sycl/include/sycl/device_aspect_macros.hpp
@@ -381,6 +381,11 @@
 #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_queue_profiling_tag__ 0
 #endif
 
+#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_virtual_mem__
+// __SYCL_ASPECT(ext_oneapi_virtual_mem, 74)
+#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_virtual_mem__ 0
+#endif
+
 #ifndef __SYCL_ANY_DEVICE_HAS_host__
 // __SYCL_ASPECT(host, 0)
 #define __SYCL_ANY_DEVICE_HAS_host__ 0
@@ -750,3 +755,8 @@
 // __SYCL_ASPECT(ext_oneapi_queue_profiling_tag, 73)
 #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_queue_profiling_tag__ 0
 #endif
+
+#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_virtual_mem__
+// __SYCL_ASPECT(ext_oneapi_virtual_mem, 74)
+#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_virtual_mem__ 0
+#endif
diff --git a/sycl/include/sycl/ext/oneapi/virtual_mem/physical_mem.hpp b/sycl/include/sycl/ext/oneapi/virtual_mem/physical_mem.hpp
new file mode 100644
index 0000000000000..24d371fe8c6fd
--- /dev/null
+++ b/sycl/include/sycl/ext/oneapi/virtual_mem/physical_mem.hpp
@@ -0,0 +1,81 @@
+//==--- physical_mem.hpp - sycl_ext_oneapi_virtual_mem physical_mem class --==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <sycl/access/access.hpp>
+#include <sycl/context.hpp>
+#include <sycl/detail/common.hpp>
+#include <sycl/detail/owner_less_base.hpp>
+#include <sycl/device.hpp>
+#include <sycl/queue.hpp>
+
+namespace sycl {
+inline namespace _V1 {
+
+namespace detail {
+class physical_mem_impl;
+} // namespace detail
+
+namespace ext::oneapi::experimental {
+
+enum class address_access_mode : char { none = 0, read = 1, read_write = 2 };
+
+class __SYCL_EXPORT physical_mem
+    : public sycl::detail::OwnerLessBase<physical_mem> {
+public:
+  physical_mem(const device &SyclDevice, const context &SyclContext,
+               size_t NumBytes);
+
+  physical_mem(const queue &SyclQueue, size_t NumBytes)
+      : physical_mem(SyclQueue.get_device(), SyclQueue.get_context(),
+                     NumBytes) {}
+
+  physical_mem(const physical_mem &rhs) = default;
+  physical_mem(physical_mem &&rhs) = default;
+
+  physical_mem &operator=(const physical_mem &rhs) = default;
+  physical_mem &operator=(physical_mem &&rhs) = default;
+
+  ~physical_mem() noexcept(false) {};
+
+  bool operator==(const physical_mem &rhs) const { return impl == rhs.impl; }
+  bool operator!=(const physical_mem &rhs) const { return !(*this == rhs); }
+
+  void *map(uintptr_t Ptr, size_t NumBytes, address_access_mode Mode,
+            size_t Offset = 0) const;
+
+  context get_context() const;
+  device get_device() const;
+
+  size_t size() const noexcept;
+
+private:
+  std::shared_ptr<sycl::detail::physical_mem_impl> impl;
+
+  template <class Obj>
+  friend decltype(Obj::impl)
+  sycl::detail::getSyclObjImpl(const Obj &SyclObject);
+
+  template <class T>
+  friend T sycl::detail::createSyclObjFromImpl(decltype(T::impl) ImplObj);
+};
+
+} // namespace ext::oneapi::experimental
+} // namespace _V1
+} // namespace sycl
+
+namespace std {
+template <> struct hash<sycl::ext::oneapi::experimental::physical_mem> {
+  size_t operator()(
+      const sycl::ext::oneapi::experimental::physical_mem &PhysicalMem) const {
+    return hash<std::shared_ptr<sycl::detail::physical_mem_impl>>()(
+        sycl::detail::getSyclObjImpl(PhysicalMem));
+  }
+};
+} // namespace std
diff --git a/sycl/include/sycl/ext/oneapi/virtual_mem/virtual_mem.hpp b/sycl/include/sycl/ext/oneapi/virtual_mem/virtual_mem.hpp
new file mode 100644
index 0000000000000..74a42354eaa01
--- /dev/null
+++ b/sycl/include/sycl/ext/oneapi/virtual_mem/virtual_mem.hpp
@@ -0,0 +1,61 @@
+//==- virtual_mem.hpp - sycl_ext_oneapi_virtual_mem virtual mem free funcs -==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <sycl/access/access.hpp>
+#include <sycl/context.hpp>
+#include <sycl/detail/common.hpp>
+#include <sycl/device.hpp>
+#include <sycl/ext/oneapi/virtual_mem/physical_mem.hpp>
+#include <sycl/queue.hpp>
+
+#include <optional>
+
+namespace sycl {
+inline namespace _V1 {
+namespace ext::oneapi::experimental {
+
+enum class granularity_mode : char {
+  minimum = 0,
+  recommended = 1,
+};
+
+__SYCL_EXPORT size_t
+get_mem_granularity(const device &SyclDevice, const context &SyclContext,
+                    granularity_mode Mode = granularity_mode::recommended);
+
+__SYCL_EXPORT size_t
+get_mem_granularity(const context &SyclContext,
+                    granularity_mode Mode = granularity_mode::recommended);
+
+__SYCL_EXPORT uintptr_t reserve_virtual_mem(uintptr_t Start, size_t NumBytes,
+                                            const context &SyclContext);
+
+inline uintptr_t reserve_virtual_mem(size_t NumBytes,
+                                     const context &SyclContext) {
+  return reserve_virtual_mem(0, NumBytes, SyclContext);
+}
+
+__SYCL_EXPORT void free_virtual_mem(uintptr_t Ptr, size_t NumBytes,
+                                    const context &SyclContext);
+
+__SYCL_EXPORT void set_access_mode(const void *Ptr, size_t NumBytes,
+                                   address_access_mode Mode,
+                                   const context &SyclContext);
+
+__SYCL_EXPORT address_access_mode get_access_mode(const void *Ptr,
+                                                  size_t NumBytes,
+                                                  const context &SyclContext);
+
+__SYCL_EXPORT void unmap(const void *Ptr, size_t NumBytes,
+                         const context &SyclContext);
+
+} // Namespace ext::oneapi::experimental
+} // namespace _V1
+} // Namespace sycl
diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def
index 2d9cee1351d7a..3b744a89dbb90 100644
--- a/sycl/include/sycl/info/aspects.def
+++ b/sycl/include/sycl/info/aspects.def
@@ -68,3 +68,4 @@ __SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 70)
 __SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 71)
 __SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 72)
 __SYCL_ASPECT(ext_oneapi_queue_profiling_tag, 73)
+__SYCL_ASPECT(ext_oneapi_virtual_mem, 74)
diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp
index 53a60381f0b8d..16b5e8f0f6c40 100644
--- a/sycl/include/sycl/sycl.hpp
+++ b/sycl/include/sycl/sycl.hpp
@@ -111,4 +111,6 @@
 #include <sycl/ext/oneapi/properties/property_value.hpp>
 #include <sycl/ext/oneapi/sub_group.hpp>
 #include <sycl/ext/oneapi/sub_group_mask.hpp>
+#include <sycl/ext/oneapi/virtual_mem/physical_mem.hpp>
+#include <sycl/ext/oneapi/virtual_mem/virtual_mem.hpp>
 #include <sycl/ext/oneapi/weak_object.hpp>
diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp
index 0077b245905db..1628b1537fae5 100644
--- a/sycl/plugins/cuda/pi_cuda.cpp
+++ b/sycl/plugins/cuda/pi_cuda.cpp
@@ -1298,6 +1298,69 @@ pi_result piextPeerAccessGetInfo(pi_device command_device,
                                        ParamValueSizeRet);
 }
 
+pi_result
+piextVirtualMemGranularityGetInfo(pi_context context, pi_device device,
+                                  pi_virtual_mem_granularity_info param_name,
+                                  size_t param_value_size, void *param_value,
+                                  size_t *param_value_size_ret) {
+  return pi2ur::piextVirtualMemGranularityGetInfo(context, device, param_name,
+                                                  param_value_size, param_value,
+                                                  param_value_size_ret);
+}
+
+pi_result piextPhysicalMemCreate(pi_context context, pi_device device,
+                                 size_t mem_size,
+                                 pi_physical_mem *ret_physical_mem) {
+  return pi2ur::piextPhysicalMemCreate(context, device, mem_size,
+                                       ret_physical_mem);
+}
+
+pi_result piextPhysicalMemRetain(pi_physical_mem physical_mem) {
+  return pi2ur::piextPhysicalMemRetain(physical_mem);
+}
+
+pi_result piextPhysicalMemRelease(pi_physical_mem physical_mem) {
+  return pi2ur::piextPhysicalMemRelease(physical_mem);
+}
+
+pi_result piextVirtualMemReserve(pi_context context, const void *start,
+                                 size_t range_size, void **ret_ptr) {
+  return pi2ur::piextVirtualMemReserve(context, start, range_size, ret_ptr);
+}
+
+pi_result piextVirtualMemFree(pi_context context, const void *ptr,
+                              size_t range_size) {
+  return pi2ur::piextVirtualMemFree(context, ptr, range_size);
+}
+
+pi_result piextVirtualMemMap(pi_context context, const void *ptr,
+                             size_t range_size, pi_physical_mem physical_mem,
+                             size_t offset, pi_virtual_access_flags flags) {
+  return pi2ur::piextVirtualMemMap(context, ptr, range_size, physical_mem,
+                                   offset, flags);
+}
+
+pi_result piextVirtualMemUnmap(pi_context context, const void *ptr,
+                               size_t range_size) {
+  return pi2ur::piextVirtualMemUnmap(context, ptr, range_size);
+}
+
+pi_result piextVirtualMemSetAccess(pi_context context, const void *ptr,
+                                   size_t range_size,
+                                   pi_virtual_access_flags flags) {
+  return pi2ur::piextVirtualMemSetAccess(context, ptr, range_size, flags);
+}
+
+pi_result piextVirtualMemGetInfo(pi_context context, const void *ptr,
+                                 size_t range_size,
+                                 pi_virtual_mem_info param_name,
+                                 size_t param_value_size, void *param_value,
+                                 size_t *param_value_size_ret) {
+  return pi2ur::piextVirtualMemGetInfo(context, ptr, range_size, param_name,
+                                       param_value_size, param_value,
+                                       param_value_size_ret);
+}
+
 const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING;
 
 pi_result piPluginInit(pi_plugin *PluginInit) {
diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/plugins/cuda/pi_cuda.hpp
index 2b5d77b26ea9d..8c5112f4cc9d1 100644
--- a/sycl/plugins/cuda/pi_cuda.hpp
+++ b/sycl/plugins/cuda/pi_cuda.hpp
@@ -31,6 +31,7 @@
 #include <adapters/cuda/event.hpp>
 #include <adapters/cuda/kernel.hpp>
 #include <adapters/cuda/memory.hpp>
+#include <adapters/cuda/physical_mem.hpp>
 #include <adapters/cuda/platform.hpp>
 #include <adapters/cuda/program.hpp>
 #include <adapters/cuda/queue.hpp>
@@ -81,4 +82,8 @@ struct _pi_ext_command_buffer : ur_exp_command_buffer_handle_t_ {
   using ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_;
 };
 
+struct _pi_physical_mem : ur_physical_mem_handle_t_ {
+  using ur_physical_mem_handle_t_::ur_physical_mem_handle_t_;
+};
+
 #endif // PI_CUDA_HPP
diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp
index 33b7388f9c884..c3324463690eb 100644
--- a/sycl/plugins/hip/pi_hip.cpp
+++ b/sycl/plugins/hip/pi_hip.cpp
@@ -1301,6 +1301,69 @@ pi_result piextPeerAccessGetInfo(pi_device command_device,
                                        ParamValueSizeRet);
 }
 
+pi_result
+piextVirtualMemGranularityGetInfo(pi_context context, pi_device device,
+                                  pi_virtual_mem_granularity_info param_name,
+                                  size_t param_value_size, void *param_value,
+                                  size_t *param_value_size_ret) {
+  return pi2ur::piextVirtualMemGranularityGetInfo(context, device, param_name,
+                                                  param_value_size, param_value,
+                                                  param_value_size_ret);
+}
+
+pi_result piextPhysicalMemCreate(pi_context context, pi_device device,
+                                 size_t mem_size,
+                                 pi_physical_mem *ret_physical_mem) {
+  return pi2ur::piextPhysicalMemCreate(context, device, mem_size,
+                                       ret_physical_mem);
+}
+
+pi_result piextPhysicalMemRetain(pi_physical_mem physical_mem) {
+  return pi2ur::piextPhysicalMemRetain(physical_mem);
+}
+
+pi_result piextPhysicalMemRelease(pi_physical_mem physical_mem) {
+  return pi2ur::piextPhysicalMemRelease(physical_mem);
+}
+
+pi_result piextVirtualMemReserve(pi_context context, const void *start,
+                                 size_t range_size, void **ret_ptr) {
+  return pi2ur::piextVirtualMemReserve(context, start, range_size, ret_ptr);
+}
+
+pi_result piextVirtualMemFree(pi_context context, const void *ptr,
+                              size_t range_size) {
+  return pi2ur::piextVirtualMemFree(context, ptr, range_size);
+}
+
+pi_result piextVirtualMemMap(pi_context context, const void *ptr,
+                             size_t range_size, pi_physical_mem physical_mem,
+                             size_t offset, pi_virtual_access_flags flags) {
+  return pi2ur::piextVirtualMemMap(context, ptr, range_size, physical_mem,
+                                   offset, flags);
+}
+
+pi_result piextVirtualMemUnmap(pi_context context, const void *ptr,
+                               size_t range_size) {
+  return pi2ur::piextVirtualMemUnmap(context, ptr, range_size);
+}
+
+pi_result piextVirtualMemSetAccess(pi_context context, const void *ptr,
+                                   size_t range_size,
+                                   pi_virtual_access_flags flags) {
+  return pi2ur::piextVirtualMemSetAccess(context, ptr, range_size, flags);
+}
+
+pi_result piextVirtualMemGetInfo(pi_context context, const void *ptr,
+                                 size_t range_size,
+                                 pi_virtual_mem_info param_name,
+                                 size_t param_value_size, void *param_value,
+                                 size_t *param_value_size_ret) {
+  return pi2ur::piextVirtualMemGetInfo(context, ptr, range_size, param_name,
+                                       param_value_size, param_value,
+                                       param_value_size_ret);
+}
+
 const char SupportedVersion[] = _PI_HIP_PLUGIN_VERSION_STRING;
 
 pi_result piPluginInit(pi_plugin *PluginInit) {
diff --git a/sycl/plugins/hip/pi_hip.hpp b/sycl/plugins/hip/pi_hip.hpp
index 018d069f5fe7f..bec26c9866fdb 100644
--- a/sycl/plugins/hip/pi_hip.hpp
+++ b/sycl/plugins/hip/pi_hip.hpp
@@ -45,6 +45,7 @@
 #include <adapters/hip/event.hpp>
 #include <adapters/hip/kernel.hpp>
 #include <adapters/hip/memory.hpp>
+#include <adapters/hip/physical_mem.hpp>
 #include <adapters/hip/platform.hpp>
 #include <adapters/hip/program.hpp>
 #include <adapters/hip/queue.hpp>
@@ -94,4 +95,8 @@ struct _pi_ext_command_buffer : ur_exp_command_buffer_handle_t_ {
   using ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_;
 };
 
+struct _pi_physical_mem : ur_physical_mem_handle_t_ {
+  using ur_physical_mem_handle_t_::ur_physical_mem_handle_t_;
+};
+
 #endif // PI_HIP_HPP
diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp
index f88e8c1ed3cd3..bab365effe85f 100644
--- a/sycl/plugins/level_zero/pi_level_zero.cpp
+++ b/sycl/plugins/level_zero/pi_level_zero.cpp
@@ -1424,6 +1424,144 @@ piextCommandBufferReleaseCommand(pi_ext_command_buffer_command Command) {
   return pi2ur::piextCommandBufferReleaseCommand(Command);
 }
 
+/// API for getting information about the minimum and recommended granularity
+/// of physical and virtual memory.
+///
+/// \param Context is the context to get the granularity from.
+/// \param Device is the device to get the granularity from.
+/// \param MemSize is the potentially unadjusted size to get granularity for.
+/// \param ParamName is the type of query to perform.
+/// \param ParamValueSize is the size of the result in bytes.
+/// \param ParamValue is the result.
+/// \param ParamValueSizeRet is how many bytes were written.
+pi_result
+piextVirtualMemGranularityGetInfo(pi_context Context, pi_device Device,
+                                  pi_virtual_mem_granularity_info ParamName,
+                                  size_t ParamValueSize, void *ParamValue,
+                                  size_t *ParamValueSizeRet) {
+  return pi2ur::piextVirtualMemGranularityGetInfo(Context, Device, ParamName,
+                                                  ParamValueSize, ParamValue,
+                                                  ParamValueSizeRet);
+}
+
+/// API for creating a physical memory handle that virtual memory can be mapped
+/// to.
+///
+/// \param Context is the context within which the physical memory is allocated.
+/// \param Device is the device the physical memory is on.
+/// \param MemSize is the size of physical memory to allocate. This must be a
+///        multiple of the minimum virtual memory granularity.
+/// \param RetPhysicalMem is the handle for the resulting physical memory.
+pi_result piextPhysicalMemCreate(pi_context Context, pi_device Device,
+                                 size_t MemSize,
+                                 pi_physical_mem *RetPhysicalMem) {
+  return pi2ur::piextPhysicalMemCreate(Context, Device, MemSize,
+                                       RetPhysicalMem);
+}
+
+/// API for retaining a physical memory handle.
+///
+/// \param PhysicalMem is the handle for the physical memory to retain.
+pi_result piextPhysicalMemRetain(pi_physical_mem PhysicalMem) {
+  return pi2ur::piextPhysicalMemRetain(PhysicalMem);
+}
+
+/// API for releasing a physical memory handle.
+///
+/// \param PhysicalMem is the handle for the physical memory to free.
+pi_result piextPhysicalMemRelease(pi_physical_mem PhysicalMem) {
+  return pi2ur::piextPhysicalMemRelease(PhysicalMem);
+}
+
+/// API for reserving a virtual memory range.
+///
+/// \param Context is the context within which the virtual memory range is
+///        reserved.
+/// \param Start is a pointer to the start of the region to reserve. If nullptr
+///        the implementation selects a start address.
+/// \param RangeSize is the size of the virtual address range to reserve in
+///        bytes.
+/// \param RetPtr is the pointer to the start of the resulting virtual memory
+///        range.
+pi_result piextVirtualMemReserve(pi_context Context, const void *Start,
+                                 size_t RangeSize, void **RetPtr) {
+  return pi2ur::piextVirtualMemReserve(Context, Start, RangeSize, RetPtr);
+}
+
+/// API for freeing a virtual memory range.
+///
+/// \param Context is the context within which the virtual memory range is
+///        reserved.
+/// \param Ptr is the pointer to the start of the virtual memory range.
+/// \param RangeSize is the size of the virtual address range.
+pi_result piextVirtualMemFree(pi_context Context, const void *Ptr,
+                              size_t RangeSize) {
+  return pi2ur::piextVirtualMemFree(Context, Ptr, RangeSize);
+}
+
+/// API for mapping a virtual memory range to a a physical memory allocation at
+/// a given offset.
+///
+/// \param Context is the context within which both the virtual memory range is
+///        reserved and the physical memory is allocated.
+/// \param Ptr is the pointer to the start of the virtual memory range.
+/// \param RangeSize is the size of the virtual address range.
+/// \param PhysicalMem is the handle for the physical memory to map Ptr to.
+/// \param Offset is the offset into PhysicalMem in bytes to map Ptr to.
+/// \param Flags is the access flags to set for the mapping.
+pi_result piextVirtualMemMap(pi_context Context, const void *Ptr,
+                             size_t RangeSize, pi_physical_mem PhysicalMem,
+                             size_t Offset, pi_virtual_access_flags Flags) {
+  return pi2ur::piextVirtualMemMap(Context, Ptr, RangeSize, PhysicalMem, Offset,
+                                   Flags);
+}
+
+/// API for unmapping a virtual memory range previously mapped in a context.
+/// After a call to this function, the virtual memory range is left in a state
+/// ready to be remapped.
+///
+/// \param Context is the context within which the virtual memory range is
+///        currently mapped.
+/// \param Ptr is the pointer to the start of the virtual memory range.
+/// \param RangeSize is the size of the virtual address range in bytes.
+pi_result piextVirtualMemUnmap(pi_context Context, const void *Ptr,
+                               size_t RangeSize) {
+  return pi2ur::piextVirtualMemUnmap(Context, Ptr, RangeSize);
+}
+
+/// API for setting the access mode of a mapped virtual memory range.
+///
+/// \param Context is the context within which the virtual memory range is
+///        currently mapped.
+/// \param Ptr is the pointer to the start of the virtual memory range.
+/// \param RangeSize is the size of the virtual address range in bytes.
+/// \param Flags is the access flags to set for the mapped virtual access range.
+pi_result piextVirtualMemSetAccess(pi_context Context, const void *Ptr,
+                                   size_t RangeSize,
+                                   pi_virtual_access_flags Flags) {
+  return pi2ur::piextVirtualMemSetAccess(Context, Ptr, RangeSize, Flags);
+}
+
+/// API for getting info about a mapped virtual memory range.
+///
+/// \param Context is the context within which the virtual memory range is
+///        currently mapped.
+/// \param Ptr is the pointer to the start of the virtual memory range.
+/// \param RangeSize is the size of the virtual address range in bytes.
+/// \param ParamName is the type of query to perform.
+/// \param ParamValueSize is the size of the result in bytes.
+/// \param ParamValue is the result.
+/// \param ParamValueSizeRet is how many bytes were written.
+pi_result piextVirtualMemGetInfo(pi_context Context, const void *Ptr,
+                                 size_t RangeSize,
+                                 pi_virtual_mem_info ParamName,
+                                 size_t ParamValueSize, void *ParamValue,
+                                 size_t *ParamValueSizeRet) {
+  return pi2ur::piextVirtualMemGetInfo(Context, Ptr, RangeSize, ParamName,
+                                       ParamValueSize, ParamValue,
+                                       ParamValueSizeRet);
+}
+
 const char SupportedVersion[] = _PI_LEVEL_ZERO_PLUGIN_VERSION_STRING;
 
 pi_result piPluginInit(pi_plugin *PluginInit) { // missing
diff --git a/sycl/plugins/native_cpu/pi_native_cpu.cpp b/sycl/plugins/native_cpu/pi_native_cpu.cpp
index d867caea5e23d..2276e9f78f7ea 100644
--- a/sycl/plugins/native_cpu/pi_native_cpu.cpp
+++ b/sycl/plugins/native_cpu/pi_native_cpu.cpp
@@ -1321,6 +1321,69 @@ pi_result piextKernelSuggestMaxCooperativeGroupCount(
   return PI_ERROR_UNSUPPORTED_FEATURE;
 }
 
+pi_result
+piextVirtualMemGranularityGetInfo(pi_context context, pi_device device,
+                                  pi_virtual_mem_granularity_info param_name,
+                                  size_t param_value_size, void *param_value,
+                                  size_t *param_value_size_ret) {
+  return pi2ur::piextVirtualMemGranularityGetInfo(context, device, param_name,
+                                                  param_value_size, param_value,
+                                                  param_value_size_ret);
+}
+
+pi_result piextPhysicalMemCreate(pi_context context, pi_device device,
+                                 size_t mem_size,
+                                 pi_physical_mem *ret_physical_mem) {
+  return pi2ur::piextPhysicalMemCreate(context, device, mem_size,
+                                       ret_physical_mem);
+}
+
+pi_result piextPhysicalMemRetain(pi_physical_mem physical_mem) {
+  return pi2ur::piextPhysicalMemRetain(physical_mem);
+}
+
+pi_result piextPhysicalMemRelease(pi_physical_mem physical_mem) {
+  return pi2ur::piextPhysicalMemRelease(physical_mem);
+}
+
+pi_result piextVirtualMemReserve(pi_context context, const void *start,
+                                 size_t range_size, void **ret_ptr) {
+  return pi2ur::piextVirtualMemReserve(context, start, range_size, ret_ptr);
+}
+
+pi_result piextVirtualMemFree(pi_context context, const void *ptr,
+                              size_t range_size) {
+  return pi2ur::piextVirtualMemFree(context, ptr, range_size);
+}
+
+pi_result piextVirtualMemMap(pi_context context, const void *ptr,
+                             size_t range_size, pi_physical_mem physical_mem,
+                             size_t offset, pi_virtual_access_flags flags) {
+  return pi2ur::piextVirtualMemMap(context, ptr, range_size, physical_mem,
+                                   offset, flags);
+}
+
+pi_result piextVirtualMemUnmap(pi_context context, const void *ptr,
+                               size_t range_size) {
+  return pi2ur::piextVirtualMemUnmap(context, ptr, range_size);
+}
+
+pi_result piextVirtualMemSetAccess(pi_context context, const void *ptr,
+                                   size_t range_size,
+                                   pi_virtual_access_flags flags) {
+  return pi2ur::piextVirtualMemSetAccess(context, ptr, range_size, flags);
+}
+
+pi_result piextVirtualMemGetInfo(pi_context context, const void *ptr,
+                                 size_t range_size,
+                                 pi_virtual_mem_info param_name,
+                                 size_t param_value_size, void *param_value,
+                                 size_t *param_value_size_ret) {
+  return pi2ur::piextVirtualMemGetInfo(context, ptr, range_size, param_name,
+                                       param_value_size, param_value,
+                                       param_value_size_ret);
+}
+
 // Initialize function table with stubs.
 #define _PI_API(api)                                                           \
   (PluginInit->PiFunctionTable).api = (decltype(&::api))(&api);
diff --git a/sycl/plugins/native_cpu/pi_native_cpu.hpp b/sycl/plugins/native_cpu/pi_native_cpu.hpp
index 1d92580997b76..287b3c03115b6 100644
--- a/sycl/plugins/native_cpu/pi_native_cpu.hpp
+++ b/sycl/plugins/native_cpu/pi_native_cpu.hpp
@@ -12,6 +12,7 @@
 #include <adapters/native_cpu/device.hpp>
 #include <adapters/native_cpu/kernel.hpp>
 #include <adapters/native_cpu/memory.hpp>
+#include <adapters/native_cpu/physical_mem.hpp>
 #include <adapters/native_cpu/platform.hpp>
 #include <adapters/native_cpu/program.hpp>
 #include <adapters/native_cpu/queue.hpp>
@@ -43,3 +44,7 @@ struct _pi_program : ur_program_handle_t_ {
 struct _pi_queue : ur_queue_handle_t_ {
   using ur_queue_handle_t_::ur_queue_handle_t_;
 };
+
+struct _pi_physical_mem : ur_physical_mem_handle_t_ {
+  using ur_physical_mem_handle_t_::ur_physical_mem_handle_t_;
+};
diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp
index 1fef329d179af..1d340b5685f4e 100644
--- a/sycl/plugins/opencl/pi_opencl.cpp
+++ b/sycl/plugins/opencl/pi_opencl.cpp
@@ -1228,6 +1228,69 @@ pi_result piextPeerAccessGetInfo(pi_device command_device,
                                        ParamValueSizeRet);
 }
 
+pi_result
+piextVirtualMemGranularityGetInfo(pi_context Context, pi_device Device,
+                                  pi_virtual_mem_granularity_info ParamName,
+                                  size_t ParamValueSize, void *ParamValue,
+                                  size_t *ParamValueSizeRet) {
+  return pi2ur::piextVirtualMemGranularityGetInfo(Context, Device, ParamName,
+                                                  ParamValueSize, ParamValue,
+                                                  ParamValueSizeRet);
+}
+
+pi_result piextPhysicalMemCreate(pi_context Context, pi_device Device,
+                                 size_t MemSize,
+                                 pi_physical_mem *RetPhysicalMem) {
+  return pi2ur::piextPhysicalMemCreate(Context, Device, MemSize,
+                                       RetPhysicalMem);
+}
+
+pi_result piextPhysicalMemRetain(pi_physical_mem PhysicalMem) {
+  return pi2ur::piextPhysicalMemRetain(PhysicalMem);
+}
+
+pi_result piextPhysicalMemRelease(pi_physical_mem PhysicalMem) {
+  return pi2ur::piextPhysicalMemRelease(PhysicalMem);
+}
+
+pi_result piextVirtualMemReserve(pi_context Context, const void *Start,
+                                 size_t RangeSize, void **RetPtr) {
+  return pi2ur::piextVirtualMemReserve(Context, Start, RangeSize, RetPtr);
+}
+
+pi_result piextVirtualMemFree(pi_context Context, const void *Ptr,
+                              size_t RangeSize) {
+  return pi2ur::piextVirtualMemFree(Context, Ptr, RangeSize);
+}
+
+pi_result piextVirtualMemMap(pi_context Context, const void *Ptr,
+                             size_t RangeSize, pi_physical_mem PhysicalMem,
+                             size_t Offset, pi_virtual_access_flags Flags) {
+  return pi2ur::piextVirtualMemMap(Context, Ptr, RangeSize, PhysicalMem, Offset,
+                                   Flags);
+}
+
+pi_result piextVirtualMemUnmap(pi_context Context, const void *Ptr,
+                               size_t RangeSize) {
+  return pi2ur::piextVirtualMemUnmap(Context, Ptr, RangeSize);
+}
+
+pi_result piextVirtualMemSetAccess(pi_context Context, const void *Ptr,
+                                   size_t RangeSize,
+                                   pi_virtual_access_flags Flags) {
+  return pi2ur::piextVirtualMemSetAccess(Context, Ptr, RangeSize, Flags);
+}
+
+pi_result piextVirtualMemGetInfo(pi_context Context, const void *Ptr,
+                                 size_t RangeSize,
+                                 pi_virtual_mem_info ParamName,
+                                 size_t ParamValueSize, void *ParamValue,
+                                 size_t *ParamValueSizeRet) {
+  return pi2ur::piextVirtualMemGetInfo(Context, Ptr, RangeSize, ParamName,
+                                       ParamValueSize, ParamValue,
+                                       ParamValueSizeRet);
+}
+
 pi_result piTearDown(void *PluginParameter) {
   return pi2ur::piTearDown(PluginParameter);
 }
diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp
index 3ee63a025593b..f22e672d84423 100644
--- a/sycl/plugins/unified_runtime/pi2ur.hpp
+++ b/sycl/plugins/unified_runtime/pi2ur.hpp
@@ -678,6 +678,31 @@ inline pi_result ur2piSamplerInfoValue(ur_sampler_info_t ParamName,
   }
 }
 
+inline pi_result ur2piVirtualMemInfoValue(ur_virtual_mem_info_t ParamName,
+                                          size_t ParamValueSizePI,
+                                          size_t *ParamValueSizeUR,
+                                          void *ParamValue) {
+
+  ConvertHelper Value(ParamValueSizePI, ParamValue, ParamValueSizeUR);
+  switch (ParamName) {
+  case UR_VIRTUAL_MEM_INFO_ACCESS_MODE: {
+    auto ConvertFunc = [](ur_virtual_mem_access_flags_t UrValue) {
+      pi_virtual_access_flags PiValue = 0;
+      if (UrValue & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE)
+        PiValue |= PI_VIRTUAL_ACCESS_FLAG_RW;
+      if (UrValue & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY)
+        PiValue |= PI_VIRTUAL_ACCESS_FLAG_READ_ONLY;
+      return PiValue;
+    };
+    return Value
+        .convert<ur_virtual_mem_access_flags_t, pi_virtual_access_flags>(
+            ConvertFunc);
+  }
+  default:
+    return PI_SUCCESS;
+  }
+}
+
 // Translate UR device info values to PI info values
 inline pi_result ur2piUSMAllocInfoValue(ur_usm_alloc_info_t ParamName,
                                         size_t ParamValueSizePI,
@@ -1311,6 +1336,8 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
     PI_TO_UR_MAP_DEVICE_INFO(
         PI_EXT_ONEAPI_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT,
         UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP)
+    PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM,
+                             UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT)
 #undef PI_TO_UR_MAP_DEVICE_INFO
   default:
     return PI_ERROR_UNKNOWN;
@@ -5665,4 +5692,194 @@ inline pi_result piextSignalExternalSemaphore(
 // Bindless Images Extension
 ///////////////////////////////////////////////////////////////////////////////
 
+///////////////////////////////////////////////////////////////////////////////
+// Virtual Memory
+
+inline pi_result
+piextVirtualMemGranularityGetInfo(pi_context Context, pi_device Device,
+                                  pi_virtual_mem_granularity_info ParamName,
+                                  size_t ParamValueSize, void *ParamValue,
+                                  size_t *ParamValueSizeRet) {
+  PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT);
+  PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE);
+
+  ur_context_handle_t UrContext =
+      reinterpret_cast<ur_context_handle_t>(Context);
+  ur_device_handle_t UrDevice = reinterpret_cast<ur_device_handle_t>(Device);
+
+  ur_virtual_mem_granularity_info_t InfoType{};
+  switch (ParamName) {
+  case PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM:
+    InfoType = UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM;
+    break;
+  case PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED:
+    InfoType = UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED;
+    break;
+  default:
+    return PI_ERROR_UNKNOWN;
+  }
+
+  HANDLE_ERRORS(urVirtualMemGranularityGetInfo(UrContext, UrDevice, InfoType,
+                                               ParamValueSize, ParamValue,
+                                               ParamValueSizeRet));
+
+  return PI_SUCCESS;
+}
+
+inline pi_result piextPhysicalMemCreate(pi_context Context, pi_device Device,
+                                        size_t MemSize,
+                                        pi_physical_mem *RetPhyscialMem) {
+  PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT);
+  PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE);
+
+  ur_context_handle_t UrContext =
+      reinterpret_cast<ur_context_handle_t>(Context);
+  ur_device_handle_t UrDevice = reinterpret_cast<ur_device_handle_t>(Device);
+
+  ur_physical_mem_handle_t *UrPhysicalMem =
+      reinterpret_cast<ur_physical_mem_handle_t *>(RetPhyscialMem);
+
+  HANDLE_ERRORS(urPhysicalMemCreate(UrContext, UrDevice, MemSize, nullptr,
+                                    UrPhysicalMem));
+
+  return PI_SUCCESS;
+}
+
+inline pi_result piextPhysicalMemRetain(pi_physical_mem PhysicalMem) {
+  PI_ASSERT(PhysicalMem, PI_ERROR_INVALID_ARG_VALUE);
+
+  ur_physical_mem_handle_t UrPhysicalMem =
+      reinterpret_cast<ur_physical_mem_handle_t>(PhysicalMem);
+
+  HANDLE_ERRORS(urPhysicalMemRetain(UrPhysicalMem));
+
+  return PI_SUCCESS;
+}
+
+inline pi_result piextPhysicalMemRelease(pi_physical_mem PhysicalMem) {
+
+  ur_physical_mem_handle_t UrPhysicalMem =
+      reinterpret_cast<ur_physical_mem_handle_t>(PhysicalMem);
+
+  HANDLE_ERRORS(urPhysicalMemRelease(UrPhysicalMem));
+
+  return PI_SUCCESS;
+}
+
+inline pi_result piextVirtualMemReserve(pi_context Context, const void *Start,
+                                        size_t RangeSize, void **RetPtr) {
+  PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT);
+  PI_ASSERT(RetPtr, PI_ERROR_INVALID_ARG_VALUE);
+
+  ur_context_handle_t UrContext =
+      reinterpret_cast<ur_context_handle_t>(Context);
+
+  HANDLE_ERRORS(urVirtualMemReserve(UrContext, Start, RangeSize, RetPtr));
+
+  return PI_SUCCESS;
+}
+
+inline pi_result piextVirtualMemFree(pi_context Context, const void *Ptr,
+                                     size_t RangeSize) {
+  PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT);
+  PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE);
+
+  ur_context_handle_t UrContext =
+      reinterpret_cast<ur_context_handle_t>(Context);
+
+  HANDLE_ERRORS(urVirtualMemFree(UrContext, Ptr, RangeSize));
+
+  return PI_SUCCESS;
+}
+
+inline pi_result piextVirtualMemSetAccess(pi_context Context, const void *Ptr,
+                                          size_t RangeSize,
+                                          pi_virtual_access_flags Flags) {
+  PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT);
+  PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE);
+
+  ur_context_handle_t UrContext =
+      reinterpret_cast<ur_context_handle_t>(Context);
+
+  ur_virtual_mem_access_flags_t UrFlags = 0;
+  if (Flags & PI_VIRTUAL_ACCESS_FLAG_RW)
+    UrFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE;
+  if (Flags & PI_VIRTUAL_ACCESS_FLAG_READ_ONLY)
+    UrFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY;
+
+  HANDLE_ERRORS(urVirtualMemSetAccess(UrContext, Ptr, RangeSize, UrFlags));
+
+  return PI_SUCCESS;
+}
+
+inline pi_result piextVirtualMemMap(pi_context Context, const void *Ptr,
+                                    size_t RangeSize,
+                                    pi_physical_mem PhysicalMem, size_t Offset,
+                                    pi_virtual_access_flags Flags) {
+  PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT);
+  PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE);
+  PI_ASSERT(PhysicalMem, PI_ERROR_INVALID_ARG_VALUE);
+
+  ur_context_handle_t UrContext =
+      reinterpret_cast<ur_context_handle_t>(Context);
+  ur_physical_mem_handle_t UrPhysicalMem =
+      reinterpret_cast<ur_physical_mem_handle_t>(PhysicalMem);
+
+  ur_virtual_mem_access_flags_t UrFlags = 0;
+  if (Flags & PI_VIRTUAL_ACCESS_FLAG_RW)
+    UrFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE;
+  if (Flags & PI_VIRTUAL_ACCESS_FLAG_READ_ONLY)
+    UrFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY;
+
+  HANDLE_ERRORS(urVirtualMemMap(UrContext, Ptr, RangeSize, UrPhysicalMem,
+                                Offset, UrFlags));
+
+  return PI_SUCCESS;
+}
+
+inline pi_result piextVirtualMemUnmap(pi_context Context, const void *Ptr,
+                                      size_t RangeSize) {
+  PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT);
+  PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE);
+
+  ur_context_handle_t UrContext =
+      reinterpret_cast<ur_context_handle_t>(Context);
+
+  HANDLE_ERRORS(urVirtualMemUnmap(UrContext, Ptr, RangeSize));
+
+  return PI_SUCCESS;
+}
+
+inline pi_result piextVirtualMemGetInfo(pi_context Context, const void *Ptr,
+                                        size_t RangeSize,
+                                        pi_virtual_mem_info ParamName,
+                                        size_t ParamValueSize, void *ParamValue,
+                                        size_t *ParamValueSizeRet) {
+  PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT);
+  PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE);
+
+  ur_context_handle_t UrContext =
+      reinterpret_cast<ur_context_handle_t>(Context);
+
+  ur_virtual_mem_info_t InfoType{};
+  switch (ParamName) {
+  case PI_EXT_ONEAPI_VIRTUAL_MEM_INFO_ACCESS_MODE:
+    InfoType = UR_VIRTUAL_MEM_INFO_ACCESS_MODE;
+    break;
+  default:
+    return PI_ERROR_UNKNOWN;
+  }
+
+  HANDLE_ERRORS(urVirtualMemGetInfo(UrContext, Ptr, RangeSize, InfoType,
+                                    ParamValueSize, ParamValue,
+                                    ParamValueSizeRet));
+  ur2piVirtualMemInfoValue(InfoType, ParamValueSize, &ParamValueSize,
+                           ParamValue);
+
+  return PI_SUCCESS;
+}
+
+// Virtual Memory
+///////////////////////////////////////////////////////////////////////////////
+
 } // namespace pi2ur
diff --git a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp
index 30ba9a7afc8b1..7e268199bba77 100644
--- a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp
+++ b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp
@@ -1189,6 +1189,72 @@ piextCommandBufferReleaseCommand(pi_ext_command_buffer_command Command) {
   return pi2ur::piextCommandBufferReleaseCommand(Command);
 }
 
+__SYCL_EXPORT pi_result piextVirtualMemGranularityGetInfo(
+    pi_context Context, pi_device Device,
+    pi_virtual_mem_granularity_info ParamName, size_t ParamValueSize,
+    void *ParamValue, size_t *ParamValueSizeRet) {
+  return pi2ur::piextVirtualMemGranularityGetInfo(Context, Device, ParamName,
+                                                  ParamValueSize, ParamValue,
+                                                  ParamValueSizeRet);
+}
+
+__SYCL_EXPORT pi_result
+piextPhysicalMemCreate(pi_context Context, pi_device Device, size_t MemSize,
+                       pi_physical_mem *RetPhsycialMem) {
+  return pi2ur::piextPhysicalMemCreate(Context, Device, MemSize,
+                                       RetPhsycialMem);
+}
+
+__SYCL_EXPORT pi_result piextPhysicalMemRetain(pi_physical_mem PhysicalMem) {
+  return pi2ur::piextPhysicalMemRetain(PhysicalMem);
+}
+
+__SYCL_EXPORT pi_result piextPhysicalMemRelease(pi_physical_mem PhysicalMem) {
+  return pi2ur::piextPhysicalMemRelease(PhysicalMem);
+}
+
+__SYCL_EXPORT pi_result piextVirtualMemReserve(pi_context Context,
+                                               const void *Start,
+                                               size_t RangeSize,
+                                               void **RetPtr) {
+  return pi2ur::piextVirtualMemReserve(Context, Start, RangeSize, RetPtr);
+}
+
+__SYCL_EXPORT pi_result piextVirtualMemFree(pi_context Context, const void *Ptr,
+                                            size_t RangeSize) {
+  return pi2ur::piextVirtualMemFree(Context, Ptr, RangeSize);
+}
+
+__SYCL_EXPORT pi_result
+piextVirtualMemSetAccess(pi_context Context, const void *Ptr, size_t RangeSize,
+                         pi_virtual_access_flags Flags) {
+  return pi2ur::piextVirtualMemSetAccess(Context, Ptr, RangeSize, Flags);
+}
+
+__SYCL_EXPORT pi_result piextVirtualMemMap(pi_context Context, const void *Ptr,
+                                           size_t RangeSize,
+                                           pi_physical_mem PhysicalMem,
+                                           size_t Offset,
+                                           pi_virtual_access_flags Flags) {
+  return pi2ur::piextVirtualMemMap(Context, Ptr, RangeSize, PhysicalMem, Offset,
+                                   Flags);
+}
+
+__SYCL_EXPORT pi_result piextVirtualMemUnmap(pi_context Context,
+                                             const void *Ptr,
+                                             size_t RangeSize) {
+  return pi2ur::piextVirtualMemUnmap(Context, Ptr, RangeSize);
+}
+
+__SYCL_EXPORT pi_result
+piextVirtualMemGetInfo(pi_context Context, const void *Ptr, size_t RangeSize,
+                       pi_virtual_mem_info ParamName, size_t ParamValueSize,
+                       void *ParamValue, size_t *ParamValueSizeRet) {
+  return pi2ur::piextVirtualMemGetInfo(Context, Ptr, RangeSize, ParamName,
+                                       ParamValueSize, ParamValue,
+                                       ParamValueSizeRet);
+}
+
 __SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device Device,
                                                 uint64_t *DeviceTime,
                                                 uint64_t *HostTime) {
diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt
index 74497db20c9f1..f915ef4e2cb8e 100644
--- a/sycl/source/CMakeLists.txt
+++ b/sycl/source/CMakeLists.txt
@@ -257,11 +257,13 @@ set(SYCL_COMMON_SOURCES
     "interop_handle.cpp"
     "kernel.cpp"
     "kernel_bundle.cpp"
+    "physical_mem.cpp"
     "platform.cpp"
     "queue.cpp"
     "sampler.cpp"
     "stream.cpp"
     "spirv_ops.cpp"
+    "virtual_mem.cpp"
     "$<$<PLATFORM_ID:Windows>:detail/windows_pi.cpp>"
     "$<$<OR:$<PLATFORM_ID:Linux>,$<PLATFORM_ID:Darwin>>:detail/posix_pi.cpp>"
 )
diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp
index 6e2b69850d5e1..3295188c295ba 100644
--- a/sycl/source/detail/device_impl.cpp
+++ b/sycl/source/detail/device_impl.cpp
@@ -718,6 +718,14 @@ bool device_impl::has(aspect Aspect) const {
             sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
     return call_successful && support;
   }
+  case aspect::ext_oneapi_virtual_mem: {
+    pi_bool support = PI_FALSE;
+    bool call_successful =
+        getPlugin()->call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
+            MDevice, PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM,
+            sizeof(pi_bool), &support, nullptr) == PI_SUCCESS;
+    return call_successful && support;
+  }
   }
 
   return false; // This device aspect has not been implemented yet.
diff --git a/sycl/source/detail/physical_mem_impl.hpp b/sycl/source/detail/physical_mem_impl.hpp
new file mode 100644
index 0000000000000..9fb38f1202257
--- /dev/null
+++ b/sycl/source/detail/physical_mem_impl.hpp
@@ -0,0 +1,95 @@
+//==- physical_mem_impl.hpp - sycl_ext_oneapi_virtual_mem physical_mem impl ==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <detail/context_impl.hpp>
+#include <detail/device_impl.hpp>
+#include <sycl/access/access.hpp>
+#include <sycl/context.hpp>
+#include <sycl/detail/common.hpp>
+#include <sycl/device.hpp>
+#include <sycl/exception.hpp>
+#include <sycl/ext/oneapi/virtual_mem/physical_mem.hpp>
+
+namespace sycl {
+inline namespace _V1 {
+namespace detail {
+
+inline sycl::detail::pi::PiVirtualAccessFlags AccessModeToVirtualAccessFlags(
+    ext::oneapi::experimental::address_access_mode Mode) {
+  switch (Mode) {
+  case ext::oneapi::experimental::address_access_mode::read:
+    return PI_VIRTUAL_ACCESS_FLAG_READ_ONLY;
+  case ext::oneapi::experimental::address_access_mode::read_write:
+    return PI_VIRTUAL_ACCESS_FLAG_RW;
+  case ext::oneapi::experimental::address_access_mode::none:
+    return 0;
+  default:
+    throw sycl::exception(make_error_code(errc::invalid),
+                          "Invalid address_access_mode.");
+  }
+}
+
+class physical_mem_impl {
+public:
+  physical_mem_impl(const device &SyclDevice, const context &SyclContext,
+                    size_t NumBytes)
+      : MDevice(getSyclObjImpl(SyclDevice)),
+        MContext(getSyclObjImpl(SyclContext)), MNumBytes(NumBytes) {
+    const PluginPtr &Plugin = MContext->getPlugin();
+
+    auto Err = Plugin->call_nocheck<PiApiKind::piextPhysicalMemCreate>(
+        MContext->getHandleRef(), MDevice->getHandleRef(), MNumBytes,
+        &MPhysicalMem);
+
+    if (Err == PI_ERROR_OUT_OF_RESOURCES || Err == PI_ERROR_OUT_OF_HOST_MEMORY)
+      throw sycl::exception(make_error_code(errc::memory_allocation),
+                            "Failed to allocate physical memory.");
+    Plugin->checkPiResult(Err);
+  }
+
+  ~physical_mem_impl() noexcept(false) {
+    const PluginPtr &Plugin = MContext->getPlugin();
+    Plugin->call<PiApiKind::piextPhysicalMemRelease>(MPhysicalMem);
+  }
+
+  void *map(uintptr_t Ptr, size_t NumBytes,
+            ext::oneapi::experimental::address_access_mode Mode,
+            size_t Offset) const {
+    sycl::detail::pi::PiVirtualAccessFlags AccessFlags =
+        AccessModeToVirtualAccessFlags(Mode);
+    const PluginPtr &Plugin = MContext->getPlugin();
+    void *ResultPtr = reinterpret_cast<void *>(Ptr);
+    Plugin->call<PiApiKind::piextVirtualMemMap>(
+        MContext->getHandleRef(), ResultPtr, NumBytes, MPhysicalMem, Offset,
+        AccessFlags);
+    return ResultPtr;
+  }
+
+  context get_context() const {
+    return createSyclObjFromImpl<context>(MContext);
+  }
+  device get_device() const { return createSyclObjFromImpl<device>(MDevice); }
+  size_t size() const noexcept { return MNumBytes; }
+
+  sycl::detail::pi::PiPhysicalMem &getHandleRef() { return MPhysicalMem; }
+  const sycl::detail::pi::PiPhysicalMem &getHandleRef() const {
+    return MPhysicalMem;
+  }
+
+private:
+  sycl::detail::pi::PiPhysicalMem MPhysicalMem = nullptr;
+  const std::shared_ptr<device_impl> MDevice;
+  const std::shared_ptr<context_impl> MContext;
+  const size_t MNumBytes;
+};
+
+} // namespace detail
+} // namespace _V1
+} // namespace sycl
diff --git a/sycl/source/feature_test.hpp.in b/sycl/source/feature_test.hpp.in
index ce88520fe50dd..f7e023c718462 100644
--- a/sycl/source/feature_test.hpp.in
+++ b/sycl/source/feature_test.hpp.in
@@ -86,6 +86,7 @@ inline namespace _V1 {
 #define SYCL_EXT_ONEAPI_ANNOTATED_ARG 1
 #define SYCL_EXT_ONEAPI_ANNOTATED_PTR 1
 #define SYCL_EXT_ONEAPI_COPY_OPTIMIZE 1
+#define SYCL_EXT_ONEAPI_VIRTUAL_MEM 1
 #define SYCL_EXT_ONEAPI_USM_MALLOC_PROPERTIES 1
 #cmakedefine01 SYCL_ENABLE_KERNEL_FUSION
 #if SYCL_ENABLE_KERNEL_FUSION
diff --git a/sycl/source/physical_mem.cpp b/sycl/source/physical_mem.cpp
new file mode 100644
index 0000000000000..d9d6073a68e89
--- /dev/null
+++ b/sycl/source/physical_mem.cpp
@@ -0,0 +1,38 @@
+//==--- physical_mem.cpp - sycl_ext_oneapi_virtual_mem physical_mem class --==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <detail/physical_mem_impl.hpp>
+#include <sycl/ext/oneapi/virtual_mem/physical_mem.hpp>
+
+namespace sycl {
+inline namespace _V1 {
+namespace ext::oneapi::experimental {
+
+physical_mem::physical_mem(const device &SyclDevice, const context &SyclContext,
+                           size_t NumBytes) {
+  if (!SyclDevice.has(aspect::ext_oneapi_virtual_mem))
+    throw sycl::exception(
+        sycl::make_error_code(sycl::errc::feature_not_supported),
+        "Device does not support aspect::ext_oneapi_virtual_mem.");
+
+  impl = std::make_shared<sycl::detail::physical_mem_impl>(
+      SyclDevice, SyclContext, NumBytes);
+}
+
+void *physical_mem::map(uintptr_t Ptr, size_t NumBytes,
+                        address_access_mode Mode, size_t Offset) const {
+  return impl->map(Ptr, NumBytes, Mode, Offset);
+}
+
+context physical_mem::get_context() const { return impl->get_context(); }
+device physical_mem::get_device() const { return impl->get_device(); }
+size_t physical_mem::size() const noexcept { return impl->size(); }
+
+} // namespace ext::oneapi::experimental
+} // namespace _V1
+} // namespace sycl
diff --git a/sycl/source/virtual_mem.cpp b/sycl/source/virtual_mem.cpp
new file mode 100644
index 0000000000000..8cdc5ffba0223
--- /dev/null
+++ b/sycl/source/virtual_mem.cpp
@@ -0,0 +1,183 @@
+//==- virtual_mem.cpp - sycl_ext_oneapi_virtual_mem virtual mem free funcs -==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <detail/context_impl.hpp>
+#include <detail/device_impl.hpp>
+#include <detail/physical_mem_impl.hpp>
+#include <sycl/ext/oneapi/virtual_mem/virtual_mem.hpp>
+
+// System headers for querying page-size.
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <unistd.h>
+#endif
+
+namespace sycl {
+inline namespace _V1 {
+namespace ext::oneapi::experimental {
+
+__SYCL_EXPORT size_t get_mem_granularity(const device &SyclDevice,
+                                         const context &SyclContext,
+                                         granularity_mode Mode) {
+  if (!SyclDevice.has(aspect::ext_oneapi_virtual_mem))
+    throw sycl::exception(
+        sycl::make_error_code(sycl::errc::feature_not_supported),
+        "Device does not support aspect::ext_oneapi_virtual_mem.");
+
+  pi_virtual_mem_granularity_info GranularityQuery = [=]() {
+    switch (Mode) {
+    case granularity_mode::minimum:
+      return PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM;
+    case granularity_mode::recommended:
+      return PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED;
+    }
+    throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
+                          "Unrecognized granularity mode.");
+  }();
+
+  std::shared_ptr<sycl::detail::device_impl> DeviceImpl =
+      sycl::detail::getSyclObjImpl(SyclDevice);
+  std::shared_ptr<sycl::detail::context_impl> ContextImpl =
+      sycl::detail::getSyclObjImpl(SyclContext);
+  const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin();
+#ifndef NDEBUG
+  size_t InfoOutputSize;
+  Plugin->call<sycl::detail::PiApiKind::piextVirtualMemGranularityGetInfo>(
+      ContextImpl->getHandleRef(), DeviceImpl->getHandleRef(), GranularityQuery,
+      0, nullptr, &InfoOutputSize);
+  assert(InfoOutputSize == sizeof(size_t) &&
+         "Unexpected output size of granularity info query.");
+#endif // NDEBUG
+  size_t Granularity = 0;
+  Plugin->call<sycl::detail::PiApiKind::piextVirtualMemGranularityGetInfo>(
+      ContextImpl->getHandleRef(), DeviceImpl->getHandleRef(), GranularityQuery,
+      sizeof(size_t), &Granularity, nullptr);
+  return Granularity;
+}
+
+__SYCL_EXPORT size_t get_mem_granularity(const context &SyclContext,
+                                         granularity_mode Mode) {
+  const std::vector<device> Devices = SyclContext.get_devices();
+  if (!std::all_of(Devices.cbegin(), Devices.cend(), [](const device &Dev) {
+        return Dev.has(aspect::ext_oneapi_virtual_mem);
+      })) {
+    throw sycl::exception(
+        sycl::make_error_code(sycl::errc::feature_not_supported),
+        "One or more devices in the context does not support "
+        "aspect::ext_oneapi_virtual_mem.");
+  }
+
+  // CUDA only needs page-size granularity.
+  if (SyclContext.get_backend() == backend::ext_oneapi_cuda) {
+#ifdef _WIN32
+    SYSTEM_INFO SystemInfo;
+    GetSystemInfo(&SystemInfo);
+    return static_cast<size_t>(SystemInfo.dwPageSize);
+#else
+    return static_cast<size_t>(sysconf(_SC_PAGESIZE));
+#endif
+  }
+
+  // Otherwise, we find the least common multiple of granularity of the devices
+  // in the context.
+  size_t LCMGranularity = get_mem_granularity(Devices[0], SyclContext, Mode);
+  for (size_t I = 1; I < Devices.size(); ++I) {
+    size_t DevGranularity = get_mem_granularity(Devices[I], SyclContext, Mode);
+    size_t GCD = LCMGranularity;
+    size_t Rem = DevGranularity % GCD;
+    while (Rem != 0) {
+      std::swap(GCD, Rem);
+      Rem %= GCD;
+    }
+    LCMGranularity *= DevGranularity / GCD;
+  }
+  return LCMGranularity;
+}
+
+__SYCL_EXPORT uintptr_t reserve_virtual_mem(uintptr_t Start, size_t NumBytes,
+                                            const context &SyclContext) {
+  std::vector<device> Devs = SyclContext.get_devices();
+  if (std::any_of(Devs.cbegin(), Devs.cend(), [](const device &Dev) {
+        return !Dev.has(aspect::ext_oneapi_virtual_mem);
+      }))
+    throw sycl::exception(
+        sycl::make_error_code(sycl::errc::feature_not_supported),
+        "One or more devices in the supplied context does not support "
+        "aspect::ext_oneapi_virtual_mem.");
+
+  std::shared_ptr<sycl::detail::context_impl> ContextImpl =
+      sycl::detail::getSyclObjImpl(SyclContext);
+  const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin();
+  void *OutPtr = nullptr;
+  Plugin->call<sycl::detail::PiApiKind::piextVirtualMemReserve>(
+      ContextImpl->getHandleRef(), reinterpret_cast<void *>(Start), NumBytes,
+      &OutPtr);
+  return reinterpret_cast<uintptr_t>(OutPtr);
+}
+
+__SYCL_EXPORT void free_virtual_mem(uintptr_t Ptr, size_t NumBytes,
+                                    const context &SyclContext) {
+  std::shared_ptr<sycl::detail::context_impl> ContextImpl =
+      sycl::detail::getSyclObjImpl(SyclContext);
+  const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin();
+  Plugin->call<sycl::detail::PiApiKind::piextVirtualMemFree>(
+      ContextImpl->getHandleRef(), reinterpret_cast<void *>(Ptr), NumBytes);
+}
+
+__SYCL_EXPORT void set_access_mode(const void *Ptr, size_t NumBytes,
+                                   address_access_mode Mode,
+                                   const context &SyclContext) {
+  sycl::detail::pi::PiVirtualAccessFlags AccessFlags =
+      sycl::detail::AccessModeToVirtualAccessFlags(Mode);
+  std::shared_ptr<sycl::detail::context_impl> ContextImpl =
+      sycl::detail::getSyclObjImpl(SyclContext);
+  const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin();
+  Plugin->call<sycl::detail::PiApiKind::piextVirtualMemSetAccess>(
+      ContextImpl->getHandleRef(), Ptr, NumBytes, AccessFlags);
+}
+
+__SYCL_EXPORT address_access_mode get_access_mode(const void *Ptr,
+                                                  size_t NumBytes,
+                                                  const context &SyclContext) {
+  std::shared_ptr<sycl::detail::context_impl> ContextImpl =
+      sycl::detail::getSyclObjImpl(SyclContext);
+  const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin();
+#ifndef NDEBUG
+  size_t InfoOutputSize;
+  Plugin->call<sycl::detail::PiApiKind::piextVirtualMemGetInfo>(
+      ContextImpl->getHandleRef(), Ptr, NumBytes,
+      PI_EXT_ONEAPI_VIRTUAL_MEM_INFO_ACCESS_MODE, 0, nullptr, &InfoOutputSize);
+  assert(InfoOutputSize == sizeof(sycl::detail::pi::PiVirtualAccessFlags) &&
+         "Unexpected output size of access mode info query.");
+#endif // NDEBUG
+  sycl::detail::pi::PiVirtualAccessFlags AccessFlags;
+  Plugin->call<sycl::detail::PiApiKind::piextVirtualMemGetInfo>(
+      ContextImpl->getHandleRef(), Ptr, NumBytes,
+      PI_EXT_ONEAPI_VIRTUAL_MEM_INFO_ACCESS_MODE,
+      sizeof(sycl::detail::pi::PiVirtualAccessFlags), &AccessFlags, nullptr);
+
+  if (AccessFlags & PI_VIRTUAL_ACCESS_FLAG_RW)
+    return address_access_mode::read_write;
+  if (AccessFlags & PI_VIRTUAL_ACCESS_FLAG_READ_ONLY)
+    return address_access_mode::read;
+  return address_access_mode::none;
+}
+
+__SYCL_EXPORT void unmap(const void *Ptr, size_t NumBytes,
+                         const context &SyclContext) {
+  std::shared_ptr<sycl::detail::context_impl> ContextImpl =
+      sycl::detail::getSyclObjImpl(SyclContext);
+  const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin();
+  Plugin->call<sycl::detail::PiApiKind::piextVirtualMemUnmap>(
+      ContextImpl->getHandleRef(), Ptr, NumBytes);
+}
+
+} // Namespace ext::oneapi::experimental
+} // namespace _V1
+} // Namespace sycl
diff --git a/sycl/test-e2e/VirtualMem/vector_with_virtual_mem.cpp b/sycl/test-e2e/VirtualMem/vector_with_virtual_mem.cpp
new file mode 100644
index 0000000000000..cbbcf52e3ab25
--- /dev/null
+++ b/sycl/test-e2e/VirtualMem/vector_with_virtual_mem.cpp
@@ -0,0 +1,236 @@
+// REQUIRES: aspect-ext_oneapi_virtual_mem, usm_shared_allocations
+
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+
+#include <sycl/detail/core.hpp>
+#include <sycl/usm.hpp>
+
+#include <sycl/ext/oneapi/experimental/device_architecture.hpp>
+#include <sycl/ext/oneapi/virtual_mem/physical_mem.hpp>
+#include <sycl/ext/oneapi/virtual_mem/virtual_mem.hpp>
+
+namespace syclext = sycl::ext::oneapi::experimental;
+
+// Find the least common multiple of the context and device granularities. This
+// value can be used for aligning both physical memory allocations and for
+// reserving virtual memory ranges.
+size_t GetLCMGranularity(const sycl::device &Dev, const sycl::context &Ctx) {
+  size_t CtxGranularity = syclext::get_mem_granularity(MContext);
+  size_t DevGranularity = syclext::get_mem_granularity(MDevice, MContext);
+
+  size_t GCD = CtxGranularity;
+  size_t Rem = DevGranularity % GCD;
+  while (Rem != 0) {
+    std::swap(GCD, Rem);
+    Rem %= GCD;
+  }
+  return (DevGranularity / GCD) * LCMGranularity;
+}
+
+template <typename T> class VirtualVector {
+public:
+  VirtualVector(sycl::queue &Q)
+      : MDevice{Q.get_device()}, MContext{Q.get_context()},
+        MGranularity{GetLCMGranularity(MDevice, MContext)} {};
+
+  ~VirtualVector() {
+    // Free all mapped ranges.
+    unmap_all();
+    for (const VirtualAddressRange &VARange : MVARanges)
+      syclext::free_virtual_mem(VARange.Ptr, VARange.Size, MContext);
+    // Physical memory allocations will be freed when the physical_mem objects
+    // die with MPhysicalMemMappings.
+  }
+
+  void reserve(size_t NewSize) {
+    // If we already have more memory than required, we can return.
+    size_t NewByteSize = sizeof(T) * NewSize;
+    if (NewByteSize <= MByteSize) {
+      MSize = NewSize;
+      return;
+    }
+
+    // Align the size by the granularity.
+    size_t AlignedNewByteSize = AlignByteSize(NewByteSize);
+    size_t AlignedNewVARangeSize = AlignedNewByteSize - MByteSize;
+
+    // Try to reserve virtual memory at the end of the existing one.
+    uintptr_t CurrentEnd = reinterpret_cast<uintptr_t>(MBasePtr) + MByteSize;
+    uintptr_t NewVAPtr = syclext::reserve_virtual_mem(
+        CurrentEnd, AlignedNewVARangeSize, MContext);
+
+    // If we failed to get a ptr to the end of the current range, we need to
+    // recreate the whole range.
+    if (CurrentEnd && NewVAPtr != CurrentEnd) {
+      // First we need to free the virtual address range we just reserved.
+      syclext::free_virtual_mem(NewVAPtr, AlignedNewVARangeSize, MContext);
+
+      // Recreate the full range and update the new VA ptr. CurrentEnd is no
+      // longer valid after this call.
+      NewVAPtr = RecreateAddressRange(AlignedNewByteSize);
+    } else {
+      // Otherwise we need to register the new range.
+      MVARanges.emplace_back(NewVAPtr, AlignedNewVARangeSize);
+
+      // If there was no base pointer previously, this is now the new base.
+      if (!MBasePtr)
+        MBasePtr = reinterpret_cast<T *>(NewVAPtr);
+    }
+
+    // Create new physical memory allocation and map the new range to it.
+    syclext::physical_mem NewPhysicalMem{MDevice, MContext,
+                                         AlignedNewVARangeSize};
+    void *MappedPtr =
+        NewPhysicalMem.map(NewVAPtr, AlignedNewVARangeSize,
+                           syclext::address_access_mode::read_write);
+    MPhysicalMemMappings.push_back(
+        std::make_pair(std::move(NewPhysicalMem), MappedPtr));
+
+    // Update the byte size of the vector.
+    MSize = NewSize;
+    MByteSize = AlignedNewByteSize;
+  }
+
+  size_t size() const noexcept { return MSize; }
+  T *data() const noexcept { return MBasePtr; }
+
+private:
+  size_t AlignByteSize(size_t UnalignedByteSize) const {
+    return ((UnalignedByteSize + MGranularity - 1) / MGranularity) *
+           MGranularity;
+  }
+
+  void unmap_all() {
+    for (std::pair<syclext::physical_mem, void *> &Mapping :
+         MPhysicalMemMappings) {
+      if (Mapping.second == 0)
+        continue;
+      syclext::unmap(Mapping.second, Mapping.first.size(), MContext);
+      Mapping.second = 0;
+    }
+  }
+
+  uintptr_t RecreateAddressRange(size_t AlignedNewByteSize) {
+    // Reserve the full range.
+    uintptr_t NewFullVAPtr =
+        syclext::reserve_virtual_mem(AlignedNewByteSize, MContext);
+
+    // Unmap the old virtual address ranges.
+    unmap_all();
+
+    // Remap all existing ranges.
+    uintptr_t NewEnd = NewFullVAPtr;
+    for (std::pair<syclext::physical_mem, void *> &Mapping :
+         MPhysicalMemMappings) {
+      Mapping.second =
+          Mapping.first.map(NewEnd, Mapping.first.size(),
+                            syclext::address_access_mode::read_write);
+      NewEnd += Mapping.first.size();
+    }
+
+    // Free the old ranges.
+    for (const VirtualAddressRange &VARange : MVARanges)
+      syclext::free_virtual_mem(VARange.Ptr, VARange.Size, MContext);
+
+    // Insert the newly reserved range to the saved ranges.
+    MVARanges.clear();
+    MVARanges.emplace_back(NewFullVAPtr, AlignedNewByteSize);
+
+    // Update the base pointer to point to the new start.
+    MBasePtr = reinterpret_cast<T *>(NewFullVAPtr);
+
+    // Return the new end of the mapped ranges.
+    return NewEnd;
+  }
+
+  struct VirtualAddressRange {
+    VirtualAddressRange(uintptr_t Ptr, size_t Size) : Ptr{Ptr}, Size{Size} {}
+
+    uintptr_t Ptr;
+    size_t Size;
+  };
+
+  sycl::device MDevice;
+  sycl::context MContext;
+
+  std::vector<VirtualAddressRange> MVARanges;
+  std::vector<std::pair<syclext::physical_mem, void *>> MPhysicalMemMappings;
+
+  T *MBasePtr = nullptr;
+  size_t MSize = 0;
+  size_t MByteSize = 0;
+
+  const size_t MGranularity = 0;
+};
+
+static constexpr size_t NumIters = 10;
+static constexpr size_t WriteValueOffset = 42;
+static constexpr size_t NumWorkItems = 512;
+
+int main() {
+  sycl::queue Q;
+
+  VirtualVector<int> Vec(Q);
+
+  // To better test the functionality, try to allocate below the granularity
+  // but enough to require more memory for some iterations.
+  size_t SizeIncrement = 11;
+  size_t MinSizeGran =
+      syclext::get_mem_granularity(Q.get_device(), Q.get_context()) /
+      sizeof(int);
+  SizeIncrement = std::max(MinSizeGran / 2 - 1, SizeIncrement);
+
+  // Each work-item will work on multiple elements.
+  size_t NumElemsPerWI = 1 + (SizeIncrement - 1) / NumWorkItems;
+
+  for (size_t I = 0; I < NumIters; ++I) {
+    // Increment the size of the vector.
+    size_t NewVecSize = (I + 1) * SizeIncrement;
+    Vec.reserve(NewVecSize);
+    assert(Vec.size() == NewVecSize);
+
+    // Populate to the new memory
+    int *VecDataPtr = Vec.data();
+    size_t StartOffset = I * SizeIncrement;
+    size_t IterWriteValueOffset = WriteValueOffset * (I + 1);
+    Q.parallel_for(sycl::range<1>{NumWorkItems}, [=](sycl::item<1> Idx) {
+       for (size_t J = 0; J < NumElemsPerWI; ++J) {
+         size_t LoopIdx = J * Idx.get_range(0) + Idx;
+         size_t OffsetIdx = StartOffset + LoopIdx;
+         if (OffsetIdx < NewVecSize)
+           VecDataPtr[OffsetIdx] = LoopIdx + IterWriteValueOffset;
+       }
+     }).wait_and_throw();
+
+    // Copy back the values and verify.
+    int *CopyBack = sycl::malloc_shared<int>(NewVecSize, Q);
+
+    // TODO: Level-zero (excluding on PVC) does not currently allow copy across
+    //       virtual memory ranges, even if they are consequtive.
+    syclext::architecture DevArch =
+        Q.get_device().get_info<syclext::info::device::architecture>();
+    if (Q.get_backend() == sycl::backend::ext_oneapi_level_zero &&
+        DevArch != syclext::architecture::intel_gpu_pvc &&
+        DevArch != syclext::architecture::intel_gpu_pvc_vg) {
+      Q.parallel_for(sycl::range<1>{NewVecSize}, [=](sycl::id<1> Idx) {
+         CopyBack[Idx] = VecDataPtr[Idx];
+       }).wait_and_throw();
+    } else {
+      Q.copy(VecDataPtr, CopyBack, NewVecSize).wait_and_throw();
+    }
+
+    for (size_t J = 0; J < NewVecSize; ++J) {
+      int ExpectedVal =
+          J % SizeIncrement + WriteValueOffset * (J / SizeIncrement + 1);
+      if (CopyBack[J] != ExpectedVal) {
+        std::cout << "Comparison failed at index " << J << ": " << CopyBack[J]
+                  << " != " << ExpectedVal << std::endl;
+        return 1;
+      }
+    }
+    sycl::free(CopyBack, Q);
+  }
+
+  return 0;
+}
diff --git a/sycl/test/abi/pi_cuda_symbol_check.dump b/sycl/test/abi/pi_cuda_symbol_check.dump
index d3047c6bb1cd0..e6b19e97d1b87 100644
--- a/sycl/test/abi/pi_cuda_symbol_check.dump
+++ b/sycl/test/abi/pi_cuda_symbol_check.dump
@@ -146,6 +146,9 @@ piextMemSampledImageHandleDestroy
 piextMemUnsampledImageCreate
 piextMemUnsampledImageHandleDestroy
 piextPeerAccessGetInfo
+piextPhysicalMemCreate
+piextPhysicalMemRelease
+piextPhysicalMemRetain
 piextPlatformCreateWithNativeHandle
 piextPlatformGetNativeHandle
 piextPluginGetOpaqueData
@@ -171,4 +174,11 @@ piextUSMImport
 piextUSMPitchedAlloc
 piextUSMRelease
 piextUSMSharedAlloc
+piextVirtualMemFree
+piextVirtualMemGetInfo
+piextVirtualMemGranularityGetInfo
+piextVirtualMemMap
+piextVirtualMemReserve
+piextVirtualMemSetAccess
+piextVirtualMemUnmap
 piextWaitExternalSemaphore
diff --git a/sycl/test/abi/pi_hip_symbol_check.dump b/sycl/test/abi/pi_hip_symbol_check.dump
index c83b4a4ba6122..530ad95722494 100644
--- a/sycl/test/abi/pi_hip_symbol_check.dump
+++ b/sycl/test/abi/pi_hip_symbol_check.dump
@@ -146,6 +146,9 @@ piextMemSampledImageHandleDestroy
 piextMemUnsampledImageCreate
 piextMemUnsampledImageHandleDestroy
 piextPeerAccessGetInfo
+piextPhysicalMemCreate
+piextPhysicalMemRelease
+piextPhysicalMemRetain
 piextPlatformCreateWithNativeHandle
 piextPlatformGetNativeHandle
 piextPluginGetOpaqueData
@@ -171,4 +174,11 @@ piextUSMImport
 piextUSMPitchedAlloc
 piextUSMRelease
 piextUSMSharedAlloc
+piextVirtualMemFree
+piextVirtualMemGetInfo
+piextVirtualMemGranularityGetInfo
+piextVirtualMemMap
+piextVirtualMemReserve
+piextVirtualMemSetAccess
+piextVirtualMemUnmap
 piextWaitExternalSemaphore
diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump
index d6cc82870c669..93cd4c4de10bb 100644
--- a/sycl/test/abi/pi_level_zero_symbol_check.dump
+++ b/sycl/test/abi/pi_level_zero_symbol_check.dump
@@ -145,6 +145,9 @@ piextMemSampledImageHandleDestroy
 piextMemUnsampledImageCreate
 piextMemUnsampledImageHandleDestroy
 piextPeerAccessGetInfo
+piextPhysicalMemCreate
+piextPhysicalMemRelease
+piextPhysicalMemRetain
 piextPlatformCreateWithNativeHandle
 piextPlatformGetNativeHandle
 piextPluginGetOpaqueData
@@ -170,4 +173,11 @@ piextUSMImport
 piextUSMPitchedAlloc
 piextUSMRelease
 piextUSMSharedAlloc
+piextVirtualMemFree
+piextVirtualMemGetInfo
+piextVirtualMemGranularityGetInfo
+piextVirtualMemMap
+piextVirtualMemReserve
+piextVirtualMemSetAccess
+piextVirtualMemUnmap
 piextWaitExternalSemaphore
diff --git a/sycl/test/abi/pi_nativecpu_symbol_check.dump b/sycl/test/abi/pi_nativecpu_symbol_check.dump
index 850e6d22fdb72..c63f579ca6b53 100644
--- a/sycl/test/abi/pi_nativecpu_symbol_check.dump
+++ b/sycl/test/abi/pi_nativecpu_symbol_check.dump
@@ -146,6 +146,9 @@ piextMemSampledImageHandleDestroy
 piextMemUnsampledImageCreate
 piextMemUnsampledImageHandleDestroy
 piextPeerAccessGetInfo
+piextPhysicalMemCreate
+piextPhysicalMemRelease
+piextPhysicalMemRetain
 piextPlatformCreateWithNativeHandle
 piextPlatformGetNativeHandle
 piextPluginGetOpaqueData
@@ -171,4 +174,11 @@ piextUSMImport
 piextUSMPitchedAlloc
 piextUSMRelease
 piextUSMSharedAlloc
+piextVirtualMemFree
+piextVirtualMemGetInfo
+piextVirtualMemGranularityGetInfo
+piextVirtualMemMap
+piextVirtualMemReserve
+piextVirtualMemSetAccess
+piextVirtualMemUnmap
 piextWaitExternalSemaphore
diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump
index daaf7bbee5de5..8807d1647ebdc 100644
--- a/sycl/test/abi/pi_opencl_symbol_check.dump
+++ b/sycl/test/abi/pi_opencl_symbol_check.dump
@@ -133,6 +133,9 @@ piextMemGetNativeHandle
 piextMemImageAllocate
 piextMemImageCopy
 piextMemImageCreateWithNativeHandle
+piextPhysicalMemCreate
+piextPhysicalMemRelease
+piextPhysicalMemRetain
 piextMemImageFree
 piextMemImageGetInfo
 piextMemImportOpaqueFD
@@ -170,4 +173,11 @@ piextUSMImport
 piextUSMPitchedAlloc
 piextUSMRelease
 piextUSMSharedAlloc
+piextVirtualMemFree
+piextVirtualMemGetInfo
+piextVirtualMemGranularityGetInfo
+piextVirtualMemMap
+piextVirtualMemReserve
+piextVirtualMemSetAccess
+piextVirtualMemUnmap
 piextWaitExternalSemaphore
diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump
index 2c97a01f87da7..99fb95d92fa72 100644
--- a/sycl/test/abi/sycl_symbols_linux.dump
+++ b/sycl/test/abi/sycl_symbols_linux.dump
@@ -2990,6 +2990,15 @@ _ZN4sycl3_V13ext5intel12experimental9pipe_base13get_pipe_nameB5cxx11EPKv
 _ZN4sycl3_V13ext5intel12experimental9pipe_base17wait_non_blockingERKNS0_5eventE
 _ZN4sycl3_V13ext6oneapi12experimental10mem_adviseENS0_5queueEPvmiRKNS0_6detail13code_locationE
 _ZN4sycl3_V13ext6oneapi10level_zero6detail11make_deviceERKNS0_8platformEm
+_ZN4sycl3_V13ext6oneapi12experimental12physical_memC1ERKNS0_6deviceERKNS0_7contextEm
+_ZN4sycl3_V13ext6oneapi12experimental12physical_memC2ERKNS0_6deviceERKNS0_7contextEm
+_ZN4sycl3_V13ext6oneapi12experimental15get_access_modeEPKvmRKNS0_7contextE
+_ZN4sycl3_V13ext6oneapi12experimental15set_access_modeEPKvmNS3_19address_access_modeERKNS0_7contextE
+_ZN4sycl3_V13ext6oneapi12experimental16free_virtual_memEmmRKNS0_7contextE
+_ZN4sycl3_V13ext6oneapi12experimental19get_mem_granularityERKNS0_6deviceERKNS0_7contextENS3_16granularity_modeE
+_ZN4sycl3_V13ext6oneapi12experimental19get_mem_granularityERKNS0_7contextENS3_16granularity_modeE
+_ZN4sycl3_V13ext6oneapi12experimental19reserve_virtual_memEmmRKNS0_7contextE
+_ZN4sycl3_V13ext6oneapi12experimental5unmapEPKvmRKNS0_7contextE
 _ZN4sycl3_V13ext6oneapi12experimental12create_imageENS3_16image_mem_handleERKNS3_16image_descriptorERKNS0_5queueE
 _ZN4sycl3_V13ext6oneapi12experimental12create_imageENS3_16image_mem_handleERKNS3_16image_descriptorERKNS0_6deviceERKNS0_7contextE
 _ZN4sycl3_V13ext6oneapi12experimental12create_imageENS3_16image_mem_handleERKNS3_22bindless_image_samplerERKNS3_16image_descriptorERKNS0_5queueE
@@ -3592,6 +3601,10 @@ _ZNK4sycl3_V114interop_handle16getNativeContextEv
 _ZNK4sycl3_V115device_selector13select_deviceEv
 _ZNK4sycl3_V116default_selectorclERKNS0_6deviceE
 _ZNK4sycl3_V120accelerator_selectorclERKNS0_6deviceE
+_ZNK4sycl3_V13ext6oneapi12experimental12physical_mem10get_deviceEv
+_ZNK4sycl3_V13ext6oneapi12experimental12physical_mem11get_contextEv
+_ZNK4sycl3_V13ext6oneapi12experimental12physical_mem3mapEmmNS3_19address_access_modeEm
+_ZNK4sycl3_V13ext6oneapi12experimental12physical_mem4sizeEv
 _ZNK4sycl3_V13ext6oneapi12experimental4node14get_successorsEv
 _ZNK4sycl3_V13ext6oneapi12experimental4node16get_predecessorsEv
 _ZNK4sycl3_V13ext6oneapi12experimental4node8get_typeEv
diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump
index d02be89140c5a..c9eca1ecc8a25 100644
--- a/sycl/test/abi/sycl_symbols_windows.dump
+++ b/sycl/test/abi/sycl_symbols_windows.dump
@@ -609,6 +609,10 @@
 ??0kernel_id@_V1@sycl@@AEAA@PEBD@Z
 ??0kernel_id@_V1@sycl@@QEAA@$$QEAV012@@Z
 ??0kernel_id@_V1@sycl@@QEAA@AEBV012@@Z
+??0physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV012345@@Z
+??0physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV012345@@Z
+??0physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBVqueue@45@_K@Z
+??0physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBVdevice@45@AEBVcontext@45@_K@Z
 ??0modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAA@AEBV?$shared_ptr@Vgraph_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@@Z
 ??0modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV0123456@@Z
 ??0modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV0123456@@Z
@@ -679,6 +683,7 @@
 ??1kernel@_V1@sycl@@QEAA@XZ
 ??1kernel_bundle_plain@detail@_V1@sycl@@QEAA@XZ
 ??1kernel_id@_V1@sycl@@QEAA@XZ
+??1physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ
 ??1modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ
 ??1node@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ
 ??1platform@_V1@sycl@@QEAA@XZ
@@ -696,6 +701,8 @@
 ??4?$OwnerLessBase@Vkernel@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z
 ??4?$OwnerLessBase@Vkernel_id@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z
 ??4?$OwnerLessBase@Vkernel_id@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z
+??4?$OwnerLessBase@Vphysical_mem@experimental@oneapi@ext@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z
+??4?$OwnerLessBase@Vphysical_mem@experimental@oneapi@ext@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z
 ??4?$OwnerLessBase@Vplatform@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z
 ??4?$OwnerLessBase@Vplatform@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z
 ??4?$OwnerLessBase@Vqueue@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z
@@ -761,6 +768,8 @@
 ??4kernel_bundle_plain@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z
 ??4kernel_id@_V1@sycl@@QEAAAEAV012@$$QEAV012@@Z
 ??4kernel_id@_V1@sycl@@QEAAAEAV012@AEBV012@@Z
+??4physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV012345@$$QEAV012345@@Z
+??4physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV012345@AEBV012345@@Z
 ??4modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@$$QEAV0123456@@Z
 ??4modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@AEBV0123456@@Z
 ??4node@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV012345@$$QEAV012345@@Z
@@ -782,6 +791,7 @@
 ??8kernel@_V1@sycl@@QEBA_NAEBV012@@Z
 ??8kernel_bundle_plain@detail@_V1@sycl@@QEBA_NAEBV0123@@Z
 ??8kernel_id@_V1@sycl@@QEBA_NAEBV012@@Z
+??8physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBA_NAEBV012345@@Z
 ??8platform@_V1@sycl@@QEBA_NAEBV012@@Z
 ??8queue@_V1@sycl@@QEBA_NAEBV012@@Z
 ??8sampler@_V1@sycl@@QEBA_NAEBV012@@Z
@@ -794,6 +804,7 @@
 ??9kernel@_V1@sycl@@QEBA_NAEBV012@@Z
 ??9kernel_bundle_plain@detail@_V1@sycl@@QEBA_NAEBV0123@@Z
 ??9kernel_id@_V1@sycl@@QEBA_NAEBV012@@Z
+??9physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBA_NAEBV012345@@Z
 ??9platform@_V1@sycl@@QEBA_NAEBV012@@Z
 ??9queue@_V1@sycl@@QEBA_NAEBV012@@Z
 ??9sampler@_V1@sycl@@QEBA_NAEBV012@@Z
@@ -4038,6 +4049,8 @@
 ?ext_oneapi_owner_before@?$OwnerLessBase@Vkernel@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVkernel@34@@Z
 ?ext_oneapi_owner_before@?$OwnerLessBase@Vkernel_id@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vkernel_id@_V1@sycl@@@2oneapi@ext@34@@Z
 ?ext_oneapi_owner_before@?$OwnerLessBase@Vkernel_id@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVkernel_id@34@@Z
+?ext_oneapi_owner_before@?$OwnerLessBase@Vphysical_mem@experimental@oneapi@ext@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vphysical_mem@experimental@oneapi@ext@_V1@sycl@@@2oneapi@ext@34@@Z
+?ext_oneapi_owner_before@?$OwnerLessBase@Vphysical_mem@experimental@oneapi@ext@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVphysical_mem@experimental@oneapi@ext@34@@Z
 ?ext_oneapi_owner_before@?$OwnerLessBase@Vplatform@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vplatform@_V1@sycl@@@2oneapi@ext@34@@Z
 ?ext_oneapi_owner_before@?$OwnerLessBase@Vplatform@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVplatform@34@@Z
 ?ext_oneapi_owner_before@?$OwnerLessBase@Vqueue@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vqueue@_V1@sycl@@@2oneapi@ext@34@@Z
@@ -4075,12 +4088,14 @@
 ?find_device_intersection@detail@_V1@sycl@@YA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@V?$kernel_bundle@$00@_V1@sycl@@V?$allocator@V?$kernel_bundle@$00@_V1@sycl@@@std@@@5@@Z
 ?free@_V1@sycl@@YAXPEAXAEBVcontext@12@AEBUcode_location@detail@12@@Z
 ?free@_V1@sycl@@YAXPEAXAEBVqueue@12@AEBUcode_location@detail@12@@Z
-?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z
+?free_virtual_mem@experimental@oneapi@ext@_V1@sycl@@YAX_K0AEBVcontext@45@@Z
 ?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVqueue@45@@Z
 ?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@W4image_type@12345@AEBVdevice@45@AEBVcontext@45@@Z
 ?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@W4image_type@12345@AEBVqueue@45@@Z
+?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z
 ?free_mipmap_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z
 ?free_mipmap_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVqueue@45@@Z
+?free_mipmap_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z
 ?frexp_impl@detail@_V1@sycl@@YA?AVhalf@half_impl@123@V45123@PEAH@Z
 ?frexp_impl@detail@_V1@sycl@@YAMMPEAH@Z
 ?frexp_impl@detail@_V1@sycl@@YANNPEAH@Z
@@ -4170,6 +4185,7 @@
 ?getStartTime@HostProfilingInfo@detail@_V1@sycl@@QEBA_KXZ
 ?getType@handler@_V1@sycl@@AEAA?AW4CGTYPE@CG@detail@23@XZ
 ?getValueFromDynamicParameter@detail@_V1@sycl@@YAPEAXAEAVdynamic_parameter_base@1experimental@oneapi@ext@23@@Z
+?get_access_mode@experimental@oneapi@ext@_V1@sycl@@YA?AW4address_access_mode@12345@PEBX_KAEBVcontext@45@@Z
 ?get_addressing_mode@sampler@_V1@sycl@@QEBA?AW4addressing_mode@23@XZ
 ?get_allocator_internal@buffer_plain@detail@_V1@sycl@@IEBAAEBV?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@XZ
 ?get_allocator_internal@image_plain@detail@_V1@sycl@@IEBAAEBV?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@XZ
@@ -4189,10 +4205,12 @@
 ?get_context@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AVcontext@56@XZ
 ?get_context@kernel@_V1@sycl@@QEBA?AVcontext@23@XZ
 ?get_context@kernel_bundle_plain@detail@_V1@sycl@@QEBA?AVcontext@34@XZ
+?get_context@physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AVcontext@56@XZ
 ?get_context@queue@_V1@sycl@@QEBA?AVcontext@23@XZ
 ?get_coordinate_normalization_mode@sampler@_V1@sycl@@QEBA?AW4coordinate_normalization_mode@23@XZ
 ?get_count@image_plain@detail@_V1@sycl@@IEBA_KXZ
 ?get_descriptor@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBAAEBUimage_descriptor@23456@XZ
+?get_device@physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AVdevice@56@XZ
 ?get_device@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AVdevice@56@XZ
 ?get_device@queue@_V1@sycl@@QEBA?AVdevice@23@XZ
 ?get_devices@context@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ
@@ -4218,6 +4236,8 @@
 ?get_kernel_ids@_V1@sycl@@YA?AV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@std@@XZ
 ?get_kernel_ids@kernel_bundle_plain@detail@_V1@sycl@@QEBA?AV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@std@@XZ
 ?get_max_statement_size@stream@_V1@sycl@@QEBA_KXZ
+?get_mem_granularity@experimental@oneapi@ext@_V1@sycl@@YA_KAEBVcontext@45@W4granularity_mode@12345@@Z
+?get_mem_granularity@experimental@oneapi@ext@_V1@sycl@@YA_KAEBVdevice@45@AEBVcontext@45@W4granularity_mode@12345@@Z
 ?get_mip_level_mem_handle@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@U612345@IAEBVdevice@45@AEBVcontext@45@@Z
 ?get_mip_level_mem_handle@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@U612345@IAEBVqueue@45@@Z
 ?get_mip_level_mem_handle@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AUimage_mem_handle@23456@I@Z
@@ -4327,6 +4347,7 @@
 ?malloc_shared@_V1@sycl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@AEBVproperty_list@12@AEBUcode_location@detail@12@@Z
 ?malloc_shared@_V1@sycl@@YAPEAX_KAEBVqueue@12@AEBUcode_location@detail@12@@Z
 ?malloc_shared@_V1@sycl@@YAPEAX_KAEBVqueue@12@AEBVproperty_list@12@AEBUcode_location@detail@12@@Z
+?map@physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBAPEAX_K0W4address_access_mode@23456@0@Z
 ?map_external_image_memory@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z
 ?map_external_image_memory@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z
 ?map_external_memory_array@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z
@@ -4389,6 +4410,7 @@
 ?remquo_impl@detail@_V1@sycl@@YA?AVhalf@half_impl@123@V45123@0PEAH@Z
 ?remquo_impl@detail@_V1@sycl@@YAMMMPEAH@Z
 ?remquo_impl@detail@_V1@sycl@@YANNNPEAH@Z
+?reserve_virtual_mem@experimental@oneapi@ext@_V1@sycl@@YA_K_K0AEBVcontext@45@@Z
 ?reset@filter_selector@ONEAPI@_V1@sycl@@QEBAXXZ
 ?reset@filter_selector@oneapi@ext@_V1@sycl@@QEBAXXZ
 ?sampledImageConstructorNotification@detail@_V1@sycl@@YAXPEAX0AEBV?$optional@W4image_target@_V1@sycl@@@std@@PEBXIAEBUcode_location@123@@Z
@@ -4412,6 +4434,7 @@
 ?setStateSpecConstSet@handler@_V1@sycl@@AEAAXXZ
 ?setType@handler@_V1@sycl@@AEAAXW4CGTYPE@CG@detail@23@@Z
 ?setUserFacingNodeType@handler@_V1@sycl@@AEAAXW4node_type@experimental@oneapi@ext@23@@Z
+?set_access_mode@experimental@oneapi@ext@_V1@sycl@@YAXPEBX_KW4address_access_mode@12345@AEBVcontext@45@@Z
 ?set_final_data_internal@buffer_plain@detail@_V1@sycl@@IEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z
 ?set_final_data_internal@buffer_plain@detail@_V1@sycl@@IEAAXXZ
 ?set_final_data_internal@image_plain@detail@_V1@sycl@@IEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z
@@ -4427,6 +4450,7 @@
 ?sincos_impl@detail@_V1@sycl@@YANNPEAN@Z
 ?single_task@handler@_V1@sycl@@QEAAXVkernel@23@@Z
 ?size@exception_list@_V1@sycl@@QEBA_KXZ
+?size@physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBA_KXZ
 ?size@stream@_V1@sycl@@QEBA_KXZ
 ?start@HostProfilingInfo@detail@_V1@sycl@@QEAAXXZ
 ?start_fusion@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEAAXXZ
@@ -4442,6 +4466,7 @@
 ?sycl_category@_V1@sycl@@YAAEBVerror_category@std@@XZ
 ?throwIfActionIsCreated@handler@_V1@sycl@@AEAAXXZ
 ?throw_asynchronous@queue@_V1@sycl@@QEAAXXZ
+?unmap@experimental@oneapi@ext@_V1@sycl@@YAXPEBX_KAEBVcontext@45@@Z
 ?unsampledImageConstructorNotification@detail@_V1@sycl@@YAXPEAX0AEBV?$optional@W4image_target@_V1@sycl@@@std@@W4mode@access@23@PEBXIAEBUcode_location@123@@Z
 ?unsampledImageConstructorNotification@image_plain@detail@_V1@sycl@@IEAAXAEBUcode_location@234@PEAXPEBXIQEA_KW4image_format@34@@Z
 ?unsampledImageDestructorNotification@image_plain@detail@_V1@sycl@@IEAAXPEAX@Z
diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp
index ca29b9bd6aa1e..b7fea5aae4ff9 100644
--- a/sycl/unittests/helpers/PiMockPlugin.hpp
+++ b/sycl/unittests/helpers/PiMockPlugin.hpp
@@ -1353,6 +1353,61 @@ inline pi_result mock_piextEnqueueDeviceGlobalVariableRead(
   return PI_SUCCESS;
 }
 
+inline pi_result
+mock_piextVirtualMemGranularityGetInfo(pi_context, pi_device,
+                                       pi_virtual_mem_granularity_info, size_t,
+                                       void *, size_t *) {
+  return PI_SUCCESS;
+}
+
+inline pi_result
+mock_piextPhysicalMemCreate(pi_context, pi_device, size_t,
+                            pi_physical_mem *ret_physical_mem) {
+  *ret_physical_mem = createDummyHandle<pi_physical_mem>();
+  return PI_SUCCESS;
+}
+
+inline pi_result mock_piextPhysicalMemRetain(pi_physical_mem) {
+  return PI_SUCCESS;
+}
+
+inline pi_result mock_piextPhysicalMemRelease(pi_physical_mem) {
+  return PI_SUCCESS;
+}
+
+inline pi_result mock_piextVirtualMemReserve(pi_context, const void *start,
+                                             size_t range_size,
+                                             void **ret_ptr) {
+  *ret_ptr =
+      start ? const_cast<void *>(start) : createDummyHandle<void *>(range_size);
+  return PI_SUCCESS;
+}
+
+inline pi_result mock_piextVirtualMemFree(pi_context, const void *, size_t) {
+  return PI_SUCCESS;
+}
+
+inline pi_result mock_piextVirtualMemMap(pi_context, const void *, size_t,
+                                         pi_physical_mem, size_t,
+                                         pi_virtual_access_flags) {
+  return PI_SUCCESS;
+}
+
+inline pi_result mock_piextVirtualMemUnmap(pi_context, const void *, size_t) {
+  return PI_SUCCESS;
+}
+
+inline pi_result mock_piextVirtualMemSetAccess(pi_context, const void *, size_t,
+                                               pi_virtual_access_flags) {
+  return PI_SUCCESS;
+}
+
+inline pi_result mock_piextVirtualMemGetInfo(pi_context, const void *, size_t,
+                                             pi_virtual_mem_info, size_t,
+                                             void *, size_t *) {
+  return PI_SUCCESS;
+}
+
 inline pi_result mock_piextPluginGetOpaqueData(void *opaque_data_param,
                                                void **opaque_data_return) {
   return PI_SUCCESS;

From 719e8ef8c0269ab23cd46eaa1d2d2c751a2bcbce Mon Sep 17 00:00:00 2001
From: Chris Perkins <chris.perkins@intel.com>
Date: Mon, 1 Jul 2024 08:59:03 -0700
Subject: [PATCH 58/58] [SYCL] no exceptions leaking from destructors (#14273)

Destructors are implicitly noexcept, so we must ensure they don't
actually throw exceptions. No change to API or ABI with this PR.
---
 sycl/include/sycl/buffer.hpp               |  8 ++-
 sycl/include/sycl/detail/common.hpp        | 11 ++++
 sycl/include/sycl/image.hpp                | 13 ++++-
 sycl/include/syclcompat/device.hpp         | 10 ++--
 sycl/source/detail/context_impl.cpp        | 35 +++++++------
 sycl/source/detail/device_image_impl.hpp   | 21 ++++----
 sycl/source/detail/event_impl.cpp          |  8 ++-
 sycl/source/detail/global_handler.cpp      | 31 ++++++++----
 sycl/source/detail/graph_impl.cpp          | 58 ++++++++++++----------
 sycl/source/detail/kernel_impl.cpp         |  8 ++-
 sycl/source/detail/pi_utils.hpp            | 11 ++--
 sycl/source/detail/program_impl.cpp        | 12 +++--
 sycl/source/detail/queue_impl.hpp          | 36 ++++++++------
 sycl/source/detail/sampler_impl.cpp        | 15 ++++--
 sycl/source/detail/thread_pool.hpp         |  8 ++-
 sycl/unittests/thread_safety/ThreadUtils.h |  8 ++-
 16 files changed, 192 insertions(+), 101 deletions(-)

diff --git a/sycl/include/sycl/buffer.hpp b/sycl/include/sycl/buffer.hpp
index 5dde105b678e6..32588de22c980 100644
--- a/sycl/include/sycl/buffer.hpp
+++ b/sycl/include/sycl/buffer.hpp
@@ -472,7 +472,13 @@ class buffer : public detail::buffer_plain,
 
   buffer &operator=(buffer &&rhs) = default;
 
-  ~buffer() { buffer_plain::handleRelease(); }
+  ~buffer() {
+    try {
+      buffer_plain::handleRelease();
+    } catch (std::exception &e) {
+      __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~buffer", e);
+    }
+  }
 
   bool operator==(const buffer &rhs) const { return impl == rhs.impl; }
 
diff --git a/sycl/include/sycl/detail/common.hpp b/sycl/include/sycl/detail/common.hpp
index 1c940a21a7223..09c31ef76ef6d 100644
--- a/sycl/include/sycl/detail/common.hpp
+++ b/sycl/include/sycl/detail/common.hpp
@@ -368,6 +368,17 @@ static constexpr std::array<T, N> RepeatValue(const T &Arg) {
   return RepeatValueHelper(Arg, std::make_index_sequence<N>());
 }
 
+// to output exceptions caught in ~destructors
+#ifndef NDEBUG
+#define __SYCL_REPORT_EXCEPTION_TO_STREAM(str, e)                              \
+  {                                                                            \
+    std::cerr << str << " " << e.what() << std::endl;                          \
+    assert(false);                                                             \
+  }
+#else
+#define __SYCL_REPORT_EXCEPTION_TO_STREAM(str, e)
+#endif
+
 } // namespace detail
 } // namespace _V1
 } // namespace sycl
diff --git a/sycl/include/sycl/image.hpp b/sycl/include/sycl/image.hpp
index 1239f65cdd259..2d0401764bbe9 100644
--- a/sycl/include/sycl/image.hpp
+++ b/sycl/include/sycl/image.hpp
@@ -954,7 +954,12 @@ class unsampled_image
   unsampled_image &operator=(unsampled_image &&rhs) = default;
 
   ~unsampled_image() {
-    common_base::unsampledImageDestructorNotification((void *)this->impl.get());
+    try {
+      common_base::unsampledImageDestructorNotification(
+          (void *)this->impl.get());
+    } catch (std::exception &e) {
+      __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~unsampled_image", e);
+    }
   }
 
   bool operator==(const unsampled_image &rhs) const {
@@ -1095,7 +1100,11 @@ class sampled_image
   sampled_image &operator=(sampled_image &&rhs) = default;
 
   ~sampled_image() {
-    common_base::sampledImageDestructorNotification((void *)this->impl.get());
+    try {
+      common_base::sampledImageDestructorNotification((void *)this->impl.get());
+    } catch (std::exception &e) {
+      __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~sampled_image", e);
+    }
   }
 
   bool operator==(const sampled_image &rhs) const {
diff --git a/sycl/include/syclcompat/device.hpp b/sycl/include/syclcompat/device.hpp
index ed16a9b32bfa4..3e3e6cb77e71d 100644
--- a/sycl/include/syclcompat/device.hpp
+++ b/sycl/include/syclcompat/device.hpp
@@ -339,9 +339,13 @@ class device_ext : public sycl::device {
 public:
   device_ext() : sycl::device(), _ctx(*this) {}
   ~device_ext() {
-    std::lock_guard<std::mutex> lock(m_mutex);
-    sycl::event::wait(_events);
-    _queues.clear();
+    try {
+      std::lock_guard<std::mutex> lock(m_mutex);
+      sycl::event::wait(_events);
+      _queues.clear();
+    } catch (std::exception &e) {
+      __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~device_ext", e);
+    }
   }
   device_ext(const sycl::device &base, bool print_on_async_exceptions = false,
              bool in_order = true)
diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp
index 8ae13b345b250..910f731071837 100644
--- a/sycl/source/detail/context_impl.cpp
+++ b/sycl/source/detail/context_impl.cpp
@@ -136,23 +136,26 @@ cl_context context_impl::get() const {
 }
 
 context_impl::~context_impl() {
-  // Free all events associated with the initialization of device globals.
-  for (auto &DeviceGlobalInitializer : MDeviceGlobalInitializers)
-    DeviceGlobalInitializer.second.ClearEvents(getPlugin());
-  // Free all device_global USM allocations associated with this context.
-  for (const void *DeviceGlobal : MAssociatedDeviceGlobals) {
-    DeviceGlobalMapEntry *DGEntry =
-        detail::ProgramManager::getInstance().getDeviceGlobalEntry(
-            DeviceGlobal);
-    DGEntry->removeAssociatedResources(this);
-  }
-  for (auto LibProg : MCachedLibPrograms) {
-    assert(LibProg.second && "Null program must not be kept in the cache");
-    getPlugin()->call<PiApiKind::piProgramRelease>(LibProg.second);
+  try {
+    // Free all events associated with the initialization of device globals.
+    for (auto &DeviceGlobalInitializer : MDeviceGlobalInitializers)
+      DeviceGlobalInitializer.second.ClearEvents(getPlugin());
+    // Free all device_global USM allocations associated with this context.
+    for (const void *DeviceGlobal : MAssociatedDeviceGlobals) {
+      DeviceGlobalMapEntry *DGEntry =
+          detail::ProgramManager::getInstance().getDeviceGlobalEntry(
+              DeviceGlobal);
+      DGEntry->removeAssociatedResources(this);
+    }
+    for (auto LibProg : MCachedLibPrograms) {
+      assert(LibProg.second && "Null program must not be kept in the cache");
+      getPlugin()->call<PiApiKind::piProgramRelease>(LibProg.second);
+    }
+    // TODO catch an exception and put it to list of asynchronous exceptions
+    getPlugin()->call<PiApiKind::piContextRelease>(MContext);
+  } catch (std::exception &e) {
+    __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~context_impl", e);
   }
-
-  // TODO catch an exception and put it to list of asynchronous exceptions
-  getPlugin()->call_nocheck<PiApiKind::piContextRelease>(MContext);
 }
 
 const async_handler &context_impl::get_async_handler() const {
diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp
index f21bf3ccd0185..eda511e340d10 100644
--- a/sycl/source/detail/device_image_impl.hpp
+++ b/sycl/source/detail/device_image_impl.hpp
@@ -300,15 +300,18 @@ class device_image_impl {
   }
 
   ~device_image_impl() {
-
-    if (MProgram) {
-      const PluginPtr &Plugin = getSyclObjImpl(MContext)->getPlugin();
-      Plugin->call<PiApiKind::piProgramRelease>(MProgram);
-    }
-    if (MSpecConstsBuffer) {
-      std::lock_guard<std::mutex> Lock{MSpecConstAccessMtx};
-      const PluginPtr &Plugin = getSyclObjImpl(MContext)->getPlugin();
-      memReleaseHelper(Plugin, MSpecConstsBuffer);
+    try {
+      if (MProgram) {
+        const PluginPtr &Plugin = getSyclObjImpl(MContext)->getPlugin();
+        Plugin->call<PiApiKind::piProgramRelease>(MProgram);
+      }
+      if (MSpecConstsBuffer) {
+        std::lock_guard<std::mutex> Lock{MSpecConstAccessMtx};
+        const PluginPtr &Plugin = getSyclObjImpl(MContext)->getPlugin();
+        memReleaseHelper(Plugin, MSpecConstsBuffer);
+      }
+    } catch (std::exception &e) {
+      __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~device_image_impl", e);
     }
   }
 
diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp
index 85afb56fcaf9b..097cef03b4d66 100644
--- a/sycl/source/detail/event_impl.cpp
+++ b/sycl/source/detail/event_impl.cpp
@@ -43,8 +43,12 @@ void event_impl::initContextIfNeeded() {
 }
 
 event_impl::~event_impl() {
-  if (MEvent)
-    getPlugin()->call<PiApiKind::piEventRelease>(MEvent);
+  try {
+    if (MEvent)
+      getPlugin()->call<PiApiKind::piEventRelease>(MEvent);
+  } catch (std::exception &e) {
+    __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~event_impl", e);
+  }
 }
 
 void event_impl::waitInternal(bool *Success) {
diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp
index 072a9628d6a6b..301125d9b5c93 100644
--- a/sycl/source/detail/global_handler.cpp
+++ b/sycl/source/detail/global_handler.cpp
@@ -54,14 +54,18 @@ class ObjectUsageCounter {
       MCounter++;
   }
   ~ObjectUsageCounter() {
-    if (!MModifyCounter)
-      return;
-
-    LockGuard Guard(GlobalHandler::MSyclGlobalHandlerProtector);
-    MCounter--;
-    GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr();
-    if (RTGlobalObjHandler) {
-      RTGlobalObjHandler->prepareSchedulerToRelease(!MCounter);
+    try {
+      if (!MModifyCounter)
+        return;
+
+      LockGuard Guard(GlobalHandler::MSyclGlobalHandlerProtector);
+      MCounter--;
+      GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr();
+      if (RTGlobalObjHandler) {
+        RTGlobalObjHandler->prepareSchedulerToRelease(!MCounter);
+      }
+    } catch (std::exception &e) {
+      __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~ObjectUsageCounter", e);
     }
   }
 
@@ -234,12 +238,17 @@ void GlobalHandler::releaseDefaultContexts() {
 
 struct EarlyShutdownHandler {
   ~EarlyShutdownHandler() {
+    try {
 #ifdef _WIN32
-    // on Windows we keep to the existing shutdown procedure
-    GlobalHandler::instance().releaseDefaultContexts();
+      // on Windows we keep to the existing shutdown procedure
+      GlobalHandler::instance().releaseDefaultContexts();
 #else
-    shutdown_early();
+      shutdown_early();
 #endif
+    } catch (std::exception &e) {
+      __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~EarlyShutdownHandler",
+                                        e);
+    }
   }
 };
 
diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp
index 09ccef30dacd2..9ef8ce262932f 100644
--- a/sycl/source/detail/graph_impl.cpp
+++ b/sycl/source/detail/graph_impl.cpp
@@ -297,9 +297,13 @@ void exec_graph_impl::makePartitions() {
 }
 
 graph_impl::~graph_impl() {
-  clearQueues();
-  for (auto &MemObj : MMemObjs) {
-    MemObj->markNoLongerBeingUsedInGraph();
+  try {
+    clearQueues();
+    for (auto &MemObj : MMemObjs) {
+      MemObj->markNoLongerBeingUsedInGraph();
+    }
+  } catch (std::exception &e) {
+    __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~graph_impl", e);
   }
 }
 
@@ -784,34 +788,38 @@ exec_graph_impl::exec_graph_impl(sycl::context Context,
 }
 
 exec_graph_impl::~exec_graph_impl() {
-  const sycl::detail::PluginPtr &Plugin =
-      sycl::detail::getSyclObjImpl(MContext)->getPlugin();
-  MSchedule.clear();
-  // We need to wait on all command buffer executions before we can release
-  // them.
-  for (auto &Event : MExecutionEvents) {
-    Event->wait(Event);
-  }
+  try {
+    const sycl::detail::PluginPtr &Plugin =
+        sycl::detail::getSyclObjImpl(MContext)->getPlugin();
+    MSchedule.clear();
+    // We need to wait on all command buffer executions before we can release
+    // them.
+    for (auto &Event : MExecutionEvents) {
+      Event->wait(Event);
+    }
 
-  for (const auto &Partition : MPartitions) {
-    Partition->MSchedule.clear();
-    for (const auto &Iter : Partition->MPiCommandBuffers) {
-      if (auto CmdBuf = Iter.second; CmdBuf) {
+    for (const auto &Partition : MPartitions) {
+      Partition->MSchedule.clear();
+      for (const auto &Iter : Partition->MPiCommandBuffers) {
+        if (auto CmdBuf = Iter.second; CmdBuf) {
+          pi_result Res = Plugin->call_nocheck<
+              sycl::detail::PiApiKind::piextCommandBufferRelease>(CmdBuf);
+          (void)Res;
+          assert(Res == pi_result::PI_SUCCESS);
+        }
+      }
+    }
+
+    for (auto &Iter : MCommandMap) {
+      if (auto Command = Iter.second; Command) {
         pi_result Res = Plugin->call_nocheck<
-            sycl::detail::PiApiKind::piextCommandBufferRelease>(CmdBuf);
+            sycl::detail::PiApiKind::piextCommandBufferReleaseCommand>(Command);
         (void)Res;
         assert(Res == pi_result::PI_SUCCESS);
       }
     }
-  }
-
-  for (auto &Iter : MCommandMap) {
-    if (auto Command = Iter.second; Command) {
-      pi_result Res = Plugin->call_nocheck<
-          sycl::detail::PiApiKind::piextCommandBufferReleaseCommand>(Command);
-      (void)Res;
-      assert(Res == pi_result::PI_SUCCESS);
-    }
+  } catch (std::exception &e) {
+    __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~exec_graph_impl", e);
   }
 }
 
diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp
index b4ab6b232eef9..8502f3489b9c7 100644
--- a/sycl/source/detail/kernel_impl.cpp
+++ b/sycl/source/detail/kernel_impl.cpp
@@ -75,8 +75,12 @@ kernel_impl::kernel_impl(ContextImplPtr Context, ProgramImplPtr ProgramImpl)
     : MContext(Context), MProgram(ProgramImpl->getHandleRef()) {}
 
 kernel_impl::~kernel_impl() {
-  // TODO catch an exception and put it to list of asynchronous exceptions
-  getPlugin()->call<PiApiKind::piKernelRelease>(MKernel);
+  try {
+    // TODO catch an exception and put it to list of asynchronous exceptions
+    getPlugin()->call<PiApiKind::piKernelRelease>(MKernel);
+  } catch (std::exception &e) {
+    __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~kernel_impl", e);
+  }
 }
 
 bool kernel_impl::isCreatedFromSource() const {
diff --git a/sycl/source/detail/pi_utils.hpp b/sycl/source/detail/pi_utils.hpp
index 877cbd0d14e52..fa288c91fc583 100644
--- a/sycl/source/detail/pi_utils.hpp
+++ b/sycl/source/detail/pi_utils.hpp
@@ -31,9 +31,14 @@ struct OwnedPiEvent {
       MPlugin->call<PiApiKind::piEventRetain>(*MEvent);
   }
   ~OwnedPiEvent() {
-    // Release the event if the ownership was not transferred.
-    if (MEvent.has_value())
-      MPlugin->call<PiApiKind::piEventRelease>(*MEvent);
+    try {
+      // Release the event if the ownership was not transferred.
+      if (MEvent.has_value())
+        MPlugin->call<PiApiKind::piEventRelease>(*MEvent);
+
+    } catch (std::exception &e) {
+      __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~OwnedPiEvent", e);
+    }
   }
 
   OwnedPiEvent(OwnedPiEvent &&Other)
diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp
index f3ac2185627f9..ca5628fb1a8d6 100644
--- a/sycl/source/detail/program_impl.cpp
+++ b/sycl/source/detail/program_impl.cpp
@@ -203,10 +203,14 @@ program_impl::program_impl(ContextImplPtr Context,
 }
 
 program_impl::~program_impl() {
-  // TODO catch an exception and put it to list of asynchronous exceptions
-  if (MProgram != nullptr) {
-    const PluginPtr &Plugin = getPlugin();
-    Plugin->call<PiApiKind::piProgramRelease>(MProgram);
+  try {
+    // TODO catch an exception and put it to list of asynchronous exceptions
+    if (MProgram != nullptr) {
+      const PluginPtr &Plugin = getPlugin();
+      Plugin->call<PiApiKind::piProgramRelease>(MProgram);
+    }
+  } catch (std::exception &e) {
+    __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~program_impl", e);
   }
 }
 
diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp
index 123efc3d87af6..ccaf52cccd408 100644
--- a/sycl/source/detail/queue_impl.hpp
+++ b/sycl/source/detail/queue_impl.hpp
@@ -306,24 +306,28 @@ class queue_impl {
   }
 
   ~queue_impl() {
-    // The trace event created in the constructor should be active through the
-    // lifetime of the queue object as member variables when ABI breakage is
-    // allowed. This example shows MTraceEvent as a member variable.
+    try {
+      // The trace event created in the constructor should be active through the
+      // lifetime of the queue object as member variables when ABI breakage is
+      // allowed. This example shows MTraceEvent as a member variable.
 #if XPTI_ENABLE_INSTRUMENTATION
-    constexpr uint16_t NotificationTraceType =
-        static_cast<uint16_t>(xpti::trace_point_type_t::queue_destroy);
-    if (xptiCheckTraceEnabled(MStreamID, NotificationTraceType)) {
-      // Used cached information in member variables
-      xptiNotifySubscribers(MStreamID, NotificationTraceType, nullptr,
-                            (xpti::trace_event_data_t *)MTraceEvent,
-                            MInstanceID,
-                            static_cast<const void *>("queue_destroy"));
-      xptiReleaseEvent((xpti::trace_event_data_t *)MTraceEvent);
-    }
+      constexpr uint16_t NotificationTraceType =
+          static_cast<uint16_t>(xpti::trace_point_type_t::queue_destroy);
+      if (xptiCheckTraceEnabled(MStreamID, NotificationTraceType)) {
+        // Used cached information in member variables
+        xptiNotifySubscribers(MStreamID, NotificationTraceType, nullptr,
+                              (xpti::trace_event_data_t *)MTraceEvent,
+                              MInstanceID,
+                              static_cast<const void *>("queue_destroy"));
+        xptiReleaseEvent((xpti::trace_event_data_t *)MTraceEvent);
+      }
 #endif
-    throw_asynchronous();
-    cleanup_fusion_cmd();
-    getPlugin()->call<PiApiKind::piQueueRelease>(MQueues[0]);
+      throw_asynchronous();
+      cleanup_fusion_cmd();
+      getPlugin()->call<PiApiKind::piQueueRelease>(MQueues[0]);
+    } catch (std::exception &e) {
+      __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~queue_impl", e);
+    }
   }
 
   /// \return an OpenCL interoperability queue handle.
diff --git a/sycl/source/detail/sampler_impl.cpp b/sycl/source/detail/sampler_impl.cpp
index c2af7884a164c..edca8eb1be025 100644
--- a/sycl/source/detail/sampler_impl.cpp
+++ b/sycl/source/detail/sampler_impl.cpp
@@ -40,11 +40,16 @@ sampler_impl::sampler_impl(cl_sampler clSampler, const context &syclContext) {
 }
 
 sampler_impl::~sampler_impl() {
-  std::lock_guard<std::mutex> Lock(MMutex);
-  for (auto &Iter : MContextToSampler) {
-    // TODO catch an exception and add it to the list of asynchronous exceptions
-    const PluginPtr &Plugin = getSyclObjImpl(Iter.first)->getPlugin();
-    Plugin->call<PiApiKind::piSamplerRelease>(Iter.second);
+  try {
+    std::lock_guard<std::mutex> Lock(MMutex);
+    for (auto &Iter : MContextToSampler) {
+      // TODO catch an exception and add it to the list of asynchronous
+      // exceptions
+      const PluginPtr &Plugin = getSyclObjImpl(Iter.first)->getPlugin();
+      Plugin->call<PiApiKind::piSamplerRelease>(Iter.second);
+    }
+  } catch (std::exception &e) {
+    __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~sample_impl", e);
   }
 }
 
diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp
index 35adb98e9d570..304045389b53b 100644
--- a/sycl/source/detail/thread_pool.hpp
+++ b/sycl/source/detail/thread_pool.hpp
@@ -74,7 +74,13 @@ class ThreadPool {
     start();
   }
 
-  ~ThreadPool() { finishAndWait(); }
+  ~ThreadPool() {
+    try {
+      finishAndWait();
+    } catch (std::exception &e) {
+      __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~ThreadPool", e);
+    }
+  }
 
   void finishAndWait() {
     MStop.store(true);
diff --git a/sycl/unittests/thread_safety/ThreadUtils.h b/sycl/unittests/thread_safety/ThreadUtils.h
index ccbca98d44e3f..4b40123ba1bb7 100644
--- a/sycl/unittests/thread_safety/ThreadUtils.h
+++ b/sycl/unittests/thread_safety/ThreadUtils.h
@@ -48,7 +48,13 @@ class ThreadPool {
     enqueueHelper<N>(std::forward<Funcs>(funcs)...);
   }
 
-  ~ThreadPool() { wait(); }
+  ~ThreadPool() {
+    try {
+      wait();
+    } catch (std::exception &e) {
+      std::cerr << "exception in ~ThreadPool" << e.what() << std::endl;
+    }
+  }
 
 private:
   template <int N, typename Func, typename... Funcs>