From 5ff31ddf6e990a04e63c1d6ddc6781acad8381c6 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Tue, 9 Apr 2024 05:04:16 -0700 Subject: [PATCH 1/5] Fix kernel shortcut path for inorder queue Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/queue_impl.cpp | 63 ++++++++++++++++++------------- sycl/source/detail/queue_impl.hpp | 10 ++++- sycl/source/handler.cpp | 10 +++-- 3 files changed, 51 insertions(+), 32 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 21dc5c4923b4a..2f646220f6ad4 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -322,35 +322,46 @@ void queue_impl::addSharedEvent(const event &Event) { MEventsShared.push_back(Event); } -static bool -areEventsSafeForSchedulerBypass(const std::vector &DepEvents, - ContextImplPtr Context) { - auto CheckEvent = [&Context](const sycl::event &Event) { - const EventImplPtr &SyclEventImplPtr = detail::getSyclObjImpl(Event); - // Events that don't have an initialized context are throwaway events that - // don't represent actual dependencies. Calling getContextImpl() would set - // their context, which we wish to avoid as it is expensive. - // NOP events also don't represent actual dependencies. - if ((!SyclEventImplPtr->isContextInitialized() && - !SyclEventImplPtr->is_host()) || - SyclEventImplPtr->isNOP()) { - return true; - } - if (SyclEventImplPtr->is_host()) { - return SyclEventImplPtr->isCompleted(); - } - // Cross-context dependencies can't be passed to the backend directly. - if (SyclEventImplPtr->getContextImpl() != Context) - return false; +bool CheckEventReadiness(const ContextImplPtr &Context, + const EventImplPtr &SyclEventImplPtr) { + // Events that don't have an initialized context are throwaway events that + // don't represent actual dependencies. Calling getContextImpl() would set + // their context, which we wish to avoid as it is expensive. + // NOP events also don't represent actual dependencies. + if ((!SyclEventImplPtr->isContextInitialized() && + !SyclEventImplPtr->is_host()) || + SyclEventImplPtr->isNOP()) { + return true; + } + if (SyclEventImplPtr->is_host()) { + return SyclEventImplPtr->isCompleted(); + } + // Cross-context dependencies can't be passed to the backend directly. + if (SyclEventImplPtr->getContextImpl() != Context) + return false; - // A nullptr here means that the commmand does not produce a PI event or it - // hasn't been enqueued yet. - return SyclEventImplPtr->getHandleRef() != nullptr; - }; + // A nullptr here means that the commmand does not produce a PI event or it + // hasn't been enqueued yet. + return SyclEventImplPtr->getHandleRef() != nullptr; +}; + +bool queue_impl::areEventsSafeForSchedulerBypass( + const std::vector &DepEvents, ContextImplPtr Context) const { return std::all_of( - DepEvents.begin(), DepEvents.end(), - [&CheckEvent](const sycl::event &Event) { return CheckEvent(Event); }); + DepEvents.begin(), DepEvents.end(), [&Context](const sycl::event &Event) { + const EventImplPtr &SyclEventImplPtr = detail::getSyclObjImpl(Event); + return CheckEventReadiness(Context, SyclEventImplPtr); + }); +} + +bool queue_impl::areEventsSafeForSchedulerBypass( + const std::vector &DepEvents, ContextImplPtr Context) const { + + return std::all_of(DepEvents.begin(), DepEvents.end(), + [&Context](const EventImplPtr &SyclEventImplPtr) { + return CheckEventReadiness(Context, SyclEventImplPtr); + }); } template diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 4dfe4bd39d4d1..71561e5a98c9c 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -92,7 +92,7 @@ class queue_impl { /// \param PropList is a list of properties to use for queue construction. queue_impl(const DeviceImplPtr &Device, const async_handler &AsyncHandler, const property_list &PropList) - : queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList){}; + : queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList) {}; /// Constructs a SYCL queue with an async_handler and property_list provided /// form a device and a context. @@ -745,6 +745,13 @@ class queue_impl { std::vector &MutableVec, std::unique_lock &QueueLock); + bool + areEventsSafeForSchedulerBypass(const std::vector &DepEvents, + ContextImplPtr Context) const; + bool + areEventsSafeForSchedulerBypass(const std::vector &DepEvents, + ContextImplPtr Context) const; + protected: event discard_or_return(const event &Event); // Hook to the scheduler to clean up any fusion command held on destruction. @@ -783,7 +790,6 @@ class queue_impl { EventRet = Handler.finalize(); } -protected: /// Performs command group submission to the queue. /// /// \param CGF is a function object containing command group. diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 17cde0994b314..fbba3f9508f0e 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -241,10 +241,12 @@ event handler::finalize() { } if (MQueue && !MGraph && !MSubgraphNode && !MQueue->getCommandGraph() && - !MQueue->is_in_fusion_mode() && - CGData.MRequirements.size() + CGData.MEvents.size() + - MStreamStorage.size() == - 0) { + !MQueue->is_in_fusion_mode() && !CGData.MRequirements.size() && + !MStreamStorage.size() && + (!CGData.MEvents.size() || + (MQueue->isInOrder() && + MQueue->areEventsSafeForSchedulerBypass( + CGData.MEvents, MQueue->getContextImplPtr())))) { // if user does not add a new dependency to the dependency graph, i.e. // the graph is not changed, and the queue is not in fusion mode, then // this faster path is used to submit kernel bypassing scheduler and From 29599f4ff81fbb09569e0ea60ac887dad130acac Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Tue, 9 Apr 2024 05:13:50 -0700 Subject: [PATCH 2/5] Move helper functions to the proper class Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/queue_impl.cpp | 46 +--------------------- sycl/source/detail/queue_impl.hpp | 7 ---- sycl/source/detail/scheduler/scheduler.cpp | 42 ++++++++++++++++++++ sycl/source/detail/scheduler/scheduler.hpp | 7 ++++ sycl/source/handler.cpp | 2 +- 5 files changed, 52 insertions(+), 52 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 2f646220f6ad4..89e2f59db0ba3 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -322,48 +322,6 @@ void queue_impl::addSharedEvent(const event &Event) { MEventsShared.push_back(Event); } -bool CheckEventReadiness(const ContextImplPtr &Context, - const EventImplPtr &SyclEventImplPtr) { - // Events that don't have an initialized context are throwaway events that - // don't represent actual dependencies. Calling getContextImpl() would set - // their context, which we wish to avoid as it is expensive. - // NOP events also don't represent actual dependencies. - if ((!SyclEventImplPtr->isContextInitialized() && - !SyclEventImplPtr->is_host()) || - SyclEventImplPtr->isNOP()) { - return true; - } - if (SyclEventImplPtr->is_host()) { - return SyclEventImplPtr->isCompleted(); - } - // Cross-context dependencies can't be passed to the backend directly. - if (SyclEventImplPtr->getContextImpl() != Context) - return false; - - // A nullptr here means that the commmand does not produce a PI event or it - // hasn't been enqueued yet. - return SyclEventImplPtr->getHandleRef() != nullptr; -}; - -bool queue_impl::areEventsSafeForSchedulerBypass( - const std::vector &DepEvents, ContextImplPtr Context) const { - - return std::all_of( - DepEvents.begin(), DepEvents.end(), [&Context](const sycl::event &Event) { - const EventImplPtr &SyclEventImplPtr = detail::getSyclObjImpl(Event); - return CheckEventReadiness(Context, SyclEventImplPtr); - }); -} - -bool queue_impl::areEventsSafeForSchedulerBypass( - const std::vector &DepEvents, ContextImplPtr Context) const { - - return std::all_of(DepEvents.begin(), DepEvents.end(), - [&Context](const EventImplPtr &SyclEventImplPtr) { - return CheckEventReadiness(Context, SyclEventImplPtr); - }); -} - template event queue_impl::submitWithHandler(const std::shared_ptr &Self, const std::vector &DepEvents, @@ -393,8 +351,8 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, // If we have a command graph set we need to capture the op through the // handler rather than by-passing the scheduler. - if (MGraph.expired() && - areEventsSafeForSchedulerBypass(ExpandedDepEvents, MContext)) { + if (MGraph.expired() && Scheduler::areEventsSafeForSchedulerBypass( + ExpandedDepEvents, MContext)) { if (MSupportsDiscardingPiEvents) { MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents), /*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr); diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 71561e5a98c9c..c7d865d388f76 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -745,13 +745,6 @@ class queue_impl { std::vector &MutableVec, std::unique_lock &QueueLock); - bool - areEventsSafeForSchedulerBypass(const std::vector &DepEvents, - ContextImplPtr Context) const; - bool - areEventsSafeForSchedulerBypass(const std::vector &DepEvents, - ContextImplPtr Context) const; - protected: event discard_or_return(const event &Event); // Hook to the scheduler to clean up any fusion command held on destruction. diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 9f25108db6d52..bceb810cf4df9 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -716,6 +716,48 @@ EventImplPtr Scheduler::addCommandGraphUpdate( return NewCmdEvent; } +bool CheckEventReadiness(const ContextImplPtr &Context, + const EventImplPtr &SyclEventImplPtr) { + // Events that don't have an initialized context are throwaway events that + // don't represent actual dependencies. Calling getContextImpl() would set + // their context, which we wish to avoid as it is expensive. + // NOP events also don't represent actual dependencies. + if ((!SyclEventImplPtr->isContextInitialized() && + !SyclEventImplPtr->is_host()) || + SyclEventImplPtr->isNOP()) { + return true; + } + if (SyclEventImplPtr->is_host()) { + return SyclEventImplPtr->isCompleted(); + } + // Cross-context dependencies can't be passed to the backend directly. + if (SyclEventImplPtr->getContextImpl() != Context) + return false; + + // A nullptr here means that the commmand does not produce a PI event or it + // hasn't been enqueued yet. + return SyclEventImplPtr->getHandleRef() != nullptr; +}; + +bool Scheduler::areEventsSafeForSchedulerBypass( + const std::vector &DepEvents, ContextImplPtr Context) { + + return std::all_of( + DepEvents.begin(), DepEvents.end(), [&Context](const sycl::event &Event) { + const EventImplPtr &SyclEventImplPtr = detail::getSyclObjImpl(Event); + return CheckEventReadiness(Context, SyclEventImplPtr); + }); +} + +bool Scheduler::areEventsSafeForSchedulerBypass( + const std::vector &DepEvents, ContextImplPtr Context) { + + return std::all_of(DepEvents.begin(), DepEvents.end(), + [&Context](const EventImplPtr &SyclEventImplPtr) { + return CheckEventReadiness(Context, SyclEventImplPtr); + }); +} + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 8e619971fbc04..09437928f1d32 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -491,6 +491,13 @@ class Scheduler { const QueueImplPtr &Queue, std::vector Requirements, std::vector &Events); + static bool + areEventsSafeForSchedulerBypass(const std::vector &DepEvents, + ContextImplPtr Context); + static bool + areEventsSafeForSchedulerBypass(const std::vector &DepEvents, + ContextImplPtr Context); + protected: using RWLockT = std::shared_timed_mutex; using ReadLockT = std::shared_lock; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index fbba3f9508f0e..c2171f53ac999 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -245,7 +245,7 @@ event handler::finalize() { !MStreamStorage.size() && (!CGData.MEvents.size() || (MQueue->isInOrder() && - MQueue->areEventsSafeForSchedulerBypass( + detail::Scheduler::areEventsSafeForSchedulerBypass( CGData.MEvents, MQueue->getContextImplPtr())))) { // if user does not add a new dependency to the dependency graph, i.e. // the graph is not changed, and the queue is not in fusion mode, then From 294e86399ad72f2287d65d8c7a1f517c52f13283 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Tue, 9 Apr 2024 05:21:05 -0700 Subject: [PATCH 3/5] fix formatting Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/queue_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index c7d865d388f76..231437cd67f19 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -92,7 +92,7 @@ class queue_impl { /// \param PropList is a list of properties to use for queue construction. queue_impl(const DeviceImplPtr &Device, const async_handler &AsyncHandler, const property_list &PropList) - : queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList) {}; + : queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList){}; /// Constructs a SYCL queue with an async_handler and property_list provided /// form a device and a context. From 282a8e159a7235cf68764f2a4c9c311bb7c0bc1c Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 10 Apr 2024 04:38:19 -0700 Subject: [PATCH 4/5] fix build & test Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/scheduler/scheduler.cpp | 2 +- sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index bceb810cf4df9..4bf64cfcccbaf 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -737,7 +737,7 @@ bool CheckEventReadiness(const ContextImplPtr &Context, // A nullptr here means that the commmand does not produce a PI event or it // hasn't been enqueued yet. return SyclEventImplPtr->getHandleRef() != nullptr; -}; +} bool Scheduler::areEventsSafeForSchedulerBypass( const std::vector &DepEvents, ContextImplPtr Context) { diff --git a/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp b/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp index 6c8953c9e5d0c..2fcf81d303a5b 100644 --- a/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp +++ b/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp @@ -375,10 +375,7 @@ TEST_F(CommandGraphTest, InOrderQueueHostTaskAndGraph) { [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); auto EventLastImpl = sycl::detail::getSyclObjImpl(EventLast); auto EventLastWaitList = EventLastImpl->getWaitList(); - // Previous task is not a host task. Explicit dependency is still needed - // to properly handle blocked tasks (the event will be filtered out before - // submission to the backend). - ASSERT_EQ(EventLastWaitList.size(), 1lu); + ASSERT_EQ(EventLastWaitList.size(), 0lu); ASSERT_EQ(EventLastWaitList[0], EventGraphImpl); } From 75e6a9bde88bd90fd9385673f1664f3f2adfe6a2 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Thu, 11 Apr 2024 07:34:02 -0700 Subject: [PATCH 5/5] Fix and extend test Signed-off-by: Tikhomirova, Kseniya --- .../Extensions/CommandGraph/InOrderQueue.cpp | 154 ++++++++++-------- 1 file changed, 89 insertions(+), 65 deletions(-) diff --git a/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp b/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp index 2fcf81d303a5b..59d11d861dd2b 100644 --- a/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp +++ b/sycl/unittests/Extensions/CommandGraph/InOrderQueue.cpp @@ -312,71 +312,95 @@ TEST_F(CommandGraphTest, InOrderQueueWithPreviousHostTask) { } TEST_F(CommandGraphTest, InOrderQueueHostTaskAndGraph) { - sycl::property_list Properties{sycl::property::queue::in_order()}; - sycl::queue InOrderQueue{Dev, Properties}; - experimental::command_graph - InOrderGraph{InOrderQueue.get_context(), InOrderQueue.get_device()}; - - auto EventInitial = - InOrderQueue.submit([&](handler &CGH) { CGH.host_task([=]() {}); }); - auto EventInitialImpl = sycl::detail::getSyclObjImpl(EventInitial); - - // Record in-order queue with three nodes. - InOrderGraph.begin_recording(InOrderQueue); - auto Node1Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); - - auto PtrNode1 = - sycl::detail::getSyclObjImpl(InOrderGraph) - ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); - ASSERT_NE(PtrNode1, nullptr); - ASSERT_TRUE(PtrNode1->MPredecessors.empty()); - - auto Node2Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); - - auto PtrNode2 = - sycl::detail::getSyclObjImpl(InOrderGraph) - ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); - ASSERT_NE(PtrNode2, nullptr); - ASSERT_NE(PtrNode2, PtrNode1); - ASSERT_EQ(PtrNode1->MSuccessors.size(), 1lu); - ASSERT_EQ(PtrNode1->MSuccessors.front().lock(), PtrNode2); - ASSERT_EQ(PtrNode2->MPredecessors.size(), 1lu); - ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1); - - auto Node3Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); - - auto PtrNode3 = - sycl::detail::getSyclObjImpl(InOrderGraph) - ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); - ASSERT_NE(PtrNode3, nullptr); - ASSERT_NE(PtrNode3, PtrNode2); - ASSERT_EQ(PtrNode2->MSuccessors.size(), 1lu); - ASSERT_EQ(PtrNode2->MSuccessors.front().lock(), PtrNode3); - ASSERT_EQ(PtrNode3->MPredecessors.size(), 1lu); - ASSERT_EQ(PtrNode3->MPredecessors.front().lock(), PtrNode2); - - InOrderGraph.end_recording(InOrderQueue); - - auto InOrderGraphExec = InOrderGraph.finalize(); - auto EventGraph = InOrderQueue.submit( - [&](sycl::handler &CGH) { CGH.ext_oneapi_graph(InOrderGraphExec); }); - - auto EventGraphImpl = sycl::detail::getSyclObjImpl(EventGraph); - auto EventGraphWaitList = EventGraphImpl->getWaitList(); - // Previous task is a host task. Explicit dependency is needed to enforce the - // execution order. - ASSERT_EQ(EventGraphWaitList.size(), 1lu); - ASSERT_EQ(EventGraphWaitList[0], EventInitialImpl); - - auto EventLast = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); - auto EventLastImpl = sycl::detail::getSyclObjImpl(EventLast); - auto EventLastWaitList = EventLastImpl->getWaitList(); - ASSERT_EQ(EventLastWaitList.size(), 0lu); - ASSERT_EQ(EventLastWaitList[0], EventGraphImpl); + auto TestBody = [&](bool BlockHostTask) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue{Dev, Properties}; + experimental::command_graph + InOrderGraph{InOrderQueue.get_context(), InOrderQueue.get_device()}; + // Event dependency build depends on host task completion. Making it + // predictable with mutex in host task. + std::mutex HostTaskMutex; + std::unique_lock Lock(HostTaskMutex, std::defer_lock); + if (BlockHostTask) + Lock.lock(); + auto EventInitial = InOrderQueue.submit([&](handler &CGH) { + CGH.host_task([&HostTaskMutex]() { + std::lock_guard HostTaskLock(HostTaskMutex); + }); + }); + auto EventInitialImpl = sycl::detail::getSyclObjImpl(EventInitial); + + // Record in-order queue with three nodes. + InOrderGraph.begin_recording(InOrderQueue); + auto Node1Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode1 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode1, nullptr); + ASSERT_TRUE(PtrNode1->MPredecessors.empty()); + + auto Node2Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode2 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode2, nullptr); + ASSERT_NE(PtrNode2, PtrNode1); + ASSERT_EQ(PtrNode1->MSuccessors.size(), 1lu); + ASSERT_EQ(PtrNode1->MSuccessors.front().lock(), PtrNode2); + ASSERT_EQ(PtrNode2->MPredecessors.size(), 1lu); + ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1); + + auto Node3Graph = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto PtrNode3 = + sycl::detail::getSyclObjImpl(InOrderGraph) + ->getLastInorderNode(sycl::detail::getSyclObjImpl(InOrderQueue)); + ASSERT_NE(PtrNode3, nullptr); + ASSERT_NE(PtrNode3, PtrNode2); + ASSERT_EQ(PtrNode2->MSuccessors.size(), 1lu); + ASSERT_EQ(PtrNode2->MSuccessors.front().lock(), PtrNode3); + ASSERT_EQ(PtrNode3->MPredecessors.size(), 1lu); + ASSERT_EQ(PtrNode3->MPredecessors.front().lock(), PtrNode2); + + InOrderGraph.end_recording(InOrderQueue); + + auto InOrderGraphExec = InOrderGraph.finalize(); + + if (!BlockHostTask) + EventInitial.wait(); + auto EventGraph = InOrderQueue.submit( + [&](sycl::handler &CGH) { CGH.ext_oneapi_graph(InOrderGraphExec); }); + + auto EventGraphImpl = sycl::detail::getSyclObjImpl(EventGraph); + auto EventGraphWaitList = EventGraphImpl->getWaitList(); + // Previous task is a host task. Explicit dependency is needed to enforce + // the execution order. + ASSERT_EQ(EventGraphWaitList.size(), 1lu); + ASSERT_EQ(EventGraphWaitList[0], EventInitialImpl); + + auto EventLast = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto EventLastImpl = sycl::detail::getSyclObjImpl(EventLast); + auto EventLastWaitList = EventLastImpl->getWaitList(); + // Previous task is not a host task. Explicit dependency is still needed + // to properly handle blocked tasks (the event will be filtered out before + // submission to the backend). + if (BlockHostTask) + Lock.unlock(); + ASSERT_EQ(EventLastWaitList.size(), size_t(BlockHostTask)); + if (EventLastWaitList.size()) { + ASSERT_EQ(EventLastWaitList[0], EventGraphImpl); + } + EventLast.wait(); + }; + + TestBody(false); + TestBody(true); } TEST_F(CommandGraphTest, InOrderQueueMemsetAndGraph) {