From 645a81d8f593962cc5036772622ef6354d08494a Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Wed, 26 Jun 2024 16:05:19 +0100 Subject: [PATCH] [SYCL][Graph] Refine barrier semantics Changes the semantics of [sycl_ext_oneapi_enqueue_barrier](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_enqueue_barrier.asciidoc) commands recorded from a queue into a SYCL-Graph. The current semantics are that a barrier without a wait-list will depend on all leaf nodes in the graph, and be a dependency of any subsequent node added to the graph. After discussion with users, this change updates the behavior of barriers to only depend on leaf nodes from the same recording queue, and only commands recorded from the same queue will have a dependency on the barrier. --- .../sycl_ext_oneapi_graph.asciidoc | 22 +- sycl/source/detail/graph_impl.cpp | 20 +- sycl/source/detail/graph_impl.hpp | 46 +- sycl/source/handler.cpp | 26 +- .../Extensions/CommandGraph/Barrier.cpp | 492 +++++++++++++++--- .../Extensions/CommandGraph/Exceptions.cpp | 35 ++ 6 files changed, 517 insertions(+), 124 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index 77fab2ebe5fb1..3e5e46823dc7c 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -1736,15 +1736,23 @@ passed an invalid event. The new handler methods, and queue shortcuts, defined by link:../supported/sycl_ext_oneapi_enqueue_barrier.asciidoc[sycl_ext_oneapi_enqueue_barrier] can only be used in graph nodes created using the Record & Replay API, as -barriers rely on events to enforce dependencies. For barriers with an empty -wait list parameter, the semantics are that the barrier node being added to -will depend on all the existing graph leaf nodes, not only the leaf nodes -that were added from the queue being recorded. +barriers rely on events to enforce dependencies. A synchronous exception will be thrown with error code `invalid` if a user -tries to add them to a graph using the Explicit API. Empty nodes created with -the `node::depends_on_all_leaves` property can be used instead of barriers when -a user is building a graph with the explicit API. +tries to add a barrier command to a graph using the explicit API. Empty nodes +created with the `node::depends_on_all_leaves` property can be used instead of +barriers when a user is building a graph with the explicit API. + +The semantics of barriers are defined in `sycl_ext_oneapi_enqueue_barrier` for +a single command-queue, and correlate as follows to a graph that may contain +nodes that are recorded from multiple queues and/or added by the explicit API: + +* Barriers with an empty wait list parameter will only depend on the leaf nodes + that were added to the graph from the queue the barrier command is being + recorded from. + +* The only commands which have an implicit dependency on the barrier command + are those recorded from the same queue the barrier command was submitted to. ==== sycl_ext_oneapi_memcpy2d diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 3268a27fbb827..bd7b407d2014e 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -353,9 +353,6 @@ graph_impl::add(const std::shared_ptr &Impl, const std::shared_ptr &NodeImpl = std::make_shared(); - // Add any deps from the vector of extra dependencies - Deps.insert(Deps.end(), MExtraDependencies.begin(), MExtraDependencies.end()); - MNodeStorage.push_back(NodeImpl); addDepsToNode(NodeImpl, Deps); @@ -488,20 +485,12 @@ graph_impl::add(node_type NodeType, // list Deps.insert(Deps.end(), UniqueDeps.begin(), UniqueDeps.end()); - // Add any deps from the extra dependencies vector - Deps.insert(Deps.end(), MExtraDependencies.begin(), MExtraDependencies.end()); - const std::shared_ptr &NodeImpl = std::make_shared(NodeType, std::move(CommandGroup)); MNodeStorage.push_back(NodeImpl); addDepsToNode(NodeImpl, Deps); - // Set barrier nodes as prerequisites (new start points) for subsequent nodes - if (NodeImpl->MCGType == sycl::detail::CG::Barrier) { - MExtraDependencies.push_back(NodeImpl); - } - return NodeImpl; } @@ -610,12 +599,17 @@ void graph_impl::makeEdge(std::shared_ptr Src, removeRoot(Dest); // remove receiver from root node list } -std::vector graph_impl::getExitNodesEvents() { +std::vector graph_impl::getExitNodesEvents( + std::weak_ptr RecordedQueue) { std::vector Events; + auto RecordedQueueSP = RecordedQueue.lock(); for (auto &Node : MNodeStorage) { if (Node->MSuccessors.empty()) { - Events.push_back(getEventForNode(Node)); + auto EventForNode = getEventForNode(Node); + if (EventForNode->getSubmittedQueue() == RecordedQueueSP) { + Events.push_back(getEventForNode(Node)); + } } } diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index 80837181ec056..9d13a87ed13e9 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -1184,26 +1184,26 @@ class graph_impl { size_t getNumberOfNodes() const { return MNodeStorage.size(); } /// Traverse the graph recursively to get the events associated with the - /// output nodes of this graph. + /// output nodes of this graph associated with a specific queue. + /// @param[in] Queue The queue exit nodes must have been recorded from. /// @return vector of events associated to exit nodes. - std::vector getExitNodesEvents(); - - /// Removes all Barrier nodes from the list of extra dependencies - /// MExtraDependencies. - /// @return vector of events associated to previous barrier nodes. std::vector - removeBarriersFromExtraDependencies() { - std::vector Events; - for (auto It = MExtraDependencies.begin(); - It != MExtraDependencies.end();) { - if ((*It)->MCGType == sycl::detail::CG::Barrier) { - Events.push_back(getEventForNode(*It)); - It = MExtraDependencies.erase(It); - } else { - ++It; - } - } - return Events; + getExitNodesEvents(std::weak_ptr Queue); + + /// Store the last barrier node that was submitted to the queue. + /// @param[in] Queue The queue the barrier was recorded from. + /// @param[in] BarrierNodeImpl The created barrier node. + void setBarrierDep(std::weak_ptr Queue, + std::shared_ptr BarrierNodeImpl) { + MBarrierDependencyMap[Queue] = BarrierNodeImpl; + } + + /// Get the last barrier node that was submitted to the queue. + /// @param[in] Queue The queue to find the last barrier node of. An empty + /// shared_ptr is returned if no barrier node has been recorded to the queue. + std::shared_ptr + getBarrierDep(std::weak_ptr Queue) { + return MBarrierDependencyMap[Queue]; } private: @@ -1281,11 +1281,11 @@ class graph_impl { /// presence of the assume_buffer_outlives_graph property. bool MAllowBuffers = false; - /// List of nodes that must be added as extra dependencies to new nodes when - /// added to this graph. - /// This list is mainly used by barrier nodes which must be considered - /// as predecessors for all nodes subsequently added to the graph. - std::list> MExtraDependencies; + /// Mapping from queues to barrier nodes. For each queue the last barrier + /// node recorded to the graph from the queue is stored. + std::map, std::shared_ptr, + std::owner_less>> + MBarrierDependencyMap; }; /// Class representing the implementation of command_graph. diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index b16441e4ff146..27af40344c8e7 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -403,19 +403,6 @@ event handler::finalize() { case detail::CG::Barrier: case detail::CG::BarrierWaitlist: { if (auto GraphImpl = getCommandGraph(); GraphImpl != nullptr) { - // if no event to wait for was specified, we add all exit - // nodes/events of the graph - if (MEventsWaitWithBarrier.size() == 0) { - MEventsWaitWithBarrier = GraphImpl->getExitNodesEvents(); - // Graph-wide barriers take precedence over previous one. - // We therefore remove the previous ones from ExtraDependencies list. - // The current barrier is then added to this list in the graph_impl. - std::vector EventsBarriers = - GraphImpl->removeBarriersFromExtraDependencies(); - MEventsWaitWithBarrier.insert(std::end(MEventsWaitWithBarrier), - std::begin(EventsBarriers), - std::end(EventsBarriers)); - } CGData.MEvents.insert(std::end(CGData.MEvents), std::begin(MEventsWaitWithBarrier), std::end(MEventsWaitWithBarrier)); @@ -533,6 +520,7 @@ event handler::finalize() { // it to the graph to create a node, rather than submit it to the scheduler. if (auto GraphImpl = MQueue->getCommandGraph(); GraphImpl) { auto EventImpl = std::make_shared(); + EventImpl->setSubmittedQueue(MQueue); std::shared_ptr NodeImpl = nullptr; @@ -564,7 +552,17 @@ event handler::finalize() { // queue. GraphImpl->setLastInorderNode(MQueue, NodeImpl); } else { - NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup)); + auto LastBarrierRecordedFromQueue = GraphImpl->getBarrierDep(MQueue); + if (LastBarrierRecordedFromQueue) { + NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup), + {LastBarrierRecordedFromQueue}); + } else { + NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup)); + } + + if (NodeImpl->MCGType == sycl::detail::CG::Barrier) { + GraphImpl->setBarrierDep(MQueue, NodeImpl); + } } // Associate an event with this new node and return the event. diff --git a/sycl/unittests/Extensions/CommandGraph/Barrier.cpp b/sycl/unittests/Extensions/CommandGraph/Barrier.cpp index 79f55dc226b62..790956d5aeff4 100644 --- a/sycl/unittests/Extensions/CommandGraph/Barrier.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Barrier.cpp @@ -61,8 +61,8 @@ TEST_F(CommandGraphTest, EnqueueBarrierMultipleQueues) { auto Node3Graph = Queue.submit( [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); - auto Barrier = - Queue2.submit([&](sycl::handler &cgh) { cgh.ext_oneapi_barrier(); }); + auto Barrier = Queue2.submit( + [&](sycl::handler &cgh) { cgh.ext_oneapi_barrier({Node2Graph}); }); auto Node4Graph = Queue2.submit( [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); @@ -74,21 +74,44 @@ TEST_F(CommandGraphTest, EnqueueBarrierMultipleQueues) { // Check the graph structure // (1) (2) (3) - // \ | / - // \ | / + // | // (B) // / \ // (4) (5) ASSERT_EQ(GraphImpl->MRoots.size(), 3lu); for (auto Root : GraphImpl->MRoots) { - auto Node = Root.lock(); - ASSERT_EQ(Node->MSuccessors.size(), 1lu); - auto BarrierNode = Node->MSuccessors.front().lock(); - ASSERT_EQ(BarrierNode->MCGType, sycl::detail::CG::Barrier); - ASSERT_EQ(GraphImpl->getEventForNode(BarrierNode), - sycl::detail::getSyclObjImpl(Barrier)); - ASSERT_EQ(BarrierNode->MPredecessors.size(), 3lu); - ASSERT_EQ(BarrierNode->MSuccessors.size(), 2lu); + auto RootNode = Root.lock(); + + if (GraphImpl->getEventForNode(RootNode) == + sycl::detail::getSyclObjImpl(Node2Graph)) { + + ASSERT_EQ(RootNode->MSuccessors.size(), 1lu); + auto SuccNode = RootNode->MSuccessors.front().lock(); + + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(Barrier)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 2lu); + + for (auto SuccSucc : SuccNode->MSuccessors) { + auto SuccSuccNode = SuccSucc.lock(); + + if (GraphImpl->getEventForNode(SuccSuccNode) == + sycl::detail::getSyclObjImpl(Node4Graph)) { + ASSERT_EQ(SuccSuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccSuccNode->MSuccessors.size(), 0lu); + } else if (GraphImpl->getEventForNode(SuccSuccNode) == + sycl::detail::getSyclObjImpl(Node5Graph)) { + ASSERT_EQ(SuccSuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccSuccNode->MSuccessors.size(), 0lu); + } else { + ASSERT_TRUE(false && "Unexpected node"); + } + } + } else { + ASSERT_EQ(RootNode->MSuccessors.size(), 0lu); + } } } @@ -434,10 +457,8 @@ TEST_F(CommandGraphTest, InOrderQueuesWithEmptyBarrierWaitList) { // Check the graph structure // (1) (2) - // \ / | - // (B) | - // | / - // (3) + // | | + // (B) (3) auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); ASSERT_EQ(GraphImpl->MRoots.size(), 2lu); @@ -447,27 +468,28 @@ TEST_F(CommandGraphTest, InOrderQueuesWithEmptyBarrierWaitList) { if (GraphImpl->getEventForNode(RootNode) == sycl::detail::getSyclObjImpl(Node1)) { ASSERT_EQ(RootNode->MSuccessors.size(), 1lu); - } else if (GraphImpl->getEventForNode(RootNode) == - sycl::detail::getSyclObjImpl(Node2)) { - ASSERT_EQ(RootNode->MSuccessors.size(), 2lu); - } else { - ASSERT_TRUE(false && "Unexpected root node"); - } - auto SuccNode = RootNode->MSuccessors.front().lock(); + auto SuccNode = RootNode->MSuccessors.front().lock(); - ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), - sycl::detail::getSyclObjImpl(BarrierNode)); - ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); - ASSERT_EQ(SuccNode->MSuccessors.size(), 1lu); + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(BarrierNode)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else if (GraphImpl->getEventForNode(RootNode) == + sycl::detail::getSyclObjImpl(Node2)) { + ASSERT_EQ(RootNode->MSuccessors.size(), 1lu); - auto SuccSuccNode = SuccNode->MSuccessors.front().lock(); + auto SuccNode = RootNode->MSuccessors.front().lock(); - ASSERT_EQ(SuccSuccNode->MPredecessors.size(), 2lu); - ASSERT_EQ(SuccSuccNode->MSuccessors.size(), 0lu); + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(Node3)); - ASSERT_EQ(GraphImpl->getEventForNode(SuccSuccNode), - sycl::detail::getSyclObjImpl(Node3)); + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else { + ASSERT_TRUE(false && "Unexpected root node"); + } } } @@ -487,7 +509,7 @@ TEST_F(CommandGraphTest, BarrierMixedQueueTypes) { auto Node2 = OutOfOrderQueue.submit( [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); - auto BarrierNode = InOrderQueue.ext_oneapi_submit_barrier(); + auto BarrierNode = InOrderQueue.ext_oneapi_submit_barrier({Node1, Node2}); auto Node3 = OutOfOrderQueue.submit([&](sycl::handler &cgh) { cgh.depends_on(Node2); @@ -500,8 +522,8 @@ TEST_F(CommandGraphTest, BarrierMixedQueueTypes) { // (1) (2) // \ /| // (B) | - // | / - // (3) + // | + // (3) auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); ASSERT_EQ(GraphImpl->MRoots.size(), 2lu); @@ -518,20 +540,21 @@ TEST_F(CommandGraphTest, BarrierMixedQueueTypes) { ASSERT_TRUE(false && "Unexpected root node"); } - auto SuccNode = RootNode->MSuccessors.front().lock(); - - ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), - sycl::detail::getSyclObjImpl(BarrierNode)); - ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); - ASSERT_EQ(SuccNode->MSuccessors.size(), 1lu); - - auto SuccSuccNode = SuccNode->MSuccessors.front().lock(); - - ASSERT_EQ(SuccSuccNode->MPredecessors.size(), 2lu); - ASSERT_EQ(SuccSuccNode->MSuccessors.size(), 0lu); - - ASSERT_EQ(GraphImpl->getEventForNode(SuccSuccNode), - sycl::detail::getSyclObjImpl(Node3)); + for (auto Succ : RootNode->MSuccessors) { + auto SuccNode = Succ.lock(); + + if (GraphImpl->getEventForNode(SuccNode) == + sycl::detail::getSyclObjImpl(BarrierNode)) { + ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else if (GraphImpl->getEventForNode(SuccNode) == + sycl::detail::getSyclObjImpl(Node3)) { + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else { + ASSERT_TRUE(false && "Unexpected root node"); + } + } } } @@ -550,35 +573,370 @@ TEST_F(CommandGraphTest, BarrierBetweenExplicitNodes) { Graph.end_recording(); auto Node2 = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(Node1)}); + + // Check the graph structure + // (B) (1) + // | + // (2) + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + ASSERT_EQ(GraphImpl->MRoots.size(), 2lu); + + for (auto Root : GraphImpl->MRoots) { + auto RootNode = Root.lock(); + + if (GraphImpl->getEventForNode(RootNode) == + sycl::detail::getSyclObjImpl(BarrierNode)) { + ASSERT_EQ(RootNode->MSuccessors.size(), 0lu); + } else if (RootNode == sycl::detail::getSyclObjImpl(Node1)) { + ASSERT_EQ(RootNode->MSuccessors.size(), 1lu); + auto SuccNode = RootNode->MSuccessors.front().lock(); + ASSERT_EQ(SuccNode, sycl::detail::getSyclObjImpl(Node2)); + } else { + ASSERT_TRUE(false); + } + } +} + +TEST_F(CommandGraphTest, BarrierMultipleOOOQueue) { + sycl::queue Queue2{Queue.get_context(), Dev}; + experimental::command_graph Graph{ + Queue}; + + Graph.begin_recording({Queue, Queue2}); + + auto Node1 = Queue.submit( [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2 = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto Node3 = Queue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto Node4 = Queue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto BarrierNode = Queue.ext_oneapi_submit_barrier(); + + auto Node5 = Queue2.submit([&](sycl::handler &cgh) { + cgh.depends_on({Node3, Node4}); + cgh.single_task>([]() {}); + }); + + auto Node6 = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + Graph.end_recording(); + // Check the graph structure - // (1) - // | - // (B) - // | - // (2) + // (1) (2) (3) (4) + // \ / \ / + // (B) (5) + // | + // (6) auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); - ASSERT_EQ(GraphImpl->MRoots.size(), 1lu); + ASSERT_EQ(GraphImpl->MRoots.size(), 4u); for (auto Root : GraphImpl->MRoots) { auto RootNode = Root.lock(); - auto Node1Impl = sycl::detail::getSyclObjImpl(Node1); - ASSERT_EQ(RootNode, Node1Impl); + auto RootNodeEvent = GraphImpl->getEventForNode(RootNode); + if ((RootNodeEvent == sycl::detail::getSyclObjImpl(Node1)) || + (RootNodeEvent == sycl::detail::getSyclObjImpl(Node2))) { - auto SuccNode = RootNode->MSuccessors.front().lock(); + auto SuccNode = RootNode->MSuccessors.front().lock(); - ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), - sycl::detail::getSyclObjImpl(BarrierNode)); - ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); - ASSERT_EQ(SuccNode->MSuccessors.size(), 1lu); + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(BarrierNode)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 1lu); - auto SuccSuccNode = SuccNode->MSuccessors.front().lock(); + auto SuccSuccNode = SuccNode->MSuccessors.front().lock(); + + ASSERT_EQ(SuccSuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccSuccNode->MSuccessors.size(), 0lu); + + auto Node6Impl = sycl::detail::getSyclObjImpl(Node6); + ASSERT_EQ(GraphImpl->getEventForNode(SuccSuccNode), Node6Impl); + } else if ((RootNodeEvent == sycl::detail::getSyclObjImpl(Node3)) || + (RootNodeEvent == sycl::detail::getSyclObjImpl(Node4))) { + auto SuccNode = RootNode->MSuccessors.front().lock(); + + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(Node5)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else { + ASSERT_TRUE(false); + } + } +} + +TEST_F(CommandGraphTest, BarrierMultipleInOrderQueue) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue1{Queue.get_context(), Dev, Properties}; + sycl::queue InOrderQueue2{Queue.get_context(), Dev, Properties}; + experimental::command_graph Graph{ + InOrderQueue1}; + + Graph.begin_recording({InOrderQueue1, InOrderQueue2}); + + auto Node1 = InOrderQueue1.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto Node2 = InOrderQueue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto BarrierNode = InOrderQueue1.ext_oneapi_submit_barrier(); - ASSERT_EQ(SuccSuccNode->MPredecessors.size(), 1lu); - ASSERT_EQ(SuccSuccNode->MSuccessors.size(), 0lu); + auto Node3 = InOrderQueue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + Graph.end_recording(); + + // Check the graph structure + // (1) (2) + // | | + // (B) (3) + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + ASSERT_EQ(GraphImpl->MRoots.size(), 2u); + + for (auto Root : GraphImpl->MRoots) { + auto RootNode = Root.lock(); + auto RootNodeEvent = GraphImpl->getEventForNode(RootNode); + if (RootNodeEvent == sycl::detail::getSyclObjImpl(Node1)) { + auto SuccNode = RootNode->MSuccessors.front().lock(); + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(BarrierNode)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else if (RootNodeEvent == sycl::detail::getSyclObjImpl(Node2)) { + auto SuccNode = RootNode->MSuccessors.front().lock(); + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(Node3)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else { + ASSERT_TRUE(false); + } + } +} + +TEST_F(CommandGraphTest, BarrierMultipleMixedOrderQueues) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue{Queue.get_context(), Dev, Properties}; + experimental::command_graph Graph{ + Queue}; + + Graph.begin_recording({Queue, InOrderQueue}); + + auto Node1 = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto Node2 = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto BarrierNode = Queue.ext_oneapi_submit_barrier(); + + auto Node3 = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + Graph.end_recording(); + + // Check the graph structure + // (1) (2) + // | | + // (B) (3) + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + ASSERT_EQ(GraphImpl->MRoots.size(), 2u); + + for (auto Root : GraphImpl->MRoots) { + auto RootNode = Root.lock(); + auto RootNodeEvent = GraphImpl->getEventForNode(RootNode); + if (RootNodeEvent == sycl::detail::getSyclObjImpl(Node1)) { + auto SuccNode = RootNode->MSuccessors.front().lock(); + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(BarrierNode)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else if (RootNodeEvent == sycl::detail::getSyclObjImpl(Node2)) { + auto SuccNode = RootNode->MSuccessors.front().lock(); + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(Node3)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else { + ASSERT_TRUE(false); + } + } +} + +TEST_F(CommandGraphTest, BarrierMultipleQueuesMultipleBarriers) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue{Queue.get_context(), Dev, Properties}; + experimental::command_graph Graph{ + Queue}; + + Graph.begin_recording({Queue, InOrderQueue}); + + auto Barrier1 = Queue.ext_oneapi_submit_barrier(); + auto Barrier2 = InOrderQueue.ext_oneapi_submit_barrier(); + auto Barrier3 = InOrderQueue.ext_oneapi_submit_barrier(); + auto Barrier4 = Queue.ext_oneapi_submit_barrier(); + + Graph.end_recording(); + + // Check the graph structure + // (1) (2) + // | | + // (4) (3) + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + ASSERT_EQ(GraphImpl->MRoots.size(), 2u); + + for (auto Root : GraphImpl->MRoots) { + auto RootNode = Root.lock(); + auto RootNodeEvent = GraphImpl->getEventForNode(RootNode); + if (RootNodeEvent == sycl::detail::getSyclObjImpl(Barrier1)) { + auto SuccNode = RootNode->MSuccessors.front().lock(); + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(Barrier4)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else if (RootNodeEvent == sycl::detail::getSyclObjImpl(Barrier2)) { + auto SuccNode = RootNode->MSuccessors.front().lock(); + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(Barrier3)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else { + ASSERT_TRUE(false); + } + } +} + +TEST_F(CommandGraphTest, BarrierWithInOrderCommands) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue1{Dev, Properties}; + sycl::queue InOrderQueue2{Dev, Properties}; + + Graph.begin_recording({InOrderQueue1, InOrderQueue2}); + auto Node1 = InOrderQueue1.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2 = InOrderQueue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Graph.end_recording(); + + Graph.begin_recording({InOrderQueue1, InOrderQueue2}); + auto Barrier1 = InOrderQueue1.ext_oneapi_submit_barrier(); + auto Barrier2 = InOrderQueue2.ext_oneapi_submit_barrier(); + Graph.end_recording(); + + Graph.begin_recording({InOrderQueue1, InOrderQueue2}); + auto Node3 = InOrderQueue1.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node4 = InOrderQueue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Graph.end_recording(); + + Graph.begin_recording({InOrderQueue1, InOrderQueue2}); + auto Barrier3 = InOrderQueue1.ext_oneapi_submit_barrier(); + auto Barrier4 = InOrderQueue2.ext_oneapi_submit_barrier(); + Graph.end_recording(); + + Graph.begin_recording({InOrderQueue1, InOrderQueue2}); + auto Node5 = InOrderQueue1.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node6 = InOrderQueue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Graph.end_recording(); + + Graph.begin_recording({InOrderQueue1, InOrderQueue2}); + auto Barrier5 = InOrderQueue1.ext_oneapi_submit_barrier({Node5, Node6}); + Graph.end_recording(); + + // Check the graph structure + // (1) (2) + // | | + // (B1) (B2) + // | | + // (3) (4) + // | | + // (B3) (B4) + // | | + // (5) (6) + // \ / + // (B5) + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + ASSERT_EQ(GraphImpl->MRoots.size(), 2lu); + + for (auto Root : GraphImpl->MRoots) { + auto RootNode = Root.lock(); + bool EvenPath; + + ASSERT_EQ(RootNode->MSuccessors.size(), 1lu); + if (GraphImpl->getEventForNode(RootNode) == + sycl::detail::getSyclObjImpl(Node2)) { + EvenPath = true; + } else if (GraphImpl->getEventForNode(RootNode), + sycl::detail::getSyclObjImpl(Node1)) { + EvenPath = false; + } else { + ASSERT_TRUE(false); + } + + auto Succ1Node = RootNode->MSuccessors.front().lock(); + ASSERT_EQ(Succ1Node->MSuccessors.size(), 1lu); + if (EvenPath) { + ASSERT_EQ(GraphImpl->getEventForNode(Succ1Node), + sycl::detail::getSyclObjImpl(Barrier2)); + } else { + ASSERT_EQ(GraphImpl->getEventForNode(Succ1Node), + sycl::detail::getSyclObjImpl(Barrier1)); + } + + auto Succ2Node = Succ1Node->MSuccessors.front().lock(); + ASSERT_EQ(Succ2Node->MSuccessors.size(), 1lu); + if (EvenPath) { + ASSERT_EQ(GraphImpl->getEventForNode(Succ2Node), + sycl::detail::getSyclObjImpl(Node4)); + } else { + ASSERT_EQ(GraphImpl->getEventForNode(Succ2Node), + sycl::detail::getSyclObjImpl(Node3)); + } + + auto Succ3Node = Succ2Node->MSuccessors.front().lock(); + ASSERT_EQ(Succ3Node->MSuccessors.size(), 1lu); + if (EvenPath) { + ASSERT_EQ(GraphImpl->getEventForNode(Succ3Node), + sycl::detail::getSyclObjImpl(Barrier4)); + } else { + ASSERT_EQ(GraphImpl->getEventForNode(Succ3Node), + sycl::detail::getSyclObjImpl(Barrier3)); + } + + auto Succ4Node = Succ3Node->MSuccessors.front().lock(); + ASSERT_EQ(Succ4Node->MSuccessors.size(), 1lu); + if (EvenPath) { + ASSERT_EQ(GraphImpl->getEventForNode(Succ4Node), + sycl::detail::getSyclObjImpl(Node6)); + } else { + ASSERT_EQ(GraphImpl->getEventForNode(Succ4Node), + sycl::detail::getSyclObjImpl(Node5)); + } - auto Node2Impl = sycl::detail::getSyclObjImpl(Node2); - ASSERT_EQ(SuccSuccNode, Node2Impl); + auto Succ5Node = Succ4Node->MSuccessors.front().lock(); + ASSERT_EQ(Succ5Node->MSuccessors.size(), 0lu); + ASSERT_EQ(Succ5Node->MPredecessors.size(), 2lu); + ASSERT_EQ(GraphImpl->getEventForNode(Succ5Node), + sycl::detail::getSyclObjImpl(Barrier5)); } } diff --git a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp index 28de1cf587504..04f306dc0cfb8 100644 --- a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp @@ -237,6 +237,41 @@ TEST_F(CommandGraphTest, ExplicitBarrierException) { ASSERT_EQ(Success, false); } +TEST_F(CommandGraphTest, ExplicitBarrierDependencyException) { + + experimental::command_graph Graph2{ + Queue}; + + Graph2.begin_recording({Queue}); + + auto Node = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Graph2.end_recording(); + + auto Event = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + Graph.begin_recording(Queue); + + std::error_code ExceptionCode = make_error_code(sycl::errc::success); + try { + auto BarrierNode = Queue.ext_oneapi_submit_barrier({Node}); + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + ExceptionCode = make_error_code(sycl::errc::success); + try { + auto BarrierNode = Queue.ext_oneapi_submit_barrier({Event}); + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + Graph2.end_recording(); +} + TEST_F(CommandGraphTest, FusionExtensionExceptionCheck) { device D; if (!D.get_info<