diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index 77fab2ebe5fb1..3e5e46823dc7c 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -1736,15 +1736,23 @@ passed an invalid event. The new handler methods, and queue shortcuts, defined by link:../supported/sycl_ext_oneapi_enqueue_barrier.asciidoc[sycl_ext_oneapi_enqueue_barrier] can only be used in graph nodes created using the Record & Replay API, as -barriers rely on events to enforce dependencies. For barriers with an empty -wait list parameter, the semantics are that the barrier node being added to -will depend on all the existing graph leaf nodes, not only the leaf nodes -that were added from the queue being recorded. +barriers rely on events to enforce dependencies. A synchronous exception will be thrown with error code `invalid` if a user -tries to add them to a graph using the Explicit API. Empty nodes created with -the `node::depends_on_all_leaves` property can be used instead of barriers when -a user is building a graph with the explicit API. +tries to add a barrier command to a graph using the explicit API. Empty nodes +created with the `node::depends_on_all_leaves` property can be used instead of +barriers when a user is building a graph with the explicit API. + +The semantics of barriers are defined in `sycl_ext_oneapi_enqueue_barrier` for +a single command-queue, and correlate as follows to a graph that may contain +nodes that are recorded from multiple queues and/or added by the explicit API: + +* Barriers with an empty wait list parameter will only depend on the leaf nodes + that were added to the graph from the queue the barrier command is being + recorded from. + +* The only commands which have an implicit dependency on the barrier command + are those recorded from the same queue the barrier command was submitted to. ==== sycl_ext_oneapi_memcpy2d diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 3268a27fbb827..bd7b407d2014e 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -353,9 +353,6 @@ graph_impl::add(const std::shared_ptr &Impl, const std::shared_ptr &NodeImpl = std::make_shared(); - // Add any deps from the vector of extra dependencies - Deps.insert(Deps.end(), MExtraDependencies.begin(), MExtraDependencies.end()); - MNodeStorage.push_back(NodeImpl); addDepsToNode(NodeImpl, Deps); @@ -488,20 +485,12 @@ graph_impl::add(node_type NodeType, // list Deps.insert(Deps.end(), UniqueDeps.begin(), UniqueDeps.end()); - // Add any deps from the extra dependencies vector - Deps.insert(Deps.end(), MExtraDependencies.begin(), MExtraDependencies.end()); - const std::shared_ptr &NodeImpl = std::make_shared(NodeType, std::move(CommandGroup)); MNodeStorage.push_back(NodeImpl); addDepsToNode(NodeImpl, Deps); - // Set barrier nodes as prerequisites (new start points) for subsequent nodes - if (NodeImpl->MCGType == sycl::detail::CG::Barrier) { - MExtraDependencies.push_back(NodeImpl); - } - return NodeImpl; } @@ -610,12 +599,17 @@ void graph_impl::makeEdge(std::shared_ptr Src, removeRoot(Dest); // remove receiver from root node list } -std::vector graph_impl::getExitNodesEvents() { +std::vector graph_impl::getExitNodesEvents( + std::weak_ptr RecordedQueue) { std::vector Events; + auto RecordedQueueSP = RecordedQueue.lock(); for (auto &Node : MNodeStorage) { if (Node->MSuccessors.empty()) { - Events.push_back(getEventForNode(Node)); + auto EventForNode = getEventForNode(Node); + if (EventForNode->getSubmittedQueue() == RecordedQueueSP) { + Events.push_back(getEventForNode(Node)); + } } } diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index 80837181ec056..9d13a87ed13e9 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -1184,26 +1184,26 @@ class graph_impl { size_t getNumberOfNodes() const { return MNodeStorage.size(); } /// Traverse the graph recursively to get the events associated with the - /// output nodes of this graph. + /// output nodes of this graph associated with a specific queue. + /// @param[in] Queue The queue exit nodes must have been recorded from. /// @return vector of events associated to exit nodes. - std::vector getExitNodesEvents(); - - /// Removes all Barrier nodes from the list of extra dependencies - /// MExtraDependencies. - /// @return vector of events associated to previous barrier nodes. std::vector - removeBarriersFromExtraDependencies() { - std::vector Events; - for (auto It = MExtraDependencies.begin(); - It != MExtraDependencies.end();) { - if ((*It)->MCGType == sycl::detail::CG::Barrier) { - Events.push_back(getEventForNode(*It)); - It = MExtraDependencies.erase(It); - } else { - ++It; - } - } - return Events; + getExitNodesEvents(std::weak_ptr Queue); + + /// Store the last barrier node that was submitted to the queue. + /// @param[in] Queue The queue the barrier was recorded from. + /// @param[in] BarrierNodeImpl The created barrier node. + void setBarrierDep(std::weak_ptr Queue, + std::shared_ptr BarrierNodeImpl) { + MBarrierDependencyMap[Queue] = BarrierNodeImpl; + } + + /// Get the last barrier node that was submitted to the queue. + /// @param[in] Queue The queue to find the last barrier node of. An empty + /// shared_ptr is returned if no barrier node has been recorded to the queue. + std::shared_ptr + getBarrierDep(std::weak_ptr Queue) { + return MBarrierDependencyMap[Queue]; } private: @@ -1281,11 +1281,11 @@ class graph_impl { /// presence of the assume_buffer_outlives_graph property. bool MAllowBuffers = false; - /// List of nodes that must be added as extra dependencies to new nodes when - /// added to this graph. - /// This list is mainly used by barrier nodes which must be considered - /// as predecessors for all nodes subsequently added to the graph. - std::list> MExtraDependencies; + /// Mapping from queues to barrier nodes. For each queue the last barrier + /// node recorded to the graph from the queue is stored. + std::map, std::shared_ptr, + std::owner_less>> + MBarrierDependencyMap; }; /// Class representing the implementation of command_graph. diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index b16441e4ff146..27af40344c8e7 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -403,19 +403,6 @@ event handler::finalize() { case detail::CG::Barrier: case detail::CG::BarrierWaitlist: { if (auto GraphImpl = getCommandGraph(); GraphImpl != nullptr) { - // if no event to wait for was specified, we add all exit - // nodes/events of the graph - if (MEventsWaitWithBarrier.size() == 0) { - MEventsWaitWithBarrier = GraphImpl->getExitNodesEvents(); - // Graph-wide barriers take precedence over previous one. - // We therefore remove the previous ones from ExtraDependencies list. - // The current barrier is then added to this list in the graph_impl. - std::vector EventsBarriers = - GraphImpl->removeBarriersFromExtraDependencies(); - MEventsWaitWithBarrier.insert(std::end(MEventsWaitWithBarrier), - std::begin(EventsBarriers), - std::end(EventsBarriers)); - } CGData.MEvents.insert(std::end(CGData.MEvents), std::begin(MEventsWaitWithBarrier), std::end(MEventsWaitWithBarrier)); @@ -533,6 +520,7 @@ event handler::finalize() { // it to the graph to create a node, rather than submit it to the scheduler. if (auto GraphImpl = MQueue->getCommandGraph(); GraphImpl) { auto EventImpl = std::make_shared(); + EventImpl->setSubmittedQueue(MQueue); std::shared_ptr NodeImpl = nullptr; @@ -564,7 +552,17 @@ event handler::finalize() { // queue. GraphImpl->setLastInorderNode(MQueue, NodeImpl); } else { - NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup)); + auto LastBarrierRecordedFromQueue = GraphImpl->getBarrierDep(MQueue); + if (LastBarrierRecordedFromQueue) { + NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup), + {LastBarrierRecordedFromQueue}); + } else { + NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup)); + } + + if (NodeImpl->MCGType == sycl::detail::CG::Barrier) { + GraphImpl->setBarrierDep(MQueue, NodeImpl); + } } // Associate an event with this new node and return the event. diff --git a/sycl/unittests/Extensions/CommandGraph/Barrier.cpp b/sycl/unittests/Extensions/CommandGraph/Barrier.cpp index 79f55dc226b62..790956d5aeff4 100644 --- a/sycl/unittests/Extensions/CommandGraph/Barrier.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Barrier.cpp @@ -61,8 +61,8 @@ TEST_F(CommandGraphTest, EnqueueBarrierMultipleQueues) { auto Node3Graph = Queue.submit( [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); - auto Barrier = - Queue2.submit([&](sycl::handler &cgh) { cgh.ext_oneapi_barrier(); }); + auto Barrier = Queue2.submit( + [&](sycl::handler &cgh) { cgh.ext_oneapi_barrier({Node2Graph}); }); auto Node4Graph = Queue2.submit( [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); @@ -74,21 +74,44 @@ TEST_F(CommandGraphTest, EnqueueBarrierMultipleQueues) { // Check the graph structure // (1) (2) (3) - // \ | / - // \ | / + // | // (B) // / \ // (4) (5) ASSERT_EQ(GraphImpl->MRoots.size(), 3lu); for (auto Root : GraphImpl->MRoots) { - auto Node = Root.lock(); - ASSERT_EQ(Node->MSuccessors.size(), 1lu); - auto BarrierNode = Node->MSuccessors.front().lock(); - ASSERT_EQ(BarrierNode->MCGType, sycl::detail::CG::Barrier); - ASSERT_EQ(GraphImpl->getEventForNode(BarrierNode), - sycl::detail::getSyclObjImpl(Barrier)); - ASSERT_EQ(BarrierNode->MPredecessors.size(), 3lu); - ASSERT_EQ(BarrierNode->MSuccessors.size(), 2lu); + auto RootNode = Root.lock(); + + if (GraphImpl->getEventForNode(RootNode) == + sycl::detail::getSyclObjImpl(Node2Graph)) { + + ASSERT_EQ(RootNode->MSuccessors.size(), 1lu); + auto SuccNode = RootNode->MSuccessors.front().lock(); + + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(Barrier)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 2lu); + + for (auto SuccSucc : SuccNode->MSuccessors) { + auto SuccSuccNode = SuccSucc.lock(); + + if (GraphImpl->getEventForNode(SuccSuccNode) == + sycl::detail::getSyclObjImpl(Node4Graph)) { + ASSERT_EQ(SuccSuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccSuccNode->MSuccessors.size(), 0lu); + } else if (GraphImpl->getEventForNode(SuccSuccNode) == + sycl::detail::getSyclObjImpl(Node5Graph)) { + ASSERT_EQ(SuccSuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccSuccNode->MSuccessors.size(), 0lu); + } else { + ASSERT_TRUE(false && "Unexpected node"); + } + } + } else { + ASSERT_EQ(RootNode->MSuccessors.size(), 0lu); + } } } @@ -434,10 +457,8 @@ TEST_F(CommandGraphTest, InOrderQueuesWithEmptyBarrierWaitList) { // Check the graph structure // (1) (2) - // \ / | - // (B) | - // | / - // (3) + // | | + // (B) (3) auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); ASSERT_EQ(GraphImpl->MRoots.size(), 2lu); @@ -447,27 +468,28 @@ TEST_F(CommandGraphTest, InOrderQueuesWithEmptyBarrierWaitList) { if (GraphImpl->getEventForNode(RootNode) == sycl::detail::getSyclObjImpl(Node1)) { ASSERT_EQ(RootNode->MSuccessors.size(), 1lu); - } else if (GraphImpl->getEventForNode(RootNode) == - sycl::detail::getSyclObjImpl(Node2)) { - ASSERT_EQ(RootNode->MSuccessors.size(), 2lu); - } else { - ASSERT_TRUE(false && "Unexpected root node"); - } - auto SuccNode = RootNode->MSuccessors.front().lock(); + auto SuccNode = RootNode->MSuccessors.front().lock(); - ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), - sycl::detail::getSyclObjImpl(BarrierNode)); - ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); - ASSERT_EQ(SuccNode->MSuccessors.size(), 1lu); + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(BarrierNode)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else if (GraphImpl->getEventForNode(RootNode) == + sycl::detail::getSyclObjImpl(Node2)) { + ASSERT_EQ(RootNode->MSuccessors.size(), 1lu); - auto SuccSuccNode = SuccNode->MSuccessors.front().lock(); + auto SuccNode = RootNode->MSuccessors.front().lock(); - ASSERT_EQ(SuccSuccNode->MPredecessors.size(), 2lu); - ASSERT_EQ(SuccSuccNode->MSuccessors.size(), 0lu); + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(Node3)); - ASSERT_EQ(GraphImpl->getEventForNode(SuccSuccNode), - sycl::detail::getSyclObjImpl(Node3)); + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else { + ASSERT_TRUE(false && "Unexpected root node"); + } } } @@ -487,7 +509,7 @@ TEST_F(CommandGraphTest, BarrierMixedQueueTypes) { auto Node2 = OutOfOrderQueue.submit( [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); - auto BarrierNode = InOrderQueue.ext_oneapi_submit_barrier(); + auto BarrierNode = InOrderQueue.ext_oneapi_submit_barrier({Node1, Node2}); auto Node3 = OutOfOrderQueue.submit([&](sycl::handler &cgh) { cgh.depends_on(Node2); @@ -500,8 +522,8 @@ TEST_F(CommandGraphTest, BarrierMixedQueueTypes) { // (1) (2) // \ /| // (B) | - // | / - // (3) + // | + // (3) auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); ASSERT_EQ(GraphImpl->MRoots.size(), 2lu); @@ -518,20 +540,21 @@ TEST_F(CommandGraphTest, BarrierMixedQueueTypes) { ASSERT_TRUE(false && "Unexpected root node"); } - auto SuccNode = RootNode->MSuccessors.front().lock(); - - ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), - sycl::detail::getSyclObjImpl(BarrierNode)); - ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); - ASSERT_EQ(SuccNode->MSuccessors.size(), 1lu); - - auto SuccSuccNode = SuccNode->MSuccessors.front().lock(); - - ASSERT_EQ(SuccSuccNode->MPredecessors.size(), 2lu); - ASSERT_EQ(SuccSuccNode->MSuccessors.size(), 0lu); - - ASSERT_EQ(GraphImpl->getEventForNode(SuccSuccNode), - sycl::detail::getSyclObjImpl(Node3)); + for (auto Succ : RootNode->MSuccessors) { + auto SuccNode = Succ.lock(); + + if (GraphImpl->getEventForNode(SuccNode) == + sycl::detail::getSyclObjImpl(BarrierNode)) { + ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else if (GraphImpl->getEventForNode(SuccNode) == + sycl::detail::getSyclObjImpl(Node3)) { + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else { + ASSERT_TRUE(false && "Unexpected root node"); + } + } } } @@ -550,35 +573,370 @@ TEST_F(CommandGraphTest, BarrierBetweenExplicitNodes) { Graph.end_recording(); auto Node2 = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, + {experimental::property::node::depends_on(Node1)}); + + // Check the graph structure + // (B) (1) + // | + // (2) + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + ASSERT_EQ(GraphImpl->MRoots.size(), 2lu); + + for (auto Root : GraphImpl->MRoots) { + auto RootNode = Root.lock(); + + if (GraphImpl->getEventForNode(RootNode) == + sycl::detail::getSyclObjImpl(BarrierNode)) { + ASSERT_EQ(RootNode->MSuccessors.size(), 0lu); + } else if (RootNode == sycl::detail::getSyclObjImpl(Node1)) { + ASSERT_EQ(RootNode->MSuccessors.size(), 1lu); + auto SuccNode = RootNode->MSuccessors.front().lock(); + ASSERT_EQ(SuccNode, sycl::detail::getSyclObjImpl(Node2)); + } else { + ASSERT_TRUE(false); + } + } +} + +TEST_F(CommandGraphTest, BarrierMultipleOOOQueue) { + sycl::queue Queue2{Queue.get_context(), Dev}; + experimental::command_graph Graph{ + Queue}; + + Graph.begin_recording({Queue, Queue2}); + + auto Node1 = Queue.submit( [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2 = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto Node3 = Queue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto Node4 = Queue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto BarrierNode = Queue.ext_oneapi_submit_barrier(); + + auto Node5 = Queue2.submit([&](sycl::handler &cgh) { + cgh.depends_on({Node3, Node4}); + cgh.single_task>([]() {}); + }); + + auto Node6 = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + Graph.end_recording(); + // Check the graph structure - // (1) - // | - // (B) - // | - // (2) + // (1) (2) (3) (4) + // \ / \ / + // (B) (5) + // | + // (6) auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); - ASSERT_EQ(GraphImpl->MRoots.size(), 1lu); + ASSERT_EQ(GraphImpl->MRoots.size(), 4u); for (auto Root : GraphImpl->MRoots) { auto RootNode = Root.lock(); - auto Node1Impl = sycl::detail::getSyclObjImpl(Node1); - ASSERT_EQ(RootNode, Node1Impl); + auto RootNodeEvent = GraphImpl->getEventForNode(RootNode); + if ((RootNodeEvent == sycl::detail::getSyclObjImpl(Node1)) || + (RootNodeEvent == sycl::detail::getSyclObjImpl(Node2))) { - auto SuccNode = RootNode->MSuccessors.front().lock(); + auto SuccNode = RootNode->MSuccessors.front().lock(); - ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), - sycl::detail::getSyclObjImpl(BarrierNode)); - ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); - ASSERT_EQ(SuccNode->MSuccessors.size(), 1lu); + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(BarrierNode)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 1lu); - auto SuccSuccNode = SuccNode->MSuccessors.front().lock(); + auto SuccSuccNode = SuccNode->MSuccessors.front().lock(); + + ASSERT_EQ(SuccSuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccSuccNode->MSuccessors.size(), 0lu); + + auto Node6Impl = sycl::detail::getSyclObjImpl(Node6); + ASSERT_EQ(GraphImpl->getEventForNode(SuccSuccNode), Node6Impl); + } else if ((RootNodeEvent == sycl::detail::getSyclObjImpl(Node3)) || + (RootNodeEvent == sycl::detail::getSyclObjImpl(Node4))) { + auto SuccNode = RootNode->MSuccessors.front().lock(); + + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(Node5)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 2lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else { + ASSERT_TRUE(false); + } + } +} + +TEST_F(CommandGraphTest, BarrierMultipleInOrderQueue) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue1{Queue.get_context(), Dev, Properties}; + sycl::queue InOrderQueue2{Queue.get_context(), Dev, Properties}; + experimental::command_graph Graph{ + InOrderQueue1}; + + Graph.begin_recording({InOrderQueue1, InOrderQueue2}); + + auto Node1 = InOrderQueue1.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto Node2 = InOrderQueue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto BarrierNode = InOrderQueue1.ext_oneapi_submit_barrier(); - ASSERT_EQ(SuccSuccNode->MPredecessors.size(), 1lu); - ASSERT_EQ(SuccSuccNode->MSuccessors.size(), 0lu); + auto Node3 = InOrderQueue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + Graph.end_recording(); + + // Check the graph structure + // (1) (2) + // | | + // (B) (3) + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + ASSERT_EQ(GraphImpl->MRoots.size(), 2u); + + for (auto Root : GraphImpl->MRoots) { + auto RootNode = Root.lock(); + auto RootNodeEvent = GraphImpl->getEventForNode(RootNode); + if (RootNodeEvent == sycl::detail::getSyclObjImpl(Node1)) { + auto SuccNode = RootNode->MSuccessors.front().lock(); + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(BarrierNode)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else if (RootNodeEvent == sycl::detail::getSyclObjImpl(Node2)) { + auto SuccNode = RootNode->MSuccessors.front().lock(); + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(Node3)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else { + ASSERT_TRUE(false); + } + } +} + +TEST_F(CommandGraphTest, BarrierMultipleMixedOrderQueues) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue{Queue.get_context(), Dev, Properties}; + experimental::command_graph Graph{ + Queue}; + + Graph.begin_recording({Queue, InOrderQueue}); + + auto Node1 = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto Node2 = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + auto BarrierNode = Queue.ext_oneapi_submit_barrier(); + + auto Node3 = InOrderQueue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + Graph.end_recording(); + + // Check the graph structure + // (1) (2) + // | | + // (B) (3) + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + ASSERT_EQ(GraphImpl->MRoots.size(), 2u); + + for (auto Root : GraphImpl->MRoots) { + auto RootNode = Root.lock(); + auto RootNodeEvent = GraphImpl->getEventForNode(RootNode); + if (RootNodeEvent == sycl::detail::getSyclObjImpl(Node1)) { + auto SuccNode = RootNode->MSuccessors.front().lock(); + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(BarrierNode)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else if (RootNodeEvent == sycl::detail::getSyclObjImpl(Node2)) { + auto SuccNode = RootNode->MSuccessors.front().lock(); + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(Node3)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else { + ASSERT_TRUE(false); + } + } +} + +TEST_F(CommandGraphTest, BarrierMultipleQueuesMultipleBarriers) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue{Queue.get_context(), Dev, Properties}; + experimental::command_graph Graph{ + Queue}; + + Graph.begin_recording({Queue, InOrderQueue}); + + auto Barrier1 = Queue.ext_oneapi_submit_barrier(); + auto Barrier2 = InOrderQueue.ext_oneapi_submit_barrier(); + auto Barrier3 = InOrderQueue.ext_oneapi_submit_barrier(); + auto Barrier4 = Queue.ext_oneapi_submit_barrier(); + + Graph.end_recording(); + + // Check the graph structure + // (1) (2) + // | | + // (4) (3) + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + ASSERT_EQ(GraphImpl->MRoots.size(), 2u); + + for (auto Root : GraphImpl->MRoots) { + auto RootNode = Root.lock(); + auto RootNodeEvent = GraphImpl->getEventForNode(RootNode); + if (RootNodeEvent == sycl::detail::getSyclObjImpl(Barrier1)) { + auto SuccNode = RootNode->MSuccessors.front().lock(); + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(Barrier4)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else if (RootNodeEvent == sycl::detail::getSyclObjImpl(Barrier2)) { + auto SuccNode = RootNode->MSuccessors.front().lock(); + ASSERT_EQ(GraphImpl->getEventForNode(SuccNode), + sycl::detail::getSyclObjImpl(Barrier3)); + + ASSERT_EQ(SuccNode->MPredecessors.size(), 1lu); + ASSERT_EQ(SuccNode->MSuccessors.size(), 0lu); + } else { + ASSERT_TRUE(false); + } + } +} + +TEST_F(CommandGraphTest, BarrierWithInOrderCommands) { + sycl::property_list Properties{sycl::property::queue::in_order()}; + sycl::queue InOrderQueue1{Dev, Properties}; + sycl::queue InOrderQueue2{Dev, Properties}; + + Graph.begin_recording({InOrderQueue1, InOrderQueue2}); + auto Node1 = InOrderQueue1.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node2 = InOrderQueue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Graph.end_recording(); + + Graph.begin_recording({InOrderQueue1, InOrderQueue2}); + auto Barrier1 = InOrderQueue1.ext_oneapi_submit_barrier(); + auto Barrier2 = InOrderQueue2.ext_oneapi_submit_barrier(); + Graph.end_recording(); + + Graph.begin_recording({InOrderQueue1, InOrderQueue2}); + auto Node3 = InOrderQueue1.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node4 = InOrderQueue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Graph.end_recording(); + + Graph.begin_recording({InOrderQueue1, InOrderQueue2}); + auto Barrier3 = InOrderQueue1.ext_oneapi_submit_barrier(); + auto Barrier4 = InOrderQueue2.ext_oneapi_submit_barrier(); + Graph.end_recording(); + + Graph.begin_recording({InOrderQueue1, InOrderQueue2}); + auto Node5 = InOrderQueue1.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto Node6 = InOrderQueue2.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Graph.end_recording(); + + Graph.begin_recording({InOrderQueue1, InOrderQueue2}); + auto Barrier5 = InOrderQueue1.ext_oneapi_submit_barrier({Node5, Node6}); + Graph.end_recording(); + + // Check the graph structure + // (1) (2) + // | | + // (B1) (B2) + // | | + // (3) (4) + // | | + // (B3) (B4) + // | | + // (5) (6) + // \ / + // (B5) + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + ASSERT_EQ(GraphImpl->MRoots.size(), 2lu); + + for (auto Root : GraphImpl->MRoots) { + auto RootNode = Root.lock(); + bool EvenPath; + + ASSERT_EQ(RootNode->MSuccessors.size(), 1lu); + if (GraphImpl->getEventForNode(RootNode) == + sycl::detail::getSyclObjImpl(Node2)) { + EvenPath = true; + } else if (GraphImpl->getEventForNode(RootNode), + sycl::detail::getSyclObjImpl(Node1)) { + EvenPath = false; + } else { + ASSERT_TRUE(false); + } + + auto Succ1Node = RootNode->MSuccessors.front().lock(); + ASSERT_EQ(Succ1Node->MSuccessors.size(), 1lu); + if (EvenPath) { + ASSERT_EQ(GraphImpl->getEventForNode(Succ1Node), + sycl::detail::getSyclObjImpl(Barrier2)); + } else { + ASSERT_EQ(GraphImpl->getEventForNode(Succ1Node), + sycl::detail::getSyclObjImpl(Barrier1)); + } + + auto Succ2Node = Succ1Node->MSuccessors.front().lock(); + ASSERT_EQ(Succ2Node->MSuccessors.size(), 1lu); + if (EvenPath) { + ASSERT_EQ(GraphImpl->getEventForNode(Succ2Node), + sycl::detail::getSyclObjImpl(Node4)); + } else { + ASSERT_EQ(GraphImpl->getEventForNode(Succ2Node), + sycl::detail::getSyclObjImpl(Node3)); + } + + auto Succ3Node = Succ2Node->MSuccessors.front().lock(); + ASSERT_EQ(Succ3Node->MSuccessors.size(), 1lu); + if (EvenPath) { + ASSERT_EQ(GraphImpl->getEventForNode(Succ3Node), + sycl::detail::getSyclObjImpl(Barrier4)); + } else { + ASSERT_EQ(GraphImpl->getEventForNode(Succ3Node), + sycl::detail::getSyclObjImpl(Barrier3)); + } + + auto Succ4Node = Succ3Node->MSuccessors.front().lock(); + ASSERT_EQ(Succ4Node->MSuccessors.size(), 1lu); + if (EvenPath) { + ASSERT_EQ(GraphImpl->getEventForNode(Succ4Node), + sycl::detail::getSyclObjImpl(Node6)); + } else { + ASSERT_EQ(GraphImpl->getEventForNode(Succ4Node), + sycl::detail::getSyclObjImpl(Node5)); + } - auto Node2Impl = sycl::detail::getSyclObjImpl(Node2); - ASSERT_EQ(SuccSuccNode, Node2Impl); + auto Succ5Node = Succ4Node->MSuccessors.front().lock(); + ASSERT_EQ(Succ5Node->MSuccessors.size(), 0lu); + ASSERT_EQ(Succ5Node->MPredecessors.size(), 2lu); + ASSERT_EQ(GraphImpl->getEventForNode(Succ5Node), + sycl::detail::getSyclObjImpl(Barrier5)); } } diff --git a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp index 28de1cf587504..04f306dc0cfb8 100644 --- a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp @@ -237,6 +237,41 @@ TEST_F(CommandGraphTest, ExplicitBarrierException) { ASSERT_EQ(Success, false); } +TEST_F(CommandGraphTest, ExplicitBarrierDependencyException) { + + experimental::command_graph Graph2{ + Queue}; + + Graph2.begin_recording({Queue}); + + auto Node = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Graph2.end_recording(); + + auto Event = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + Graph.begin_recording(Queue); + + std::error_code ExceptionCode = make_error_code(sycl::errc::success); + try { + auto BarrierNode = Queue.ext_oneapi_submit_barrier({Node}); + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + ExceptionCode = make_error_code(sycl::errc::success); + try { + auto BarrierNode = Queue.ext_oneapi_submit_barrier({Event}); + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + Graph2.end_recording(); +} + TEST_F(CommandGraphTest, FusionExtensionExceptionCheck) { device D; if (!D.get_info<