Skip to content

Commit 3e06221

Browse files
authored
[SYCL][Graph] Refine barrier semantics (#14363)
Changes the semantics of [sycl_ext_oneapi_enqueue_barrier](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_enqueue_barrier.asciidoc) commands recorded from a queue into a SYCL-Graph. The current semantics are that a barrier without a wait-list will depend on all leaf nodes in the graph, and be a dependency of any subsequent node added to the graph. After discussion with users, this change updates the behavior of barriers to only depend on leaf nodes from the same recording queue, and only commands recorded from the same queue will have a dependency on the barrier.
1 parent cb71841 commit 3e06221

File tree

6 files changed

+517
-124
lines changed

6 files changed

+517
-124
lines changed

sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1736,15 +1736,23 @@ passed an invalid event.
17361736
The new handler methods, and queue shortcuts, defined by
17371737
link:../supported/sycl_ext_oneapi_enqueue_barrier.asciidoc[sycl_ext_oneapi_enqueue_barrier]
17381738
can only be used in graph nodes created using the Record & Replay API, as
1739-
barriers rely on events to enforce dependencies. For barriers with an empty
1740-
wait list parameter, the semantics are that the barrier node being added to
1741-
will depend on all the existing graph leaf nodes, not only the leaf nodes
1742-
that were added from the queue being recorded.
1739+
barriers rely on events to enforce dependencies.
17431740

17441741
A synchronous exception will be thrown with error code `invalid` if a user
1745-
tries to add them to a graph using the Explicit API. Empty nodes created with
1746-
the `node::depends_on_all_leaves` property can be used instead of barriers when
1747-
a user is building a graph with the explicit API.
1742+
tries to add a barrier command to a graph using the explicit API. Empty nodes
1743+
created with the `node::depends_on_all_leaves` property can be used instead of
1744+
barriers when a user is building a graph with the explicit API.
1745+
1746+
The semantics of barriers are defined in `sycl_ext_oneapi_enqueue_barrier` for
1747+
a single command-queue, and correlate as follows to a graph that may contain
1748+
nodes that are recorded from multiple queues and/or added by the explicit API:
1749+
1750+
* Barriers with an empty wait list parameter will only depend on the leaf nodes
1751+
that were added to the graph from the queue the barrier command is being
1752+
recorded from.
1753+
1754+
* The only commands which have an implicit dependency on the barrier command
1755+
are those recorded from the same queue the barrier command was submitted to.
17481756

17491757
==== sycl_ext_oneapi_memcpy2d
17501758

sycl/source/detail/graph_impl.cpp

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -357,9 +357,6 @@ graph_impl::add(const std::shared_ptr<graph_impl> &Impl,
357357

358358
const std::shared_ptr<node_impl> &NodeImpl = std::make_shared<node_impl>();
359359

360-
// Add any deps from the vector of extra dependencies
361-
Deps.insert(Deps.end(), MExtraDependencies.begin(), MExtraDependencies.end());
362-
363360
MNodeStorage.push_back(NodeImpl);
364361

365362
addDepsToNode(NodeImpl, Deps);
@@ -492,20 +489,12 @@ graph_impl::add(node_type NodeType,
492489
// list
493490
Deps.insert(Deps.end(), UniqueDeps.begin(), UniqueDeps.end());
494491

495-
// Add any deps from the extra dependencies vector
496-
Deps.insert(Deps.end(), MExtraDependencies.begin(), MExtraDependencies.end());
497-
498492
const std::shared_ptr<node_impl> &NodeImpl =
499493
std::make_shared<node_impl>(NodeType, std::move(CommandGroup));
500494
MNodeStorage.push_back(NodeImpl);
501495

502496
addDepsToNode(NodeImpl, Deps);
503497

504-
// Set barrier nodes as prerequisites (new start points) for subsequent nodes
505-
if (NodeImpl->MCGType == sycl::detail::CG::Barrier) {
506-
MExtraDependencies.push_back(NodeImpl);
507-
}
508-
509498
return NodeImpl;
510499
}
511500

@@ -614,12 +603,17 @@ void graph_impl::makeEdge(std::shared_ptr<node_impl> Src,
614603
removeRoot(Dest); // remove receiver from root node list
615604
}
616605

617-
std::vector<sycl::detail::EventImplPtr> graph_impl::getExitNodesEvents() {
606+
std::vector<sycl::detail::EventImplPtr> graph_impl::getExitNodesEvents(
607+
std::weak_ptr<sycl::detail::queue_impl> RecordedQueue) {
618608
std::vector<sycl::detail::EventImplPtr> Events;
619609

610+
auto RecordedQueueSP = RecordedQueue.lock();
620611
for (auto &Node : MNodeStorage) {
621612
if (Node->MSuccessors.empty()) {
622-
Events.push_back(getEventForNode(Node));
613+
auto EventForNode = getEventForNode(Node);
614+
if (EventForNode->getSubmittedQueue() == RecordedQueueSP) {
615+
Events.push_back(getEventForNode(Node));
616+
}
623617
}
624618
}
625619

sycl/source/detail/graph_impl.hpp

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1184,26 +1184,26 @@ class graph_impl {
11841184
size_t getNumberOfNodes() const { return MNodeStorage.size(); }
11851185

11861186
/// Traverse the graph recursively to get the events associated with the
1187-
/// output nodes of this graph.
1187+
/// output nodes of this graph associated with a specific queue.
1188+
/// @param[in] Queue The queue exit nodes must have been recorded from.
11881189
/// @return vector of events associated to exit nodes.
1189-
std::vector<sycl::detail::EventImplPtr> getExitNodesEvents();
1190-
1191-
/// Removes all Barrier nodes from the list of extra dependencies
1192-
/// MExtraDependencies.
1193-
/// @return vector of events associated to previous barrier nodes.
11941190
std::vector<sycl::detail::EventImplPtr>
1195-
removeBarriersFromExtraDependencies() {
1196-
std::vector<sycl::detail::EventImplPtr> Events;
1197-
for (auto It = MExtraDependencies.begin();
1198-
It != MExtraDependencies.end();) {
1199-
if ((*It)->MCGType == sycl::detail::CG::Barrier) {
1200-
Events.push_back(getEventForNode(*It));
1201-
It = MExtraDependencies.erase(It);
1202-
} else {
1203-
++It;
1204-
}
1205-
}
1206-
return Events;
1191+
getExitNodesEvents(std::weak_ptr<sycl::detail::queue_impl> Queue);
1192+
1193+
/// Store the last barrier node that was submitted to the queue.
1194+
/// @param[in] Queue The queue the barrier was recorded from.
1195+
/// @param[in] BarrierNodeImpl The created barrier node.
1196+
void setBarrierDep(std::weak_ptr<sycl::detail::queue_impl> Queue,
1197+
std::shared_ptr<node_impl> BarrierNodeImpl) {
1198+
MBarrierDependencyMap[Queue] = BarrierNodeImpl;
1199+
}
1200+
1201+
/// Get the last barrier node that was submitted to the queue.
1202+
/// @param[in] Queue The queue to find the last barrier node of. An empty
1203+
/// shared_ptr is returned if no barrier node has been recorded to the queue.
1204+
std::shared_ptr<node_impl>
1205+
getBarrierDep(std::weak_ptr<sycl::detail::queue_impl> Queue) {
1206+
return MBarrierDependencyMap[Queue];
12071207
}
12081208

12091209
private:
@@ -1281,11 +1281,11 @@ class graph_impl {
12811281
/// presence of the assume_buffer_outlives_graph property.
12821282
bool MAllowBuffers = false;
12831283

1284-
/// List of nodes that must be added as extra dependencies to new nodes when
1285-
/// added to this graph.
1286-
/// This list is mainly used by barrier nodes which must be considered
1287-
/// as predecessors for all nodes subsequently added to the graph.
1288-
std::list<std::shared_ptr<node_impl>> MExtraDependencies;
1284+
/// Mapping from queues to barrier nodes. For each queue the last barrier
1285+
/// node recorded to the graph from the queue is stored.
1286+
std::map<std::weak_ptr<sycl::detail::queue_impl>, std::shared_ptr<node_impl>,
1287+
std::owner_less<std::weak_ptr<sycl::detail::queue_impl>>>
1288+
MBarrierDependencyMap;
12891289
};
12901290

12911291
/// Class representing the implementation of command_graph<executable>.

sycl/source/handler.cpp

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -421,19 +421,6 @@ event handler::finalize() {
421421
case detail::CG::Barrier:
422422
case detail::CG::BarrierWaitlist: {
423423
if (auto GraphImpl = getCommandGraph(); GraphImpl != nullptr) {
424-
// if no event to wait for was specified, we add all exit
425-
// nodes/events of the graph
426-
if (MEventsWaitWithBarrier.size() == 0) {
427-
MEventsWaitWithBarrier = GraphImpl->getExitNodesEvents();
428-
// Graph-wide barriers take precedence over previous one.
429-
// We therefore remove the previous ones from ExtraDependencies list.
430-
// The current barrier is then added to this list in the graph_impl.
431-
std::vector<detail::EventImplPtr> EventsBarriers =
432-
GraphImpl->removeBarriersFromExtraDependencies();
433-
MEventsWaitWithBarrier.insert(std::end(MEventsWaitWithBarrier),
434-
std::begin(EventsBarriers),
435-
std::end(EventsBarriers));
436-
}
437424
CGData.MEvents.insert(std::end(CGData.MEvents),
438425
std::begin(MEventsWaitWithBarrier),
439426
std::end(MEventsWaitWithBarrier));
@@ -551,6 +538,7 @@ event handler::finalize() {
551538
// it to the graph to create a node, rather than submit it to the scheduler.
552539
if (auto GraphImpl = MQueue->getCommandGraph(); GraphImpl) {
553540
auto EventImpl = std::make_shared<detail::event_impl>();
541+
EventImpl->setSubmittedQueue(MQueue);
554542
std::shared_ptr<ext::oneapi::experimental::detail::node_impl> NodeImpl =
555543
nullptr;
556544

@@ -582,7 +570,17 @@ event handler::finalize() {
582570
// queue.
583571
GraphImpl->setLastInorderNode(MQueue, NodeImpl);
584572
} else {
585-
NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup));
573+
auto LastBarrierRecordedFromQueue = GraphImpl->getBarrierDep(MQueue);
574+
if (LastBarrierRecordedFromQueue) {
575+
NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup),
576+
{LastBarrierRecordedFromQueue});
577+
} else {
578+
NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup));
579+
}
580+
581+
if (NodeImpl->MCGType == sycl::detail::CG::Barrier) {
582+
GraphImpl->setBarrierDep(MQueue, NodeImpl);
583+
}
586584
}
587585

588586
// Associate an event with this new node and return the event.

0 commit comments

Comments
 (0)