Skip to content

Commit 7bb11ce

Browse files
committed
[SYCL] Use handler to execute graph
Update API to match the spec change from #26 to execute a graph via the handler rather than queue submit. This spec update includes queue shortcut functions, which i've added a new test for.
1 parent 578692f commit 7bb11ce

File tree

7 files changed

+110
-22
lines changed

7 files changed

+110
-22
lines changed

sycl/include/sycl/handler.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
#include <sycl/sampler.hpp>
3131
#include <sycl/stl.hpp>
3232

33+
#include <sycl/ext/oneapi/experimental/graph.hpp>
34+
3335
#include <functional>
3436
#include <limits>
3537
#include <memory>
@@ -2516,6 +2518,13 @@ class __SYCL_EXPORT handler {
25162518
/// \param Advice is a device-defined advice for the specified allocation.
25172519
void mem_advise(const void *Ptr, size_t Length, int Advice);
25182520

2521+
/// Executes a command_graph.
2522+
///
2523+
/// \param Graph Executable command_graph to run
2524+
void exec_graph(ext::oneapi::experimental::command_graph<
2525+
ext::oneapi::experimental::graph_state::executable>
2526+
Graph);
2527+
25192528
private:
25202529
std::shared_ptr<detail::handler_impl> MImpl;
25212530
std::shared_ptr<detail::queue_impl> MQueue;

sycl/include/sycl/queue.hpp

Lines changed: 49 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
#include <sycl/property_list.hpp>
2424
#include <sycl/stl.hpp>
2525

26-
#include <sycl/ext/oneapi/experimental/graph.hpp>
2726

2827
// Explicitly request format macros
2928
#ifndef __STDC_FORMAT_MACROS
@@ -1060,6 +1059,55 @@ class __SYCL_EXPORT queue {
10601059
// Clean KERNELFUNC macros.
10611060
#undef _KERNELFUNCPARAM
10621061

1062+
/// Shortcut for executing a graph of commands.
1063+
///
1064+
/// \param Graph the graph of commands to execute
1065+
/// \return an event representing graph execution operation.
1066+
event exec_graph(ext::oneapi::experimental::command_graph<
1067+
ext::oneapi::experimental::graph_state::executable>
1068+
Graph) {
1069+
const detail::code_location CodeLoc = {};
1070+
return submit([&](handler &CGH) { CGH.exec_graph(Graph); }, CodeLoc);
1071+
}
1072+
1073+
/// Shortcut for executing a graph of commands.
1074+
///
1075+
/// \param Graph the graph of commands to execute
1076+
/// \param DepEvent is an event that specifies the graph execution
1077+
/// dependencies.
1078+
/// \return an event representing graph execution operation.
1079+
event exec_graph(ext::oneapi::experimental::command_graph<
1080+
ext::oneapi::experimental::graph_state::executable>
1081+
Graph,
1082+
event DepEvent) {
1083+
const detail::code_location CodeLoc = {};
1084+
return submit(
1085+
[&](handler &CGH) {
1086+
CGH.depends_on(DepEvent);
1087+
CGH.exec_graph(Graph);
1088+
},
1089+
CodeLoc);
1090+
}
1091+
1092+
/// Shortcut for executing a graph of commands.
1093+
///
1094+
/// \param Graph the graph of commands to execute
1095+
/// \param DepEvents is a vector of events that specifies the graph
1096+
/// execution dependencies.
1097+
/// \return an event representing graph execution operation.
1098+
event exec_graph(ext::oneapi::experimental::command_graph<
1099+
ext::oneapi::experimental::graph_state::executable>
1100+
Graph,
1101+
const std::vector<event> &DepEvents) {
1102+
const detail::code_location CodeLoc = {};
1103+
return submit(
1104+
[&](handler &CGH) {
1105+
CGH.depends_on(DepEvents);
1106+
CGH.exec_graph(Graph);
1107+
},
1108+
CodeLoc);
1109+
}
1110+
10631111
/// Returns whether the queue is in order or OoO
10641112
///
10651113
/// Equivalent to has_property<property::queue::in_order>()
@@ -1070,14 +1118,6 @@ class __SYCL_EXPORT queue {
10701118
/// \return the backend associated with this queue.
10711119
backend get_backend() const noexcept;
10721120

1073-
public:
1074-
/// Submits an executable command_graph for execution on this queue
1075-
///
1076-
/// \return an event representing the execution of the command_graph
1077-
event submit(ext::oneapi::experimental::command_graph<
1078-
ext::oneapi::experimental::graph_state::executable>
1079-
graph);
1080-
10811121
private:
10821122
pi_native_handle getNative() const;
10831123

sycl/source/handler.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#include <detail/config.hpp>
1212
#include <detail/global_handler.hpp>
13+
#include <detail/graph_impl.hpp>
1314
#include <detail/handler_impl.hpp>
1415
#include <detail/kernel_bundle_impl.hpp>
1516
#include <detail/kernel_impl.hpp>
@@ -698,5 +699,12 @@ void handler::depends_on(const std::vector<event> &Events) {
698699
}
699700
}
700701

702+
void handler::exec_graph(ext::oneapi::experimental::command_graph<
703+
ext::oneapi::experimental::graph_state::executable>
704+
Graph) {
705+
auto GraphImpl = detail::getSyclObjImpl(Graph);
706+
GraphImpl->exec_and_wait(MQueue);
707+
}
708+
701709
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
702710
} // namespace sycl

sycl/source/queue.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
#include <detail/backend_impl.hpp>
1010
#include <detail/event_impl.hpp>
11-
#include <detail/graph_impl.hpp>
1211
#include <detail/queue_impl.hpp>
1312
#include <sycl/event.hpp>
1413
#include <sycl/exception_list.hpp>
@@ -213,13 +212,5 @@ bool queue::device_has(aspect Aspect) const {
213212
// avoid creating sycl object from impl
214213
return impl->getDeviceImplPtr()->has(Aspect);
215214
}
216-
217-
event queue::submit(ext::oneapi::experimental::command_graph<
218-
ext::oneapi::experimental::graph_state::executable>
219-
Graph) {
220-
auto GraphImpl = detail::getSyclObjImpl(Graph);
221-
GraphImpl->exec_and_wait(this->impl);
222-
return {};
223-
}
224215
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
225216
} // namespace sycl

sycl/test/graph/graph-explicit-dotp.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,10 @@ int main() {
7878
},
7979
{node_a, node_b});
8080

81-
auto exec_graph = g.finalize(q.get_context());
81+
auto executable_graph = g.finalize(q.get_context());
8282

83-
q.submit(exec_graph).wait();
83+
// Using shortcut for executing a graph of commands
84+
q.exec_graph(executable_graph).wait();
8485

8586
if (*dotp != host_gold_result()) {
8687
std::cout << "Error unexpected result!\n";
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
#include <CL/sycl.hpp>
3+
#include <iostream>
4+
5+
#include <sycl/ext/oneapi/experimental/graph.hpp>
6+
7+
int main() {
8+
9+
sycl::property_list properties{
10+
sycl::property::queue::in_order{},
11+
sycl::ext::oneapi::property::queue::lazy_execution{}};
12+
13+
sycl::queue q{sycl::gpu_selector_v, properties};
14+
15+
sycl::ext::oneapi::experimental::command_graph g;
16+
17+
const size_t n = 10;
18+
float *arr = sycl::malloc_shared<float>(n, q);
19+
20+
g.add([&](sycl::handler &h) {
21+
h.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> idx) {
22+
size_t i = idx;
23+
arr[i] = 1;
24+
});
25+
});
26+
27+
auto executable_graph = g.finalize(q.get_context());
28+
29+
auto e1 = q.exec_graph(executable_graph);
30+
auto e2 = q.exec_graph(executable_graph, e1);
31+
auto e3 = q.exec_graph(executable_graph, e1);
32+
q.exec_graph(executable_graph, {e2, e3}).wait();
33+
34+
sycl::free(arr, q);
35+
36+
std::cout << "done " << arr[0] << std::endl;
37+
38+
return 0;
39+
}

sycl/test/graph/graph-explicit-simple.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@ int main() {
2626

2727
auto result_before_exec1 = arr[0];
2828

29-
auto exec_graph = g.finalize(q.get_context());
29+
auto executable_graph = g.finalize(q.get_context());
3030

3131
auto result_before_exec2 = arr[0];
3232

33-
q.submit(exec_graph).wait();
33+
q.submit([&](sycl::handler &h) { h.exec_graph(executable_graph); });
3434

3535
auto result = arr[0];
3636

0 commit comments

Comments
 (0)