Skip to content

[SYCL][Graph] Fix memset queue shortcut when queue is recorded #12508

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 13, 2024
26 changes: 7 additions & 19 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,6 @@ event queue_impl::memset(const std::shared_ptr<detail::queue_impl> &Self,
// Emit a begin/end scope for this call
PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin);
#endif
if (MGraph.lock()) {
throw sycl::exception(make_error_code(errc::invalid),
"The memset feature is not yet available "
"for use with the SYCL Graph extension.");
}

return submitMemOpHelper(
Self, DepEvents, [&](handler &CGH) { CGH.memset(Ptr, Value, Count); },
Expand Down Expand Up @@ -169,19 +164,14 @@ event queue_impl::memcpy(const std::shared_ptr<detail::queue_impl> &Self,
// Emit a begin/end scope for this call
PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin);
#endif
// If we have a command graph set we need to capture the copy through normal
// queue submission rather than execute the copy directly.
auto HandlerFunc = [&](handler &CGH) { CGH.memcpy(Dest, Src, Count); };
if (MGraph.lock())
return submitWithHandler(Self, DepEvents, HandlerFunc);

if ((!Src || !Dest) && Count != 0) {
report(CodeLoc);
throw runtime_error("NULL pointer argument in memory copy operation.",
PI_ERROR_INVALID_VALUE);
}
return submitMemOpHelper(
Self, DepEvents, HandlerFunc,
Self, DepEvents, [&](handler &CGH) { CGH.memcpy(Dest, Src, Count); },
[](const auto &...Args) { MemoryManager::copy_usm(Args...); }, Src, Self,
Count, Dest);
}
Expand All @@ -190,14 +180,9 @@ event queue_impl::mem_advise(const std::shared_ptr<detail::queue_impl> &Self,
const void *Ptr, size_t Length,
pi_mem_advice Advice,
const std::vector<event> &DepEvents) {
// If we have a command graph set we need to capture the advise through normal
// queue submission.
auto HandlerFunc = [&](handler &CGH) { CGH.mem_advise(Ptr, Length, Advice); };
if (MGraph.lock())
return submitWithHandler(Self, DepEvents, HandlerFunc);

return submitMemOpHelper(
Self, DepEvents, HandlerFunc,
Self, DepEvents,
[&](handler &CGH) { CGH.mem_advise(Ptr, Length, Advice); },
[](const auto &...Args) { MemoryManager::advise_usm(Args...); }, Ptr,
Self, Length, Advice);
}
Expand Down Expand Up @@ -353,7 +338,10 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
const std::vector<event> &ExpandedDepEvents =
getExtendDependencyList(DepEvents, MutableDepEvents, Lock);

if (areEventsSafeForSchedulerBypass(ExpandedDepEvents, MContext)) {
// If we have a command graph set we need to capture the op through the
// handler rather than by-passing the scheduler.
if (!MGraph.lock() &&
areEventsSafeForSchedulerBypass(ExpandedDepEvents, MContext)) {
if (MSupportsDiscardingPiEvents) {
MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents),
/*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr);
Expand Down
48 changes: 48 additions & 0 deletions sycl/test-e2e/Graph/RecordReplay/usm_memset_shortcut.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
// RUN: %if level_zero %{env UR_L0_LEAKS_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
//
// Tests adding a USM memset queue shortcut operation as a graph node.

#include "../graph_common.hpp"

int main() {

queue Queue;

if (!are_graphs_supported(Queue)) {
return 0;
}

exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};

const size_t N = 10;
unsigned char *Arr = malloc_device<unsigned char>(N, Queue);

int Value = 77;
Graph.begin_recording(Queue);
auto Init = Queue.memset(Arr, Value, N);
Queue.submit([&](handler &CGH) {
CGH.depends_on(Init);
CGH.single_task<class double_dest>([=]() {
for (int i = 0; i < Size; i++)
Arr[i] = 2 * Arr[i];
});
});

Graph.end_recording();

auto ExecGraph = Graph.finalize();

Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); }).wait();

std::vector<unsigned char> Output(N);
Queue.memcpy(Output.data(), Arr, N).wait();
for (int i = 0; i < N; i++)
assert(Output[i] == (Value * 2));

sycl::free(Arr, Queue);

return 0;
}
19 changes: 0 additions & 19 deletions sycl/unittests/Extensions/CommandGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1848,25 +1848,6 @@ TEST_F(CommandGraphTest, FusionExtensionExceptionCheck) {
ASSERT_EQ(ExceptionCode, sycl::errc::invalid);
}

TEST_F(CommandGraphTest, USMMemsetShortcutExceptionCheck) {

const size_t N = 10;
unsigned char *Arr = malloc_device<unsigned char>(N, Queue);
int Value = 77;

Graph.begin_recording(Queue);

std::error_code ExceptionCode = make_error_code(sycl::errc::success);
try {
Queue.memset(Arr, Value, N);
} catch (exception &Exception) {
ExceptionCode = Exception.code();
}
ASSERT_EQ(ExceptionCode, sycl::errc::invalid);

Graph.end_recording(Queue);
}

TEST_F(CommandGraphTest, Memcpy2DExceptionCheck) {
constexpr size_t RECT_WIDTH = 30;
constexpr size_t RECT_HEIGHT = 21;
Expand Down