From f3bf37a49fcfaace819149b78df72eed2299e580 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 26 Feb 2020 14:57:55 +0300 Subject: [PATCH 001/188] [SYCL] Tests for host/interop task Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/accessor.hpp | 8 +- sycl/include/CL/sycl/detail/cg.hpp | 1 + sycl/include/CL/sycl/event.hpp | 6 + sycl/include/CL/sycl/handler.hpp | 54 ++++ sycl/include/CL/sycl/interop_handle.hpp | 53 ++++ sycl/include/CL/sycl/queue.hpp | 4 + .../host-task-through-event.cpp | 230 ++++++++++++++++++ sycl/test/host-interop-task/host-task.cpp | 73 ++++++ sycl/test/host-interop-task/interop-task.cpp | 74 ++++++ 9 files changed, 501 insertions(+), 2 deletions(-) create mode 100644 sycl/include/CL/sycl/interop_handle.hpp create mode 100644 sycl/test/host-interop-task/host-task-through-event.cpp create mode 100644 sycl/test/host-interop-task/host-task.cpp create mode 100644 sycl/test/host-interop-task/interop-task.cpp diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp index 74bb361bb96f2..78e65ba4880c1 100644 --- a/sycl/include/CL/sycl/accessor.hpp +++ b/sycl/include/CL/sycl/accessor.hpp @@ -813,9 +813,11 @@ class accessor : } #endif + // IsHostBuf is here for host/interop task template 0) && (!IsPlaceH && (IsGlobalBuf || IsConstantBuf))>> + (Dims > 0) && (IsHostBuf || (!IsPlaceH && + (IsGlobalBuf || IsConstantBuf)))>> accessor(buffer &BufferRef, handler &CommandGroupHandler) #ifdef __SYCL_DEVICE_ONLY__ @@ -853,9 +855,11 @@ class accessor : } #endif + // IsHostBuf is here for host/interop task template 0) && (!IsPlaceH && (IsGlobalBuf || IsConstantBuf))>> + (Dims > 0) && (IsHostBuf || (!IsPlaceH && + (IsGlobalBuf || IsConstantBuf)))>> accessor(buffer &BufferRef, handler &CommandGroupHandler, range AccessRange, id AccessOffset = {}) diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index c45e00643576c..ec6890807721c 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -52,6 +52,7 @@ class interop_handler { detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Acc; return getMemImpl(detail::getSyclObjImpl(*AccBase).get()); } + private: cl_command_queue MQueue; std::vector MMemObjs; diff --git a/sycl/include/CL/sycl/event.hpp b/sycl/include/CL/sycl/event.hpp index ba6248a2b8908..fa6943442d180 100644 --- a/sycl/include/CL/sycl/event.hpp +++ b/sycl/include/CL/sycl/event.hpp @@ -112,6 +112,12 @@ class event { typename info::param_traits::return_type get_profiling_info() const; + template + void when_complete(FuncT &&Func) { + (void)Func; // eliminate possible compiler warning + throw std::runtime_error("Not implemented"); + } + private: event(shared_ptr_class EventImpl); diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index dd464de3d851c..083fa0f7bda0e 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -106,6 +107,29 @@ template struct get_kernel_name_t { }; device getDeviceFromHandler(handler &); + +template +struct check_fn_signature { + static_assert(std::integral_constant::value, + "Second template parameter needs to be of function type"); +}; + +template +struct check_fn_signature { +private: + template + static constexpr auto check(T*) + -> typename std::is_same< + decltype(std::declval().operator()(std::declval()...)), + RetT>::type; + + template + static constexpr std::false_type check(...); + + typedef decltype(check(0)) type; +public: + static constexpr bool value = type::value; +}; } // namespace detail /// 4.8.3 Command group handler class @@ -568,6 +592,36 @@ class handler { MCGType = detail::CG::RUN_ON_HOST_INTEL; } + template + typename std::enable_if< + detail::check_fn_signature::value, + void>::type + codeplay_host_task(FuncT Func) { + (void)Func; // eliminate possible compiler warning + throw std::runtime_error("Not implemented"); + } + + template + typename std::enable_if< + detail::check_fn_signature::value, + void>::type + codeplay_host_task(FuncT Func) { + codeplay_host_task([Func](interop_handle &IH) { + (void)IH; // eliminate possible compiler warning + Func(); + }); + } + + template + typename std::enable_if< + detail::check_fn_signature::type, + void(event &)>::value, + void>::type + host_task(FuncT &&Func) { + (void)Func; // eliminate possible compiler warning + throw std::runtime_error("Not implemented"); + } + /// Defines and invokes a SYCL kernel function for the specified range and /// offset. /// diff --git a/sycl/include/CL/sycl/interop_handle.hpp b/sycl/include/CL/sycl/interop_handle.hpp new file mode 100644 index 0000000000000..c181226ce5ae6 --- /dev/null +++ b/sycl/include/CL/sycl/interop_handle.hpp @@ -0,0 +1,53 @@ +//==-- interop_handler.hpp --- SYCL interoperability handler ---*- C++ -*---==// +// +// Copyright (C) 2018 Intel Corporation. All rights reserved. +// +// The information and source code contained herein is the exclusive property +// of Intel Corporation and may not be disclosed, examined or reproduced in +// whole or in part without explicit written authorization from the company. +// +// ===--------------------------------------------------------------------=== // + +#pragma once + +#include + +#include +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { + +template +class accessor; + +class interop_handle { +private: + interop_handle() { + assert(false && "Not implemented"); + } + +public: + template + cl_mem get_native_mem(const accessor &) const { + assert(false && "Not implemented"); + } + + cl_command_queue get_native_queue() const noexcept { + assert(false && "Not implemented"); + } + + cl_device_id get_native_device() const noexcept { + assert(false && "Not implemented"); + } + + cl_context get_native_context() const noexcept { + assert(false && "Not implemented"); + } +}; + +} // namespace sycl +} // namespace cl diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index bb811decac723..a98e42bde47ef 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -448,6 +448,10 @@ class queue { /// Equivalent to has_property() bool is_in_order() const; + void set_host_task_thread_pool_size(unsigned int) { + throw std::runtime_error("Not implemented"); + } + private: shared_ptr_class impl; template diff --git a/sycl/test/host-interop-task/host-task-through-event.cpp b/sycl/test/host-interop-task/host-task-through-event.cpp new file mode 100644 index 0000000000000..db4ce0ecb91c5 --- /dev/null +++ b/sycl/test/host-interop-task/host-task-through-event.cpp @@ -0,0 +1,230 @@ +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out + +#include +#include +#include +#include +#include + +#include +#include + +namespace S = cl::sycl; + +struct Context { + std::atomic_bool Flag; + S::queue &Queue; + std::string Message; + S::buffer Buf; + std::mutex Mutex; + std::condition_variable CV; +}; + +void ThreadA1Fn(Context &Ctx) { + // T1.1. submit device-side kernel K1 + auto Event = Ctx.Queue.submit([&](S::handler &CGH) { + S::accessor GeneratorAcc(Ctx.Buf, CGH); + + auto GeneratorKernel = [GeneratorAcc] () { + for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) + GeneratorAcc[Idx] = Idx; + }; + CGH.single_task(GeneratorKernel); + }); + + // T1.2. submit host task using event of K1 as a lock with callback to set + // flag F = true + Ctx.Queue.submit([&](S::handler &CGH) { + CGH.depends_on(Event); + + auto Callback = [&Ctx, Event] (const std::vector &E) -> void { + assert(E.size() == 1); + + // operator== of cl::sycl::event will only compare pointers to impls. + // We want to compare underlying impl objects, though. + assert(S::detail::getSyclObjImpl(Event)->get() == S::detail::getSyclObjImpl(E[0])->get()); + + bool Expected = false; + bool Desired = true; + assert(Ctx.Flag.compare_exchange_strong(Expected, Desired)); + + // let's employ some locking here + { + std::lock_guard Lock(Ctx.Mutex); + Ctx.CV.notify_all(); + } + }; + + // The Callback is run on Queue-internal thread-pool or in backend's thread + // if thread pool size is explicitly set to 0 + CGH.host_task(Callback); + }); +} + +void ThreadB1Fn(Context &Ctx) { + // T1.1. submit device-side kernel K1 + Ctx.Queue.submit([&](S::handler &CGH) { + S::accessor GeneratorAcc(Ctx.Buf, CGH); + + auto GeneratorKernel = [GeneratorAcc] () { + for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) + GeneratorAcc[Idx] = Idx; + }; + CGH.single_task(GeneratorKernel); + }); + + // T1.2. submit host task using event of K1 as a lock with callback to set + // flag F = true + Ctx.Queue.submit([&](S::handler &CGH) { + S::accessor TestAcc(Ctx.Buf, CGH); + + auto Callback = [&Ctx, TestAcc] (const std::vector &E) -> void { + assert(E.size() == 1); + + for (size_t Idx = 0; Idx < TestAcc.get_count(); ++Idx) + assert(Idx == TestAcc[Idx]); + + bool Expected = false; + bool Desired = true; + assert(Ctx.Flag.compare_exchange_strong(Expected, Desired)); + + // let's employ some locking here + { + std::lock_guard Lock(Ctx.Mutex); + Ctx.CV.notify_all(); + } + }; + + // The Callback is run on Queue-internal thread-pool or in backend's thread + // if thread pool size is explicitly set to 0 + CGH.host_task(Callback); + }); +} + +void ThreadC1Fn(Context &Ctx) { + // T1.1. submit device-side kernel K1 + Ctx.Queue.submit([&](S::handler &CGH) { + S::accessor GeneratorAcc(Ctx.Buf, CGH); + + auto GeneratorKernel = [GeneratorAcc] () { + for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) + GeneratorAcc[Idx] = Idx; + }; + CGH.single_task(GeneratorKernel); + }) + // T1.2. submit host task using event of K1 as a lock with callback to set + // flag F = true + .when_complete([&Ctx] (const S::event &E) { + bool Expected = false; + bool Desired = true; + assert(Ctx.Flag.compare_exchange_strong(Expected, Desired)); + + // let's employ some locking here + { + std::lock_guard Lock(Ctx.Mutex); + Ctx.CV.notify_all(); + } + }); +} + +void Thread2Fn(Context &Ctx) { + std::unique_lock Lock(Ctx.Mutex); + + // T2.1. Wait until flag F is set eq true. + Ctx.CV.wait(Lock, [&Ctx] { return Ctx.Flag.load(); }); + + assert(Ctx.Flag.load()); + + // T2.2. print some "hello, world" message + Ctx.Message = "Hello, world"; +} + +void testA() { + auto EH = [] (S::exception_list EL) { + for (const std::exception_ptr &E : EL) { + throw E; + } + }; + + S::queue Queue(EH); + + // optional +// Queue.set_host_task_thread_pool_size(4); + + Context Ctx{{false}, Queue, "", {10}}; + + // 0. setup: thread 1 T1: exec smth; thread 2 T2: waits; init flag F = false + std::thread Thread1(ThreadA1Fn, std::reference_wrapper(Ctx)); + std::thread Thread2(Thread2Fn, std::reference_wrapper(Ctx)); + + Thread1.join(); + Thread2.join(); + + std::cout << "Msg = " << Ctx.Message << std::endl; + + assert(Ctx.Flag.load()); + assert(Ctx.Message == "Hello, world"); +} + +void testB() { + auto EH = [] (S::exception_list EL) { + for (const std::exception_ptr &E : EL) { + throw E; + } + }; + + S::queue Queue(EH); + + // optional +// Queue.set_host_task_thread_pool_size(4); + + Context Ctx{{false}, Queue, "", {10}}; + + // 0. setup: thread 1 T1: exec smth; thread 2 T2: waits; init flag F = false + std::thread Thread1(ThreadB1Fn, std::reference_wrapper(Ctx)); + std::thread Thread2(Thread2Fn, std::reference_wrapper(Ctx)); + + Thread1.join(); + Thread2.join(); + + assert(Ctx.Flag.load()); + assert(Ctx.Message == "Hello, world"); +} + +void testC() { + auto EH = [] (S::exception_list EL) { + for (const std::exception_ptr &E : EL) { + throw E; + } + }; + + S::queue Queue(EH); + + // optional + Queue.set_host_task_thread_pool_size(4); + + Context Ctx{{false}, Queue, "", {10}}; + + // 0. setup: thread 1 T1: exec smth; thread 2 T2: waits; init flag F = false + std::thread Thread1(ThreadC1Fn, std::reference_wrapper(Ctx)); + std::thread Thread2(Thread2Fn, std::reference_wrapper(Ctx)); + + Thread1.join(); + Thread2.join(); + + assert(Ctx.Flag.load()); + assert(Ctx.Message == "Hello, world"); +} + +int main(void) { + testA(); +// testB(); +// testC(); + + return 0; +} diff --git a/sycl/test/host-interop-task/host-task.cpp b/sycl/test/host-interop-task/host-task.cpp new file mode 100644 index 0000000000000..21741dd899499 --- /dev/null +++ b/sycl/test/host-interop-task/host-task.cpp @@ -0,0 +1,73 @@ +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out + +#include + +namespace S = cl::sycl; + +void test() { + auto EH = [] (S::exception_list EL) { + for (const std::exception_ptr &E : EL) { + throw E; + } + }; + + S::queue Queue(EH); + +#define DATA_SIZE 10 + S::buffer Buf1(DATA_SIZE); + S::buffer Buf2(DATA_SIZE); + + // 0. initialize resulting buffer with apriori wrong result + { + S::accessor Acc(Buf2); + + for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) + Acc[Idx] = -1; + } + + // 1. submit task writing to buffer 1 + Queue.submit([&](S::handler &CGH) { + S::accessor GeneratorAcc(Buf1, CGH); + + auto GeneratorKernel = [GeneratorAcc] () { + for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) + GeneratorAcc[Idx] = Idx; + }; + + CGH.single_task(GeneratorKernel); + }); + + // 2. submit host task writing from buf 1 to buf 2 + Queue.submit([&](S::handler &CGH) { + S::accessor CopierSrcAcc(Buf1, CGH); + S::accessor CopierDstAcc(Buf2, CGH); + + auto CopierKernel = [CopierSrcAcc, CopierDstAcc] () { + for (size_t Idx = 0; Idx < CopierDstAcc.get_count(); ++Idx) + CopierDstAcc[Idx] = CopierSrcAcc[Idx]; + }; + + CGH.codeplay_host_task(CopierKernel); + }); + + // 3. check via host accessor that buf 2 contains valid data + { + S::accessor ResultAcc(Buf2); + + for (size_t Idx = 0; Idx < ResultAcc.get_count(); ++Idx) { + assert(ResultAcc[Idx] == Idx && "Invalid data in result buffer"); + } + } +} + +int main() { + test(); + + return 0; +} diff --git a/sycl/test/host-interop-task/interop-task.cpp b/sycl/test/host-interop-task/interop-task.cpp new file mode 100644 index 0000000000000..b7aff104d43aa --- /dev/null +++ b/sycl/test/host-interop-task/interop-task.cpp @@ -0,0 +1,74 @@ +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out + +#include + +namespace S = cl::sycl; + +void test() { + auto EH = [] (S::exception_list EL) { + for (const std::exception_ptr &E : EL) { + throw E; + } + }; + + S::queue Queue(EH); + +#define DATA_SIZE 10 + S::buffer Buf1(DATA_SIZE); + S::buffer Buf2(DATA_SIZE); + + // 0. initialize resulting buffer with apriori wrong result + { + S::accessor Acc(Buf2); + + for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) + Acc[Idx] = -1; + } + + // 1. submit task writing to buffer 1 + Queue.submit([&](S::handler &CGH) { + S::accessor GeneratorAcc(Buf1, CGH); + + auto GeneratorKernel = [GeneratorAcc] () { + for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) + GeneratorAcc[Idx] = Idx; + }; + + CGH.single_task(GeneratorKernel); + }); + + // 2. submit host task writing from buf 1 to buf 2 + Queue.submit([&](S::handler &CGH) { + S::accessor CopierSrcAcc(Buf1, CGH); + S::accessor CopierDstAcc(Buf2, CGH); + + auto CopierKernel = [CopierSrcAcc, CopierDstAcc] (S::interop_handle &IH) { + (void)IH; // TODO use interop handle + for (size_t Idx = 0; Idx < CopierDstAcc.get_count(); ++Idx) + CopierDstAcc[Idx] = CopierSrcAcc[Idx]; + }; + + CGH.codeplay_host_task(CopierKernel); + }); + + // 3. check via host accessor that buf 2 contains valid data + { + S::accessor ResultAcc(Buf2); + + for (size_t Idx = 0; Idx < ResultAcc.get_count(); ++Idx) { + assert(ResultAcc[Idx] == Idx && "Invalid data in result buffer"); + } + } +} + +int main() { + test(); + + return 0; +} From faded84e4a9721259b0b0bc96876f0727ae2ec0c Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 2 Mar 2020 17:57:23 +0300 Subject: [PATCH 002/188] [SYCL] Remove unused parameter Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 3 +-- sycl/source/detail/scheduler/scheduler.cpp | 10 ++++------ sycl/source/detail/scheduler/scheduler.hpp | 4 ++-- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index d112de043a99a..05969b7f8e6cb 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -339,8 +339,7 @@ Command *Scheduler::GraphBuilder::addCopyBack(Requirement *Req) { // The function implements SYCL host accessor logic: host accessor // should provide access to the buffer in user space. -Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, - const bool destructor) { +Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req) { const QueueImplPtr &HostQueue = getInstance().getDefaultHostQueue(); diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 4b9ce7e017f29..c2a76c7272fa8 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -21,9 +21,8 @@ namespace sycl { namespace detail { EventImplPtr addHostAccessorToSchedulerInstance(Requirement *Req, - const bool destructor) { - return cl::sycl::detail::Scheduler::getInstance(). - addHostAccessor(Req, destructor); + const bool destructor) { + return cl::sycl::detail::Scheduler::getInstance().addHostAccessor(Req); } void Scheduler::waitForRecordToFinish(MemObjRecord *Record) { @@ -145,11 +144,10 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { MGraphBuilder.removeRecordForMemObj(MemObj); } -EventImplPtr Scheduler::addHostAccessor(Requirement *Req, - const bool destructor) { +EventImplPtr Scheduler::addHostAccessor(Requirement *Req) { std::lock_guard lock(MGraphLock); - Command *NewCmd = MGraphBuilder.addHostAccessor(Req, destructor); + Command *NewCmd = MGraphBuilder.addHostAccessor(Req); if (!NewCmd) return nullptr; diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 90000f6ab558c..dbdcf3257d801 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -86,7 +86,7 @@ class Scheduler { // operations with the same memory object that have side effects are blocked // until releaseHostAccessor is called. Returns an event which indicates // when these nodes are completed and host accessor is ready for using. - EventImplPtr addHostAccessor(Requirement *Req, const bool Destructor = false); + EventImplPtr addHostAccessor(Requirement *Req); // Unblocks operations with the memory object. void releaseHostAccessor(Requirement *Req); @@ -119,7 +119,7 @@ class Scheduler { QueueImplPtr HostQueue); Command *addCopyBack(Requirement *Req); - Command *addHostAccessor(Requirement *Req, const bool destructor = false); + Command *addHostAccessor(Requirement *Req); // [Provisional] Optimizes the whole graph. void optimize(); From b31cb8eeb117f303a13b5ecafa9c7f83e19f94b8 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 2 Mar 2020 18:17:10 +0300 Subject: [PATCH 003/188] [SYCL] Allow for creating host accessor without blocked event. Signed-off-by: Sergey Kanaev --- .../source/detail/scheduler/graph_builder.cpp | 27 ++++++++++--------- sycl/source/detail/scheduler/scheduler.cpp | 5 ++-- sycl/source/detail/scheduler/scheduler.hpp | 4 +-- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 05969b7f8e6cb..66b6d677bdac0 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -339,7 +339,8 @@ Command *Scheduler::GraphBuilder::addCopyBack(Requirement *Req) { // The function implements SYCL host accessor logic: host accessor // should provide access to the buffer in user space. -Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req) { +Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, + const bool WaitEvent) { const QueueImplPtr &HostQueue = getInstance().getDefaultHostQueue(); @@ -357,20 +358,22 @@ Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req) { Command *UpdateHostAccCmd = insertUpdateHostReqCmd(Record, Req, HostQueue); - // Need empty command to be blocked until host accessor is destructed - EmptyCommand *EmptyCmd = new EmptyCommand(HostQueue, *Req); - EmptyCmd->addDep( - DepDesc{UpdateHostAccCmd, EmptyCmd->getRequirement(), HostAllocaCmd}); - UpdateHostAccCmd->addUser(EmptyCmd); + if (WaitEvent) { + // Need empty command to be blocked until host accessor is destructed + EmptyCommand *EmptyCmd = new EmptyCommand(HostQueue, *Req); + EmptyCmd->addDep( + DepDesc{UpdateHostAccCmd, EmptyCmd->getRequirement(), HostAllocaCmd}); + UpdateHostAccCmd->addUser(EmptyCmd); - EmptyCmd->MIsBlockable = true; - EmptyCmd->MCanEnqueue = false; - EmptyCmd->MBlockReason = "A Buffer is locked by the host accessor"; + EmptyCmd->MIsBlockable = true; + EmptyCmd->MCanEnqueue = false; + EmptyCmd->MBlockReason = "A Buffer is locked by the host accessor"; - updateLeaves({UpdateHostAccCmd}, Record, Req->MAccessMode); - addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); + updateLeaves({UpdateHostAccCmd}, Record, Req->MAccessMode); + addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); - Req->MBlockedCmd = EmptyCmd; + Req->MBlockedCmd = EmptyCmd; + } if (MPrintOptionsArray[AfterAddHostAcc]) printGraphAsDot("after_addHostAccessor"); diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index c2a76c7272fa8..f8c40ab524e61 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -144,10 +144,11 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { MGraphBuilder.removeRecordForMemObj(MemObj); } -EventImplPtr Scheduler::addHostAccessor(Requirement *Req) { +EventImplPtr Scheduler::addHostAccessor(Requirement *Req, + const bool WaitEvent) { std::lock_guard lock(MGraphLock); - Command *NewCmd = MGraphBuilder.addHostAccessor(Req); + Command *NewCmd = MGraphBuilder.addHostAccessor(Req, WaitEvent); if (!NewCmd) return nullptr; diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index dbdcf3257d801..f523187304f48 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -86,7 +86,7 @@ class Scheduler { // operations with the same memory object that have side effects are blocked // until releaseHostAccessor is called. Returns an event which indicates // when these nodes are completed and host accessor is ready for using. - EventImplPtr addHostAccessor(Requirement *Req); + EventImplPtr addHostAccessor(Requirement *Req, const bool WaitEvent = true); // Unblocks operations with the memory object. void releaseHostAccessor(Requirement *Req); @@ -119,7 +119,7 @@ class Scheduler { QueueImplPtr HostQueue); Command *addCopyBack(Requirement *Req); - Command *addHostAccessor(Requirement *Req); + Command *addHostAccessor(Requirement *Req, const bool WaitEvent); // [Provisional] Optimizes the whole graph. void optimize(); From 253086f0a2cf83f9bffaecf1d32bad0346948e72 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 3 Mar 2020 17:02:09 +0300 Subject: [PATCH 004/188] [SYCL] Some syntax improvements Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/handler.hpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 083fa0f7bda0e..4ee8b923d147c 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -594,18 +594,18 @@ class handler { template typename std::enable_if< - detail::check_fn_signature::value, - void>::type - codeplay_host_task(FuncT Func) { + detail::check_fn_signature::type, + void(interop_handle &)>::value>::type + codeplay_host_task(FuncT &&Func) { (void)Func; // eliminate possible compiler warning throw std::runtime_error("Not implemented"); } template typename std::enable_if< - detail::check_fn_signature::value, - void>::type - codeplay_host_task(FuncT Func) { + detail::check_fn_signature::type, + void()>::value>::type + codeplay_host_task(FuncT &&Func) { codeplay_host_task([Func](interop_handle &IH) { (void)IH; // eliminate possible compiler warning Func(); @@ -615,8 +615,7 @@ class handler { template typename std::enable_if< detail::check_fn_signature::type, - void(event &)>::value, - void>::type + void(event &)>::value>::type host_task(FuncT &&Func) { (void)Func; // eliminate possible compiler warning throw std::runtime_error("Not implemented"); From cacb056fe2280c390a531bf4fb4d8273e0027de8 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 4 Mar 2020 14:09:53 +0300 Subject: [PATCH 005/188] [SYCL] Sample impl of host task through event Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/cg.hpp | 30 ++++++++++++- sycl/include/CL/sycl/handler.hpp | 14 ++++-- sycl/source/detail/scheduler/commands.cpp | 53 ++++++++++++++++++++++- sycl/source/handler.cpp | 6 +++ 4 files changed, 98 insertions(+), 5 deletions(-) diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index ec6890807721c..aa6537a9e4e21 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -183,6 +183,16 @@ class InteropTask { void call(cl::sycl::interop_handler &h) { MFunc(h); } }; +class HostTask { + std::function &)> MHostTask; + +public: + HostTask(function_class &)> &&Func) + : MHostTask(Func) {} + + void call(const vector_class &Event) { MHostTask(Event); } +}; + // Class which stores specific lambda object. template class HostKernel : public HostKernelBase { @@ -360,7 +370,8 @@ class CG { COPY_USM, FILL_USM, PREFETCH_USM, - INTEROP_TASK_CODEPLAY + INTEROP_TASK_CODEPLAY, + HOST_TASK }; CG(CGTYPE Type, vector_class> ArgsStorage, @@ -576,6 +587,23 @@ class CGInteropTask : public CG { MInteropTask(std::move(InteropTask)) {} }; +class CGHostTask : public CG { +public: + std::unique_ptr MHostTask; + + CGHostTask(std::unique_ptr HostTask, + std::vector> ArgsStorage, + std::vector AccStorage, + std::vector> SharedPtrStorage, + std::vector Requirements, + std::vector Events, CGTYPE Type) + : CG(Type, std::move(ArgsStorage), std::move(AccStorage), + std::move(SharedPtrStorage), std::move(Requirements), + std::move(Events)), + MHostTask(std::move(HostTask)) + {} +}; + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 4ee8b923d147c..bdd716e2dbc93 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -615,10 +615,16 @@ class handler { template typename std::enable_if< detail::check_fn_signature::type, - void(event &)>::value>::type + void(const vector_class &)>::value>::type host_task(FuncT &&Func) { - (void)Func; // eliminate possible compiler warning - throw std::runtime_error("Not implemented"); + throwIfActionIsCreated(); + + MNDRDesc.set(range<1>{1}); + MArgs = std::move(MAssociatedAccesors); + + MHostTask.reset(new detail::HostTask(Func)); + + MCGType = detail::CG::HOST_TASK; } /// Defines and invokes a SYCL kernel function for the specified range and @@ -1333,6 +1339,8 @@ class handler { detail::OSModuleHandle MOSModuleHandle; // Storage for a lambda or function when using InteropTasks std::unique_ptr MInteropTask; + // Storage for a lambda/function when using HostTask + std::unique_ptr MHostTask; /// The list of events that order this operation. vector_class MEvents; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 3b97de5673a92..26ae2fbbcfede 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -753,11 +753,46 @@ void DispatchNativeKernel(void *Blob) { HostTask->MHostKernel->call(HostTask->MNDRDesc, nullptr); } +struct HostTaskContext { + CGHostTask *ExecHost; + const size_t RequiredAmount; + // It could be a mere counter. Though we employ the collection to retrieve + // a set of events for callback. + std::unordered_set Completed; + + ContextImplPtr Context; +}; + +void DispatchHostTask(pi_event Event, pi_int32 EventStatus, void *UD) { + HostTaskContext *Ctx = reinterpret_cast(UD); + + if (EventStatus == PI_EVENT_COMPLETE) + Ctx->Completed.insert(Event); + + if (Ctx->Completed.size() < Ctx->RequiredAmount) + return; + + std::vector Events; + Events.reserve(Ctx->Completed.size()); + + auto Context = createSyclObjFromImpl(Ctx->Context); + + for (const pi_event &E : Ctx->Completed) { + detail::EventImplPtr Impl(new detail::event_impl(E, Context)); + + Events.emplace_back(createSyclObjFromImpl(Impl)); + } + + Ctx->ExecHost->MHostTask->call(Events); + + delete Ctx; +} + cl_int ExecCGCommand::enqueueImp() { std::vector EventImpls = Command::prepareEvents(detail::getSyclObjImpl(MQueue->get_context())); - auto RawEvents = getPiEvents(EventImpls); + std::vector RawEvents = getPiEvents(EventImpls); RT::PiEvent &Event = MEvent->getHandleRef(); @@ -1041,6 +1076,22 @@ cl_int ExecCGCommand::enqueueImp() { Plugin.call(reinterpret_cast(interop_queue)); return CL_SUCCESS; } + case CG::CGTYPE::HOST_TASK: { + auto *Ctx = new HostTaskContext{ + static_cast(MCommandGroup.get()), + RawEvents.size(), + {}, + MQueue->getContextImplPtr() + }; + + const detail::plugin &Plugin = MQueue->getPlugin(); + + for (const RT::PiEvent &Event : RawEvents) + Plugin.call(Event, PI_EVENT_COMPLETE, + DispatchHostTask, Ctx); + + return CL_SUCCESS; + } case CG::CGTYPE::NONE: default: throw runtime_error("CG type not implemented."); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 973d7262da6df..73d25018f3acf 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -77,6 +77,12 @@ event handler::finalize() { std::move(MSharedPtrStorage), std::move(MRequirements), std::move(MEvents))); break; + case detail::CG::HOST_TASK: + CommandGroup.reset(new detail::CGHostTask( + std::move(MHostTask), std::move(MArgsStorage), + std::move(MAccStorage), std::move(MSharedPtrStorage), + std::move(MRequirements), std::move(MEvents), MCGType)); + break; case detail::CG::NONE: throw runtime_error("Command group submitted without a kernel or a " "explicit memory operation."); From 1a79e3fb3caad665d726aef26198064ff3aa395d Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 6 Mar 2020 17:10:36 +0300 Subject: [PATCH 006/188] [SYCL] Sample implementation without OpenCL specific calls. Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/event.hpp | 7 ++-- sycl/source/CMakeLists.txt | 4 +++ sycl/source/detail/event_impl.cpp | 16 +++++++++ sycl/source/detail/event_impl.hpp | 3 ++ sycl/source/detail/queue_impl.hpp | 7 ++++ sycl/source/detail/scheduler/commands.cpp | 41 +++++++---------------- sycl/source/detail/thread_pool.hpp | 31 +++++++++++++++++ sycl/source/event.cpp | 4 +++ 8 files changed, 80 insertions(+), 33 deletions(-) create mode 100644 sycl/source/detail/thread_pool.hpp diff --git a/sycl/include/CL/sycl/event.hpp b/sycl/include/CL/sycl/event.hpp index fa6943442d180..51e0b02d978c8 100644 --- a/sycl/include/CL/sycl/event.hpp +++ b/sycl/include/CL/sycl/event.hpp @@ -13,6 +13,7 @@ #include #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -112,11 +113,7 @@ class event { typename info::param_traits::return_type get_profiling_info() const; - template - void when_complete(FuncT &&Func) { - (void)Func; // eliminate possible compiler warning - throw std::runtime_error("Not implemented"); - } + void when_complete(std::function Func); private: event(shared_ptr_class EventImpl); diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 6c2243fec3ed3..00f6164d0503f 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -29,10 +29,14 @@ function(add_sycl_rt_library LIB_NAME) target_include_directories( ${LIB_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} "${sycl_inc_dir}") + + find_package(Threads REQUIRED) + target_link_libraries(${LIB_NAME} PRIVATE OpenCL::Headers PRIVATE ${OpenCL_LIBRARIES} PRIVATE ${CMAKE_DL_LIBS} + ${CMAKE_THREAD_LIBS_INIT} ) add_common_options(${LIB_NAME}) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 3340f6b488891..ab48cad29eee8 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -179,6 +179,22 @@ event_impl::get_info() const { return info::event_command_status::complete; } +void event_impl::setComplete() { + throw std::runtime_error("Not implemented"); +} + +void event_impl::when_complete(std::shared_ptr Self, + std::function Func) { + if (auto Queue = MQueue.lock()) + Queue->getHostTaskThreadPool().submit([Self, Func] () { + Self->wait_and_throw(Self); + + Func(); + }); + else + throw runtime_error("Queue not available", PI_ERROR_UNKNOWN); +} + static uint64_t getTimestamp() { auto TimeStamp = std::chrono::high_resolution_clock::now().time_since_epoch(); return std::chrono::duration_cast(TimeStamp) diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 2593ad762dfb8..7dbe1521e72c1 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -145,6 +145,9 @@ class event_impl { /// @return a pointer to HostProfilingInfo instance. HostProfilingInfo *getHostProfilingInfo() { return MHostProfilingInfo.get(); } + void when_complete(std::shared_ptr Self, + std::function Func); + private: RT::PiEvent MEvent = nullptr; ContextImplPtr MContext; diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 701c1f6941694..ccb037e6d9a99 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -21,6 +21,7 @@ #include #include #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -344,6 +345,10 @@ class queue_impl { MExceptions.PushBack(ExceptionPtr); } + ThreadPool &getHostTaskThreadPool() { + return MHostTaskThreadPool; + } + private: /// Performs command group submission to the queue. /// @@ -385,6 +390,8 @@ class queue_impl { const bool MOpenCLInterop = false; // Assume OOO support by default. bool MSupportOOO = true; + + ThreadPool MHostTaskThreadPool; }; } // namespace detail diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 0a3b44ffcf369..1f11b780e7df5 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -755,36 +755,26 @@ void DispatchNativeKernel(void *Blob) { struct HostTaskContext { CGHostTask *ExecHost; - const size_t RequiredAmount; - // It could be a mere counter. Though we employ the collection to retrieve - // a set of events for callback. - std::unordered_set Completed; - ContextImplPtr Context; + std::vector WaitList; + EventImplPtr SelfEvent; }; -void DispatchHostTask(pi_event Event, pi_int32 EventStatus, void *UD) { - HostTaskContext *Ctx = reinterpret_cast(UD); - - if (EventStatus == PI_EVENT_COMPLETE) - Ctx->Completed.insert(Event); - - if (Ctx->Completed.size() < Ctx->RequiredAmount) - return; - +void DispatchHostTask(HostTaskContext *Ctx) { std::vector Events; - Events.reserve(Ctx->Completed.size()); - - auto Context = createSyclObjFromImpl(Ctx->Context); + Events.reserve(Ctx->WaitList.size()); - for (const pi_event &E : Ctx->Completed) { - detail::EventImplPtr Impl(new detail::event_impl(E, Context)); + for (const EventImplPtr &Event : Ctx->WaitList) { + Event->wait_and_throw(Event); - Events.emplace_back(createSyclObjFromImpl(Impl)); + Events.emplace_back(createSyclObjFromImpl(Event)); } Ctx->ExecHost->MHostTask->call(Events); + if (Ctx->SelfEvent) + Ctx->SelfEvent->setComplete(); + delete Ctx; } @@ -1080,16 +1070,11 @@ cl_int ExecCGCommand::enqueueImp() { case CG::CGTYPE::HOST_TASK: { auto *Ctx = new HostTaskContext{ static_cast(MCommandGroup.get()), - RawEvents.size(), - {}, - MQueue->getContextImplPtr() + EventImpls, + MEvent }; - const detail::plugin &Plugin = MQueue->getPlugin(); - - for (const RT::PiEvent &Event : RawEvents) - Plugin.call(Event, PI_EVENT_COMPLETE, - DispatchHostTask, Ctx); + MQueue->getHostTaskThreadPool().submit(DispatchHostTask, Ctx); return CL_SUCCESS; } diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp new file mode 100644 index 0000000000000..1fc345ccecd40 --- /dev/null +++ b/sycl/source/detail/thread_pool.hpp @@ -0,0 +1,31 @@ +#pragma once + +#include +#include + +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { + +class ThreadPool { + std::list MLaunchedThreads; +public: + ThreadPool() {} + ~ThreadPool() { + for (std::thread &Thr : MLaunchedThreads) { + if (Thr.joinable()) + Thr.join(); + } + } + + template + void submit(FuncT &&Func, ArgsT... Args) { + MLaunchedThreads.emplace_back(Func, Args...); + } +}; + +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp index bf0b0780edca9..5f40f05d098c4 100644 --- a/sycl/source/event.cpp +++ b/sycl/source/event.cpp @@ -60,6 +60,10 @@ vector_class event::get_wait_list() { return Result; } +void event::when_complete(std::function Func) { + impl->when_complete(impl, std::move(Func)); +} + event::event(shared_ptr_class event_impl) : impl(event_impl) {} From 93cffcc8ea757bc7a7ff68a9c03a9180e27dcd1f Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 6 Mar 2020 17:58:17 +0300 Subject: [PATCH 007/188] [SYCL] Only leave event-callback Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/accessor.hpp | 8 +- sycl/include/CL/sycl/detail/cg.hpp | 31 +-- sycl/include/CL/sycl/handler.hpp | 61 ----- sycl/include/CL/sycl/interop_handle.hpp | 53 ---- sycl/include/CL/sycl/queue.hpp | 4 +- sycl/source/detail/event_impl.cpp | 4 - sycl/source/detail/scheduler/commands.cpp | 38 +-- .../source/detail/scheduler/graph_builder.cpp | 26 +- sycl/source/detail/scheduler/scheduler.cpp | 7 +- sycl/source/detail/scheduler/scheduler.hpp | 4 +- sycl/source/handler.cpp | 6 - .../test/host-interop-task/event-callback.cpp | 92 +++++++ .../host-task-through-event.cpp | 230 ------------------ sycl/test/host-interop-task/host-task.cpp | 73 ------ sycl/test/host-interop-task/interop-task.cpp | 74 ------ 15 files changed, 116 insertions(+), 595 deletions(-) delete mode 100644 sycl/include/CL/sycl/interop_handle.hpp create mode 100644 sycl/test/host-interop-task/event-callback.cpp delete mode 100644 sycl/test/host-interop-task/host-task-through-event.cpp delete mode 100644 sycl/test/host-interop-task/host-task.cpp delete mode 100644 sycl/test/host-interop-task/interop-task.cpp diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp index dc0156c58bfeb..baa869fc3eaf3 100644 --- a/sycl/include/CL/sycl/accessor.hpp +++ b/sycl/include/CL/sycl/accessor.hpp @@ -814,11 +814,9 @@ class accessor : } #endif - // IsHostBuf is here for host/interop task template 0) && (IsHostBuf || (!IsPlaceH && - (IsGlobalBuf || IsConstantBuf)))>> + (Dims > 0) && (!IsPlaceH && (IsGlobalBuf || IsConstantBuf))>> accessor(buffer &BufferRef, handler &CommandGroupHandler) #ifdef __SYCL_DEVICE_ONLY__ @@ -856,11 +854,9 @@ class accessor : } #endif - // IsHostBuf is here for host/interop task template 0) && (IsHostBuf || (!IsPlaceH && - (IsGlobalBuf || IsConstantBuf)))>> + (Dims > 0) && (!IsPlaceH && (IsGlobalBuf || IsConstantBuf))>> accessor(buffer &BufferRef, handler &CommandGroupHandler, range AccessRange, id AccessOffset = {}) diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index a88d0571247fc..e4d0a0d30967c 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -52,7 +52,6 @@ class interop_handler { detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Acc; return getMemImpl(detail::getSyclObjImpl(*AccBase).get()); } - private: cl_command_queue MQueue; std::vector MMemObjs; @@ -183,16 +182,6 @@ class InteropTask { void call(cl::sycl::interop_handler &h) { MFunc(h); } }; -class HostTask { - std::function &)> MHostTask; - -public: - HostTask(function_class &)> &&Func) - : MHostTask(Func) {} - - void call(const vector_class &Event) { MHostTask(Event); } -}; - // Class which stores specific lambda object. template class HostKernel : public HostKernelBase { @@ -372,8 +361,7 @@ class CG { COPY_USM, FILL_USM, PREFETCH_USM, - INTEROP_TASK_CODEPLAY, - HOST_TASK + INTEROP_TASK_CODEPLAY }; CG(CGTYPE Type, vector_class> ArgsStorage, @@ -589,23 +577,6 @@ class CGInteropTask : public CG { MInteropTask(std::move(InteropTask)) {} }; -class CGHostTask : public CG { -public: - std::unique_ptr MHostTask; - - CGHostTask(std::unique_ptr HostTask, - std::vector> ArgsStorage, - std::vector AccStorage, - std::vector> SharedPtrStorage, - std::vector Requirements, - std::vector Events, CGTYPE Type) - : CG(Type, std::move(ArgsStorage), std::move(AccStorage), - std::move(SharedPtrStorage), std::move(Requirements), - std::move(Events)), - MHostTask(std::move(HostTask)) - {} -}; - } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 641a827fbb67b..a753190079f49 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -107,29 +106,6 @@ template struct get_kernel_name_t { }; device getDeviceFromHandler(handler &); - -template -struct check_fn_signature { - static_assert(std::integral_constant::value, - "Second template parameter needs to be of function type"); -}; - -template -struct check_fn_signature { -private: - template - static constexpr auto check(T*) - -> typename std::is_same< - decltype(std::declval().operator()(std::declval()...)), - RetT>::type; - - template - static constexpr std::false_type check(...); - - typedef decltype(check(0)) type; -public: - static constexpr bool value = type::value; -}; } // namespace detail /// 4.8.3 Command group handler class @@ -594,41 +570,6 @@ class handler { MCGType = detail::CG::RUN_ON_HOST_INTEL; } - template - typename std::enable_if< - detail::check_fn_signature::type, - void(interop_handle &)>::value>::type - codeplay_host_task(FuncT &&Func) { - (void)Func; // eliminate possible compiler warning - throw std::runtime_error("Not implemented"); - } - - template - typename std::enable_if< - detail::check_fn_signature::type, - void()>::value>::type - codeplay_host_task(FuncT &&Func) { - codeplay_host_task([Func](interop_handle &IH) { - (void)IH; // eliminate possible compiler warning - Func(); - }); - } - - template - typename std::enable_if< - detail::check_fn_signature::type, - void(const vector_class &)>::value>::type - host_task(FuncT &&Func) { - throwIfActionIsCreated(); - - MNDRDesc.set(range<1>{1}); - MArgs = std::move(MAssociatedAccesors); - - MHostTask.reset(new detail::HostTask(Func)); - - MCGType = detail::CG::HOST_TASK; - } - /// Defines and invokes a SYCL kernel function for the specified range and /// offset. /// @@ -1341,8 +1282,6 @@ class handler { detail::OSModuleHandle MOSModuleHandle; // Storage for a lambda or function when using InteropTasks std::unique_ptr MInteropTask; - // Storage for a lambda/function when using HostTask - std::unique_ptr MHostTask; /// The list of events that order this operation. vector_class MEvents; diff --git a/sycl/include/CL/sycl/interop_handle.hpp b/sycl/include/CL/sycl/interop_handle.hpp deleted file mode 100644 index c181226ce5ae6..0000000000000 --- a/sycl/include/CL/sycl/interop_handle.hpp +++ /dev/null @@ -1,53 +0,0 @@ -//==-- interop_handler.hpp --- SYCL interoperability handler ---*- C++ -*---==// -// -// Copyright (C) 2018 Intel Corporation. All rights reserved. -// -// The information and source code contained herein is the exclusive property -// of Intel Corporation and may not be disclosed, examined or reproduced in -// whole or in part without explicit written authorization from the company. -// -// ===--------------------------------------------------------------------=== // - -#pragma once - -#include - -#include -#include - -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { - -template -class accessor; - -class interop_handle { -private: - interop_handle() { - assert(false && "Not implemented"); - } - -public: - template - cl_mem get_native_mem(const accessor &) const { - assert(false && "Not implemented"); - } - - cl_command_queue get_native_queue() const noexcept { - assert(false && "Not implemented"); - } - - cl_device_id get_native_device() const noexcept { - assert(false && "Not implemented"); - } - - cl_context get_native_context() const noexcept { - assert(false && "Not implemented"); - } -}; - -} // namespace sycl -} // namespace cl diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index a98e42bde47ef..84c72c8588001 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -448,8 +448,8 @@ class queue { /// Equivalent to has_property() bool is_in_order() const; - void set_host_task_thread_pool_size(unsigned int) { - throw std::runtime_error("Not implemented"); + void set_event_cb_thread_pool_size(unsigned int) { + /* Not implemented yet */ } private: diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index ab48cad29eee8..b72c9932b9c19 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -179,10 +179,6 @@ event_impl::get_info() const { return info::event_command_status::complete; } -void event_impl::setComplete() { - throw std::runtime_error("Not implemented"); -} - void event_impl::when_complete(std::shared_ptr Self, std::function Func) { if (auto Queue = MQueue.lock()) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 1f11b780e7df5..6343ddecc670a 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -753,36 +753,11 @@ void DispatchNativeKernel(void *Blob) { HostTask->MHostKernel->call(HostTask->MNDRDesc, nullptr); } -struct HostTaskContext { - CGHostTask *ExecHost; - - std::vector WaitList; - EventImplPtr SelfEvent; -}; - -void DispatchHostTask(HostTaskContext *Ctx) { - std::vector Events; - Events.reserve(Ctx->WaitList.size()); - - for (const EventImplPtr &Event : Ctx->WaitList) { - Event->wait_and_throw(Event); - - Events.emplace_back(createSyclObjFromImpl(Event)); - } - - Ctx->ExecHost->MHostTask->call(Events); - - if (Ctx->SelfEvent) - Ctx->SelfEvent->setComplete(); - - delete Ctx; -} - cl_int ExecCGCommand::enqueueImp() { std::vector EventImpls = Command::prepareEvents(detail::getSyclObjImpl(MQueue->get_context())); - std::vector RawEvents = getPiEvents(EventImpls); + auto RawEvents = getPiEvents(EventImpls); RT::PiEvent &Event = MEvent->getHandleRef(); @@ -1067,17 +1042,6 @@ cl_int ExecCGCommand::enqueueImp() { Plugin.call(reinterpret_cast(interop_queue)); return CL_SUCCESS; } - case CG::CGTYPE::HOST_TASK: { - auto *Ctx = new HostTaskContext{ - static_cast(MCommandGroup.get()), - EventImpls, - MEvent - }; - - MQueue->getHostTaskThreadPool().submit(DispatchHostTask, Ctx); - - return CL_SUCCESS; - } case CG::CGTYPE::NONE: default: throw runtime_error("CG type not implemented.", PI_INVALID_OPERATION); diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index aa4b11839aa5a..62db768e40dfc 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -340,7 +340,7 @@ Command *Scheduler::GraphBuilder::addCopyBack(Requirement *Req) { // The function implements SYCL host accessor logic: host accessor // should provide access to the buffer in user space. Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, - const bool WaitEvent) { + const bool destructor) { const QueueImplPtr &HostQueue = getInstance().getDefaultHostQueue(); @@ -358,22 +358,20 @@ Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, Command *UpdateHostAccCmd = insertUpdateHostReqCmd(Record, Req, HostQueue); - if (WaitEvent) { - // Need empty command to be blocked until host accessor is destructed - EmptyCommand *EmptyCmd = new EmptyCommand(HostQueue, *Req); - EmptyCmd->addDep( - DepDesc{UpdateHostAccCmd, EmptyCmd->getRequirement(), HostAllocaCmd}); - UpdateHostAccCmd->addUser(EmptyCmd); + // Need empty command to be blocked until host accessor is destructed + EmptyCommand *EmptyCmd = new EmptyCommand(HostQueue, *Req); + EmptyCmd->addDep( + DepDesc{UpdateHostAccCmd, EmptyCmd->getRequirement(), HostAllocaCmd}); + UpdateHostAccCmd->addUser(EmptyCmd); - EmptyCmd->MIsBlockable = true; - EmptyCmd->MCanEnqueue = false; - EmptyCmd->MBlockReason = "A Buffer is locked by the host accessor"; + EmptyCmd->MIsBlockable = true; + EmptyCmd->MCanEnqueue = false; + EmptyCmd->MBlockReason = "A Buffer is locked by the host accessor"; - updateLeaves({UpdateHostAccCmd}, Record, Req->MAccessMode); - addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); + updateLeaves({UpdateHostAccCmd}, Record, Req->MAccessMode); + addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); - Req->MBlockedCmd = EmptyCmd; - } + Req->MBlockedCmd = EmptyCmd; if (MPrintOptionsArray[AfterAddHostAcc]) printGraphAsDot("after_addHostAccessor"); diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 58d2ac038b458..862507c1839cc 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -22,7 +22,8 @@ namespace detail { EventImplPtr addHostAccessorToSchedulerInstance(Requirement *Req, const bool destructor) { - return cl::sycl::detail::Scheduler::getInstance().addHostAccessor(Req); + return cl::sycl::detail::Scheduler::getInstance(). + addHostAccessor(Req, destructor); } void Scheduler::waitForRecordToFinish(MemObjRecord *Record) { @@ -145,10 +146,10 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { } EventImplPtr Scheduler::addHostAccessor(Requirement *Req, - const bool WaitEvent) { + const bool destructor) { std::lock_guard lock(MGraphLock); - Command *NewCmd = MGraphBuilder.addHostAccessor(Req, WaitEvent); + Command *NewCmd = MGraphBuilder.addHostAccessor(Req, destructor); if (!NewCmd) return nullptr; diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index f523187304f48..90000f6ab558c 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -86,7 +86,7 @@ class Scheduler { // operations with the same memory object that have side effects are blocked // until releaseHostAccessor is called. Returns an event which indicates // when these nodes are completed and host accessor is ready for using. - EventImplPtr addHostAccessor(Requirement *Req, const bool WaitEvent = true); + EventImplPtr addHostAccessor(Requirement *Req, const bool Destructor = false); // Unblocks operations with the memory object. void releaseHostAccessor(Requirement *Req); @@ -119,7 +119,7 @@ class Scheduler { QueueImplPtr HostQueue); Command *addCopyBack(Requirement *Req); - Command *addHostAccessor(Requirement *Req, const bool WaitEvent); + Command *addHostAccessor(Requirement *Req, const bool destructor = false); // [Provisional] Optimizes the whole graph. void optimize(); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 1b4a46c78ed04..9d49f0c3e8efa 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -77,12 +77,6 @@ event handler::finalize() { std::move(MSharedPtrStorage), std::move(MRequirements), std::move(MEvents))); break; - case detail::CG::HOST_TASK: - CommandGroup.reset(new detail::CGHostTask( - std::move(MHostTask), std::move(MArgsStorage), - std::move(MAccStorage), std::move(MSharedPtrStorage), - std::move(MRequirements), std::move(MEvents), MCGType)); - break; case detail::CG::NONE: throw runtime_error("Command group submitted without a kernel or a " "explicit memory operation.", diff --git a/sycl/test/host-interop-task/event-callback.cpp b/sycl/test/host-interop-task/event-callback.cpp new file mode 100644 index 0000000000000..4b677a177289e --- /dev/null +++ b/sycl/test/host-interop-task/event-callback.cpp @@ -0,0 +1,92 @@ +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out + +#include +#include +#include +#include +#include + +#include +#include + +namespace S = cl::sycl; + +struct Context { + std::atomic_bool Flag; + S::queue &Queue; + std::string Message; + S::buffer Buf; + std::mutex Mutex; + std::condition_variable CV; +}; + +void Thread1Fn(Context &Ctx) { + // T1.1. submit device-side kernel K1 + Ctx.Queue.submit([&](S::handler &CGH) { + S::accessor GeneratorAcc(Ctx.Buf, CGH); + + auto GeneratorKernel = [GeneratorAcc] () { + for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) + GeneratorAcc[Idx] = Idx; + }; + CGH.single_task(GeneratorKernel); + }) + // T1.2. submit host task using event of K1 as a lock with callback to set + // flag F = true + .when_complete([&Ctx] (const S::event &E) { + bool Expected = false; + bool Desired = true; + assert(Ctx.Flag.compare_exchange_strong(Expected, Desired)); + + // let's employ some locking here + { + std::lock_guard Lock(Ctx.Mutex); + Ctx.CV.notify_all(); + } + }); +} + +void Thread2Fn(Context &Ctx) { + std::unique_lock Lock(Ctx.Mutex); + + // T2.1. Wait until flag F is set eq true. + Ctx.CV.wait(Lock, [&Ctx] { return Ctx.Flag.load(); }); + + assert(Ctx.Flag.load()); + + // T2.2. print some "hello, world" message + Ctx.Message = "Hello, world"; +} + +void test() { + auto EH = [] (S::exception_list EL) { + for (const std::exception_ptr &E : EL) { + throw E; + } + }; + + S::queue Queue(EH); + + // optional + Queue.set_host_task_thread_pool_size(4); + + Context Ctx{{false}, Queue, "", {10}}; + + // 0. setup: thread 1 T1: exec smth; thread 2 T2: waits; init flag F = false + std::thread Thread1(Thread1Fn, std::reference_wrapper(Ctx)); + std::thread Thread2(Thread2Fn, std::reference_wrapper(Ctx)); + + Thread1.join(); + Thread2.join(); + + assert(Ctx.Flag.load()); + assert(Ctx.Message == "Hello, world"); +} + +int main(void) { + test(); + + return 0; +} diff --git a/sycl/test/host-interop-task/host-task-through-event.cpp b/sycl/test/host-interop-task/host-task-through-event.cpp deleted file mode 100644 index db4ce0ecb91c5..0000000000000 --- a/sycl/test/host-interop-task/host-task-through-event.cpp +++ /dev/null @@ -1,230 +0,0 @@ -// RUN: %clangxx -fsycl %s -o %t.out -// RUN: %CPU_RUN_PLACEHOLDER %t.out - -#include -#include -#include -#include -#include - -#include -#include - -namespace S = cl::sycl; - -struct Context { - std::atomic_bool Flag; - S::queue &Queue; - std::string Message; - S::buffer Buf; - std::mutex Mutex; - std::condition_variable CV; -}; - -void ThreadA1Fn(Context &Ctx) { - // T1.1. submit device-side kernel K1 - auto Event = Ctx.Queue.submit([&](S::handler &CGH) { - S::accessor GeneratorAcc(Ctx.Buf, CGH); - - auto GeneratorKernel = [GeneratorAcc] () { - for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) - GeneratorAcc[Idx] = Idx; - }; - CGH.single_task(GeneratorKernel); - }); - - // T1.2. submit host task using event of K1 as a lock with callback to set - // flag F = true - Ctx.Queue.submit([&](S::handler &CGH) { - CGH.depends_on(Event); - - auto Callback = [&Ctx, Event] (const std::vector &E) -> void { - assert(E.size() == 1); - - // operator== of cl::sycl::event will only compare pointers to impls. - // We want to compare underlying impl objects, though. - assert(S::detail::getSyclObjImpl(Event)->get() == S::detail::getSyclObjImpl(E[0])->get()); - - bool Expected = false; - bool Desired = true; - assert(Ctx.Flag.compare_exchange_strong(Expected, Desired)); - - // let's employ some locking here - { - std::lock_guard Lock(Ctx.Mutex); - Ctx.CV.notify_all(); - } - }; - - // The Callback is run on Queue-internal thread-pool or in backend's thread - // if thread pool size is explicitly set to 0 - CGH.host_task(Callback); - }); -} - -void ThreadB1Fn(Context &Ctx) { - // T1.1. submit device-side kernel K1 - Ctx.Queue.submit([&](S::handler &CGH) { - S::accessor GeneratorAcc(Ctx.Buf, CGH); - - auto GeneratorKernel = [GeneratorAcc] () { - for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) - GeneratorAcc[Idx] = Idx; - }; - CGH.single_task(GeneratorKernel); - }); - - // T1.2. submit host task using event of K1 as a lock with callback to set - // flag F = true - Ctx.Queue.submit([&](S::handler &CGH) { - S::accessor TestAcc(Ctx.Buf, CGH); - - auto Callback = [&Ctx, TestAcc] (const std::vector &E) -> void { - assert(E.size() == 1); - - for (size_t Idx = 0; Idx < TestAcc.get_count(); ++Idx) - assert(Idx == TestAcc[Idx]); - - bool Expected = false; - bool Desired = true; - assert(Ctx.Flag.compare_exchange_strong(Expected, Desired)); - - // let's employ some locking here - { - std::lock_guard Lock(Ctx.Mutex); - Ctx.CV.notify_all(); - } - }; - - // The Callback is run on Queue-internal thread-pool or in backend's thread - // if thread pool size is explicitly set to 0 - CGH.host_task(Callback); - }); -} - -void ThreadC1Fn(Context &Ctx) { - // T1.1. submit device-side kernel K1 - Ctx.Queue.submit([&](S::handler &CGH) { - S::accessor GeneratorAcc(Ctx.Buf, CGH); - - auto GeneratorKernel = [GeneratorAcc] () { - for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) - GeneratorAcc[Idx] = Idx; - }; - CGH.single_task(GeneratorKernel); - }) - // T1.2. submit host task using event of K1 as a lock with callback to set - // flag F = true - .when_complete([&Ctx] (const S::event &E) { - bool Expected = false; - bool Desired = true; - assert(Ctx.Flag.compare_exchange_strong(Expected, Desired)); - - // let's employ some locking here - { - std::lock_guard Lock(Ctx.Mutex); - Ctx.CV.notify_all(); - } - }); -} - -void Thread2Fn(Context &Ctx) { - std::unique_lock Lock(Ctx.Mutex); - - // T2.1. Wait until flag F is set eq true. - Ctx.CV.wait(Lock, [&Ctx] { return Ctx.Flag.load(); }); - - assert(Ctx.Flag.load()); - - // T2.2. print some "hello, world" message - Ctx.Message = "Hello, world"; -} - -void testA() { - auto EH = [] (S::exception_list EL) { - for (const std::exception_ptr &E : EL) { - throw E; - } - }; - - S::queue Queue(EH); - - // optional -// Queue.set_host_task_thread_pool_size(4); - - Context Ctx{{false}, Queue, "", {10}}; - - // 0. setup: thread 1 T1: exec smth; thread 2 T2: waits; init flag F = false - std::thread Thread1(ThreadA1Fn, std::reference_wrapper(Ctx)); - std::thread Thread2(Thread2Fn, std::reference_wrapper(Ctx)); - - Thread1.join(); - Thread2.join(); - - std::cout << "Msg = " << Ctx.Message << std::endl; - - assert(Ctx.Flag.load()); - assert(Ctx.Message == "Hello, world"); -} - -void testB() { - auto EH = [] (S::exception_list EL) { - for (const std::exception_ptr &E : EL) { - throw E; - } - }; - - S::queue Queue(EH); - - // optional -// Queue.set_host_task_thread_pool_size(4); - - Context Ctx{{false}, Queue, "", {10}}; - - // 0. setup: thread 1 T1: exec smth; thread 2 T2: waits; init flag F = false - std::thread Thread1(ThreadB1Fn, std::reference_wrapper(Ctx)); - std::thread Thread2(Thread2Fn, std::reference_wrapper(Ctx)); - - Thread1.join(); - Thread2.join(); - - assert(Ctx.Flag.load()); - assert(Ctx.Message == "Hello, world"); -} - -void testC() { - auto EH = [] (S::exception_list EL) { - for (const std::exception_ptr &E : EL) { - throw E; - } - }; - - S::queue Queue(EH); - - // optional - Queue.set_host_task_thread_pool_size(4); - - Context Ctx{{false}, Queue, "", {10}}; - - // 0. setup: thread 1 T1: exec smth; thread 2 T2: waits; init flag F = false - std::thread Thread1(ThreadC1Fn, std::reference_wrapper(Ctx)); - std::thread Thread2(Thread2Fn, std::reference_wrapper(Ctx)); - - Thread1.join(); - Thread2.join(); - - assert(Ctx.Flag.load()); - assert(Ctx.Message == "Hello, world"); -} - -int main(void) { - testA(); -// testB(); -// testC(); - - return 0; -} diff --git a/sycl/test/host-interop-task/host-task.cpp b/sycl/test/host-interop-task/host-task.cpp deleted file mode 100644 index 21741dd899499..0000000000000 --- a/sycl/test/host-interop-task/host-task.cpp +++ /dev/null @@ -1,73 +0,0 @@ -// RUN: %clangxx -fsycl %s -o %t.out -// RUN: %CPU_RUN_PLACEHOLDER %t.out - -#include - -namespace S = cl::sycl; - -void test() { - auto EH = [] (S::exception_list EL) { - for (const std::exception_ptr &E : EL) { - throw E; - } - }; - - S::queue Queue(EH); - -#define DATA_SIZE 10 - S::buffer Buf1(DATA_SIZE); - S::buffer Buf2(DATA_SIZE); - - // 0. initialize resulting buffer with apriori wrong result - { - S::accessor Acc(Buf2); - - for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) - Acc[Idx] = -1; - } - - // 1. submit task writing to buffer 1 - Queue.submit([&](S::handler &CGH) { - S::accessor GeneratorAcc(Buf1, CGH); - - auto GeneratorKernel = [GeneratorAcc] () { - for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) - GeneratorAcc[Idx] = Idx; - }; - - CGH.single_task(GeneratorKernel); - }); - - // 2. submit host task writing from buf 1 to buf 2 - Queue.submit([&](S::handler &CGH) { - S::accessor CopierSrcAcc(Buf1, CGH); - S::accessor CopierDstAcc(Buf2, CGH); - - auto CopierKernel = [CopierSrcAcc, CopierDstAcc] () { - for (size_t Idx = 0; Idx < CopierDstAcc.get_count(); ++Idx) - CopierDstAcc[Idx] = CopierSrcAcc[Idx]; - }; - - CGH.codeplay_host_task(CopierKernel); - }); - - // 3. check via host accessor that buf 2 contains valid data - { - S::accessor ResultAcc(Buf2); - - for (size_t Idx = 0; Idx < ResultAcc.get_count(); ++Idx) { - assert(ResultAcc[Idx] == Idx && "Invalid data in result buffer"); - } - } -} - -int main() { - test(); - - return 0; -} diff --git a/sycl/test/host-interop-task/interop-task.cpp b/sycl/test/host-interop-task/interop-task.cpp deleted file mode 100644 index b7aff104d43aa..0000000000000 --- a/sycl/test/host-interop-task/interop-task.cpp +++ /dev/null @@ -1,74 +0,0 @@ -// RUN: %clangxx -fsycl %s -o %t.out -// RUN: %CPU_RUN_PLACEHOLDER %t.out - -#include - -namespace S = cl::sycl; - -void test() { - auto EH = [] (S::exception_list EL) { - for (const std::exception_ptr &E : EL) { - throw E; - } - }; - - S::queue Queue(EH); - -#define DATA_SIZE 10 - S::buffer Buf1(DATA_SIZE); - S::buffer Buf2(DATA_SIZE); - - // 0. initialize resulting buffer with apriori wrong result - { - S::accessor Acc(Buf2); - - for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) - Acc[Idx] = -1; - } - - // 1. submit task writing to buffer 1 - Queue.submit([&](S::handler &CGH) { - S::accessor GeneratorAcc(Buf1, CGH); - - auto GeneratorKernel = [GeneratorAcc] () { - for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) - GeneratorAcc[Idx] = Idx; - }; - - CGH.single_task(GeneratorKernel); - }); - - // 2. submit host task writing from buf 1 to buf 2 - Queue.submit([&](S::handler &CGH) { - S::accessor CopierSrcAcc(Buf1, CGH); - S::accessor CopierDstAcc(Buf2, CGH); - - auto CopierKernel = [CopierSrcAcc, CopierDstAcc] (S::interop_handle &IH) { - (void)IH; // TODO use interop handle - for (size_t Idx = 0; Idx < CopierDstAcc.get_count(); ++Idx) - CopierDstAcc[Idx] = CopierSrcAcc[Idx]; - }; - - CGH.codeplay_host_task(CopierKernel); - }); - - // 3. check via host accessor that buf 2 contains valid data - { - S::accessor ResultAcc(Buf2); - - for (size_t Idx = 0; Idx < ResultAcc.get_count(); ++Idx) { - assert(ResultAcc[Idx] == Idx && "Invalid data in result buffer"); - } - } -} - -int main() { - test(); - - return 0; -} From 3790b3a8a69faae7d75296d8d9fc6e1c1f5ad02d Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 10 Mar 2020 10:48:34 +0300 Subject: [PATCH 008/188] [SYCL] Fix typo Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/event-callback.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/test/host-interop-task/event-callback.cpp b/sycl/test/host-interop-task/event-callback.cpp index 4b677a177289e..90583d8c715cf 100644 --- a/sycl/test/host-interop-task/event-callback.cpp +++ b/sycl/test/host-interop-task/event-callback.cpp @@ -35,7 +35,7 @@ void Thread1Fn(Context &Ctx) { }) // T1.2. submit host task using event of K1 as a lock with callback to set // flag F = true - .when_complete([&Ctx] (const S::event &E) { + .when_complete([&Ctx] () { bool Expected = false; bool Desired = true; assert(Ctx.Flag.compare_exchange_strong(Expected, Desired)); @@ -70,7 +70,7 @@ void test() { S::queue Queue(EH); // optional - Queue.set_host_task_thread_pool_size(4); + Queue.set_event_cb_thread_pool_size(4); Context Ctx{{false}, Queue, "", {10}}; From dc0ab02ae549151677e58b23a8d5ac43b88b4e6d Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 10 Mar 2020 17:20:12 +0300 Subject: [PATCH 009/188] [SYCL] Allow for running lit-tests with threads. Signed-off-by: Sergey Kanaev --- sycl/test/CMakeLists.txt | 3 +++ sycl/test/host-interop-task/event-callback.cpp | 2 +- sycl/test/lit.cfg.py | 1 + sycl/test/lit.site.cfg.py.in | 1 + 4 files changed, 6 insertions(+), 1 deletion(-) diff --git a/sycl/test/CMakeLists.txt b/sycl/test/CMakeLists.txt index 68feb07fc2d76..5ed26c0323a8c 100644 --- a/sycl/test/CMakeLists.txt +++ b/sycl/test/CMakeLists.txt @@ -7,6 +7,9 @@ set(SYCL_INCLUDE "${dst_dir}") set(RT_TEST_ARGS ${RT_TEST_ARGS} "-v") set(DEPLOY_RT_TEST_ARGS ${DEPLOY_RT_TEST_ARGS} "-v -D SYCL_TOOLS_DIR=${CMAKE_INSTALL_PREFIX}/bin -D SYCL_LIBS_DIR=${CMAKE_INSTALL_PREFIX}/lib${LLVM_LIBDIR_SUFFIX} -D SYCL_INCLUDE=${dst_deploy_dir}") +find_package(Threads REQUIRED) +set(SYCL_THREADS_LIB ${CMAKE_THREAD_LIBS_INIT}) + configure_lit_site_cfg( ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py diff --git a/sycl/test/host-interop-task/event-callback.cpp b/sycl/test/host-interop-task/event-callback.cpp index 90583d8c715cf..74454cd0c2d21 100644 --- a/sycl/test/host-interop-task/event-callback.cpp +++ b/sycl/test/host-interop-task/event-callback.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl %s -o %t.out %threads_lib // RUN: %CPU_RUN_PLACEHOLDER %t.out #include diff --git a/sycl/test/lit.cfg.py b/sycl/test/lit.cfg.py index 18cf02cd52745..4f880f3d58aa9 100644 --- a/sycl/test/lit.cfg.py +++ b/sycl/test/lit.cfg.py @@ -67,6 +67,7 @@ if 'OCL_ICD_FILENAMES' in os.environ: config.environment['OCL_ICD_FILENAMES'] = os.environ['OCL_ICD_FILENAMES'] +config.substitutions.append( ('%threads_lib', config.sycl_threads_lib) ) config.substitutions.append( ('%sycl_libs_dir', config.sycl_libs_dir ) ) config.substitutions.append( ('%sycl_include', config.sycl_include ) ) config.substitutions.append( ('%opencl_libs_dir', config.opencl_libs_dir) ) diff --git a/sycl/test/lit.site.cfg.py.in b/sycl/test/lit.site.cfg.py.in index 4ce4d38cdc914..a04c075524c0c 100644 --- a/sycl/test/lit.site.cfg.py.in +++ b/sycl/test/lit.site.cfg.py.in @@ -17,6 +17,7 @@ config.cuda_toolkit_include = "@CUDA_TOOLKIT_INCLUDE@" config.llvm_enable_projects = "@LLVM_ENABLE_PROJECTS@" +config.sycl_threads_lib = '@SYCL_THREADS_LIB@' import lit.llvm lit.llvm.initialize(lit_config, config) From f0df34971b072317bd7106d29a9e41b689e113fc Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 11 Mar 2020 14:38:06 +0300 Subject: [PATCH 010/188] [SYCL] A more sophisticated thread pool impl Signed-off-by: Sergey Kanaev --- sycl/source/detail/thread_pool.hpp | 76 +++++++++++++++++++++++++----- 1 file changed, 65 insertions(+), 11 deletions(-) diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 1fc345ccecd40..4c9c2537c3b06 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -1,7 +1,12 @@ #pragma once -#include +#include +#include +#include +#include +#include #include +#include #include @@ -10,19 +15,68 @@ namespace sycl { namespace detail { class ThreadPool { - std::list MLaunchedThreads; -public: - ThreadPool() {} - ~ThreadPool() { - for (std::thread &Thr : MLaunchedThreads) { - if (Thr.joinable()) - Thr.join(); + std::vector MLaunchedThreads; + + size_t MThreadCount; + std::queue> MJobQueue; + std::mutex MJobQueueMutex; + std::condition_variable MDoSmthOrStop; + std::atomic_bool MStop; + + void worker() { + std::unique_lock Lock(MJobQueueMutex); + + for (;;) { + MDoSmthOrStop.wait( + Lock, [this]() { return !MJobQueue.empty() || MStop.load(); }); + + if (MStop.load()) + break; + + std::function Job = std::move(MJobQueue.front()); + MJobQueue.pop(); + Lock.unlock(); + + Job(); + + Lock.lock(); } } - template - void submit(FuncT &&Func, ArgsT... Args) { - MLaunchedThreads.emplace_back(Func, Args...); +public: + ThreadPool(unsigned int ThreadCount = std::max( + 1L, + static_cast(std::thread::hardware_concurrency()) - 1)) + : MThreadCount(ThreadCount) {} + + ~ThreadPool() { finishAndWait(); } + + void start() { + MLaunchedThreads.reserve(MThreadCount); + + MStop.store(false); + + for (size_t Idx = 0; Idx < MThreadCount; ++Idx) + MLaunchedThreads.emplace_back(&ThreadPool::worker, this); + } + + void finishAndWait() { + MStop.store(true); + + MDoSmthOrStop.notify_all(); + + for (std::thread &Thread : MLaunchedThreads) + if (Thread.joinable()) + Thread.join(); + } + + void submit(std::function &&Func) { + { + std::lock_guard Lock(MJobQueueMutex); + MJobQueue.emplace(Func); + } + + MDoSmthOrStop.notify_one(); } }; From ea577f62688ded9c749ff8fd8a2ca5485f88dba9 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 12 Mar 2020 11:12:35 +0300 Subject: [PATCH 011/188] [SYCL] Proper use of fork() in assertion test Signed-off-by: Sergey Kanaev --- sycl/test/devicelib/assert.cpp | 122 ++++++++++++++++++++------------- 1 file changed, 75 insertions(+), 47 deletions(-) diff --git a/sycl/test/devicelib/assert.cpp b/sycl/test/devicelib/assert.cpp index 9170d2557e283..2e3b0a83fca14 100644 --- a/sycl/test/devicelib/assert.cpp +++ b/sycl/test/devicelib/assert.cpp @@ -1,4 +1,5 @@ // REQUIRES: cpu,linux +// RUN: %clangxx %s -DPARENT_PROCESS -o %t.parent.bin // RUN: %clangxx -fsycl -c %s -o %t.o // RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-glibc.o -o %t.out // (see the other RUN lines below; it is a bit complicated) @@ -75,12 +76,12 @@ // // Overall this sounds stable enough. What could possibly go wrong? // -// RUN: env SYCL_PI_TRACE=1 SHOULD_CRASH=1 CL_CONFIG_USE_VECTORIZER=False SYCL_DEVICE_TYPE=CPU EXPECTED_SIGNAL=SIGABRT SKIP_IF_NO_EXT=1 %t.out 2>%t.stderr.native >%t.stdout.native +// RUN: env SYCL_PI_TRACE=1 SHOULD_CRASH=1 CL_CONFIG_USE_VECTORIZER=False SYCL_DEVICE_TYPE=CPU EXPECTED_SIGNAL=SIGABRT SKIP_IF_NO_EXT=1 %t.parent.bin %t.out 2>%t.stderr.native >%t.stdout.native // RUN: FileCheck %s --input-file %t.stdout.native --check-prefixes=CHECK-NATIVE || FileCheck %s --input-file %t.stderr.native --check-prefix CHECK-NOTSUPPORTED // RUN: FileCheck %s --input-file %t.stderr.native --check-prefixes=CHECK-MESSAGE || FileCheck %s --input-file %t.stderr.native --check-prefix CHECK-NOTSUPPORTED // -// RUN: env SYCL_PI_TRACE=1 SYCL_DEVICELIB_INHIBIT_NATIVE=cl_intel_devicelib_assert CL_CONFIG_USE_VECTORIZER=False SYCL_DEVICE_TYPE=CPU EXPECTED_SIGNAL=SIGSEGV %t.out >%t.stdout.pi.fallback -// RUN: env SHOULD_CRASH=1 SYCL_DEVICELIB_INHIBIT_NATIVE=cl_intel_devicelib_assert CL_CONFIG_USE_VECTORIZER=False SYCL_DEVICE_TYPE=CPU EXPECTED_SIGNAL=SIGSEGV %t.out >%t.stdout.msg.fallback +// RUN: env SYCL_PI_TRACE=1 SYCL_DEVICELIB_INHIBIT_NATIVE=cl_intel_devicelib_assert CL_CONFIG_USE_VECTORIZER=False SYCL_DEVICE_TYPE=CPU EXPECTED_SIGNAL=SIGSEGV %t.parent.bin %t.out >%t.stdout.pi.fallback +// RUN: env SHOULD_CRASH=1 SYCL_DEVICELIB_INHIBIT_NATIVE=cl_intel_devicelib_assert CL_CONFIG_USE_VECTORIZER=False SYCL_DEVICE_TYPE=CPU EXPECTED_SIGNAL=SIGSEGV %t.parent.bin %t.out >%t.stdout.msg.fallback // RUN: FileCheck %s --input-file %t.stdout.pi.fallback --check-prefixes=CHECK-FALLBACK // RUN: FileCheck %s --input-file %t.stdout.msg.fallback --check-prefixes=CHECK-MESSAGE // @@ -107,13 +108,79 @@ #include #include +const int EXIT_SKIP_TEST = 42; + +#ifdef PARENT_PROCESS +int main(int argc, char *argv[]) { + assert(argc > 1); + + char **ChildArgv = new char*; + ChildArgv[0] = argv[1]; + + int Child = fork(); + + if (Child < 0) { + perror("Fork failed"); + return 1; + } + + if (!Child) { + int ExecFailed = execve(argv[1], ChildArgv, environ); + + if (ExecFailed) { + perror("Execve failed"); + return 1; + } + + assert(false && "Unreachanble reached"); + } + + int status = 0; + waitpid(Child, &status, 0); + if (WIFEXITED(status) && WEXITSTATUS(status) == EXIT_SKIP_TEST) { + return 0; + } + if (getenv("SHOULD_CRASH")) { + if (!WIFSIGNALED(status)) { + fprintf(stderr, "error: process did not terminate by a signal\n"); + return 1; + } + } else { + if (WIFSIGNALED(status)) { + fprintf(stderr, "error: process should not terminate\n"); + return 1; + } + // We should not check anything if the child finished successful and this + // was expected. + return 0; + } + int sig = WTERMSIG(status); + int expected = 0; + if (const char *env = getenv("EXPECTED_SIGNAL")) { + if (0 == strcmp(env, "SIGABRT")) { + expected = SIGABRT; + } else if (0 == strcmp(env, "SIGSEGV")) { + expected = SIGSEGV; + } + if (!expected) { + fprintf(stderr, "EXPECTED_SIGNAL should be set to either \"SIGABRT\", " + "or \"SIGSEGV\"!\n"); + return 1; + } + } + if (sig != expected) { + fprintf(stderr, "error: expected signal %d, got %d\n", expected, sig); + return 1; + } + + return 0; +} +#else using namespace cl::sycl; constexpr auto sycl_read = cl::sycl::access::mode::read; constexpr auto sycl_write = cl::sycl::access::mode::write; -const int EXIT_SKIP_TEST = 42; - template void simple_vadd(const std::array &VA, const std::array &VB, std::array &VC) { @@ -164,48 +231,6 @@ void simple_vadd(const std::array &VA, const std::array &VB, } int main() { - int child = fork(); - if (child) { - int status = 0; - waitpid(child, &status, 0); - if (WIFEXITED(status) && WEXITSTATUS(status) == EXIT_SKIP_TEST) { - return 0; - } - if (getenv("SHOULD_CRASH")) { - if (!WIFSIGNALED(status)) { - fprintf(stderr, "error: process did not terminate by a signal\n"); - return 1; - } - } else { - if (WIFSIGNALED(status)) { - fprintf(stderr, "error: process should not terminate\n"); - return 1; - } - // We should not check anything if the child finished successful and this - // was expected. - return 0; - } - int sig = WTERMSIG(status); - int expected = 0; - if (const char *env = getenv("EXPECTED_SIGNAL")) { - if (0 == strcmp(env, "SIGABRT")) { - expected = SIGABRT; - } else if (0 == strcmp(env, "SIGSEGV")) { - expected = SIGSEGV; - } - if (!expected) { - fprintf(stderr, "EXPECTED_SIGNAL should be set to either \"SIGABRT\", " - "or \"SIGSEGV\"!\n"); - return 1; - } - } - if (sig != expected) { - fprintf(stderr, "error: expected signal %d, got %d\n", expected, sig); - return 1; - } - return 0; - } - // Turn the bufferization off to not loose the assert message if it is written // to stdout. if (setvbuf(stdout, NULL, _IONBF, 0)) { @@ -218,4 +243,7 @@ int main() { std::array C = {0, 0, 0}; simple_vadd(A, B, C); + + return 0; } +#endif // PARENT_PROCESS From 99f64fb895731305b961d67470c8d1fbe7442902 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 12 Mar 2020 13:40:50 +0300 Subject: [PATCH 012/188] [SYCL] Fix style issue Signed-off-by: Sergey Kanaev --- sycl/test/devicelib/assert.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/devicelib/assert.cpp b/sycl/test/devicelib/assert.cpp index 2e3b0a83fca14..1b6724b5ffa7d 100644 --- a/sycl/test/devicelib/assert.cpp +++ b/sycl/test/devicelib/assert.cpp @@ -114,7 +114,7 @@ const int EXIT_SKIP_TEST = 42; int main(int argc, char *argv[]) { assert(argc > 1); - char **ChildArgv = new char*; + char **ChildArgv = new char *; ChildArgv[0] = argv[1]; int Child = fork(); From 529d4dc7bcb4d05f753ee2cc69106a64dd83df5d Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 12 Mar 2020 17:41:44 +0300 Subject: [PATCH 013/188] [SYCL] Fix some typos. Lazy initialization of thread pool. Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/queue.hpp | 4 +--- sycl/source/detail/event_impl.cpp | 8 ++++---- sycl/source/detail/queue_impl.cpp | 10 ++++++++++ sycl/source/detail/queue_impl.hpp | 20 ++++++++++++++++--- sycl/source/queue.cpp | 4 ++++ .../test/host-interop-task/event-callback.cpp | 2 +- 6 files changed, 37 insertions(+), 11 deletions(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 5f3b6a0b36ffd..b268717e26c5b 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -637,9 +637,7 @@ class queue { /// Equivalent to has_property() bool is_in_order() const; - void set_event_cb_thread_pool_size(unsigned int) { - /* Not implemented yet */ - } + void set_event_cb_and_host_task_thread_pool_size(size_t Threads); private: shared_ptr_class impl; diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 69e389feb81d3..88eb80d54705a 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -252,13 +252,13 @@ event_impl::get_info() const { void event_impl::when_complete(std::shared_ptr Self, std::function Func) { if (auto Queue = MQueue.lock()) - Queue->getHostTaskThreadPool().submit([Self, Func] () { - Self->wait_and_throw(Self); + Queue->getHostTaskAndEventCallbackThreadPool().submit([Self, Func] () { + Self->wait_and_throw(Self); - Func(); + Func(); }); else - throw runtime_error("Queue not available", PI_ERROR_UNKNOWN); + throw runtime_error("Queue not available", PI_ERROR_UNKNOWN); } static uint64_t getTimestamp() { diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index ddc9883cb610e..fdc09a7b00e7f 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -185,6 +185,16 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { #endif } +void queue_impl::initHostTaskAndEventCallbackThreadPool() { + if (MHostTaskAndEventCallbackThreadPoolThreadsCount) + MHostTaskAndEventCallbackThreadPool.reset( + new ThreadPool(MHostTaskAndEventCallbackThreadPoolThreadsCount)); + else + MHostTaskAndEventCallbackThreadPool.reset(new ThreadPool); + + MHostTaskAndEventCallbackThreadPool->start(); +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 4d47f0ad5b789..35e0fda6c8642 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -347,8 +347,15 @@ class queue_impl { MExceptions.PushBack(ExceptionPtr); } - ThreadPool &getHostTaskThreadPool() { - return MHostTaskThreadPool; + void set_event_cb_and_host_task_thread_pool_size(size_t Threads) { + MHostTaskAndEventCallbackThreadPoolThreadsCount = Threads; + } + + ThreadPool &getHostTaskAndEventCallbackThreadPool() { + if (!MHostTaskAndEventCallbackThreadPool) + initHostTaskAndEventCallbackThreadPool(); + + return *MHostTaskAndEventCallbackThreadPool; } private: @@ -382,6 +389,8 @@ class queue_impl { /// \param Event is the event to be stored void addEvent(event Event); + void initHostTaskAndEventCallbackThreadPool(); + /// Protects all the fields that can be changed by class' methods. mutex_class MMutex; @@ -404,7 +413,12 @@ class queue_impl { // Assume OOO support by default. bool MSupportOOO = true; - ThreadPool MHostTaskThreadPool; + size_t MHostTaskAndEventCallbackThreadPoolThreadsCount = 0; + + // Thread pool for host task and event callbacks execution. + // The thread pool is instntiated upon the very first call to + // getHostTaskAndEventCallbackThreadPool + std::unique_ptr MHostTaskAndEventCallbackThreadPool; }; } // namespace detail diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index e20b8e6016725..026f145e77c26 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -149,5 +149,9 @@ queue::get_property() const; bool queue::is_in_order() const { return impl->has_property(); } + +void queue::set_event_cb_and_host_task_thread_pool_size(size_t Threads) { + impl->set_event_cb_and_host_task_thread_pool_size(Threads); +} } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/test/host-interop-task/event-callback.cpp b/sycl/test/host-interop-task/event-callback.cpp index 74454cd0c2d21..8d676a62cd625 100644 --- a/sycl/test/host-interop-task/event-callback.cpp +++ b/sycl/test/host-interop-task/event-callback.cpp @@ -70,7 +70,7 @@ void test() { S::queue Queue(EH); // optional - Queue.set_event_cb_thread_pool_size(4); + Queue.set_event_cb_and_host_task_thread_pool_size(4); Context Ctx{{false}, Queue, "", {10}}; From 4d04655b9a54ed04982b75f405e2fecaf2b16a95 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 13 Mar 2020 09:56:16 +0300 Subject: [PATCH 014/188] [SYCL] Employ event::when_complete instead of piEventSetCallback Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index e9425a3e47783..bb5cf72858020 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -190,11 +190,13 @@ std::vector Command::prepareEvents(ContextImplPtr Context) { RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); Plugin.call(Context->getHandleRef(), &GlueEventHandle); - EventImplPtr *GlueEventCopy = - new EventImplPtr(GlueEvent); // To increase the reference count by 1. - Plugin.call( - Event->getHandleRef(), CL_COMPLETE, EventCompletionClbk, - /*void *data=*/(GlueEventCopy)); + + Event->when_complete(Event, [GlueEvent] () { + RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); + const detail::plugin &Plugin = GlueEvent->getPlugin(); + Plugin.call(GlueEventHandle, CL_COMPLETE); + }); + GlueEvents.push_back(GlueEvent); Result.push_back(std::move(GlueEvent)); continue; From 1e3bfe48d5ec482b631abd71489ee351e32a6665 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 13 Mar 2020 13:29:50 +0300 Subject: [PATCH 015/188] [SYCL] Host-task test Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/host-task.cpp | 118 ++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 sycl/test/host-interop-task/host-task.cpp diff --git a/sycl/test/host-interop-task/host-task.cpp b/sycl/test/host-interop-task/host-task.cpp new file mode 100644 index 0000000000000..7344d9f0062c1 --- /dev/null +++ b/sycl/test/host-interop-task/host-task.cpp @@ -0,0 +1,118 @@ +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out + +#include + +namespace S = cl::sycl; + +struct Context { + std::atomic_bool Flag; + S::queue &Queue; + std::string Message; + S::buffer Buf1; + S::buffer Buf2; + std::mutex Mutex; + std::condition_variable CV; +}; + +void thread1Fn(Context &Ctx) { + // 0. initialize resulting buffer with apriori wrong result + { + S::accessor Acc(Ctx.Buf2); + + for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) + Acc[Idx] = -1; + } + + // 1. submit task writing to buffer 1 + Queue.submit([&](S::handler &CGH) { + S::accessor GeneratorAcc(Ctx.Buf1, CGH); + + auto GeneratorKernel = [GeneratorAcc] () { + for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) + GeneratorAcc[Idx] = Idx; + }; + + CGH.single_task(GeneratorKernel); + }); + + // 2. submit host task writing from buf 1 to buf 2 + Queue.submit([&](S::handler &CGH) { + S::accessor CopierSrcAcc(Ctx.Buf1, CGH); + S::accessor CopierDstAcc(Ctx.Buf2, CGH); + + auto CopierKernel = [CopierSrcAcc, CopierDstAcc, &Ctx] () { + for (size_t Idx = 0; Idx < CopierDstAcc.get_count(); ++Idx) + CopierDstAcc[Idx] = CopierSrcAcc[Idx]; + + bool Expected = false; + bool Desired = true; + assert(Ctx.Flag.compare_exchange_strong(Expected, Desired)); + + // let's employ some locking here + { + std::lock_guard Lock(Ctx.Mutex); + Ctx.CV.notify_all(); + } + }; + + CGH.codeplay_host_task(CopierKernel); + }); +} + +void thread2Fn(Context &Ctx) { + std::unique_lock Lock(Ctx.Mutex); + + // T2.1. Wait until flag F is set eq true. + Ctx.CV.wait(Lock, [&Ctx] { return Ctx.Flag.load(); }); + + assert(Ctx.Flag.load()); + + // T2.2. print some "hello, world" message + Ctx.Message = "Hello, world"; +} + +void test() { + auto EH = [] (S::exception_list EL) { + for (const std::exception_ptr &E : EL) { + throw E; + } + }; + + S::queue Queue(EH); + + // optional + Queue.set_event_cb_and_host_task_thread_pool_size(4); + + Context Ctx{{false}, Queue, "", {10}, {10}}; + + // 0. setup: thread 1 T1: exec smth; thread 2 T2: waits; init flag F = false + std::thread Thread1(Thread1Fn, std::reference_wrapper(Ctx)); + std::thread Thread2(Thread2Fn, std::reference_wrapper(Ctx)); + + Thread1.join(); + Thread2.join(); + + assert(Ctx.Flag.load()); + assert(Ctx.Message == "Hello, world"); + + // 3. check via host accessor that buf 2 contains valid data + { + S::accessor ResultAcc(Buf2); + + for (size_t Idx = 0; Idx < ResultAcc.get_count(); ++Idx) { + assert(ResultAcc[Idx] == Idx && "Invalid data in result buffer"); + } + } +} + +int main() { + test(); + + return 0; +} From fc70b037be92ea11b9a97506daf0b583c55504e6 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 13 Mar 2020 13:30:59 +0300 Subject: [PATCH 016/188] [SYCL] Remove unwanted include Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/event-callback.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/test/host-interop-task/event-callback.cpp b/sycl/test/host-interop-task/event-callback.cpp index 8d676a62cd625..6e522bf3d2296 100644 --- a/sycl/test/host-interop-task/event-callback.cpp +++ b/sycl/test/host-interop-task/event-callback.cpp @@ -8,7 +8,6 @@ #include #include -#include namespace S = cl::sycl; From eaaefd3ed589f8b526db819fa45cf81eb5a414b0 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 18 Mar 2020 15:43:28 +0300 Subject: [PATCH 017/188] [SYCL] Worked on host-task Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/accessor.hpp | 4 +- sycl/include/CL/sycl/detail/cg.hpp | 40 +++++++++++++- sycl/include/CL/sycl/handler.hpp | 41 ++++++++++++++ sycl/source/detail/scheduler/commands.cpp | 67 +++++++++++++++++++++++ sycl/source/handler.cpp | 19 +++++++ sycl/test/host-interop-task/host-task.cpp | 17 ++++-- 6 files changed, 179 insertions(+), 9 deletions(-) diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp index 1fe3a597d430f..c3ef9b571537a 100644 --- a/sycl/include/CL/sycl/accessor.hpp +++ b/sycl/include/CL/sycl/accessor.hpp @@ -777,7 +777,7 @@ class accessor : template + (!IsPlaceH && (IsGlobalBuf || IsConstantBuf || IsHostBuf))> > accessor(buffer &BufferRef, handler &CommandGroupHandler) @@ -818,7 +818,7 @@ class accessor : template 0) && (Dims == Dimensions) && (!IsPlaceH && - (IsGlobalBuf || IsConstantBuf))>> + (IsGlobalBuf || IsConstantBuf || IsHostBuf))>> accessor(buffer &BufferRef, handler &CommandGroupHandler) #ifdef __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index a87daa3e8e154..1ccd8ecf21c90 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -28,6 +28,10 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +namespace detail { +class queue_impl; +} + // Interoperability handler // class interop_handler { @@ -182,6 +186,16 @@ class InteropTask { void call(cl::sycl::interop_handler &h) { MFunc(h); } }; +class HostTask { + std::function MHostTask; + +public: + HostTask(std::function &&Func) + : MHostTask(Func) {} + + void call() { MHostTask(); } +}; + // Class which stores specific lambda object. template class HostKernel : public HostKernelBase { @@ -361,7 +375,8 @@ class CG { COPY_USM, FILL_USM, PREFETCH_USM, - INTEROP_TASK_CODEPLAY + INTEROP_TASK_CODEPLAY, + HOST_TASK }; CG(CGTYPE Type, vector_class> ArgsStorage, @@ -600,6 +615,29 @@ class CGInteropTask : public CG { MInteropTask(std::move(InteropTask)) {} }; +class CGHostTask : public CG { +public: + std::unique_ptr MHostTask; + shared_ptr_class MQueue; + vector_class MArgs; + + CGHostTask(std::unique_ptr HostTask, + std::shared_ptr Queue, + vector_class Args, + std::vector> ArgsStorage, + std::vector AccStorage, + std::vector> SharedPtrStorage, + std::vector Requirements, + std::vector Events, CGTYPE Type, + detail::code_location loc = {}) + : CG(Type, std::move(ArgsStorage), std::move(AccStorage), + std::move(SharedPtrStorage), std::move(Requirements), + std::move(Events), std::move(loc)), + MHostTask(std::move(HostTask)), MQueue(std::move(Queue)), + MArgs(std::move(Args)) + {} +}; + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 2ea4e4e83db66..869250c6a6561 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -105,6 +105,30 @@ template struct get_kernel_name_t { }; device getDeviceFromHandler(handler &); + +template +struct check_fn_signature { + static_assert(std::integral_constant::value, + "Second template parameter is required to be of function type"); +}; + +template +struct check_fn_signature { +private: + template + static constexpr auto check(T*) + -> typename std::is_same< + decltype(std::declval().operator()(std::declval()...)), + RetT>::type; + + template + static constexpr std::false_type check(...); + + typedef decltype(check(0)) type; + +public: + static constexpr bool value = type::value; +}; } // namespace detail /// 4.8.3 Command group handler class @@ -570,6 +594,21 @@ class handler { MCGType = detail::CG::RUN_ON_HOST_INTEL; } + template + typename std::enable_if< + detail::check_fn_signature::type, + void()>::value>::type + codeplay_host_task(FuncT &&Func) { + throwIfActionIsCreated(); + + MNDRDesc.set(range<1>(1)); + MArgs = std::move(MAssociatedAccesors); + + MHostTask.reset(new detail::HostTask(Func, MQueue)); + + MCGType = detail::CG::HOST_TASK; + } + /// Defines and invokes a SYCL kernel function for the specified range and /// offset. /// @@ -1277,6 +1316,8 @@ class handler { vector_class MPattern; /// Storage for a lambda or function object. unique_ptr_class MHostKernel; + /// Storage for lambda/function when using HostTask + unique_ptr_class MHostTask; detail::OSModuleHandle MOSModuleHandle; // Storage for a lambda or function when using InteropTasks std::unique_ptr MInteropTask; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 06ea8e77afedd..9c7f85a7cb6bf 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1462,6 +1462,41 @@ void DispatchNativeKernel(void *Blob) { HostTask->MHostKernel->call(HostTask->MNDRDesc, nullptr); } +struct HostTaskContext { + CGHostTask *HostTask; + + // TODO events dependencies + // TODO buffer dependencies, though a buffer dependency may be expressed via event + + std::mutex RequirementsMutex; + std::condition_variable AnotherRequirementFulfilledCV; +}; + +bool CheckHostTaskRequirements(const std::shared_ptr &Ctx) { + (void)Ctx; + // TODO check if all the requirements are fullfiled i.e: + // - event: use clGetEventInfo + // - buffer: ??? maybe use copy_acc_to_acc? + return true; +} + +void DispatchHostTask(const std::shared_ptr &Ctx) { + { + std::unique_lock Lock(Ctx->RequirementsMutex); + + Ctx->AnotherRequirementFulfilledCV.wait(Lock, [Ctx] () { + return CheckHostTaskRequirements(Ctx); + }); + } + + const QueueImplPtr &Queue = Scheduler::getInstance().getDefaultHostQueue(); + Queue->getHostTaskAndEventCallbackThreadPool().submit([Ctx] () { + Ctx->HostTask->MHostTask->call(); + }); + + // Ctx will be deleted automatically by shared_ptr +} + cl_int ExecCGCommand::enqueueImp() { std::vector EventImpls = Command::prepareEvents(detail::getSyclObjImpl(MQueue->get_context())); @@ -1751,6 +1786,38 @@ cl_int ExecCGCommand::enqueueImp() { Plugin.call(reinterpret_cast(interop_queue)); return CL_SUCCESS; } + case CG::CGTYPE::HOST_TASK: { + CGHostTask *HostTask = static_cast(MCommandGroup.get()); + const QueueImplPtr &Queue = HostTask->MQueue; + + std::shared_ptr Ctx{new HostTaskContext{HostTask}}; + + size_t ArgIdx = 0, ReqIdx = 0; + while (ArgIdx < HostTask->MArgs.size()) { + ArgDesc &Arg = HostTask->MArgs[ArgIdx]; + + switch (Arg.MType) { + case kernel_param_kind_t::kind_accessor: { + Requirement *Req = static_cast(Arg.MPtr); + AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); + + detail::Requirement *TaskReq = HostTask->MRequirements[ReqIdx]; + TaskReq->MData = AllocaCmd->getMemAllocation(); + ++ReqIdx; + break; + } + default: + throw std::runtime_error("Yet unsupported arg type"); + } + + ++ArgIdx; + } + + Queue->getHostTaskAndEventCallbackThreadPool().submit([Ctx] () { + DispatchHostTask(Ctx); + }); + return CL_SUCCESS; + } case CG::CGTYPE::NONE: default: throw runtime_error("CG type not implemented.", PI_INVALID_OPERATION); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index ac6baf67bfd25..5c7efee047e81 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -76,6 +76,25 @@ event handler::finalize(const cl::sycl::detail::code_location &Payload) { std::move(MSharedPtrStorage), std::move(MRequirements), std::move(MEvents), Payload)); break; + case detail::CG::HOST_TASK: +// CommandGroup.reset(new detail::CGHostTask( +// std::move(MNDRDesc), std::move(MHostKernel), std::move(MKernel), +// std::move(MArgsStorage), std::move(MAccStorage), +// std::move(MSharedPtrStorage), std::move(MRequirements), +// std::move(MEvents), std::move(MArgs), std::move(MKernelName), +// std::move(MOSModuleHandle), std::move(MStreamStorage), MCGType, +// Payload)); + CommandGroup.reset(new detail::CGHostTask( + std::move(MHostTask), MQueue, std::move(MArgs), std::move(MArgsStorage), + std::move(MAccStorage), std::move(MSharedPtrStorage), + std::move(MRequirements), std::move(MEvents), MCGType, Payload)); +// CommandGroup.reset(new detail::CGHostTask( +// std::move(MHostTask), std::move(MArgsStorage), std::move(MAccStorage), +// std::move(MSharedPtrStorage), std::move(MRequirements), +// std::move(MEvents), std::move(MArgs), std::move(MKernelName), +// std::move(MOSModuleHandle), std::move(MStreamStorage), MCGType, +// Payload)); + break; case detail::CG::NONE: throw runtime_error("Command group submitted without a kernel or a " "explicit memory operation.", diff --git a/sycl/test/host-interop-task/host-task.cpp b/sycl/test/host-interop-task/host-task.cpp index 7344d9f0062c1..a5cba2d304839 100644 --- a/sycl/test/host-interop-task/host-task.cpp +++ b/sycl/test/host-interop-task/host-task.cpp @@ -1,6 +1,11 @@ -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl %s -o %t.out %threads_lib // RUN: %CPU_RUN_PLACEHOLDER %t.out +#include +#include +#include +#include + #include namespace S = cl::sycl; @@ -15,7 +20,7 @@ struct Context { std::condition_variable CV; }; -void thread1Fn(Context &Ctx) { +void Thread1Fn(Context &Ctx) { // 0. initialize resulting buffer with apriori wrong result { S::accessor GeneratorAcc(Ctx.Buf1, CGH); @@ -39,7 +44,7 @@ void thread1Fn(Context &Ctx) { }); // 2. submit host task writing from buf 1 to buf 2 - Queue.submit([&](S::handler &CGH) { + Ctx.Queue.submit([&](S::handler &CGH) { S::accessor CopierSrcAcc(Ctx.Buf1, CGH); S::accessor Lock(Ctx.Mutex); // T2.1. Wait until flag F is set eq true. @@ -103,7 +108,7 @@ void test() { // 3. check via host accessor that buf 2 contains valid data { S::accessor ResultAcc(Buf2); + S::access::target::host_buffer> ResultAcc(Ctx.Buf2); for (size_t Idx = 0; Idx < ResultAcc.get_count(); ++Idx) { assert(ResultAcc[Idx] == Idx && "Invalid data in result buffer"); From 9f3d2d47a505b054e6cb38fa3b2b2058932809ab Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 23 Mar 2020 12:30:26 +0300 Subject: [PATCH 018/188] [SYCL] Reimplement event::when_complete through host_task Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/handler.hpp | 2 +- sycl/source/detail/event_impl.cpp | 18 +++++++++++------- sycl/source/detail/event_impl.hpp | 2 +- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 869250c6a6561..9d970af9e0c48 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -604,7 +604,7 @@ class handler { MNDRDesc.set(range<1>(1)); MArgs = std::move(MAssociatedAccesors); - MHostTask.reset(new detail::HostTask(Func, MQueue)); + MHostTask.reset(new detail::HostTask(std::move(Func))); MCGType = detail::CG::HOST_TASK; } diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 65588a47e62e9..d7caea162479a 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -249,13 +249,17 @@ event_impl::get_info() const { } void event_impl::when_complete(std::shared_ptr Self, - std::function Func) { - if (auto Queue = MQueue.lock()) - Queue->getHostTaskAndEventCallbackThreadPool().submit([Self, Func] () { - Self->wait_and_throw(Self); - - Func(); - }); + std::function &&Func) { + if (auto Queue = MQueue.lock()) { + const detail::code_location &CodeLoc = {}; + auto Lambda = [Func, Self] (handler &CGH) mutable { + auto SelfEvent = createSyclObjFromImpl(Self); + CGH.depends_on(SelfEvent); + + CGH.codeplay_host_task(std::move(Func)); + }; + Queue->submit(Lambda, Queue, CodeLoc); + } else throw runtime_error("Queue not available", PI_ERROR_UNKNOWN); } diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 5e307ec764cc4..acce6fcbcccc3 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -146,7 +146,7 @@ class event_impl { HostProfilingInfo *getHostProfilingInfo() { return MHostProfilingInfo.get(); } void when_complete(std::shared_ptr Self, - std::function Func); + std::function &&Func); private: // When instrumentation is enabled emits trace event for event wait begin and From 92917a0fc333276180dd7061aebd8bc61db4b652 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 26 Mar 2020 22:29:00 +0300 Subject: [PATCH 019/188] [SYCL] Worked on host task Signed-off-by: Sergey Kanaev --- sycl/source/detail/event_impl.cpp | 6 ++ sycl/source/detail/scheduler/commands.cpp | 65 ++++++++++++++++++- .../source/detail/scheduler/graph_builder.cpp | 4 ++ sycl/source/detail/thread_pool.hpp | 4 +- 4 files changed, 76 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index d7caea162479a..b02bbdd3fbc0e 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -250,7 +250,11 @@ event_impl::get_info() const { void event_impl::when_complete(std::shared_ptr Self, std::function &&Func) { +#if 1 if (auto Queue = MQueue.lock()) { +#else + auto Queue = Scheduler::getInstance().getDefaultHostQueue(); +#endif const detail::code_location &CodeLoc = {}; auto Lambda = [Func, Self] (handler &CGH) mutable { auto SelfEvent = createSyclObjFromImpl(Self); @@ -259,9 +263,11 @@ void event_impl::when_complete(std::shared_ptr Self, CGH.codeplay_host_task(std::move(Func)); }; Queue->submit(Lambda, Queue, CodeLoc); +#if 1 } else throw runtime_error("Queue not available", PI_ERROR_UNKNOWN); +#endif } static uint64_t getTimestamp() { diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index b9a8fe0c2b0de..f64f86cfdbea1 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -191,11 +191,19 @@ std::vector Command::prepareEvents(ContextImplPtr Context) { Plugin.call(Context->getHandleRef(), &GlueEventHandle); +#if 1 + EventImplPtr *GlueEventCopy = + new EventImplPtr(GlueEvent); // To increase the reference count by 1. + Plugin.call( + Event->getHandleRef(), CL_COMPLETE, EventCompletionClbk, + /*void *data=*/(GlueEventCopy)); +#else Event->when_complete(Event, [GlueEvent] () { RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); const detail::plugin &Plugin = GlueEvent->getPlugin(); Plugin.call(GlueEventHandle, CL_COMPLETE); }); +#endif GlueEvents.push_back(GlueEvent); Result.push_back(std::move(GlueEvent)); @@ -1466,12 +1474,34 @@ struct HostTaskContext { CGHostTask *HostTask; // TODO events dependencies + const size_t RequiredAmount; + size_t CompletedAmount; + // TODO buffer dependencies, though a buffer dependency may be expressed via event std::mutex RequirementsMutex; std::condition_variable AnotherRequirementFulfilledCV; + + ContextImplPtr Context; }; +void DispatchHostTask2(pi_event Event, pi_int32 EventStatus, void *UD) { + HostTaskContext *Ctx = reinterpret_cast(UD); + + if (EventStatus == PI_EVENT_COMPLETE) + ++Ctx->CompletedAmount; + + assert(Ctx->CompletedAmount <= Ctx->RequiredAmount && + "Invalid event completion reported"); + + if (Ctx->CompletedAmount < Ctx->RequiredAmount) + return; + + Ctx->HostTask->MHostTask->call(); + + delete Ctx; +} + bool CheckHostTaskRequirements(const std::shared_ptr &Ctx) { (void)Ctx; // TODO check if all the requirements are fullfiled i.e: @@ -1789,8 +1819,18 @@ cl_int ExecCGCommand::enqueueImp() { case CG::CGTYPE::HOST_TASK: { CGHostTask *HostTask = static_cast(MCommandGroup.get()); const QueueImplPtr &Queue = HostTask->MQueue; - +#if 0 + auto *Ctx = new HostTaskContext{ + static_cast(MCommandGroup.get()), + RawEvents.size(), + 0, + {}, + {}, + MQueue->getContextImplPtr() + }; +#else std::shared_ptr Ctx{new HostTaskContext{HostTask}}; +#endif size_t ArgIdx = 0, ReqIdx = 0; while (ArgIdx < HostTask->MArgs.size()) { @@ -1812,10 +1852,33 @@ cl_int ExecCGCommand::enqueueImp() { ++ArgIdx; } +#if 0 + const detail::plugin &Plugin = MQueue->getPlugin(); + ContextImplPtr Context = MQueue->getContextImplPtr(); + EventImplPtr HostTaskEvent(new detail::event_impl()); + HostTaskEvent->setContextImpl(Context); + RT::PiEvent &HostTaskEventHandle = HostTaskEvent->getHandleRef(); + Plugin.call(Context->getHandleRef(), + &HostTaskEventHandle); + // Increment refcount for pi_event + EventImplPtr *HostTaskEventCopy = new EventImplPtr(HostTaskEvent); + + // set callback for each and every dependency event + Plugin.call(); +#endif + +#if 0 + const detail::plugin &Plugin = MQueue->getPlugin(); + for (const RT::PiEvent &Event : RawEvents) + Plugin.call(Event, PI_EVENT_COMPLETE, + DispatchHostTask2, Ctx); +#else + // TODO create user event and set its callback to dispatch host task Queue->getHostTaskAndEventCallbackThreadPool().submit([Ctx] () { DispatchHostTask(Ctx); }); +#endif return CL_SUCCESS; } case CG::CGTYPE::NONE: diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 62db768e40dfc..537483796dfa6 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -588,6 +588,10 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, QueueImplPtr Queue) { const std::vector &Reqs = CommandGroup->MRequirements; const std::vector &Events = CommandGroup->MEvents; + + if (CommandGroup->getType() == CG::CGTYPE::HOST_TASK) + Queue = Scheduler::getInstance().getDefaultHostQueue(); + std::unique_ptr NewCmd( new ExecCGCommand(std::move(CommandGroup), Queue)); if (!NewCmd) diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 4c9c2537c3b06..edbdf00f411de 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -44,9 +44,9 @@ class ThreadPool { } public: - ThreadPool(unsigned int ThreadCount = std::max( + ThreadPool(unsigned int ThreadCount = 2 /*std::max( 1L, - static_cast(std::thread::hardware_concurrency()) - 1)) + static_cast(std::thread::hardware_concurrency()) - 1)*/) : MThreadCount(ThreadCount) {} ~ThreadPool() { finishAndWait(); } From 8e3dcd83a9a5dea512bb21fcdbcddd015e96c4f3 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 27 Mar 2020 14:07:19 +0300 Subject: [PATCH 020/188] [SYCL] Fix merge issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 3e96f415b9013..917241c497966 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -198,8 +198,6 @@ std::vector Command::prepareEvents(ContextImplPtr Context) { &GlueEventHandle); #if 1 - EventImplPtr *GlueEventCopy = - new EventImplPtr(GlueEvent); // To increase the reference count by 1. DepPlugin.call( DepEvent->getHandleRef(), PI_EVENT_COMPLETE, EventCompletionClbk, /*void *data=*/(GlueEventCopy)); From 14458e31271f12b55a5c4f97204f5412a6a4cc8e Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Sun, 29 Mar 2020 11:44:03 +0300 Subject: [PATCH 021/188] [SYCL] Fix assert test Signed-off-by: Sergey Kanaev --- sycl/test/devicelib/assert.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sycl/test/devicelib/assert.cpp b/sycl/test/devicelib/assert.cpp index 1b6724b5ffa7d..351b3a861deab 100644 --- a/sycl/test/devicelib/assert.cpp +++ b/sycl/test/devicelib/assert.cpp @@ -100,10 +100,18 @@ // Note that the work-item that hits the assert first may vary, since the order // of execution is undefined. We catch only the first one (whatever id it is). +#ifndef PARENT_PROCESS #include +#endif + #include #include #include + +#ifdef PARENT_PROCESS +#include +#endif + #include #include #include From dde6af5de374cbb9eb5a5ed0d36c3dbcf31bbf01 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 30 Mar 2020 16:37:20 +0300 Subject: [PATCH 022/188] [SYCL] Event for host-task Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 37 +++++++++++++---------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 917241c497966..e730294f620f6 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -186,8 +186,10 @@ std::vector Command::prepareEvents(ContextImplPtr Context) { if (DepEventContext != Context && !Context->is_host()) { EventImplPtr GlueEvent(new detail::event_impl()); GlueEvent->setContextImpl(Context); +#if 1 EventImplPtr *GlueEventCopy = new EventImplPtr(GlueEvent); // To increase the reference count by 1. +#endif RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); auto Plugin = Context->getPlugin(); @@ -202,7 +204,7 @@ std::vector Command::prepareEvents(ContextImplPtr Context) { DepEvent->getHandleRef(), PI_EVENT_COMPLETE, EventCompletionClbk, /*void *data=*/(GlueEventCopy)); #else - Event->when_complete(Event, [GlueEvent] () { + DepEvent->when_complete(DepEvent, [GlueEvent] () { RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); const detail::plugin &Plugin = GlueEvent->getPlugin(); Plugin.call(GlueEventHandle, CL_COMPLETE); @@ -1492,6 +1494,8 @@ struct HostTaskContext { std::condition_variable AnotherRequirementFulfilledCV; ContextImplPtr Context; + + EventImplPtr SelfEvent; }; void DispatchHostTask2(pi_event Event, pi_int32 EventStatus, void *UD) { @@ -1533,6 +1537,9 @@ void DispatchHostTask(const std::shared_ptr &Ctx) { Ctx->HostTask->MHostTask->call(); }); + const detail::plugin &Plugin = Ctx->SelfEvent->getPlugin(); + Plugin.call(Ctx->SelfEvent->getHandleRef(), + CL_COMPLETE); // Ctx will be deleted automatically by shared_ptr } @@ -1825,6 +1832,8 @@ cl_int ExecCGCommand::enqueueImp() { } case CG::CGTYPE::HOST_TASK: { CGHostTask *HostTask = static_cast(MCommandGroup.get()); + // MQueue is host queue here thus we'll employ the one host task is + // submitted to const QueueImplPtr &Queue = HostTask->MQueue; #if 0 auto *Ctx = new HostTaskContext{ @@ -1839,6 +1848,16 @@ cl_int ExecCGCommand::enqueueImp() { std::shared_ptr Ctx{new HostTaskContext{HostTask}}; #endif + if (true /*false*/) { + Ctx->SelfEvent = MEvent; + RT::PiContext ContextRef = Queue->getContextImplPtr()->getHandleRef(); + + const detail::plugin &Plugin = Queue->getPlugin(); + Plugin.call(ContextRef, &Event); + + Ctx->SelfEvent->setContextImpl(Queue->getContextImplPtr()); + } + size_t ArgIdx = 0, ReqIdx = 0; while (ArgIdx < HostTask->MArgs.size()) { ArgDesc &Arg = HostTask->MArgs[ArgIdx]; @@ -1859,23 +1878,9 @@ cl_int ExecCGCommand::enqueueImp() { ++ArgIdx; } -#if 0 - const detail::plugin &Plugin = MQueue->getPlugin(); - ContextImplPtr Context = MQueue->getContextImplPtr(); - EventImplPtr HostTaskEvent(new detail::event_impl()); - HostTaskEvent->setContextImpl(Context); - RT::PiEvent &HostTaskEventHandle = HostTaskEvent->getHandleRef(); - Plugin.call(Context->getHandleRef(), - &HostTaskEventHandle); - // Increment refcount for pi_event - EventImplPtr *HostTaskEventCopy = new EventImplPtr(HostTaskEvent); - - // set callback for each and every dependency event - Plugin.call(); -#endif #if 0 - const detail::plugin &Plugin = MQueue->getPlugin(); + const detail::plugin &Plugin = Queue->getPlugin(); for (const RT::PiEvent &Event : RawEvents) Plugin.call(Event, PI_EVENT_COMPLETE, From e4c676446bd4cd542eab75948fb8bdd44ce395d6 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 30 Mar 2020 22:47:31 +0300 Subject: [PATCH 023/188] [SYCL] Add test stub Signed-off-by: Sergey Kanaev --- .../host-task-dependency.cpp | 153 ++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 sycl/test/host-interop-task/host-task-dependency.cpp diff --git a/sycl/test/host-interop-task/host-task-dependency.cpp b/sycl/test/host-interop-task/host-task-dependency.cpp new file mode 100644 index 0000000000000..2127ad4d5007c --- /dev/null +++ b/sycl/test/host-interop-task/host-task-dependency.cpp @@ -0,0 +1,153 @@ +// RUN: %clangxx -fsycl %s -o %t.out %threads_lib +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: env SYCL_PI_TRACE=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1 %CPU_CHECK_PLACEHOLDER + +#include +#include +#include +#include + +#include + +namespace S = cl::sycl; + +struct Context { + std::atomic_bool Flag; + S::queue &Queue; + std::string Message; + S::buffer Buf1; + S::buffer Buf2; + S::buffer Buf3; + std::mutex Mutex; + std::condition_variable CV; +}; + +void Thread1Fn(Context &Ctx) { + // 0. initialize resulting buffer with apriori wrong result + { + S::accessor Acc(Ctx.Buf2); + + for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) + Acc[Idx] = -1; + } + + // 1. submit task writing to buffer 1 + Ctx.Queue.submit([&](S::handler &CGH) { + S::accessor GeneratorAcc(Ctx.Buf1, CGH); + + auto GeneratorKernel = [GeneratorAcc] () { + for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) + GeneratorAcc[Idx] = Idx; + }; + + CGH.single_task(GeneratorKernel); + }); + + // 2. submit host task writing from buf 1 to buf 2 + auto HostTaskEvent = Ctx.Queue.submit([&](S::handler &CGH) { + S::accessor CopierSrcAcc(Ctx.Buf1, CGH); + S::accessor CopierDstAcc(Ctx.Buf2, CGH); + + auto CopierKernel = [CopierSrcAcc, CopierDstAcc, &Ctx] () { + for (size_t Idx = 0; Idx < CopierDstAcc.get_count(); ++Idx) + CopierDstAcc[Idx] = CopierSrcAcc[Idx]; + + bool Expected = false; + bool Desired = true; + assert(Ctx.Flag.compare_exchange_strong(Expected, Desired)); + + // let's employ some locking here + { + std::lock_guard Lock(Ctx.Mutex); + Ctx.CV.notify_all(); + } + }; + + CGH.codeplay_host_task(CopierKernel); + }); + + // 3. submit simple task to move data between two buffers + Ctx.Queue.submit([&](S::handler &CGH) { + S::accessor SrcAcc(Ctx.Buf2, CGH); + S::accessor DstAcc(Ctx.Buf3, CGH); + + CGH.depends_on(HostTaskEvent); + + auto CopierKernel = [SrcAcc, DstAcc] () { + for (size_t Idx = 0; Idx < DstAcc.get_count(); ++Idx) + DstAcc[Idx] = SrcAcc[Idx]; + }; + + CGH.single_task(CopierKernel); + }); + + // 4. check data in buffer #3 + { + S::accessor Acc(Ctx.Buf3); + + for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) + assert(Acc[Idx] == Idx && "Invalid data in third buffer"); + } +} + +void Thread2Fn(Context &Ctx) { + std::unique_lock Lock(Ctx.Mutex); + + // T2.1. Wait until flag F is set eq true. + Ctx.CV.wait(Lock, [&Ctx] { return Ctx.Flag.load(); }); + + assert(Ctx.Flag.load()); + + // T2.2. print some "hello, world" message + Ctx.Message = "Hello, world"; +} + +void test() { + auto EH = [] (S::exception_list EL) { + for (const std::exception_ptr &E : EL) { + throw E; + } + }; + + S::queue Queue(EH); + + // optional + Queue.set_event_cb_and_host_task_thread_pool_size(4); + + Context Ctx{{false}, Queue, "", {10}, {10}, {10}, {}, {}}; + + // 0. setup: thread 1 T1: exec smth; thread 2 T2: waits; init flag F = false + std::thread Thread1(Thread1Fn, std::reference_wrapper(Ctx)); + std::thread Thread2(Thread2Fn, std::reference_wrapper(Ctx)); + + Thread1.join(); + Thread2.join(); + + assert(Ctx.Flag.load()); + assert(Ctx.Message == "Hello, world"); + + // 3. check via host accessor that buf 2 contains valid data + { + S::accessor ResultAcc(Ctx.Buf2); + + for (size_t Idx = 0; Idx < ResultAcc.get_count(); ++Idx) { + assert(ResultAcc[Idx] == Idx && "Invalid data in result buffer"); + } + } +} + +int main() { + test(); + + return 0; +} + +// CHECK:---> xxx From eb71004d03690d7317585aea3e36a049530610ea Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 31 Mar 2020 18:15:57 +0300 Subject: [PATCH 024/188] [SYCL] Distinct command for host task representation. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 167 ++++++++++++------ sycl/source/detail/scheduler/commands.hpp | 18 +- .../source/detail/scheduler/graph_builder.cpp | 54 ++++++ sycl/source/detail/scheduler/scheduler.cpp | 4 + sycl/source/detail/scheduler/scheduler.hpp | 2 + 5 files changed, 193 insertions(+), 52 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index e730294f620f6..41d9533f7fa81 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1271,6 +1271,9 @@ static std::string cgTypeToString(detail::CG::CGTYPE Type) { case detail::CG::PREFETCH_USM: return "prefetch usm"; break; + case detail::CG::HOST_TASK: + return "host task"; + break; default: return "unknown"; break; @@ -1830,73 +1833,135 @@ cl_int ExecCGCommand::enqueueImp() { Plugin.call(reinterpret_cast(interop_queue)); return CL_SUCCESS; } - case CG::CGTYPE::HOST_TASK: { - CGHostTask *HostTask = static_cast(MCommandGroup.get()); - // MQueue is host queue here thus we'll employ the one host task is - // submitted to - const QueueImplPtr &Queue = HostTask->MQueue; + case CG::CGTYPE::NONE: + default: + throw runtime_error("CG type not implemented.", PI_INVALID_OPERATION); + } +} + +HostTaskCommand::HostTaskCommand(std::unique_ptr CommandGroup, + QueueImplPtr Queue) + : Command(CommandType::HOST_TASK, std::move(Queue)), + MCommandGroup(std::move(CommandGroup)) { + + emitInstrumentationDataProxy(); +} + +void HostTaskCommand::printDot(std::ostream &Stream) const { + Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\""; + + Stream << "ID = " << this << "\\n"; + Stream << "EXEC HOST TASK ON " << deviceToString(MQueue->get_device()) << "\\n"; + + switch (MCommandGroup->getType()) { + case detail::CG::KERNEL: { + auto KernelCG = + reinterpret_cast(MCommandGroup.get()); + Stream << "Kernel name: "; + if (KernelCG->MSyclKernel && KernelCG->MSyclKernel->isCreatedFromSource()) + Stream << "created from source"; + else + Stream << demangleKernelName(KernelCG->getKernelName()); + Stream << "\\n"; + break; + } + default: + Stream << "CG type: " << cgTypeToString(MCommandGroup->getType()) << "\\n"; + break; + } + + Stream << "\"];" << std::endl; + + for (const auto &Dep : MDeps) { + Stream << " \"" << this << "\" -> \"" << Dep.MDepCommand << "\"" + << " [ label = \"Access mode: " + << accessModeToString(Dep.MDepRequirement->MAccessMode) << "\\n" + << "MemObj: " << Dep.MDepRequirement->MSYCLMemObj << " \" ]" + << std::endl; + } +} + +void HostTaskCommand::emitInstrumentationData() { + // TODO +} + +cl_int HostTaskCommand::enqueueImp() { + std::vector EventImpls = + Command::prepareEvents(detail::getSyclObjImpl(MQueue->get_context())); + + auto RawEvents = getPiEvents(EventImpls); + + RT::PiEvent &Event = MEvent->getHandleRef(); + + CGHostTask *HostTask = static_cast(MCommandGroup.get()); + // MQueue is host queue here thus we'll employ the one host task is + // submitted to + const QueueImplPtr &Queue = HostTask->MQueue; #if 0 - auto *Ctx = new HostTaskContext{ - static_cast(MCommandGroup.get()), - RawEvents.size(), - 0, - {}, - {}, - MQueue->getContextImplPtr() - }; + auto *Ctx = new HostTaskContext{ + static_cast(MCommandGroup.get()), + RawEvents.size(), + 0, + {}, + {}, + MQueue->getContextImplPtr() + }; #else - std::shared_ptr Ctx{new HostTaskContext{HostTask}}; + std::shared_ptr Ctx{new HostTaskContext{HostTask}}; #endif - if (true /*false*/) { - Ctx->SelfEvent = MEvent; - RT::PiContext ContextRef = Queue->getContextImplPtr()->getHandleRef(); - - const detail::plugin &Plugin = Queue->getPlugin(); - Plugin.call(ContextRef, &Event); + if (true /*false*/) { + Ctx->SelfEvent = MEvent; + RT::PiContext ContextRef = Queue->getContextImplPtr()->getHandleRef(); - Ctx->SelfEvent->setContextImpl(Queue->getContextImplPtr()); - } + const detail::plugin &Plugin = Queue->getPlugin(); + Plugin.call(ContextRef, &Event); - size_t ArgIdx = 0, ReqIdx = 0; - while (ArgIdx < HostTask->MArgs.size()) { - ArgDesc &Arg = HostTask->MArgs[ArgIdx]; + Ctx->SelfEvent->setContextImpl(Queue->getContextImplPtr()); + } - switch (Arg.MType) { - case kernel_param_kind_t::kind_accessor: { - Requirement *Req = static_cast(Arg.MPtr); - AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); + size_t ArgIdx = 0, ReqIdx = 0; + while (ArgIdx < HostTask->MArgs.size()) { + ArgDesc &Arg = HostTask->MArgs[ArgIdx]; - detail::Requirement *TaskReq = HostTask->MRequirements[ReqIdx]; - TaskReq->MData = AllocaCmd->getMemAllocation(); - ++ReqIdx; - break; - } - default: - throw std::runtime_error("Yet unsupported arg type"); - } + switch (Arg.MType) { + case kernel_param_kind_t::kind_accessor: { + Requirement *Req = static_cast(Arg.MPtr); + AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); - ++ArgIdx; + detail::Requirement *TaskReq = HostTask->MRequirements[ReqIdx]; + TaskReq->MData = AllocaCmd->getMemAllocation(); + ++ReqIdx; + break; + } + default: + throw std::runtime_error("Yet unsupported arg type"); } + ++ArgIdx; + } + #if 0 - const detail::plugin &Plugin = Queue->getPlugin(); + const detail::plugin &Plugin = Queue->getPlugin(); - for (const RT::PiEvent &Event : RawEvents) - Plugin.call(Event, PI_EVENT_COMPLETE, - DispatchHostTask2, Ctx); + for (const RT::PiEvent &Event : RawEvents) + Plugin.call(Event, PI_EVENT_COMPLETE, + DispatchHostTask2, Ctx); #else - // TODO create user event and set its callback to dispatch host task - Queue->getHostTaskAndEventCallbackThreadPool().submit([Ctx] () { - DispatchHostTask(Ctx); - }); + // TODO create user event and set its callback to dispatch host task + Queue->getHostTaskAndEventCallbackThreadPool().submit([Ctx] () { + DispatchHostTask(Ctx); + }); #endif - return CL_SUCCESS; - } - case CG::CGTYPE::NONE: - default: - throw runtime_error("CG type not implemented.", PI_INVALID_OPERATION); + return CL_SUCCESS; +} + +AllocaCommandBase *HostTaskCommand::getAllocaForReq(Requirement *Req) { + for (const DepDesc &Dep : MDeps) { + if (Dep.MDepRequirement == Req) + return Dep.MAllocaCmd; } + throw runtime_error("Alloca for command not found", PI_INVALID_OPERATION); } } // namespace detail diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 13763cc77a8d3..cf8584cd95707 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -88,7 +88,8 @@ class Command { MAP_MEM_OBJ, UNMAP_MEM_OBJ, UPDATE_REQUIREMENT, - EMPTY_TASK + EMPTY_TASK, + HOST_TASK }; Command(CommandType Type, QueueImplPtr Queue); @@ -429,6 +430,21 @@ class UpdateHostRequirementCommand : public Command { void **MDstPtr = nullptr; }; +class HostTaskCommand : public Command { +public: + HostTaskCommand(std::unique_ptr CommandGroup, QueueImplPtr Queue); + + void printDot(std::ostream &Stream) const final; + void emitInstrumentationData() final; + +private: + cl_int enqueueImp() final; + + AllocaCommandBase *getAllocaForReq(Requirement *Req); + + std::unique_ptr MCommandGroup; +}; + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index c68d345db8d50..4ad19adae0939 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -386,6 +386,60 @@ Command *Scheduler::GraphBuilder::addCGUpdateHost( return insertMemoryMove(Record, Req, HostQueue); } +Command *Scheduler::GraphBuilder::addCGHostTask( + std::unique_ptr CommandGroup, QueueImplPtr HostQueue) { + const std::vector &Reqs = CommandGroup->MRequirements; + const std::vector &Events = CommandGroup->MEvents; + + std::unique_ptr NewCmd( + new HostTaskCommand(std::move(CommandGroup), HostQueue)); + if (!NewCmd) + throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); + + if (MPrintOptionsArray[BeforeAddCG]) + printGraphAsDot("before_addCGHostTask"); + + for (Requirement *Req : Reqs) { + MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req); + markModifiedIfWrite(Record, Req); + + AllocaCommandBase *AllocaCmd = getOrCreateAllocaForReq(Record, Req, HostQueue); + // If there is alloca command we need to check if the latest memory is in + // required context. + if (!sameCtx(HostQueue->getContextImplPtr(), Record->MCurContext)) { + // Cannot directly copy memory from OpenCL device to OpenCL device - + // create two copies: device->host and host->device. + if (!HostQueue->is_host() && !Record->MCurContext->is_host()) + insertMemoryMove(Record, Req, + Scheduler::getInstance().getDefaultHostQueue()); + insertMemoryMove(Record, Req, HostQueue); + } + std::set Deps = + findDepsForReq(Record, Req, HostQueue->getContextImplPtr()); + + for (Command *Dep : Deps) + NewCmd->addDep(DepDesc{Dep, Req, AllocaCmd}); + } + + // Set new command as user for dependencies and update leaves. + for (DepDesc &Dep : NewCmd->MDeps) { + Dep.MDepCommand->addUser(NewCmd.get()); + const Requirement *Req = Dep.MDepRequirement; + MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); + updateLeaves({Dep.MDepCommand}, Record, Req->MAccessMode); + addNodeToLeaves(Record, NewCmd.get(), Req->MAccessMode); + } + + // Register all the events as dependencies + for (detail::EventImplPtr e : Events) { + NewCmd->addDep(e); + } + + if (MPrintOptionsArray[AfterAddCG]) + printGraphAsDot("after_addCGHostTask"); + return NewCmd.release(); +} + // The functions finds dependencies for the requirement. It starts searching // from list of "leaf" commands for the record and check if the examining // command can be executed in parallel with new one with regard to the memory diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 9a5f02fab02c8..d1ebd96a3ca04 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -79,6 +79,10 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, NewCmd = MGraphBuilder.addCGUpdateHost(std::move(CommandGroup), DefaultHostQueue); break; + case CG::HOST_TASK: + NewCmd = MGraphBuilder.addCGHostTask(std::move(CommandGroup), + DefaultHostQueue); + break; default: NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), std::move(Queue)); } diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 90000f6ab558c..fd96436bd3ec5 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -115,6 +115,8 @@ class Scheduler { Command *addCG(std::unique_ptr CommandGroup, QueueImplPtr Queue); + Command *addCGHostTask(std::unique_ptr CommandGroup, + QueueImplPtr Queue); Command *addCGUpdateHost(std::unique_ptr CommandGroup, QueueImplPtr HostQueue); From 90cac7c3987b72923e65a8f84ca3af42c4a8e12c Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 1 Apr 2020 21:47:23 +0300 Subject: [PATCH 025/188] [SYCL] Depend device alloca cmd on cg's operating with linked host alloca cmd Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 4ad19adae0939..3f63e369a4ae5 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -608,6 +608,15 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( } else { LinkedAllocaCmd->MIsActive = false; Record->MCurContext = Queue->getContextImplPtr(); + + std::set Deps = + findDepsForReq(Record, Req, Queue->getContextImplPtr()); + for (Command *Dep : Deps) { + AllocaCmd->addDep(DepDesc{Dep, Req, AllocaCmd}); + Dep->addUser(AllocaCmd); + } + updateLeaves(Deps, Record, Req->MAccessMode); + addNodeToLeaves(Record, AllocaCmd, Req->MAccessMode); } } } From 1473f5d891ae3783b5681ea07a2ba22370c52a51 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 1 Apr 2020 21:48:08 +0300 Subject: [PATCH 026/188] [SYCL] Remove spare code. Wait for dependency events. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 103 +++++----------------- 1 file changed, 24 insertions(+), 79 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 41d9533f7fa81..80c7c1e35009a 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -186,10 +186,8 @@ std::vector Command::prepareEvents(ContextImplPtr Context) { if (DepEventContext != Context && !Context->is_host()) { EventImplPtr GlueEvent(new detail::event_impl()); GlueEvent->setContextImpl(Context); -#if 1 EventImplPtr *GlueEventCopy = new EventImplPtr(GlueEvent); // To increase the reference count by 1. -#endif RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); auto Plugin = Context->getPlugin(); @@ -199,17 +197,9 @@ std::vector Command::prepareEvents(ContextImplPtr Context) { Plugin.call(Context->getHandleRef(), &GlueEventHandle); -#if 1 DepPlugin.call( DepEvent->getHandleRef(), PI_EVENT_COMPLETE, EventCompletionClbk, /*void *data=*/(GlueEventCopy)); -#else - DepEvent->when_complete(DepEvent, [GlueEvent] () { - RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); - const detail::plugin &Plugin = GlueEvent->getPlugin(); - Plugin.call(GlueEventHandle, CL_COMPLETE); - }); -#endif GlueEvents.push_back(GlueEvent); Result.push_back(std::move(GlueEvent)); @@ -1487,62 +1477,30 @@ void DispatchNativeKernel(void *Blob) { struct HostTaskContext { CGHostTask *HostTask; - // TODO events dependencies - const size_t RequiredAmount; - size_t CompletedAmount; - - // TODO buffer dependencies, though a buffer dependency may be expressed via event - - std::mutex RequirementsMutex; - std::condition_variable AnotherRequirementFulfilledCV; + // events dependencies + std::map> RequiredEventsPerPlugin; ContextImplPtr Context; EventImplPtr SelfEvent; }; -void DispatchHostTask2(pi_event Event, pi_int32 EventStatus, void *UD) { - HostTaskContext *Ctx = reinterpret_cast(UD); - - if (EventStatus == PI_EVENT_COMPLETE) - ++Ctx->CompletedAmount; - - assert(Ctx->CompletedAmount <= Ctx->RequiredAmount && - "Invalid event completion reported"); - - if (Ctx->CompletedAmount < Ctx->RequiredAmount) - return; - - Ctx->HostTask->MHostTask->call(); - - delete Ctx; -} - -bool CheckHostTaskRequirements(const std::shared_ptr &Ctx) { - (void)Ctx; - // TODO check if all the requirements are fullfiled i.e: - // - event: use clGetEventInfo - // - buffer: ??? maybe use copy_acc_to_acc? - return true; -} - void DispatchHostTask(const std::shared_ptr &Ctx) { - { - std::unique_lock Lock(Ctx->RequirementsMutex); - - Ctx->AnotherRequirementFulfilledCV.wait(Lock, [Ctx] () { - return CheckHostTaskRequirements(Ctx); - }); + // wait for dependency events + // FIXME introduce a more sophisticated wait mechanism + for (auto &PluginWithEvents : Ctx->RequiredEventsPerPlugin) { + auto RawEvents = getPiEvents(PluginWithEvents.second); + PluginWithEvents.first->call(RawEvents.size(), + RawEvents.data()); } - const QueueImplPtr &Queue = Scheduler::getInstance().getDefaultHostQueue(); - Queue->getHostTaskAndEventCallbackThreadPool().submit([Ctx] () { - Ctx->HostTask->MHostTask->call(); - }); + // we're ready to call the user-defined lambda now + Ctx->HostTask->MHostTask->call(); const detail::plugin &Plugin = Ctx->SelfEvent->getPlugin(); Plugin.call(Ctx->SelfEvent->getHandleRef(), CL_COMPLETE); + // Ctx will be deleted automatically by shared_ptr } @@ -1897,29 +1855,24 @@ cl_int HostTaskCommand::enqueueImp() { // MQueue is host queue here thus we'll employ the one host task is // submitted to const QueueImplPtr &Queue = HostTask->MQueue; -#if 0 - auto *Ctx = new HostTaskContext{ - static_cast(MCommandGroup.get()), - RawEvents.size(), - 0, - {}, - {}, - MQueue->getContextImplPtr() - }; -#else std::shared_ptr Ctx{new HostTaskContext{HostTask}}; -#endif - if (true /*false*/) { - Ctx->SelfEvent = MEvent; - RT::PiContext ContextRef = Queue->getContextImplPtr()->getHandleRef(); + // Init self-event + Ctx->SelfEvent = MEvent; + RT::PiContext ContextRef = Queue->getContextImplPtr()->getHandleRef(); - const detail::plugin &Plugin = Queue->getPlugin(); - Plugin.call(ContextRef, &Event); + const detail::plugin &Plugin = Queue->getPlugin(); + Plugin.call(ContextRef, &Event); + + Ctx->SelfEvent->setContextImpl(Queue->getContextImplPtr()); - Ctx->SelfEvent->setContextImpl(Queue->getContextImplPtr()); + // init dependency events in Ctx + for (EventImplPtr &Event : EventImpls) { + const detail::plugin &Plugin = Event->getPlugin(); + Ctx->RequiredEventsPerPlugin[&Plugin].push_back(Event); } + size_t ArgIdx = 0, ReqIdx = 0; while (ArgIdx < HostTask->MArgs.size()) { ArgDesc &Arg = HostTask->MArgs[ArgIdx]; @@ -1941,18 +1894,10 @@ cl_int HostTaskCommand::enqueueImp() { ++ArgIdx; } -#if 0 - const detail::plugin &Plugin = Queue->getPlugin(); - - for (const RT::PiEvent &Event : RawEvents) - Plugin.call(Event, PI_EVENT_COMPLETE, - DispatchHostTask2, Ctx); -#else - // TODO create user event and set its callback to dispatch host task Queue->getHostTaskAndEventCallbackThreadPool().submit([Ctx] () { DispatchHostTask(Ctx); }); -#endif + return CL_SUCCESS; } From bfb0572035dfff0c0940999b1318540534efd2a1 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 1 Apr 2020 21:48:35 +0300 Subject: [PATCH 027/188] [SYCL] Remove spare code Signed-off-by: Sergey Kanaev --- sycl/source/handler.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 5c7efee047e81..949f4c35b17ce 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -77,23 +77,10 @@ event handler::finalize(const cl::sycl::detail::code_location &Payload) { std::move(MEvents), Payload)); break; case detail::CG::HOST_TASK: -// CommandGroup.reset(new detail::CGHostTask( -// std::move(MNDRDesc), std::move(MHostKernel), std::move(MKernel), -// std::move(MArgsStorage), std::move(MAccStorage), -// std::move(MSharedPtrStorage), std::move(MRequirements), -// std::move(MEvents), std::move(MArgs), std::move(MKernelName), -// std::move(MOSModuleHandle), std::move(MStreamStorage), MCGType, -// Payload)); CommandGroup.reset(new detail::CGHostTask( std::move(MHostTask), MQueue, std::move(MArgs), std::move(MArgsStorage), std::move(MAccStorage), std::move(MSharedPtrStorage), std::move(MRequirements), std::move(MEvents), MCGType, Payload)); -// CommandGroup.reset(new detail::CGHostTask( -// std::move(MHostTask), std::move(MArgsStorage), std::move(MAccStorage), -// std::move(MSharedPtrStorage), std::move(MRequirements), -// std::move(MEvents), std::move(MArgs), std::move(MKernelName), -// std::move(MOSModuleHandle), std::move(MStreamStorage), MCGType, -// Payload)); break; case detail::CG::NONE: throw runtime_error("Command group submitted without a kernel or a " From 9f68320ca9780da512f571c40d1576d7d305caaf Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 1 Apr 2020 22:36:10 +0300 Subject: [PATCH 028/188] [SYCL] Fix code-style issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 80c7c1e35009a..eede767839ae4 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1478,7 +1478,8 @@ struct HostTaskContext { CGHostTask *HostTask; // events dependencies - std::map> RequiredEventsPerPlugin; + std::map> RequiredEventsPerPlugin; ContextImplPtr Context; From 442f905d6675c9b7dbcdb54c9c50da7aade77ae7 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 2 Apr 2020 17:40:49 +0300 Subject: [PATCH 029/188] [SYCL] Move handling of multiple contexts out of Command::prepareEvents() Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 113 +++++++++++++--------- sycl/source/detail/scheduler/commands.hpp | 5 + 2 files changed, 74 insertions(+), 44 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index eede767839ae4..17e3b3891b780 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -168,7 +168,6 @@ void EventCompletionClbk(RT::PiEvent, pi_int32, void *data) { // Method prepares PI event's from list sycl::event's std::vector Command::prepareEvents(ContextImplPtr Context) { std::vector Result; - std::vector GlueEvents; for (EventImplPtr &DepEvent : MDepsEvents) { // Async work is not supported for host device. if (DepEvent->is_host()) { @@ -180,34 +179,17 @@ std::vector Command::prepareEvents(ContextImplPtr Context) { if (DepEvent->getHandleRef() == nullptr) { continue; } + ContextImplPtr DepEventContext = DepEvent->getContextImpl(); - // If contexts don't match - connect them using user event + // If contexts don't match the events are already connected in addDep if (DepEventContext != Context && !Context->is_host()) { - EventImplPtr GlueEvent(new detail::event_impl()); - GlueEvent->setContextImpl(Context); - EventImplPtr *GlueEventCopy = - new EventImplPtr(GlueEvent); // To increase the reference count by 1. - - RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); - auto Plugin = Context->getPlugin(); - auto DepPlugin = DepEventContext->getPlugin(); - // Add an event on the current context that - // is triggered when the DepEvent is complete - Plugin.call(Context->getHandleRef(), - &GlueEventHandle); - - DepPlugin.call( - DepEvent->getHandleRef(), PI_EVENT_COMPLETE, EventCompletionClbk, - /*void *data=*/(GlueEventCopy)); - - GlueEvents.push_back(GlueEvent); - Result.push_back(std::move(GlueEvent)); continue; } + Result.push_back(DepEvent); } - MDepsEvents.insert(MDepsEvents.end(), GlueEvents.begin(), GlueEvents.end()); + return Result; } @@ -403,9 +385,51 @@ void Command::makeTraceEventEpilog() { #endif } +void Command::addDepSub(EventImplPtr DepEvent, ContextImplPtr Context) { + // Async work is not supported for host device. + if (DepEvent->is_host()) { + // call to waitInternal() is in prepareEvents() as it's called from + // enqueue process functions + return; + } + + if (DepEvent->getHandleRef() == nullptr) { + return; + } + + ContextImplPtr DepEventContext = DepEvent->getContextImpl(); + // If contexts don't match - connect them using user event + if (DepEventContext != Context && !Context->is_host()) { + EventImplPtr GlueEvent(new detail::event_impl()); + GlueEvent->setContextImpl(Context); + EventImplPtr *GlueEventCopy = + new EventImplPtr(GlueEvent); // To increase the reference count by 1. + + RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); + auto Plugin = Context->getPlugin(); + auto DepPlugin = DepEventContext->getPlugin(); + // Add an event on the current context that + // is triggered when the DepEvent is complete + Plugin.call(Context->getHandleRef(), + &GlueEventHandle); + + DepPlugin.call( + DepEvent->getHandleRef(), PI_EVENT_COMPLETE, EventCompletionClbk, + /*void *data=*/(GlueEventCopy)); + + MDepsEvents.push_back(std::move(GlueEvent)); + } +} + +ContextImplPtr Command::getContext() const { + return detail::getSyclObjImpl(MQueue->get_context()); +} + void Command::addDep(DepDesc NewDep) { - if (NewDep.MDepCommand) + if (NewDep.MDepCommand) { MDepsEvents.push_back(NewDep.MDepCommand->getEvent()); + addDepSub(NewDep.MDepCommand->getEvent(), getContext()); + } MDeps.push_back(NewDep); #ifdef XPTI_ENABLE_INSTRUMENTATION emitEdgeEventForCommandDependence( @@ -424,7 +448,8 @@ void Command::addDep(EventImplPtr Event) { emitEdgeEventForEventDependence(Cmd, PiEventAddr); #endif - MDepsEvents.push_back(std::move(Event)); + MDepsEvents.push_back(Event); + addDepSub(std::move(Event), getContext()); } void Command::emitEnqueuedEventSignal(RT::PiEvent &PiEventAddr) { @@ -608,8 +633,7 @@ void AllocaCommand::emitInstrumentationData() { } cl_int AllocaCommand::enqueueImp() { - std::vector EventImpls = - Command::prepareEvents(detail::getSyclObjImpl(MQueue->get_context())); + std::vector EventImpls = Command::prepareEvents(getContext()); RT::PiEvent &Event = MEvent->getHandleRef(); @@ -686,8 +710,7 @@ void AllocaSubBufCommand::emitInstrumentationData() { } cl_int AllocaSubBufCommand::enqueueImp() { - std::vector EventImpls = - Command::prepareEvents(detail::getSyclObjImpl(MQueue->get_context())); + std::vector EventImpls = Command::prepareEvents(getContext()); RT::PiEvent &Event = MEvent->getHandleRef(); MMemAllocation = MemoryManager::allocateMemSubBuffer( @@ -746,8 +769,7 @@ void ReleaseCommand::emitInstrumentationData() { } cl_int ReleaseCommand::enqueueImp() { - std::vector EventImpls = - Command::prepareEvents(detail::getSyclObjImpl(MQueue->get_context())); + std::vector EventImpls = Command::prepareEvents(getContext()); std::vector RawEvents = getPiEvents(EventImpls); bool SkipRelease = false; @@ -855,8 +877,7 @@ void MapMemObject::emitInstrumentationData() { } cl_int MapMemObject::enqueueImp() { - std::vector EventImpls = - Command::prepareEvents(detail::getSyclObjImpl(MQueue->get_context())); + std::vector EventImpls = Command::prepareEvents(getContext()); std::vector RawEvents = getPiEvents(EventImpls); RT::PiEvent &Event = MEvent->getHandleRef(); @@ -912,8 +933,7 @@ void UnMapMemObject::emitInstrumentationData() { } cl_int UnMapMemObject::enqueueImp() { - std::vector EventImpls = - Command::prepareEvents(detail::getSyclObjImpl(MQueue->get_context())); + std::vector EventImpls = Command::prepareEvents(getContext()); std::vector RawEvents = getPiEvents(EventImpls); RT::PiEvent &Event = MEvent->getHandleRef(); @@ -978,11 +998,15 @@ void MemCpyCommand::emitInstrumentationData() { #endif } +ContextImplPtr MemCpyCommand::getContext() const { + QueueImplPtr Queue = MQueue->is_host() ? MSrcQueue : MQueue; + return detail::getSyclObjImpl(Queue->get_context()); +} + cl_int MemCpyCommand::enqueueImp() { std::vector EventImpls; QueueImplPtr Queue = MQueue->is_host() ? MSrcQueue : MQueue; - EventImpls = - Command::prepareEvents(detail::getSyclObjImpl(Queue->get_context())); + EventImpls = Command::prepareEvents(getContext()); RT::PiEvent &Event = MEvent->getHandleRef(); @@ -1046,8 +1070,7 @@ void ExecCGCommand::flushStreams() { cl_int UpdateHostRequirementCommand::enqueueImp() { std::vector EventImpls; - EventImpls = - Command::prepareEvents(detail::getSyclObjImpl(MQueue->get_context())); + EventImpls = Command::prepareEvents(getContext()); RT::PiEvent &Event = MEvent->getHandleRef(); Command::waitForEvents(MQueue, EventImpls, Event); @@ -1118,10 +1141,14 @@ void MemCpyCommandHost::emitInstrumentationData() { #endif } +ContextImplPtr MemCpyCommandHost::getContext() const { + QueueImplPtr Queue = MQueue->is_host() ? MSrcQueue : MQueue; + return detail::getSyclObjImpl(Queue->get_context()); +} + cl_int MemCpyCommandHost::enqueueImp() { QueueImplPtr Queue = MQueue->is_host() ? MSrcQueue : MQueue; - std::vector EventImpls = - Command::prepareEvents(detail::getSyclObjImpl(Queue->get_context())); + std::vector EventImpls = Command::prepareEvents(getContext()); std::vector RawEvents = getPiEvents(EventImpls); RT::PiEvent &Event = MEvent->getHandleRef(); @@ -1506,8 +1533,7 @@ void DispatchHostTask(const std::shared_ptr &Ctx) { } cl_int ExecCGCommand::enqueueImp() { - std::vector EventImpls = - Command::prepareEvents(detail::getSyclObjImpl(MQueue->get_context())); + std::vector EventImpls = Command::prepareEvents(getContext()); auto RawEvents = getPiEvents(EventImpls); @@ -1845,8 +1871,7 @@ void HostTaskCommand::emitInstrumentationData() { } cl_int HostTaskCommand::enqueueImp() { - std::vector EventImpls = - Command::prepareEvents(detail::getSyclObjImpl(MQueue->get_context())); + std::vector EventImpls = Command::prepareEvents(getContext()); auto RawEvents = getPiEvents(EventImpls); diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index cf8584cd95707..0c8d36bccd0fc 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -165,6 +165,9 @@ class Command { void waitForEvents(QueueImplPtr Queue, std::vector &RawEvents, RT::PiEvent &Event); std::vector prepareEvents(ContextImplPtr Context); + void addDepSub(EventImplPtr DepEvent, ContextImplPtr Context); + + virtual ContextImplPtr getContext() const; // Private interface. Derived classes should implement this method. virtual cl_int enqueueImp() = 0; @@ -365,6 +368,7 @@ class MemCpyCommand : public Command { void emitInstrumentationData(); private: + ContextImplPtr getContext() const final; cl_int enqueueImp() final; QueueImplPtr MSrcQueue; @@ -386,6 +390,7 @@ class MemCpyCommandHost : public Command { void emitInstrumentationData(); private: + ContextImplPtr getContext() const final; cl_int enqueueImp() final; QueueImplPtr MSrcQueue; From 380b009c748c163703f5e614082a7233660ca89e Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 3 Apr 2020 12:44:22 +0300 Subject: [PATCH 030/188] [SYCL] Output values in test Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/host-task-dependency.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sycl/test/host-interop-task/host-task-dependency.cpp b/sycl/test/host-interop-task/host-task-dependency.cpp index 2127ad4d5007c..f0eec7f5aba6b 100644 --- a/sycl/test/host-interop-task/host-task-dependency.cpp +++ b/sycl/test/host-interop-task/host-task-dependency.cpp @@ -138,9 +138,14 @@ void test() { S::accessor ResultAcc(Ctx.Buf2); + bool failure = false; for (size_t Idx = 0; Idx < ResultAcc.get_count(); ++Idx) { - assert(ResultAcc[Idx] == Idx && "Invalid data in result buffer"); + fprintf(stderr, "Third buffer [%3zu] = %i\n", Idx, ResultAcc[Idx]); + + failure |= (ResultAcc[Idx] != Idx); } + + assert(!failure && "Invalid data in result buffer"); } } From cee7e47e50f95b9cb37a07711e044769533a165f Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 3 Apr 2020 13:10:39 +0300 Subject: [PATCH 031/188] [SYCL] Add CHECK-sequence to test Signed-off-by: Sergey Kanaev --- .../host-interop-task/host-task-dependency.cpp | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/sycl/test/host-interop-task/host-task-dependency.cpp b/sycl/test/host-interop-task/host-task-dependency.cpp index f0eec7f5aba6b..173d9b0779ab1 100644 --- a/sycl/test/host-interop-task/host-task-dependency.cpp +++ b/sycl/test/host-interop-task/host-task-dependency.cpp @@ -155,4 +155,19 @@ int main() { return 0; } -// CHECK:---> xxx +// launch of GeneratorTask kernel +// CHECK:---> piKernelCreate( +// CHECK: GeneratorTask +// CHECK:---> piEnqueueKernelLaunch( +// prepare for host task +// CHECK:---> piEnqueueMemBufferMap( +// creation of host task self-event +// CHECK:---> piEventCreate( +// wait on dependencies of host task +// CHECK:---> piEventsWait( +// host task is done, set status of self-event +// CHECK:---> piEventSetStatus( +// launch of CopierTask kernel +// CHECK:---> piKernelCreate( +// CHECK: CopierTask +// CHECK:---> piEnqueueKernelLaunch( From b9003dc8ea0b034e243e6cce983483b4f9157ff0 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 3 Apr 2020 13:17:53 +0300 Subject: [PATCH 032/188] [SYCL] Eliminate use of event callback during glue-ing of events from multiple contexts. Signed-off-by: Sergey Kanaev --- sycl/source/detail/event_impl.hpp | 4 +++ sycl/source/detail/scheduler/commands.cpp | 39 +++++++++++++--------- sycl/source/detail/scheduler/scheduler.hpp | 2 ++ 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index acce6fcbcccc3..86ac8b2081223 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -148,6 +148,10 @@ class event_impl { void when_complete(std::shared_ptr Self, std::function &&Func); + QueueImplWPtr getQueueWPtr() const { + return MQueue; + } + private: // When instrumentation is enabled emits trace event for event wait begin and // returns the telemetry event generated for the wait diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 17e3b3891b780..d7e81478a5687 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -156,15 +156,6 @@ getPiEvents(const std::vector &EventImpls) { return RetPiEvents; } -void EventCompletionClbk(RT::PiEvent, pi_int32, void *data) { - // TODO: Handle return values. Store errors to async handler. - EventImplPtr *Event = (reinterpret_cast(data)); - RT::PiEvent &EventHandle = (*Event)->getHandleRef(); - const detail::plugin &Plugin = (*Event)->getPlugin(); - Plugin.call(EventHandle, PI_EVENT_COMPLETE); - delete (Event); -} - // Method prepares PI event's from list sycl::event's std::vector Command::prepareEvents(ContextImplPtr Context) { std::vector Result; @@ -402,20 +393,38 @@ void Command::addDepSub(EventImplPtr DepEvent, ContextImplPtr Context) { if (DepEventContext != Context && !Context->is_host()) { EventImplPtr GlueEvent(new detail::event_impl()); GlueEvent->setContextImpl(Context); - EventImplPtr *GlueEventCopy = - new EventImplPtr(GlueEvent); // To increase the reference count by 1. RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); auto Plugin = Context->getPlugin(); - auto DepPlugin = DepEventContext->getPlugin(); // Add an event on the current context that // is triggered when the DepEvent is complete + // TODO eliminate creation of user-event Plugin.call(Context->getHandleRef(), &GlueEventHandle); - DepPlugin.call( - DepEvent->getHandleRef(), PI_EVENT_COMPLETE, EventCompletionClbk, - /*void *data=*/(GlueEventCopy)); + // enqueue GlueCmd + std::function Func = [GlueEvent] () { + RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); + const detail::plugin &Plugin = GlueEvent->getPlugin(); + Plugin.call(GlueEventHandle, CL_COMPLETE); + }; + + std::unique_ptr HT(new detail::HostTask(std::move(Func))); + + std::unique_ptr GlueCG(new detail::CGHostTask( + std::move(HT), DepEvent->getQueueWPtr().lock(), + /* Args = */ {}, /* ArgsStorage = */ {}, /* AccStorage = */ {}, + /* SharedPtrStorage = */ {}, /* Requirements = */ {}, + /* DepEvents = */{DepEvent}, CG::HOST_TASK, /* Payload */ {})); + + Command *GlueCmd = Scheduler::getInstance().MGraphBuilder.addCGHostTask( + std::move(GlueCG), Scheduler::getInstance().getDefaultHostQueue()); + + EnqueueResultT Res; + bool Enqueued = Scheduler::GraphProcessor::enqueueCommand(GlueCmd, Res); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Enqueue process failed for glue command.", + PI_INVALID_OPERATION); MDepsEvents.push_back(std::move(GlueEvent)); } diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index fd96436bd3ec5..4845a065d1dea 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -234,6 +234,8 @@ class Scheduler { std::mutex MGraphLock; QueueImplPtr DefaultHostQueue; + + friend class Command; }; } // namespace detail From 667729db9c20cba5dff8e59a942fec9006df8e97 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 3 Apr 2020 14:59:21 +0300 Subject: [PATCH 033/188] [SYCL] Fix some comments Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/cg.hpp | 2 +- sycl/include/CL/sycl/event.hpp | 2 - sycl/include/CL/sycl/handler.hpp | 2 +- sycl/include/CL/sycl/queue.hpp | 2 - sycl/source/detail/event_impl.cpp | 22 ----- sycl/source/detail/event_impl.hpp | 3 - sycl/source/detail/queue_impl.cpp | 15 +-- sycl/source/detail/queue_impl.hpp | 12 +-- sycl/source/detail/scheduler/commands.cpp | 28 ++---- .../source/detail/scheduler/graph_builder.cpp | 2 +- sycl/source/detail/scheduler/scheduler.cpp | 2 +- sycl/source/detail/thread_pool.hpp | 4 +- sycl/source/event.cpp | 4 - sycl/source/handler.cpp | 2 +- sycl/source/queue.cpp | 3 - .../test/host-interop-task/event-callback.cpp | 91 ------------------- .../host-task-dependency.cpp | 3 - sycl/test/host-interop-task/host-task.cpp | 3 - 18 files changed, 24 insertions(+), 178 deletions(-) delete mode 100644 sycl/test/host-interop-task/event-callback.cpp diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index 1ccd8ecf21c90..6c9cdd477b3d1 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -376,7 +376,7 @@ class CG { FILL_USM, PREFETCH_USM, INTEROP_TASK_CODEPLAY, - HOST_TASK + CODEPLAY_HOST_TASK }; CG(CGTYPE Type, vector_class> ArgsStorage, diff --git a/sycl/include/CL/sycl/event.hpp b/sycl/include/CL/sycl/event.hpp index 2b7154cbf3bd7..58f1e7317c6c7 100644 --- a/sycl/include/CL/sycl/event.hpp +++ b/sycl/include/CL/sycl/event.hpp @@ -113,8 +113,6 @@ class event { typename info::param_traits::return_type get_profiling_info() const; - void when_complete(std::function Func); - private: event(shared_ptr_class EventImpl); diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 7eabb4a043fb2..f0d32f5748ff9 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -607,7 +607,7 @@ class handler { MHostTask.reset(new detail::HostTask(std::move(Func))); - MCGType = detail::CG::HOST_TASK; + MCGType = detail::CG::CODEPLAY_HOST_TASK; } /// Defines and invokes a SYCL kernel function for the specified range and diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 11b920deb73e4..870854c682686 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -637,8 +637,6 @@ class queue { /// Equivalent to has_property() bool is_in_order() const; - void set_event_cb_and_host_task_thread_pool_size(size_t Threads); - private: shared_ptr_class impl; template diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index b02bbdd3fbc0e..b4678181df937 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -248,28 +248,6 @@ event_impl::get_info() const { return info::event_command_status::complete; } -void event_impl::when_complete(std::shared_ptr Self, - std::function &&Func) { -#if 1 - if (auto Queue = MQueue.lock()) { -#else - auto Queue = Scheduler::getInstance().getDefaultHostQueue(); -#endif - const detail::code_location &CodeLoc = {}; - auto Lambda = [Func, Self] (handler &CGH) mutable { - auto SelfEvent = createSyclObjFromImpl(Self); - CGH.depends_on(SelfEvent); - - CGH.codeplay_host_task(std::move(Func)); - }; - Queue->submit(Lambda, Queue, CodeLoc); -#if 1 - } - else - throw runtime_error("Queue not available", PI_ERROR_UNKNOWN); -#endif -} - static uint64_t getTimestamp() { auto TimeStamp = std::chrono::high_resolution_clock::now().time_since_epoch(); return std::chrono::duration_cast(TimeStamp) diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 86ac8b2081223..52ccb0a554b0b 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -145,9 +145,6 @@ class event_impl { /// @return a pointer to HostProfilingInfo instance. HostProfilingInfo *getHostProfilingInfo() { return MHostProfilingInfo.get(); } - void when_complete(std::shared_ptr Self, - std::function &&Func); - QueueImplWPtr getQueueWPtr() const { return MQueue; } diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 5188343cfc8a6..0f3c709f5749d 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -187,13 +187,16 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { } void queue_impl::initHostTaskAndEventCallbackThreadPool() { - if (MHostTaskAndEventCallbackThreadPoolThreadsCount) - MHostTaskAndEventCallbackThreadPool.reset( - new ThreadPool(MHostTaskAndEventCallbackThreadPoolThreadsCount)); - else - MHostTaskAndEventCallbackThreadPool.reset(new ThreadPool); + if (MHostTaskThreadPool) + return; + + int Size = 1; + + if (const char *val = std::getenv("SYCL_QUEUE_THREAD_POOL_SIZE")) + Size = std::stoi(val); - MHostTaskAndEventCallbackThreadPool->start(); + MHostTaskThreadPool.reset(new ThreadPool(Size)); + MHostTaskThreadPool->start(); } } // namespace detail diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 360aa04bbf880..bc38141250426 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -347,15 +347,11 @@ class queue_impl { MExceptions.PushBack(ExceptionPtr); } - void set_event_cb_and_host_task_thread_pool_size(size_t Threads) { - MHostTaskAndEventCallbackThreadPoolThreadsCount = Threads; - } - ThreadPool &getHostTaskAndEventCallbackThreadPool() { - if (!MHostTaskAndEventCallbackThreadPool) + if (!MHostTaskThreadPool) initHostTaskAndEventCallbackThreadPool(); - return *MHostTaskAndEventCallbackThreadPool; + return *MHostTaskThreadPool; } private: @@ -413,12 +409,10 @@ class queue_impl { // Assume OOO support by default. bool MSupportOOO = true; - size_t MHostTaskAndEventCallbackThreadPoolThreadsCount = 0; - // Thread pool for host task and event callbacks execution. // The thread pool is instntiated upon the very first call to // getHostTaskAndEventCallbackThreadPool - std::unique_ptr MHostTaskAndEventCallbackThreadPool; + std::unique_ptr MHostTaskThreadPool; }; } // namespace detail diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index d7e81478a5687..e691b29efb905 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -415,7 +415,7 @@ void Command::addDepSub(EventImplPtr DepEvent, ContextImplPtr Context) { std::move(HT), DepEvent->getQueueWPtr().lock(), /* Args = */ {}, /* ArgsStorage = */ {}, /* AccStorage = */ {}, /* SharedPtrStorage = */ {}, /* Requirements = */ {}, - /* DepEvents = */{DepEvent}, CG::HOST_TASK, /* Payload */ {})); + /* DepEvents = */{DepEvent}, CG::CODEPLAY_HOST_TASK, /* Payload */ {})); Command *GlueCmd = Scheduler::getInstance().MGraphBuilder.addCGHostTask( std::move(GlueCG), Scheduler::getInstance().getDefaultHostQueue()); @@ -1297,7 +1297,7 @@ static std::string cgTypeToString(detail::CG::CGTYPE Type) { case detail::CG::PREFETCH_USM: return "prefetch usm"; break; - case detail::CG::HOST_TASK: + case detail::CG::CODEPLAY_HOST_TASK: return "host task"; break; default: @@ -1845,24 +1845,9 @@ void HostTaskCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "EXEC HOST TASK ON " << deviceToString(MQueue->get_device()) << "\\n"; - - switch (MCommandGroup->getType()) { - case detail::CG::KERNEL: { - auto KernelCG = - reinterpret_cast(MCommandGroup.get()); - Stream << "Kernel name: "; - if (KernelCG->MSyclKernel && KernelCG->MSyclKernel->isCreatedFromSource()) - Stream << "created from source"; - else - Stream << demangleKernelName(KernelCG->getKernelName()); - Stream << "\\n"; - break; - } - default: - Stream << "CG type: " << cgTypeToString(MCommandGroup->getType()) << "\\n"; - break; - } + Stream << "EXEC HOST TASK ON " << deviceToString(MQueue->get_device()) + << "\\n"; + Stream << "CG type: " << cgTypeToString(MCommandGroup->getType()) << "\\n"; Stream << "\"];" << std::endl; @@ -1907,7 +1892,6 @@ cl_int HostTaskCommand::enqueueImp() { Ctx->RequiredEventsPerPlugin[&Plugin].push_back(Event); } - size_t ArgIdx = 0, ReqIdx = 0; while (ArgIdx < HostTask->MArgs.size()) { ArgDesc &Arg = HostTask->MArgs[ArgIdx]; @@ -1929,7 +1913,7 @@ cl_int HostTaskCommand::enqueueImp() { ++ArgIdx; } - Queue->getHostTaskAndEventCallbackThreadPool().submit([Ctx] () { + MQueue->getHostTaskAndEventCallbackThreadPool().submit([Ctx] () { DispatchHostTask(Ctx); }); diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 3f63e369a4ae5..825e6e71fc9c9 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -649,7 +649,7 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, const std::vector &Reqs = CommandGroup->MRequirements; const std::vector &Events = CommandGroup->MEvents; - if (CommandGroup->getType() == CG::CGTYPE::HOST_TASK) + if (CommandGroup->getType() == CG::CGTYPE::CODEPLAY_HOST_TASK) Queue = Scheduler::getInstance().getDefaultHostQueue(); std::unique_ptr NewCmd( diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index d1ebd96a3ca04..847d4afe7184b 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -79,7 +79,7 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, NewCmd = MGraphBuilder.addCGUpdateHost(std::move(CommandGroup), DefaultHostQueue); break; - case CG::HOST_TASK: + case CG::CODEPLAY_HOST_TASK: NewCmd = MGraphBuilder.addCGHostTask(std::move(CommandGroup), DefaultHostQueue); break; diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index edbdf00f411de..12e1de309cd3e 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -44,9 +44,7 @@ class ThreadPool { } public: - ThreadPool(unsigned int ThreadCount = 2 /*std::max( - 1L, - static_cast(std::thread::hardware_concurrency()) - 1)*/) + ThreadPool(unsigned int ThreadCount = 1) : MThreadCount(ThreadCount) {} ~ThreadPool() { finishAndWait(); } diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp index 5f40f05d098c4..bf0b0780edca9 100644 --- a/sycl/source/event.cpp +++ b/sycl/source/event.cpp @@ -60,10 +60,6 @@ vector_class event::get_wait_list() { return Result; } -void event::when_complete(std::function Func) { - impl->when_complete(impl, std::move(Func)); -} - event::event(shared_ptr_class event_impl) : impl(event_impl) {} diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 949f4c35b17ce..b2247f4de013e 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -76,7 +76,7 @@ event handler::finalize(const cl::sycl::detail::code_location &Payload) { std::move(MSharedPtrStorage), std::move(MRequirements), std::move(MEvents), Payload)); break; - case detail::CG::HOST_TASK: + case detail::CG::CODEPLAY_HOST_TASK: CommandGroup.reset(new detail::CGHostTask( std::move(MHostTask), MQueue, std::move(MArgs), std::move(MArgsStorage), std::move(MAccStorage), std::move(MSharedPtrStorage), diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 211ba8a0b2460..305fad6c094cd 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -150,8 +150,5 @@ bool queue::is_in_order() const { return impl->has_property(); } -void queue::set_event_cb_and_host_task_thread_pool_size(size_t Threads) { - impl->set_event_cb_and_host_task_thread_pool_size(Threads); -} } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/test/host-interop-task/event-callback.cpp b/sycl/test/host-interop-task/event-callback.cpp deleted file mode 100644 index 6e522bf3d2296..0000000000000 --- a/sycl/test/host-interop-task/event-callback.cpp +++ /dev/null @@ -1,91 +0,0 @@ -// RUN: %clangxx -fsycl %s -o %t.out %threads_lib -// RUN: %CPU_RUN_PLACEHOLDER %t.out - -#include -#include -#include -#include -#include - -#include - -namespace S = cl::sycl; - -struct Context { - std::atomic_bool Flag; - S::queue &Queue; - std::string Message; - S::buffer Buf; - std::mutex Mutex; - std::condition_variable CV; -}; - -void Thread1Fn(Context &Ctx) { - // T1.1. submit device-side kernel K1 - Ctx.Queue.submit([&](S::handler &CGH) { - S::accessor GeneratorAcc(Ctx.Buf, CGH); - - auto GeneratorKernel = [GeneratorAcc] () { - for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) - GeneratorAcc[Idx] = Idx; - }; - CGH.single_task(GeneratorKernel); - }) - // T1.2. submit host task using event of K1 as a lock with callback to set - // flag F = true - .when_complete([&Ctx] () { - bool Expected = false; - bool Desired = true; - assert(Ctx.Flag.compare_exchange_strong(Expected, Desired)); - - // let's employ some locking here - { - std::lock_guard Lock(Ctx.Mutex); - Ctx.CV.notify_all(); - } - }); -} - -void Thread2Fn(Context &Ctx) { - std::unique_lock Lock(Ctx.Mutex); - - // T2.1. Wait until flag F is set eq true. - Ctx.CV.wait(Lock, [&Ctx] { return Ctx.Flag.load(); }); - - assert(Ctx.Flag.load()); - - // T2.2. print some "hello, world" message - Ctx.Message = "Hello, world"; -} - -void test() { - auto EH = [] (S::exception_list EL) { - for (const std::exception_ptr &E : EL) { - throw E; - } - }; - - S::queue Queue(EH); - - // optional - Queue.set_event_cb_and_host_task_thread_pool_size(4); - - Context Ctx{{false}, Queue, "", {10}}; - - // 0. setup: thread 1 T1: exec smth; thread 2 T2: waits; init flag F = false - std::thread Thread1(Thread1Fn, std::reference_wrapper(Ctx)); - std::thread Thread2(Thread2Fn, std::reference_wrapper(Ctx)); - - Thread1.join(); - Thread2.join(); - - assert(Ctx.Flag.load()); - assert(Ctx.Message == "Hello, world"); -} - -int main(void) { - test(); - - return 0; -} diff --git a/sycl/test/host-interop-task/host-task-dependency.cpp b/sycl/test/host-interop-task/host-task-dependency.cpp index 173d9b0779ab1..38fe471e618d1 100644 --- a/sycl/test/host-interop-task/host-task-dependency.cpp +++ b/sycl/test/host-interop-task/host-task-dependency.cpp @@ -118,9 +118,6 @@ void test() { S::queue Queue(EH); - // optional - Queue.set_event_cb_and_host_task_thread_pool_size(4); - Context Ctx{{false}, Queue, "", {10}, {10}, {10}, {}, {}}; // 0. setup: thread 1 T1: exec smth; thread 2 T2: waits; init flag F = false diff --git a/sycl/test/host-interop-task/host-task.cpp b/sycl/test/host-interop-task/host-task.cpp index a5cba2d304839..6c83c0566e6e8 100644 --- a/sycl/test/host-interop-task/host-task.cpp +++ b/sycl/test/host-interop-task/host-task.cpp @@ -90,9 +90,6 @@ void test() { S::queue Queue(EH); - // optional - Queue.set_event_cb_and_host_task_thread_pool_size(4); - Context Ctx{{false}, Queue, "", {10}, {10}}; // 0. setup: thread 1 T1: exec smth; thread 2 T2: waits; init flag F = false From 072a8502e7ddfba2e8d5f175e1712f689cd506a8 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 3 Apr 2020 15:36:12 +0300 Subject: [PATCH 034/188] [SYCL] Eliminate HostTaskCommand Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 130 ++++++------------ sycl/source/detail/scheduler/commands.hpp | 15 -- .../source/detail/scheduler/graph_builder.cpp | 54 -------- sycl/source/detail/scheduler/scheduler.cpp | 3 +- sycl/source/detail/scheduler/scheduler.hpp | 2 - 5 files changed, 43 insertions(+), 161 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index e691b29efb905..e7f4b9f93e2e3 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -417,7 +417,7 @@ void Command::addDepSub(EventImplPtr DepEvent, ContextImplPtr Context) { /* SharedPtrStorage = */ {}, /* Requirements = */ {}, /* DepEvents = */{DepEvent}, CG::CODEPLAY_HOST_TASK, /* Payload */ {})); - Command *GlueCmd = Scheduler::getInstance().MGraphBuilder.addCGHostTask( + Command *GlueCmd = Scheduler::getInstance().MGraphBuilder.addCG( std::move(GlueCG), Scheduler::getInstance().getDefaultHostQueue()); EnqueueResultT Res; @@ -1827,105 +1827,59 @@ cl_int ExecCGCommand::enqueueImp() { Plugin.call(reinterpret_cast(interop_queue)); return CL_SUCCESS; } - case CG::CGTYPE::NONE: - default: - throw runtime_error("CG type not implemented.", PI_INVALID_OPERATION); - } -} - -HostTaskCommand::HostTaskCommand(std::unique_ptr CommandGroup, - QueueImplPtr Queue) - : Command(CommandType::HOST_TASK, std::move(Queue)), - MCommandGroup(std::move(CommandGroup)) { - - emitInstrumentationDataProxy(); -} - -void HostTaskCommand::printDot(std::ostream &Stream) const { - Stream << "\"" << this << "\" [style=filled, fillcolor=\"#AFFF82\", label=\""; + case CG::CGTYPE::CODEPLAY_HOST_TASK: { + CGHostTask *HostTask = static_cast(MCommandGroup.get()); + // MQueue is host queue here thus we'll employ the one host task is + // submitted to + const QueueImplPtr &Queue = HostTask->MQueue; + std::shared_ptr Ctx{new HostTaskContext{HostTask}}; - Stream << "ID = " << this << "\\n"; - Stream << "EXEC HOST TASK ON " << deviceToString(MQueue->get_device()) - << "\\n"; - Stream << "CG type: " << cgTypeToString(MCommandGroup->getType()) << "\\n"; + // Init self-event + Ctx->SelfEvent = MEvent; + RT::PiContext ContextRef = Queue->getContextImplPtr()->getHandleRef(); - Stream << "\"];" << std::endl; + const detail::plugin &Plugin = Queue->getPlugin(); + Plugin.call(ContextRef, &Event); - for (const auto &Dep : MDeps) { - Stream << " \"" << this << "\" -> \"" << Dep.MDepCommand << "\"" - << " [ label = \"Access mode: " - << accessModeToString(Dep.MDepRequirement->MAccessMode) << "\\n" - << "MemObj: " << Dep.MDepRequirement->MSYCLMemObj << " \" ]" - << std::endl; - } -} - -void HostTaskCommand::emitInstrumentationData() { - // TODO -} - -cl_int HostTaskCommand::enqueueImp() { - std::vector EventImpls = Command::prepareEvents(getContext()); - - auto RawEvents = getPiEvents(EventImpls); - - RT::PiEvent &Event = MEvent->getHandleRef(); + Ctx->SelfEvent->setContextImpl(Queue->getContextImplPtr()); - CGHostTask *HostTask = static_cast(MCommandGroup.get()); - // MQueue is host queue here thus we'll employ the one host task is - // submitted to - const QueueImplPtr &Queue = HostTask->MQueue; - std::shared_ptr Ctx{new HostTaskContext{HostTask}}; - - // Init self-event - Ctx->SelfEvent = MEvent; - RT::PiContext ContextRef = Queue->getContextImplPtr()->getHandleRef(); - - const detail::plugin &Plugin = Queue->getPlugin(); - Plugin.call(ContextRef, &Event); - - Ctx->SelfEvent->setContextImpl(Queue->getContextImplPtr()); + // init dependency events in Ctx + for (EventImplPtr &Event : EventImpls) { + const detail::plugin &Plugin = Event->getPlugin(); + Ctx->RequiredEventsPerPlugin[&Plugin].push_back(Event); + } - // init dependency events in Ctx - for (EventImplPtr &Event : EventImpls) { - const detail::plugin &Plugin = Event->getPlugin(); - Ctx->RequiredEventsPerPlugin[&Plugin].push_back(Event); - } + size_t ArgIdx = 0, ReqIdx = 0; + while (ArgIdx < HostTask->MArgs.size()) { + ArgDesc &Arg = HostTask->MArgs[ArgIdx]; - size_t ArgIdx = 0, ReqIdx = 0; - while (ArgIdx < HostTask->MArgs.size()) { - ArgDesc &Arg = HostTask->MArgs[ArgIdx]; + switch (Arg.MType) { + case kernel_param_kind_t::kind_accessor: { + Requirement *Req = static_cast(Arg.MPtr); + AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); - switch (Arg.MType) { - case kernel_param_kind_t::kind_accessor: { - Requirement *Req = static_cast(Arg.MPtr); - AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); + detail::Requirement *TaskReq = HostTask->MRequirements[ReqIdx]; + TaskReq->MData = AllocaCmd->getMemAllocation(); + ++ReqIdx; + break; + } + default: + throw std::runtime_error("Yet unsupported arg type"); + } - detail::Requirement *TaskReq = HostTask->MRequirements[ReqIdx]; - TaskReq->MData = AllocaCmd->getMemAllocation(); - ++ReqIdx; - break; - } - default: - throw std::runtime_error("Yet unsupported arg type"); + ++ArgIdx; } - ++ArgIdx; - } - - MQueue->getHostTaskAndEventCallbackThreadPool().submit([Ctx] () { - DispatchHostTask(Ctx); - }); - - return CL_SUCCESS; -} + MQueue->getHostTaskAndEventCallbackThreadPool().submit([Ctx] () { + DispatchHostTask(Ctx); + }); -AllocaCommandBase *HostTaskCommand::getAllocaForReq(Requirement *Req) { - for (const DepDesc &Dep : MDeps) { - if (Dep.MDepRequirement == Req) - return Dep.MAllocaCmd; + return CL_SUCCESS; + } + case CG::CGTYPE::NONE: + default: + throw runtime_error("CG type not implemented.", PI_INVALID_OPERATION); } - throw runtime_error("Alloca for command not found", PI_INVALID_OPERATION); } } // namespace detail diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 0c8d36bccd0fc..c92c3b9662a15 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -435,21 +435,6 @@ class UpdateHostRequirementCommand : public Command { void **MDstPtr = nullptr; }; -class HostTaskCommand : public Command { -public: - HostTaskCommand(std::unique_ptr CommandGroup, QueueImplPtr Queue); - - void printDot(std::ostream &Stream) const final; - void emitInstrumentationData() final; - -private: - cl_int enqueueImp() final; - - AllocaCommandBase *getAllocaForReq(Requirement *Req); - - std::unique_ptr MCommandGroup; -}; - } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 825e6e71fc9c9..9b4b5db58a8a8 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -386,60 +386,6 @@ Command *Scheduler::GraphBuilder::addCGUpdateHost( return insertMemoryMove(Record, Req, HostQueue); } -Command *Scheduler::GraphBuilder::addCGHostTask( - std::unique_ptr CommandGroup, QueueImplPtr HostQueue) { - const std::vector &Reqs = CommandGroup->MRequirements; - const std::vector &Events = CommandGroup->MEvents; - - std::unique_ptr NewCmd( - new HostTaskCommand(std::move(CommandGroup), HostQueue)); - if (!NewCmd) - throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); - - if (MPrintOptionsArray[BeforeAddCG]) - printGraphAsDot("before_addCGHostTask"); - - for (Requirement *Req : Reqs) { - MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req); - markModifiedIfWrite(Record, Req); - - AllocaCommandBase *AllocaCmd = getOrCreateAllocaForReq(Record, Req, HostQueue); - // If there is alloca command we need to check if the latest memory is in - // required context. - if (!sameCtx(HostQueue->getContextImplPtr(), Record->MCurContext)) { - // Cannot directly copy memory from OpenCL device to OpenCL device - - // create two copies: device->host and host->device. - if (!HostQueue->is_host() && !Record->MCurContext->is_host()) - insertMemoryMove(Record, Req, - Scheduler::getInstance().getDefaultHostQueue()); - insertMemoryMove(Record, Req, HostQueue); - } - std::set Deps = - findDepsForReq(Record, Req, HostQueue->getContextImplPtr()); - - for (Command *Dep : Deps) - NewCmd->addDep(DepDesc{Dep, Req, AllocaCmd}); - } - - // Set new command as user for dependencies and update leaves. - for (DepDesc &Dep : NewCmd->MDeps) { - Dep.MDepCommand->addUser(NewCmd.get()); - const Requirement *Req = Dep.MDepRequirement; - MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); - updateLeaves({Dep.MDepCommand}, Record, Req->MAccessMode); - addNodeToLeaves(Record, NewCmd.get(), Req->MAccessMode); - } - - // Register all the events as dependencies - for (detail::EventImplPtr e : Events) { - NewCmd->addDep(e); - } - - if (MPrintOptionsArray[AfterAddCG]) - printGraphAsDot("after_addCGHostTask"); - return NewCmd.release(); -} - // The functions finds dependencies for the requirement. It starts searching // from list of "leaf" commands for the record and check if the examining // command can be executed in parallel with new one with regard to the memory diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 847d4afe7184b..8e38b153afbeb 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -80,8 +80,7 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, DefaultHostQueue); break; case CG::CODEPLAY_HOST_TASK: - NewCmd = MGraphBuilder.addCGHostTask(std::move(CommandGroup), - DefaultHostQueue); + NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), DefaultHostQueue); break; default: NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), std::move(Queue)); diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 4845a065d1dea..b165230641a5b 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -115,8 +115,6 @@ class Scheduler { Command *addCG(std::unique_ptr CommandGroup, QueueImplPtr Queue); - Command *addCGHostTask(std::unique_ptr CommandGroup, - QueueImplPtr Queue); Command *addCGUpdateHost(std::unique_ptr CommandGroup, QueueImplPtr HostQueue); From 40f4a32171797b809986f7dbd905e524e4ad05f1 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 6 Apr 2020 10:18:42 +0300 Subject: [PATCH 035/188] [SYCL] Fix codestyle issues Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/accessor.hpp | 6 ++-- sycl/include/CL/sycl/detail/cg.hpp | 14 ++++---- sycl/include/CL/sycl/event.hpp | 2 +- sycl/include/CL/sycl/handler.hpp | 18 ++++------ sycl/source/detail/event_impl.hpp | 4 +-- sycl/source/detail/scheduler/commands.cpp | 16 ++++----- sycl/source/detail/thread_pool.hpp | 3 +- .../host-task-dependency.cpp | 36 +++++++++++-------- sycl/test/host-interop-task/host-task.cpp | 25 +++++++------ 9 files changed, 64 insertions(+), 60 deletions(-) diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp index c3ef9b571537a..10377fdc02073 100644 --- a/sycl/include/CL/sycl/accessor.hpp +++ b/sycl/include/CL/sycl/accessor.hpp @@ -816,9 +816,9 @@ class accessor : #endif template 0) && (Dims == Dimensions) && - (!IsPlaceH && - (IsGlobalBuf || IsConstantBuf || IsHostBuf))>> + typename = detail::enable_if_t< + (Dims > 0) && (Dims == Dimensions) && + (!IsPlaceH && (IsGlobalBuf || IsConstantBuf || IsHostBuf))>> accessor(buffer &BufferRef, handler &CommandGroupHandler) #ifdef __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index 6c9cdd477b3d1..200900cc07d88 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -190,8 +190,7 @@ class HostTask { std::function MHostTask; public: - HostTask(std::function &&Func) - : MHostTask(Func) {} + HostTask(std::function &&Func) : MHostTask(Func) {} void call() { MHostTask(); } }; @@ -630,12 +629,11 @@ class CGHostTask : public CG { std::vector Requirements, std::vector Events, CGTYPE Type, detail::code_location loc = {}) - : CG(Type, std::move(ArgsStorage), std::move(AccStorage), - std::move(SharedPtrStorage), std::move(Requirements), - std::move(Events), std::move(loc)), - MHostTask(std::move(HostTask)), MQueue(std::move(Queue)), - MArgs(std::move(Args)) - {} + : CG(Type, std::move(ArgsStorage), std::move(AccStorage), + std::move(SharedPtrStorage), std::move(Requirements), + std::move(Events), std::move(loc)), + MHostTask(std::move(HostTask)), MQueue(std::move(Queue)), + MArgs(std::move(Args)) {} }; } // namespace detail diff --git a/sycl/include/CL/sycl/event.hpp b/sycl/include/CL/sycl/event.hpp index 58f1e7317c6c7..ed4518a7514bf 100644 --- a/sycl/include/CL/sycl/event.hpp +++ b/sycl/include/CL/sycl/event.hpp @@ -12,8 +12,8 @@ #include #include -#include #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index f0d32f5748ff9..d280f32c85d03 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -106,8 +106,7 @@ template struct get_kernel_name_t { device getDeviceFromHandler(handler &); -template -struct check_fn_signature { +template struct check_fn_signature { static_assert(std::integral_constant::value, "Second template parameter is required to be of function type"); }; @@ -116,13 +115,11 @@ template struct check_fn_signature { private: template - static constexpr auto check(T*) - -> typename std::is_same< - decltype(std::declval().operator()(std::declval()...)), - RetT>::type; + static constexpr auto check(T *) -> typename std::is_same< + decltype(std::declval().operator()(std::declval()...)), + RetT>::type; - template - static constexpr std::false_type check(...); + template static constexpr std::false_type check(...); typedef decltype(check(0)) type; @@ -596,9 +593,8 @@ class handler { } template - typename std::enable_if< - detail::check_fn_signature::type, - void()>::value>::type + typename std::enable_if::type, void()>::value>::type codeplay_host_task(FuncT &&Func) { throwIfActionIsCreated(); diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 52ccb0a554b0b..3bd8efda4ad0d 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -145,9 +145,7 @@ class event_impl { /// @return a pointer to HostProfilingInfo instance. HostProfilingInfo *getHostProfilingInfo() { return MHostProfilingInfo.get(); } - QueueImplWPtr getQueueWPtr() const { - return MQueue; - } + QueueImplWPtr getQueueWPtr() const { return MQueue; } private: // When instrumentation is enabled emits trace event for event wait begin and diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index cefb2537ef7e9..fe088f059c4d1 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -404,7 +404,7 @@ void Command::addDepSub(EventImplPtr DepEvent, ContextImplPtr Context) { &GlueEventHandle); // enqueue GlueCmd - std::function Func = [GlueEvent] () { + std::function Func = [GlueEvent]() { RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); const detail::plugin &Plugin = GlueEvent->getPlugin(); Plugin.call(GlueEventHandle, CL_COMPLETE); @@ -416,10 +416,11 @@ void Command::addDepSub(EventImplPtr DepEvent, ContextImplPtr Context) { std::move(HT), DepEvent->getQueueWPtr().lock(), /* Args = */ {}, /* ArgsStorage = */ {}, /* AccStorage = */ {}, /* SharedPtrStorage = */ {}, /* Requirements = */ {}, - /* DepEvents = */{DepEvent}, CG::CODEPLAY_HOST_TASK, /* Payload */ {})); + /* DepEvents = */ {DepEvent}, CG::CODEPLAY_HOST_TASK, + /* Payload */ {})); Command *GlueCmd = Scheduler::getInstance().MGraphBuilder.addCG( - std::move(GlueCG), Scheduler::getInstance().getDefaultHostQueue()); + std::move(GlueCG), Scheduler::getInstance().getDefaultHostQueue()); EnqueueResultT Res; bool Enqueued = Scheduler::GraphProcessor::enqueueCommand(GlueCmd, Res); @@ -1525,8 +1526,8 @@ struct HostTaskContext { CGHostTask *HostTask; // events dependencies - std::map> RequiredEventsPerPlugin; + std::map> + RequiredEventsPerPlugin; ContextImplPtr Context; @@ -1881,9 +1882,8 @@ cl_int ExecCGCommand::enqueueImp() { ++ArgIdx; } - MQueue->getHostTaskAndEventCallbackThreadPool().submit([Ctx] () { - DispatchHostTask(Ctx); - }); + MQueue->getHostTaskAndEventCallbackThreadPool().submit( + [Ctx]() { DispatchHostTask(Ctx); }); return CL_SUCCESS; } diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 12e1de309cd3e..4e765f847e3d4 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -44,8 +44,7 @@ class ThreadPool { } public: - ThreadPool(unsigned int ThreadCount = 1) - : MThreadCount(ThreadCount) {} + ThreadPool(unsigned int ThreadCount = 1) : MThreadCount(ThreadCount) {} ~ThreadPool() { finishAndWait(); } diff --git a/sycl/test/host-interop-task/host-task-dependency.cpp b/sycl/test/host-interop-task/host-task-dependency.cpp index 38fe471e618d1..a6ab395e2f5da 100644 --- a/sycl/test/host-interop-task/host-task-dependency.cpp +++ b/sycl/test/host-interop-task/host-task-dependency.cpp @@ -4,8 +4,8 @@ #include #include -#include #include +#include #include @@ -26,7 +26,8 @@ void Thread1Fn(Context &Ctx) { // 0. initialize resulting buffer with apriori wrong result { S::accessor Acc(Ctx.Buf2); + S::access::target::host_buffer> + Acc(Ctx.Buf2); for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) Acc[Idx] = -1; @@ -35,9 +36,10 @@ void Thread1Fn(Context &Ctx) { // 1. submit task writing to buffer 1 Ctx.Queue.submit([&](S::handler &CGH) { S::accessor GeneratorAcc(Ctx.Buf1, CGH); + S::access::target::global_buffer> + GeneratorAcc(Ctx.Buf1, CGH); - auto GeneratorKernel = [GeneratorAcc] () { + auto GeneratorKernel = [GeneratorAcc]() { for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) GeneratorAcc[Idx] = Idx; }; @@ -48,11 +50,13 @@ void Thread1Fn(Context &Ctx) { // 2. submit host task writing from buf 1 to buf 2 auto HostTaskEvent = Ctx.Queue.submit([&](S::handler &CGH) { S::accessor CopierSrcAcc(Ctx.Buf1, CGH); + S::access::target::host_buffer> + CopierSrcAcc(Ctx.Buf1, CGH); S::accessor CopierDstAcc(Ctx.Buf2, CGH); + S::access::target::host_buffer> + CopierDstAcc(Ctx.Buf2, CGH); - auto CopierKernel = [CopierSrcAcc, CopierDstAcc, &Ctx] () { + auto CopierHostTask = [CopierSrcAcc, CopierDstAcc, &Ctx]() { for (size_t Idx = 0; Idx < CopierDstAcc.get_count(); ++Idx) CopierDstAcc[Idx] = CopierSrcAcc[Idx]; @@ -67,19 +71,21 @@ void Thread1Fn(Context &Ctx) { } }; - CGH.codeplay_host_task(CopierKernel); + CGH.codeplay_host_task(CopierHostTask); }); // 3. submit simple task to move data between two buffers Ctx.Queue.submit([&](S::handler &CGH) { S::accessor SrcAcc(Ctx.Buf2, CGH); + S::access::target::global_buffer> + SrcAcc(Ctx.Buf2, CGH); S::accessor DstAcc(Ctx.Buf3, CGH); + S::access::target::global_buffer> + DstAcc(Ctx.Buf3, CGH); CGH.depends_on(HostTaskEvent); - auto CopierKernel = [SrcAcc, DstAcc] () { + auto CopierKernel = [SrcAcc, DstAcc]() { for (size_t Idx = 0; Idx < DstAcc.get_count(); ++Idx) DstAcc[Idx] = SrcAcc[Idx]; }; @@ -90,7 +96,8 @@ void Thread1Fn(Context &Ctx) { // 4. check data in buffer #3 { S::accessor Acc(Ctx.Buf3); + S::access::target::host_buffer> + Acc(Ctx.Buf3); for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) assert(Acc[Idx] == Idx && "Invalid data in third buffer"); @@ -110,7 +117,7 @@ void Thread2Fn(Context &Ctx) { } void test() { - auto EH = [] (S::exception_list EL) { + auto EH = [](S::exception_list EL) { for (const std::exception_ptr &E : EL) { throw E; } @@ -133,7 +140,8 @@ void test() { // 3. check via host accessor that buf 2 contains valid data { S::accessor ResultAcc(Ctx.Buf2); + S::access::target::host_buffer> + ResultAcc(Ctx.Buf2); bool failure = false; for (size_t Idx = 0; Idx < ResultAcc.get_count(); ++Idx) { diff --git a/sycl/test/host-interop-task/host-task.cpp b/sycl/test/host-interop-task/host-task.cpp index 6c83c0566e6e8..fd973fbd285a1 100644 --- a/sycl/test/host-interop-task/host-task.cpp +++ b/sycl/test/host-interop-task/host-task.cpp @@ -3,8 +3,8 @@ #include #include -#include #include +#include #include @@ -24,7 +24,8 @@ void Thread1Fn(Context &Ctx) { // 0. initialize resulting buffer with apriori wrong result { S::accessor Acc(Ctx.Buf2); + S::access::target::host_buffer> + Acc(Ctx.Buf2); for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) Acc[Idx] = -1; @@ -33,9 +34,10 @@ void Thread1Fn(Context &Ctx) { // 1. submit task writing to buffer 1 Ctx.Queue.submit([&](S::handler &CGH) { S::accessor GeneratorAcc(Ctx.Buf1, CGH); + S::access::target::global_buffer> + GeneratorAcc(Ctx.Buf1, CGH); - auto GeneratorKernel = [GeneratorAcc] () { + auto GeneratorKernel = [GeneratorAcc]() { for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) GeneratorAcc[Idx] = Idx; }; @@ -46,11 +48,13 @@ void Thread1Fn(Context &Ctx) { // 2. submit host task writing from buf 1 to buf 2 Ctx.Queue.submit([&](S::handler &CGH) { S::accessor CopierSrcAcc(Ctx.Buf1, CGH); + S::access::target::host_buffer> + CopierSrcAcc(Ctx.Buf1, CGH); S::accessor CopierDstAcc(Ctx.Buf2, CGH); + S::access::target::host_buffer> + CopierDstAcc(Ctx.Buf2, CGH); - auto CopierKernel = [CopierSrcAcc, CopierDstAcc, &Ctx] () { + auto CopierHostTask = [CopierSrcAcc, CopierDstAcc, &Ctx]() { for (size_t Idx = 0; Idx < CopierDstAcc.get_count(); ++Idx) CopierDstAcc[Idx] = CopierSrcAcc[Idx]; @@ -65,7 +69,7 @@ void Thread1Fn(Context &Ctx) { } }; - CGH.codeplay_host_task(CopierKernel); + CGH.codeplay_host_task(CopierHostTask); }); } @@ -82,7 +86,7 @@ void Thread2Fn(Context &Ctx) { } void test() { - auto EH = [] (S::exception_list EL) { + auto EH = [](S::exception_list EL) { for (const std::exception_ptr &E : EL) { throw E; } @@ -105,7 +109,8 @@ void test() { // 3. check via host accessor that buf 2 contains valid data { S::accessor ResultAcc(Ctx.Buf2); + S::access::target::host_buffer> + ResultAcc(Ctx.Buf2); for (size_t Idx = 0; Idx < ResultAcc.get_count(); ++Idx) { assert(ResultAcc[Idx] == Idx && "Invalid data in result buffer"); From 465c4e7f65b4aea21bc267dcfb9ccc21039e5b81 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 6 Apr 2020 14:48:56 +0300 Subject: [PATCH 036/188] [SYCL] Add interop_handle class Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/interop_handle.hpp | 100 ++++++++++++++++++++++++ sycl/source/interop_handle.cpp | 27 +++++++ 2 files changed, 127 insertions(+) create mode 100644 sycl/include/CL/sycl/interop_handle.hpp create mode 100644 sycl/source/interop_handle.cpp diff --git a/sycl/include/CL/sycl/interop_handle.hpp b/sycl/include/CL/sycl/interop_handle.hpp new file mode 100644 index 0000000000000..8efdf9699d6ac --- /dev/null +++ b/sycl/include/CL/sycl/interop_handle.hpp @@ -0,0 +1,100 @@ +//==------------ interop_handle.hpp --- SYCL interop handle ----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { + +template +class accessor; + +class interop_handle { +public: + /// Receives a SYCL accessor that has been defined is a requirement for the + /// command group, and returns the underlying OpenCL memory object that is + /// used by the SYCL runtime. If the accessor passed as parameter is not part + /// of the command group requirements (e.g. it is an unregistered placeholder + /// accessor), the exception `cl::sycl::invalid_object` is thrown + /// asynchronously. + template + typename std::enable_if::type + get_native_mem(const accessor &Acc) const { + auto *AccBase = static_cast(&Acc); + return getMemImpl(detail::getSyclObjImpl(*AccBase).get()); + } + + template + typename std::enable_if::type + get_native_mem(const accessor &Acc) const { + throw invalid_object_error("Getting memory object out of host accessor is " + "not allowed", PI_INVALID_MEM_OBJECT); + } + + /// Returns an underlying OpenCL queue for the SYCL queue used to submit the + /// command group, or the fallback queue if this command-group is re-trying + /// execution on an OpenCL queue. The OpenCL command queue returned is + /// implementation-defined in cases where the SYCL queue maps to multiple + /// underlying OpenCL objects. It is responsibility of the SYCL runtime to + /// ensure the OpenCL queue returned is in a state that can be used to + /// dispatch work, and that other potential OpenCL command queues associated + /// with the same SYCL command queue are not executing commands while the host + /// task is executing. + pi_queue get_native_queue() const noexcept { + return MQueue; + } + + /// Returns an underlying OpenCL device associated with the SYCL queue used + /// to submit the command group, or the fallback queue if this command-group + /// is re-trying execution on an OpenCL queue. + cl_device_id get_native_device() const noexcept { + return MDeviceId; + } + + /// Returns an underlying OpenCL context associated with the SYCL queue used + /// to submit the command group, or the fallback queue if this command-group + /// is re-trying execution on an OpenCL queue. + pi_context get_native_context() const noexcept { + return MContext; + } + +private: + using ReqToMem = std::pair; + + template + friend class accessor; + + + interop_handle(std::vector MemObjs, pi_queue Queue, + cl_device_id DeviceId, pi_context Context) + : MQueue(Queue), MDeviceId(DeviceId), + MContext(Context), MMemObjs(std::move(MemObjs)) {} + + pi_mem getMemImpl(detail::Requirement* Req) const; + + pi_queue MQueue; + cl_device_id MDeviceId; + pi_context MContext; + std::vector MMemObjs; +}; + +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/interop_handle.cpp b/sycl/source/interop_handle.cpp new file mode 100644 index 0000000000000..3001fa6b481ab --- /dev/null +++ b/sycl/source/interop_handle.cpp @@ -0,0 +1,27 @@ +//==------------ interop_handle.cpp --- SYCL interop handle ----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { + +pi_mem interop_handle::getMemImpl(detail::Requirement *Req) const { + auto Iter = std::find_if(std::begin(MMemObjs), std::end(MMemObjs), + [=](ReqToMem Elem) { + return (Elem.first == Req); + }); + + if (Iter == std::end(MMemObjs)) + throw("Invalid memory object used inside interop"); + + return detail::pi::cast(Iter->second); +} + +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) From 0114276e76f03ed8efd64209292fc503f15b10e0 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 6 Apr 2020 14:49:50 +0300 Subject: [PATCH 037/188] [SYCL] Sample implementation of host-interop-task Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/cg.hpp | 6 +++++ sycl/include/CL/sycl/handler.hpp | 16 +++++++++++++ sycl/source/detail/scheduler/commands.cpp | 28 ++++++++++++++++++++++- 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index 200900cc07d88..79485e5ba731c 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -188,11 +189,16 @@ class InteropTask { class HostTask { std::function MHostTask; + std::function MInteropTask; public: HostTask(std::function &&Func) : MHostTask(Func) {} + HostTask(std::function &&Func) : MInteropTask(Func) {} + + bool isInteropTask() const { return MInteropTask; } void call() { MHostTask(); } + void call(interop_handle handle) { MInteropTask(handle); } }; // Class which stores specific lambda object. diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index d280f32c85d03..efe01118ac7d6 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -606,6 +607,21 @@ class handler { MCGType = detail::CG::CODEPLAY_HOST_TASK; } + template + typename std::enable_if::type, + void(interop_handle)>::value>::type + codeplay_host_task(FuncT &&Func) { + throwIfActionIsCreated(); + + MNDRDesc.set(range<1>(1)); + MArgs = std::move(MAssociatedAccesors); + + MHostTask.reset(new detail::HostTask(std::move(Func))); + + MCGType = detail::CG::CODEPLAY_HOST_TASK; + } + /// Defines and invokes a SYCL kernel function for the specified range and /// offset. /// diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index fe088f059c4d1..57a3f9636276d 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1528,6 +1528,7 @@ struct HostTaskContext { // events dependencies std::map> RequiredEventsPerPlugin; + std::vector ReqToMem; ContextImplPtr Context; @@ -1543,8 +1544,19 @@ void DispatchHostTask(const std::shared_ptr &Ctx) { RawEvents.data()); } + std::unique_ptr &HT = Ctx->HostTask->MHostTask; + // we're ready to call the user-defined lambda now - Ctx->HostTask->MHostTask->call(); + if (HT->isInterop()) { + auto Queue = Ctx->CGHostTask->MQueue->get(); + auto DeviceId = Ctx->CGHostTask->MQueue->get_device()->get(); + auto Context = Ctx->CGHostTask->MQueue->get_context()->get(); + + interop_handle IH{Ctx->ReqToMem, Queue, DeviceId, Context}; + + HT->call(IH); + } else + HT->call(); const detail::plugin &Plugin = Ctx->SelfEvent->getPlugin(); Plugin.call(Ctx->SelfEvent->getHandleRef(), @@ -1882,6 +1894,20 @@ cl_int ExecCGCommand::enqueueImp() { ++ArgIdx; } + std::vector &ReqToMem = Ctx->ReqToMem; + // Extract the Mem Objects for all Requirements, to ensure they are + // available if a user ask for them inside the interop task scope + const auto& HandlerReq = ExecInterop->MRequirements; + std::for_each(std::begin(HandlerReq), std::end(HandlerReq), + [&](Requirement* Req) { + AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); + auto MemArg = reinterpret_cast(AllocaCmd->getMemAllocation()); + interop_handle::ReqToMem ReqToMem = std::make_pair(Req, MemArg); + ReqToMem.emplace_back(ReqToMem); + }); + + std::sort(std::begin(ReqMemObjs), std::end(ReqMemObjs)); + MQueue->getHostTaskAndEventCallbackThreadPool().submit( [Ctx]() { DispatchHostTask(Ctx); }); From 2766936774ac10c29a62076abc8d4523351e5fd4 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 6 Apr 2020 18:05:20 +0300 Subject: [PATCH 038/188] [SYCL] Fix build issues Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/cg.hpp | 2 +- sycl/include/CL/sycl/interop_handle.hpp | 35 ++++++++++++++++------- sycl/source/detail/scheduler/commands.cpp | 16 +++++------ sycl/source/detail/scheduler/commands.hpp | 3 ++ sycl/source/interop_handle.cpp | 2 +- 5 files changed, 38 insertions(+), 20 deletions(-) diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index 79485e5ba731c..44a47226fa0d1 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -195,7 +195,7 @@ class HostTask { HostTask(std::function &&Func) : MHostTask(Func) {} HostTask(std::function &&Func) : MInteropTask(Func) {} - bool isInteropTask() const { return MInteropTask; } + bool isInteropTask() const { return !!MInteropTask; } void call() { MHostTask(); } void call(interop_handle handle) { MInteropTask(handle); } diff --git a/sycl/include/CL/sycl/interop_handle.hpp b/sycl/include/CL/sycl/interop_handle.hpp index 8efdf9699d6ac..a587ffe5d0f71 100644 --- a/sycl/include/CL/sycl/interop_handle.hpp +++ b/sycl/include/CL/sycl/interop_handle.hpp @@ -13,9 +13,18 @@ #include #include +#include + __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +namespace detail { + class ExecCGCommand; + struct HostTaskContext; + + void DispatchHostTask(const std::shared_ptr &); +} + template class accessor; @@ -31,7 +40,7 @@ class interop_handle { template typename std::enable_if::type + cl_mem>::type get_native_mem(const accessor &Acc) const { auto *AccBase = static_cast(&Acc); @@ -41,7 +50,7 @@ class interop_handle { template typename std::enable_if::type + cl_mem>::type get_native_mem(const accessor &Acc) const { throw invalid_object_error("Getting memory object out of host accessor is " @@ -57,7 +66,7 @@ class interop_handle { /// dispatch work, and that other potential OpenCL command queues associated /// with the same SYCL command queue are not executing commands while the host /// task is executing. - pi_queue get_native_queue() const noexcept { + cl_command_queue get_native_queue() const noexcept { return MQueue; } @@ -71,7 +80,7 @@ class interop_handle { /// Returns an underlying OpenCL context associated with the SYCL queue used /// to submit the command group, or the fallback queue if this command-group /// is re-trying execution on an OpenCL queue. - pi_context get_native_context() const noexcept { + cl_context get_native_context() const noexcept { return MContext; } @@ -81,18 +90,24 @@ class interop_handle { template friend class accessor; + friend class detail::ExecCGCommand; + friend struct detail::HostTaskContext; + friend void DispatchHostTask( + const std::shared_ptr &); - - interop_handle(std::vector MemObjs, pi_queue Queue, - cl_device_id DeviceId, pi_context Context) +public: + // TODO set c-tor private + interop_handle(std::vector MemObjs, cl_command_queue Queue, + cl_device_id DeviceId, cl_context Context) : MQueue(Queue), MDeviceId(DeviceId), MContext(Context), MMemObjs(std::move(MemObjs)) {} +private: - pi_mem getMemImpl(detail::Requirement* Req) const; + cl_mem getMemImpl(detail::Requirement* Req) const; - pi_queue MQueue; + cl_command_queue MQueue; cl_device_id MDeviceId; - pi_context MContext; + cl_context MContext; std::vector MMemObjs; }; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 57a3f9636276d..a5aeb90245a81 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1547,10 +1547,10 @@ void DispatchHostTask(const std::shared_ptr &Ctx) { std::unique_ptr &HT = Ctx->HostTask->MHostTask; // we're ready to call the user-defined lambda now - if (HT->isInterop()) { - auto Queue = Ctx->CGHostTask->MQueue->get(); - auto DeviceId = Ctx->CGHostTask->MQueue->get_device()->get(); - auto Context = Ctx->CGHostTask->MQueue->get_context()->get(); + if (HT->isInteropTask()) { + auto Queue = Ctx->HostTask->MQueue->get(); + auto DeviceId = Ctx->HostTask->MQueue->get_device().get(); + auto Context = Ctx->HostTask->MQueue->get_context().get(); interop_handle IH{Ctx->ReqToMem, Queue, DeviceId, Context}; @@ -1897,16 +1897,16 @@ cl_int ExecCGCommand::enqueueImp() { std::vector &ReqToMem = Ctx->ReqToMem; // Extract the Mem Objects for all Requirements, to ensure they are // available if a user ask for them inside the interop task scope - const auto& HandlerReq = ExecInterop->MRequirements; + const auto& HandlerReq = HostTask->MRequirements; std::for_each(std::begin(HandlerReq), std::end(HandlerReq), [&](Requirement* Req) { AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); auto MemArg = reinterpret_cast(AllocaCmd->getMemAllocation()); - interop_handle::ReqToMem ReqToMem = std::make_pair(Req, MemArg); - ReqToMem.emplace_back(ReqToMem); + interop_handle::ReqToMem ReqToMemEl = std::make_pair(Req, MemArg); + ReqToMem.emplace_back(ReqToMemEl); }); - std::sort(std::begin(ReqMemObjs), std::end(ReqMemObjs)); + std::sort(std::begin(ReqToMem), std::end(ReqToMem)); MQueue->getHostTaskAndEventCallbackThreadPool().submit( [Ctx]() { DispatchHostTask(Ctx); }); diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 43210a79fcbf4..355ef457699c7 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -77,6 +77,9 @@ struct DepDesc { AllocaCommandBase *MAllocaCmd = nullptr; }; +struct HostTaskContext; +void DispatchHostTask(const std::shared_ptr &Ctx); + // The Command represents some action that needs to be performed on one or // more memory objects. The command has vector of Depdesc objects that // represent dependencies of the command. It has vector of pointer to commands diff --git a/sycl/source/interop_handle.cpp b/sycl/source/interop_handle.cpp index 3001fa6b481ab..759b1cfc15d61 100644 --- a/sycl/source/interop_handle.cpp +++ b/sycl/source/interop_handle.cpp @@ -11,7 +11,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -pi_mem interop_handle::getMemImpl(detail::Requirement *Req) const { +cl_mem interop_handle::getMemImpl(detail::Requirement *Req) const { auto Iter = std::find_if(std::begin(MMemObjs), std::end(MMemObjs), [=](ReqToMem Elem) { return (Elem.first == Req); From d7fe4366f6f554e012824026d9fb5947bdf5e4f9 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 7 Apr 2020 11:06:29 +0300 Subject: [PATCH 039/188] [SYCL] Fix runtime issue Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/cg.hpp | 7 ++++--- sycl/source/detail/scheduler/commands.cpp | 21 ++++++++++----------- sycl/source/handler.cpp | 8 +++++--- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index 200900cc07d88..a995f0bd77b5c 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -617,11 +617,12 @@ class CGInteropTask : public CG { class CGHostTask : public CG { public: std::unique_ptr MHostTask; - shared_ptr_class MQueue; + // context to create self event with + shared_ptr_class MContext; vector_class MArgs; CGHostTask(std::unique_ptr HostTask, - std::shared_ptr Queue, + std::shared_ptr Context, vector_class Args, std::vector> ArgsStorage, std::vector AccStorage, @@ -632,7 +633,7 @@ class CGHostTask : public CG { : CG(Type, std::move(ArgsStorage), std::move(AccStorage), std::move(SharedPtrStorage), std::move(Requirements), std::move(Events), std::move(loc)), - MHostTask(std::move(HostTask)), MQueue(std::move(Queue)), + MHostTask(std::move(HostTask)), MContext(Context), MArgs(std::move(Args)) {} }; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index fe088f059c4d1..5a29ee477969c 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -413,11 +413,10 @@ void Command::addDepSub(EventImplPtr DepEvent, ContextImplPtr Context) { std::unique_ptr HT(new detail::HostTask(std::move(Func))); std::unique_ptr GlueCG(new detail::CGHostTask( - std::move(HT), DepEvent->getQueueWPtr().lock(), - /* Args = */ {}, /* ArgsStorage = */ {}, /* AccStorage = */ {}, - /* SharedPtrStorage = */ {}, /* Requirements = */ {}, - /* DepEvents = */ {DepEvent}, CG::CODEPLAY_HOST_TASK, - /* Payload */ {})); + std::move(HT), DepEventContext, /* Args = */ {}, /* ArgsStorage = */ {}, + /* AccStorage = */ {}, /* SharedPtrStorage = */ {}, + /* Requirements = */ {}, /* DepEvents = */ {DepEvent}, + CG::CODEPLAY_HOST_TASK, /* Payload */ {})); Command *GlueCmd = Scheduler::getInstance().MGraphBuilder.addCG( std::move(GlueCG), Scheduler::getInstance().getDefaultHostQueue()); @@ -1529,6 +1528,7 @@ struct HostTaskContext { std::map> RequiredEventsPerPlugin; + // Context with which SelfEvent has to be created ContextImplPtr Context; EventImplPtr SelfEvent; @@ -1841,19 +1841,18 @@ cl_int ExecCGCommand::enqueueImp() { } case CG::CGTYPE::CODEPLAY_HOST_TASK: { CGHostTask *HostTask = static_cast(MCommandGroup.get()); - // MQueue is host queue here thus we'll employ the one host task is - // submitted to - const QueueImplPtr &Queue = HostTask->MQueue; std::shared_ptr Ctx{new HostTaskContext{HostTask}}; + Ctx->Context = HostTask->MContext; + // Init self-event Ctx->SelfEvent = MEvent; - RT::PiContext ContextRef = Queue->getContextImplPtr()->getHandleRef(); + RT::PiContext ContextRef = Ctx->Context->getHandleRef(); - const detail::plugin &Plugin = Queue->getPlugin(); + const detail::plugin &Plugin = Ctx->Context->getPlugin(); Plugin.call(ContextRef, &Event); - Ctx->SelfEvent->setContextImpl(Queue->getContextImplPtr()); + Ctx->SelfEvent->setContextImpl(Ctx->Context); // init dependency events in Ctx for (EventImplPtr &Event : EventImpls) { diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index b2247f4de013e..e640180bd218c 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -14,6 +14,7 @@ #include #include #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -78,9 +79,10 @@ event handler::finalize(const cl::sycl::detail::code_location &Payload) { break; case detail::CG::CODEPLAY_HOST_TASK: CommandGroup.reset(new detail::CGHostTask( - std::move(MHostTask), MQueue, std::move(MArgs), std::move(MArgsStorage), - std::move(MAccStorage), std::move(MSharedPtrStorage), - std::move(MRequirements), std::move(MEvents), MCGType, Payload)); + std::move(MHostTask), MQueue->getContextImplPtr(), + std::move(MArgs), std::move(MArgsStorage), std::move(MAccStorage), + std::move(MSharedPtrStorage), std::move(MRequirements), + std::move(MEvents), MCGType, Payload)); break; case detail::CG::NONE: throw runtime_error("Command group submitted without a kernel or a " From fa6b054a451ae65795acf10ef33b123d41eef8b6 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 7 Apr 2020 11:26:18 +0300 Subject: [PATCH 040/188] [SYCL] Fix build issue after merge Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/cg.hpp | 5 ++++- sycl/source/detail/scheduler/commands.cpp | 9 +++++---- sycl/source/handler.cpp | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index 7cc144cdf55eb..9ef0903c594e4 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -623,11 +623,14 @@ class CGInteropTask : public CG { class CGHostTask : public CG { public: std::unique_ptr MHostTask; + // queue for host-interop task + shared_ptr_class MQueue; // context to create self event with shared_ptr_class MContext; vector_class MArgs; CGHostTask(std::unique_ptr HostTask, + std::shared_ptr Queue, std::shared_ptr Context, vector_class Args, std::vector> ArgsStorage, @@ -639,7 +642,7 @@ class CGHostTask : public CG { : CG(Type, std::move(ArgsStorage), std::move(AccStorage), std::move(SharedPtrStorage), std::move(Requirements), std::move(Events), std::move(loc)), - MHostTask(std::move(HostTask)), MContext(Context), + MHostTask(std::move(HostTask)), MQueue(Queue), MContext(Context), MArgs(std::move(Args)) {} }; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index c48a7af3e071c..3d1d037ddc149 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -413,10 +413,11 @@ void Command::addDepSub(EventImplPtr DepEvent, ContextImplPtr Context) { std::unique_ptr HT(new detail::HostTask(std::move(Func))); std::unique_ptr GlueCG(new detail::CGHostTask( - std::move(HT), DepEventContext, /* Args = */ {}, /* ArgsStorage = */ {}, - /* AccStorage = */ {}, /* SharedPtrStorage = */ {}, - /* Requirements = */ {}, /* DepEvents = */ {DepEvent}, - CG::CODEPLAY_HOST_TASK, /* Payload */ {})); + std::move(HT), DepEvent->getQueueWPtr().lock(), DepEventContext, + /* Args = */ {}, /* ArgsStorage = */ {}, /* AccStorage = */ {}, + /* SharedPtrStorage = */ {}, /* Requirements = */ {}, + /* DepEvents = */ {DepEvent}, CG::CODEPLAY_HOST_TASK, + /* Payload */ {})); Command *GlueCmd = Scheduler::getInstance().MGraphBuilder.addCG( std::move(GlueCG), Scheduler::getInstance().getDefaultHostQueue()); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index e640180bd218c..754cd703ed73b 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -79,7 +79,7 @@ event handler::finalize(const cl::sycl::detail::code_location &Payload) { break; case detail::CG::CODEPLAY_HOST_TASK: CommandGroup.reset(new detail::CGHostTask( - std::move(MHostTask), MQueue->getContextImplPtr(), + std::move(MHostTask), MQueue, MQueue->getContextImplPtr(), std::move(MArgs), std::move(MArgsStorage), std::move(MAccStorage), std::move(MSharedPtrStorage), std::move(MRequirements), std::move(MEvents), MCGType, Payload)); From 89d6b44b2ff765b63afa5d3c983fa400d692cd76 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 7 Apr 2020 11:49:31 +0300 Subject: [PATCH 041/188] [SYCL] Fix codestyle issue Signed-off-by: Sergey Kanaev --- sycl/source/handler.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index e640180bd218c..f8fb31c36fb27 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -13,8 +13,8 @@ #include #include #include -#include #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -79,8 +79,8 @@ event handler::finalize(const cl::sycl::detail::code_location &Payload) { break; case detail::CG::CODEPLAY_HOST_TASK: CommandGroup.reset(new detail::CGHostTask( - std::move(MHostTask), MQueue->getContextImplPtr(), - std::move(MArgs), std::move(MArgsStorage), std::move(MAccStorage), + std::move(MHostTask), MQueue->getContextImplPtr(), std::move(MArgs), + std::move(MArgsStorage), std::move(MAccStorage), std::move(MSharedPtrStorage), std::move(MRequirements), std::move(MEvents), MCGType, Payload)); break; From 726827280c6c2315638d04c7b2e9c5a8166081b5 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 7 Apr 2020 20:29:47 +0300 Subject: [PATCH 042/188] [SYCL] Fix compilation issue Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/interop_handle.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sycl/include/CL/sycl/interop_handle.hpp b/sycl/include/CL/sycl/interop_handle.hpp index a587ffe5d0f71..5d9e48d0d8535 100644 --- a/sycl/include/CL/sycl/interop_handle.hpp +++ b/sycl/include/CL/sycl/interop_handle.hpp @@ -43,8 +43,13 @@ class interop_handle { cl_mem>::type get_native_mem(const accessor &Acc) const { +#ifndef __SYCL_DEVICE_ONLY__ auto *AccBase = static_cast(&Acc); return getMemImpl(detail::getSyclObjImpl(*AccBase).get()); +#else + // we believe this won't be ever called on device side + return static_cast(0x0); +#endif } template Date: Tue, 7 Apr 2020 20:30:14 +0300 Subject: [PATCH 043/188] [SYCL] Add tests stub Signed-off-by: Sergey Kanaev --- .../interop-task-dependency.cpp | 184 ++++++++++++++++++ sycl/test/host-interop-task/interop-task.cpp | 129 ++++++++++++ 2 files changed, 313 insertions(+) create mode 100644 sycl/test/host-interop-task/interop-task-dependency.cpp create mode 100644 sycl/test/host-interop-task/interop-task.cpp diff --git a/sycl/test/host-interop-task/interop-task-dependency.cpp b/sycl/test/host-interop-task/interop-task-dependency.cpp new file mode 100644 index 0000000000000..72331dc08e36b --- /dev/null +++ b/sycl/test/host-interop-task/interop-task-dependency.cpp @@ -0,0 +1,184 @@ +// RUN: %clangxx -fsycl %s -o %t.out %threads_lib +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: env SYCL_PI_TRACE=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1 %CPU_CHECK_PLACEHOLDER + +#include +#include +#include +#include + +#include + +namespace S = cl::sycl; + +struct Context { + std::atomic_bool Flag; + S::queue &Queue; + std::string Message; + S::buffer Buf1; + S::buffer Buf2; + S::buffer Buf3; + std::mutex Mutex; + std::condition_variable CV; +}; + +void Thread1Fn(Context &Ctx) { + // 0. initialize resulting buffer with apriori wrong result + { + S::accessor + Acc(Ctx.Buf2); + + for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) + Acc[Idx] = -1; + } + + // 1. submit task writing to buffer 1 + Ctx.Queue.submit([&](S::handler &CGH) { + S::accessor + GeneratorAcc(Ctx.Buf1, CGH); + + auto GeneratorKernel = [GeneratorAcc]() { + for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) + GeneratorAcc[Idx] = Idx; + }; + + CGH.single_task(GeneratorKernel); + }); + + // 2. submit host task writing from buf 1 to buf 2 + auto HostTaskEvent = Ctx.Queue.submit([&](S::handler &CGH) { + S::accessor + CopierSrcAcc(Ctx.Buf1, CGH); + S::accessor + CopierDstAcc(Ctx.Buf2, CGH); + + auto CopierHostTask = [CopierSrcAcc, CopierDstAcc, &Ctx](S::interop_handle IH) { + // TODO write through interop handle objects + //(void)IH.get_native_mem(CopierSrcAcc); + //(void)IH.get_native_mem(CopierDstAcc); + (void)IH.get_native_queue(); + (void)IH.get_native_device(); + (void)IH.get_native_context(); + for (size_t Idx = 0; Idx < CopierDstAcc.get_count(); ++Idx) + CopierDstAcc[Idx] = CopierSrcAcc[Idx]; + + bool Expected = false; + bool Desired = true; + assert(Ctx.Flag.compare_exchange_strong(Expected, Desired)); + + // let's employ some locking here + { + std::lock_guard Lock(Ctx.Mutex); + Ctx.CV.notify_all(); + } + }; + + CGH.codeplay_host_task(CopierHostTask); + }); + + // 3. submit simple task to move data between two buffers + Ctx.Queue.submit([&](S::handler &CGH) { + S::accessor + SrcAcc(Ctx.Buf2, CGH); + S::accessor + DstAcc(Ctx.Buf3, CGH); + + CGH.depends_on(HostTaskEvent); + + auto CopierKernel = [SrcAcc, DstAcc]() { + for (size_t Idx = 0; Idx < DstAcc.get_count(); ++Idx) + DstAcc[Idx] = SrcAcc[Idx]; + }; + + CGH.single_task(CopierKernel); + }); + + // 4. check data in buffer #3 + { + S::accessor + Acc(Ctx.Buf3); + + for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) + assert(Acc[Idx] == Idx && "Invalid data in third buffer"); + } +} + +void Thread2Fn(Context &Ctx) { + std::unique_lock Lock(Ctx.Mutex); + + // T2.1. Wait until flag F is set eq true. + Ctx.CV.wait(Lock, [&Ctx] { return Ctx.Flag.load(); }); + + assert(Ctx.Flag.load()); + + // T2.2. print some "hello, world" message + Ctx.Message = "Hello, world"; +} + +void test() { + auto EH = [](S::exception_list EL) { + for (const std::exception_ptr &E : EL) { + throw E; + } + }; + + S::queue Queue(EH); + + Context Ctx{{false}, Queue, "", {10}, {10}, {10}, {}, {}}; + + // 0. setup: thread 1 T1: exec smth; thread 2 T2: waits; init flag F = false + std::thread Thread1(Thread1Fn, std::reference_wrapper(Ctx)); + std::thread Thread2(Thread2Fn, std::reference_wrapper(Ctx)); + + Thread1.join(); + Thread2.join(); + + assert(Ctx.Flag.load()); + assert(Ctx.Message == "Hello, world"); + + // 3. check via host accessor that buf 2 contains valid data + { + S::accessor + ResultAcc(Ctx.Buf2); + + bool failure = false; + for (size_t Idx = 0; Idx < ResultAcc.get_count(); ++Idx) { + fprintf(stderr, "Third buffer [%3zu] = %i\n", Idx, ResultAcc[Idx]); + + failure |= (ResultAcc[Idx] != Idx); + } + + assert(!failure && "Invalid data in result buffer"); + } +} + +int main() { + test(); + + return 0; +} + +// launch of GeneratorTask kernel +// CHECK:---> piKernelCreate( +// CHECK: GeneratorTask +// CHECK:---> piEnqueueKernelLaunch( +// prepare for host task +// CHECK:---> piEnqueueMemBufferMap( +// creation of host task self-event +// CHECK:---> piEventCreate( +// wait on dependencies of host task +// CHECK:---> piEventsWait( +// host task is done, set status of self-event +// CHECK:---> piEventSetStatus( +// launch of CopierTask kernel +// CHECK:---> piKernelCreate( +// CHECK: CopierTask +// CHECK:---> piEnqueueKernelLaunch( diff --git a/sycl/test/host-interop-task/interop-task.cpp b/sycl/test/host-interop-task/interop-task.cpp new file mode 100644 index 0000000000000..96528438acda2 --- /dev/null +++ b/sycl/test/host-interop-task/interop-task.cpp @@ -0,0 +1,129 @@ +// RUN: %clangxx -fsycl %s -o %t.out %threads_lib +// RUN: %CPU_RUN_PLACEHOLDER %t.out + +#include +#include +#include +#include + +#include + +namespace S = cl::sycl; + +struct Context { + std::atomic_bool Flag; + S::queue &Queue; + std::string Message; + S::buffer Buf1; + S::buffer Buf2; + std::mutex Mutex; + std::condition_variable CV; +}; + +void Thread1Fn(Context &Ctx) { + // 0. initialize resulting buffer with apriori wrong result + { + S::accessor + Acc(Ctx.Buf2); + + for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) + Acc[Idx] = -1; + } + + // 1. submit task writing to buffer 1 + Ctx.Queue.submit([&](S::handler &CGH) { + S::accessor + GeneratorAcc(Ctx.Buf1, CGH); + + auto GeneratorKernel = [GeneratorAcc]() { + for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) + GeneratorAcc[Idx] = Idx; + }; + + CGH.single_task(GeneratorKernel); + }); + + // 2. submit host task writing from buf 1 to buf 2 + Ctx.Queue.submit([&](S::handler &CGH) { + S::accessor + CopierSrcAcc(Ctx.Buf1, CGH); + S::accessor CopierDstAcc(Ctx.Buf2, CGH); + + auto CopierHostTask = [CopierSrcAcc, CopierDstAcc, &Ctx](S::interop_handle IH) { + // TODO write through interop handle objects + //(void)IH.get_native_mem(CopierSrcAcc); + (void)IH.get_native_mem(CopierDstAcc); + (void)IH.get_native_queue(); + (void)IH.get_native_device(); + (void)IH.get_native_context(); +// for (size_t Idx = 0; Idx < CopierDstAcc.get_count(); ++Idx) +// CopierDstAcc[Idx] = CopierSrcAcc[Idx]; + + bool Expected = false; + bool Desired = true; + assert(Ctx.Flag.compare_exchange_strong(Expected, Desired)); + + // let's employ some locking here + { + std::lock_guard Lock(Ctx.Mutex); + Ctx.CV.notify_all(); + } + }; + + CGH.codeplay_host_task(CopierHostTask); + }); +} + +void Thread2Fn(Context &Ctx) { + std::unique_lock Lock(Ctx.Mutex); + + // T2.1. Wait until flag F is set eq true. + Ctx.CV.wait(Lock, [&Ctx] { return Ctx.Flag.load(); }); + + assert(Ctx.Flag.load()); + + // T2.2. print some "hello, world" message + Ctx.Message = "Hello, world"; +} + +void test() { + auto EH = [](S::exception_list EL) { + for (const std::exception_ptr &E : EL) { + throw E; + } + }; + + S::queue Queue(EH); + + Context Ctx{{false}, Queue, "", {10}, {10}}; + + // 0. setup: thread 1 T1: exec smth; thread 2 T2: waits; init flag F = false + std::thread Thread1(Thread1Fn, std::reference_wrapper(Ctx)); + std::thread Thread2(Thread2Fn, std::reference_wrapper(Ctx)); + + Thread1.join(); + Thread2.join(); + + assert(Ctx.Flag.load()); + assert(Ctx.Message == "Hello, world"); + + // 3. check via host accessor that buf 2 contains valid data + { + S::accessor + ResultAcc(Ctx.Buf2); + + for (size_t Idx = 0; Idx < ResultAcc.get_count(); ++Idx) { + // assert(ResultAcc[Idx] == Idx && "Invalid data in result buffer"); + } + } +} + +int main() { + test(); + + return 0; +} From fcd3d5894be01a67d7f2ad2407d14b00c2e72d82 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 8 Apr 2020 14:00:42 +0300 Subject: [PATCH 044/188] [SYCL] Fix build issue Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/interop_handle.hpp | 7 ++++++- sycl/source/CMakeLists.txt | 1 + sycl/source/interop_handle.cpp | 3 +++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/sycl/include/CL/sycl/interop_handle.hpp b/sycl/include/CL/sycl/interop_handle.hpp index 5d9e48d0d8535..239bf6b121e12 100644 --- a/sycl/include/CL/sycl/interop_handle.hpp +++ b/sycl/include/CL/sycl/interop_handle.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -19,6 +20,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { + class AccessorBaseHost; class ExecCGCommand; struct HostTaskContext; @@ -44,7 +46,10 @@ class interop_handle { get_native_mem(const accessor &Acc) const { #ifndef __SYCL_DEVICE_ONLY__ - auto *AccBase = static_cast(&Acc); + // employ reinterpret_cast instead of static_cast due to cycle in includes + // involving CL/sycl/accessor.hpp + auto *AccBase = const_cast( + reinterpret_cast(&Acc)); return getMemImpl(detail::getSyclObjImpl(*AccBase).get()); #else // we believe this won't be ever called on device side diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 0143240b1ad74..4acfd045d21f3 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -141,6 +141,7 @@ set(SYCL_SOURCES "sampler.cpp" "stream.cpp" "spirv_ops.cpp" + "interop_handle.cpp" "$<$:detail/windows_pi.cpp>" "$<$,$>:detail/posix_pi.cpp>" ) diff --git a/sycl/source/interop_handle.cpp b/sycl/source/interop_handle.cpp index 759b1cfc15d61..5e6b68fe29634 100644 --- a/sycl/source/interop_handle.cpp +++ b/sycl/source/interop_handle.cpp @@ -7,6 +7,9 @@ //===----------------------------------------------------------------------===// #include +#include + +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { From 6537bb4d695113fde062188c5e472166c5b2b126 Mon Sep 17 00:00:00 2001 From: s-kanaev <57672082+s-kanaev@users.noreply.github.com> Date: Thu, 9 Apr 2020 10:17:11 +0300 Subject: [PATCH 045/188] [SYCL] Fix typo in sycl/source/detail/queue_impl.hpp Signed-off-by: Sergey Kanaev sergey.kanaev@intel.com Co-Authored-By: Ruyman --- sycl/source/detail/queue_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index bc38141250426..e6f0886104b2c 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -410,7 +410,7 @@ class queue_impl { bool MSupportOOO = true; // Thread pool for host task and event callbacks execution. - // The thread pool is instntiated upon the very first call to + // The thread pool is instantiated upon the very first call to // getHostTaskAndEventCallbackThreadPool std::unique_ptr MHostTaskThreadPool; }; From 7ce22fe68df3013e4ba62bd0cf8bdcc97f3e1ee2 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 13 Apr 2020 14:31:45 +0300 Subject: [PATCH 046/188] [SYCL] Fix some review comments Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/event.hpp | 1 - sycl/include/CL/sycl/handler.hpp | 4 +- sycl/source/detail/queue_impl.cpp | 13 ++- sycl/source/detail/queue_impl.hpp | 2 +- sycl/source/detail/scheduler/commands.cpp | 105 ++++++++++------------ sycl/source/detail/scheduler/commands.hpp | 9 +- 6 files changed, 71 insertions(+), 63 deletions(-) diff --git a/sycl/include/CL/sycl/event.hpp b/sycl/include/CL/sycl/event.hpp index 463f226364d18..001f5ee209c15 100644 --- a/sycl/include/CL/sycl/event.hpp +++ b/sycl/include/CL/sycl/event.hpp @@ -13,7 +13,6 @@ #include #include -#include #include __SYCL_INLINE_NAMESPACE(cl) { diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 58e9192609ed7..f75a722711c04 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -121,7 +121,7 @@ struct check_fn_signature { template static constexpr std::false_type check(...); - typedef decltype(check(0)) type; + using type = decltype(check(0)); public: static constexpr bool value = type::value; @@ -595,7 +595,7 @@ class __SYCL_EXPORT handler { template typename std::enable_if::type, void()>::value>::type - codeplay_host_task(FuncT &&Func) { + codeplay_host_task(FuncT Func) { throwIfActionIsCreated(); MNDRDesc.set(range<1>(1)); diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 20668c662f40a..1f1ed26b86600 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -192,7 +192,18 @@ void queue_impl::initHostTaskAndEventCallbackThreadPool() { int Size = 1; if (const char *val = std::getenv("SYCL_QUEUE_THREAD_POOL_SIZE")) - Size = std::stoi(val); + try { + Size = std::stoi(val); + } catch (const std::exception &e) { + throw invalid_parameter_error( + "Invalid value for SYCL_QUEUE_THREAD_POOL_SIZE environment variable", + PI_INVALID_VALUE); + } + + if (Size < 1) + throw invalid_parameter_error( + "Invalid value for SYCL_QUEUE_THREAD_POOL_SIZE environment variable", + PI_INVALID_VALUE); MHostTaskThreadPool.reset(new ThreadPool(Size)); MHostTaskThreadPool->start(); diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index e6f0886104b2c..08f1e932f0e12 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -347,7 +347,7 @@ class queue_impl { MExceptions.PushBack(ExceptionPtr); } - ThreadPool &getHostTaskAndEventCallbackThreadPool() { + ThreadPool &getThreadPool() { if (!MHostTaskThreadPool) initHostTaskAndEventCallbackThreadPool(); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index e90a801b08854..3e5db4ae9e675 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -173,7 +173,7 @@ std::vector Command::prepareEvents(ContextImplPtr Context) { ContextImplPtr DepEventContext = DepEvent->getContextImpl(); - // If contexts don't match the events are already connected in addDep + // If contexts don't match the events are already connected if (DepEventContext != Context && !Context->is_host()) { continue; } @@ -377,7 +377,9 @@ void Command::makeTraceEventEpilog() { #endif } -void Command::addDepSub(EventImplPtr DepEvent, ContextImplPtr Context) { +void Command::glueEvents(EventImplPtr DepEvent) { + const ContextImplPtr &Context = getContext(); + // Async work is not supported for host device. if (DepEvent->is_host()) { // call to waitInternal() is in prepareEvents() as it's called from @@ -424,7 +426,7 @@ void Command::addDepSub(EventImplPtr DepEvent, ContextImplPtr Context) { EnqueueResultT Res; bool Enqueued = Scheduler::GraphProcessor::enqueueCommand(GlueCmd, Res); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error("Enqueue process failed for glue command.", + throw runtime_error("Failed to enqueue a sync event between two contexts", PI_INVALID_OPERATION); MDepsEvents.push_back(std::move(GlueEvent)); @@ -438,7 +440,7 @@ ContextImplPtr Command::getContext() const { void Command::addDep(DepDesc NewDep) { if (NewDep.MDepCommand) { MDepsEvents.push_back(NewDep.MDepCommand->getEvent()); - addDepSub(NewDep.MDepCommand->getEvent(), getContext()); + glueEvents(NewDep.MDepCommand->getEvent()); } MDeps.push_back(NewDep); #ifdef XPTI_ENABLE_INSTRUMENTATION @@ -459,7 +461,7 @@ void Command::addDep(EventImplPtr Event) { #endif MDepsEvents.push_back(Event); - addDepSub(std::move(Event), getContext()); + glueEvents(std::move(Event)); } void Command::emitEnqueuedEventSignal(RT::PiEvent &PiEventAddr) { @@ -1533,38 +1535,6 @@ void DispatchNativeKernel(void *Blob) { HostTask->MHostKernel->call(HostTask->MNDRDesc, nullptr); } -struct HostTaskContext { - CGHostTask *HostTask; - - // events dependencies - std::map> - RequiredEventsPerPlugin; - - // Context with which SelfEvent has to be created - ContextImplPtr Context; - - EventImplPtr SelfEvent; -}; - -void DispatchHostTask(const std::shared_ptr &Ctx) { - // wait for dependency events - // FIXME introduce a more sophisticated wait mechanism - for (auto &PluginWithEvents : Ctx->RequiredEventsPerPlugin) { - auto RawEvents = getPiEvents(PluginWithEvents.second); - PluginWithEvents.first->call(RawEvents.size(), - RawEvents.data()); - } - - // we're ready to call the user-defined lambda now - Ctx->HostTask->MHostTask->call(); - - const detail::plugin &Plugin = Ctx->SelfEvent->getPlugin(); - Plugin.call(Ctx->SelfEvent->getHandleRef(), - CL_COMPLETE); - - // Ctx will be deleted automatically by shared_ptr -} - cl_int ExecCGCommand::enqueueImp() { std::vector EventImpls = Command::prepareEvents(getContext()); @@ -1853,24 +1823,6 @@ cl_int ExecCGCommand::enqueueImp() { } case CG::CGTYPE::CODEPLAY_HOST_TASK: { CGHostTask *HostTask = static_cast(MCommandGroup.get()); - std::shared_ptr Ctx{new HostTaskContext{HostTask}}; - - Ctx->Context = HostTask->MContext; - - // Init self-event - Ctx->SelfEvent = MEvent; - RT::PiContext ContextRef = Ctx->Context->getHandleRef(); - - const detail::plugin &Plugin = Ctx->Context->getPlugin(); - Plugin.call(ContextRef, &Event); - - Ctx->SelfEvent->setContextImpl(Ctx->Context); - - // init dependency events in Ctx - for (EventImplPtr &Event : EventImpls) { - const detail::plugin &Plugin = Event->getPlugin(); - Ctx->RequiredEventsPerPlugin[&Plugin].push_back(Event); - } size_t ArgIdx = 0, ReqIdx = 0; while (ArgIdx < HostTask->MArgs.size()) { @@ -1893,8 +1845,47 @@ cl_int ExecCGCommand::enqueueImp() { ++ArgIdx; } - MQueue->getHostTaskAndEventCallbackThreadPool().submit( - [Ctx]() { DispatchHostTask(Ctx); }); + { + ContextImplPtr HTContext = HostTask->MContext; + + // Init self-event + EventImplPtr SelfEvent = MEvent; + RT::PiContext ContextRef = HTContext->getHandleRef(); + + const detail::plugin &Plugin = HTContext->getPlugin(); + Plugin.call(ContextRef, &Event); + + SelfEvent->setContextImpl(HTContext); + + // init dependency events in Ctx + auto DispatchHostTask = [EventImpls, HostTask, SelfEvent] () { + std::map> + RequiredEventsPerPlugin; + + for (const EventImplPtr &Event : EventImpls) { + const detail::plugin &Plugin = Event->getPlugin(); + RequiredEventsPerPlugin[&Plugin].push_back(Event); + } + + // wait for dependency events + // FIXME introduce a more sophisticated wait mechanism + for (auto &PluginWithEvents : RequiredEventsPerPlugin) { + std::vector RawEvents = getPiEvents( + PluginWithEvents.second); + PluginWithEvents.first->call(RawEvents.size(), + RawEvents.data()); + } + + // we're ready to call the user-defined lambda now + HostTask->MHostTask->call(); + + const detail::plugin &Plugin = SelfEvent->getPlugin(); + Plugin.call(SelfEvent->getHandleRef(), + PI_EVENT_COMPLETE); + }; + + MQueue->getThreadPool().submit(DispatchHostTask); + } return CL_SUCCESS; } diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index d4f9ce66ab627..70480d0b493c3 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -172,7 +172,14 @@ class Command { void waitForEvents(QueueImplPtr Queue, std::vector &RawEvents, RT::PiEvent &Event); std::vector prepareEvents(ContextImplPtr Context); - void addDepSub(EventImplPtr DepEvent, ContextImplPtr Context); + + /// Perform glueing of events from different contexts + /// \param DepEvent event this commands should depend on + /// + /// Glueing (i.e. connecting) will be performed if and only if DepEvent is + /// not from host context and its context doesn't match to context of this + /// command. Context of this command is fetched via getContext(). + void glueEvents(EventImplPtr DepEvent); virtual ContextImplPtr getContext() const; From a23c16756de1a527c7c97c3bb08fa59f9b3e1764 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 13 Apr 2020 16:13:00 +0300 Subject: [PATCH 047/188] [SYCL] Fix indentation Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 60 +++++++++++------------ 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 3e5db4ae9e675..ecf010131603a 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1846,45 +1846,45 @@ cl_int ExecCGCommand::enqueueImp() { } { - ContextImplPtr HTContext = HostTask->MContext; + ContextImplPtr HTContext = HostTask->MContext; - // Init self-event - EventImplPtr SelfEvent = MEvent; - RT::PiContext ContextRef = HTContext->getHandleRef(); + // Init self-event + EventImplPtr SelfEvent = MEvent; + RT::PiContext ContextRef = HTContext->getHandleRef(); - const detail::plugin &Plugin = HTContext->getPlugin(); - Plugin.call(ContextRef, &Event); + const detail::plugin &Plugin = HTContext->getPlugin(); + Plugin.call(ContextRef, &Event); - SelfEvent->setContextImpl(HTContext); + SelfEvent->setContextImpl(HTContext); - // init dependency events in Ctx - auto DispatchHostTask = [EventImpls, HostTask, SelfEvent] () { - std::map> - RequiredEventsPerPlugin; + // init dependency events in Ctx + auto DispatchHostTask = [EventImpls, HostTask, SelfEvent] () { + std::map> + RequiredEventsPerPlugin; - for (const EventImplPtr &Event : EventImpls) { - const detail::plugin &Plugin = Event->getPlugin(); - RequiredEventsPerPlugin[&Plugin].push_back(Event); - } + for (const EventImplPtr &Event : EventImpls) { + const detail::plugin &Plugin = Event->getPlugin(); + RequiredEventsPerPlugin[&Plugin].push_back(Event); + } - // wait for dependency events - // FIXME introduce a more sophisticated wait mechanism - for (auto &PluginWithEvents : RequiredEventsPerPlugin) { - std::vector RawEvents = getPiEvents( - PluginWithEvents.second); - PluginWithEvents.first->call(RawEvents.size(), - RawEvents.data()); - } + // wait for dependency events + // FIXME introduce a more sophisticated wait mechanism + for (auto &PluginWithEvents : RequiredEventsPerPlugin) { + std::vector RawEvents = getPiEvents( + PluginWithEvents.second); + PluginWithEvents.first->call( + RawEvents.size(), RawEvents.data()); + } - // we're ready to call the user-defined lambda now - HostTask->MHostTask->call(); + // we're ready to call the user-defined lambda now + HostTask->MHostTask->call(); - const detail::plugin &Plugin = SelfEvent->getPlugin(); - Plugin.call(SelfEvent->getHandleRef(), - PI_EVENT_COMPLETE); - }; + const detail::plugin &Plugin = SelfEvent->getPlugin(); + Plugin.call(SelfEvent->getHandleRef(), + PI_EVENT_COMPLETE); + }; - MQueue->getThreadPool().submit(DispatchHostTask); + MQueue->getThreadPool().submit(DispatchHostTask); } return CL_SUCCESS; From fc10c6a7799302800bace1076831b88b24acb3b6 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 14 Apr 2020 17:06:51 +0300 Subject: [PATCH 048/188] [SYCL] Remove unrelated change Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index aaab5cfacabb6..1c5bead00a9f9 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -619,15 +619,6 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( } else { LinkedAllocaCmd->MIsActive = false; Record->MCurContext = Queue->getContextImplPtr(); - - std::set Deps = - findDepsForReq(Record, Req, Queue->getContextImplPtr()); - for (Command *Dep : Deps) { - AllocaCmd->addDep(DepDesc{Dep, Req, AllocaCmd}); - Dep->addUser(AllocaCmd); - } - updateLeaves(Deps, Record, Req->MAccessMode); - addNodeToLeaves(Record, AllocaCmd, Req->MAccessMode); } } } From e326ed0212afbfd7fb674eed6ee9a398e1588690 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 14 Apr 2020 17:49:41 +0300 Subject: [PATCH 049/188] [SYCL] Eliminate Command::prepareEvents() Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 158 +++++++++++----------- sycl/source/detail/scheduler/commands.hpp | 15 +- 2 files changed, 88 insertions(+), 85 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index ecf010131603a..fd480a8d7a86b 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -156,32 +156,9 @@ getPiEvents(const std::vector &EventImpls) { return RetPiEvents; } -// Method prepares PI event's from list sycl::event's -std::vector Command::prepareEvents(ContextImplPtr Context) { - std::vector Result; - for (EventImplPtr &DepEvent : MDepsEvents) { - // Async work is not supported for host device. - if (DepEvent->is_host()) { - DepEvent->waitInternal(); - continue; - } - // The event handle can be null in case of, for example, alloca command, - // which is currently synchrounious, so don't generate OpenCL event. - if (DepEvent->getHandleRef() == nullptr) { - continue; - } - - ContextImplPtr DepEventContext = DepEvent->getContextImpl(); - - // If contexts don't match the events are already connected - if (DepEventContext != Context && !Context->is_host()) { - continue; - } - - Result.push_back(DepEvent); - } - - return Result; +void Command::waitForPreparedHostEvents() const { + for (const EventImplPtr &HostEvent : MPreparedHostDepsEvents) + HostEvent->waitInternal(); } void Command::waitForEvents(QueueImplPtr Queue, @@ -377,13 +354,56 @@ void Command::makeTraceEventEpilog() { #endif } -void Command::glueEvents(EventImplPtr DepEvent) { +// static +EventImplPtr Command::connectDepEvent(EventImplPtr DepEvent, + const ContextImplPtr &DepEventContext, + const ContextImplPtr &Context) { + EventImplPtr GlueEvent(new detail::event_impl()); + GlueEvent->setContextImpl(Context); + + RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); + auto Plugin = Context->getPlugin(); + // Add an event on the current context that + // is triggered when the DepEvent is complete + // TODO eliminate creation of user-event + Plugin.call(Context->getHandleRef(), + &GlueEventHandle); + + // enqueue GlueCmd + std::function Func = [GlueEvent]() { + RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); + const detail::plugin &Plugin = GlueEvent->getPlugin(); + Plugin.call(GlueEventHandle, CL_COMPLETE); + }; + + std::unique_ptr HT(new detail::HostTask(std::move(Func))); + + std::unique_ptr GlueCG(new detail::CGHostTask( + std::move(HT), DepEventContext, /* Args = */ {}, /* ArgsStorage = */ {}, + /* AccStorage = */ {}, /* SharedPtrStorage = */ {}, + /* Requirements = */ {}, /* DepEvents = */ {DepEvent}, + CG::CODEPLAY_HOST_TASK, /* Payload */ {})); + + Command *GlueCmd = Scheduler::getInstance().MGraphBuilder.addCG( + std::move(GlueCG), Scheduler::getInstance().getDefaultHostQueue()); + + EnqueueResultT Res; + bool Enqueued = Scheduler::GraphProcessor::enqueueCommand(GlueCmd, Res); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Failed to enqueue a sync event between two contexts", + PI_INVALID_OPERATION); + + return GlueEvent; +} + +void Command::processDepEvent(EventImplPtr DepEvent) { const ContextImplPtr &Context = getContext(); // Async work is not supported for host device. if (DepEvent->is_host()) { - // call to waitInternal() is in prepareEvents() as it's called from - // enqueue process functions + // call to waitInternal() is in waitForPreparedHostEvents() as it's called + // from enqueue process functions + MPreparedHostDepsEvents.push_back(DepEvent); return; } @@ -394,43 +414,12 @@ void Command::glueEvents(EventImplPtr DepEvent) { ContextImplPtr DepEventContext = DepEvent->getContextImpl(); // If contexts don't match - connect them using user event if (DepEventContext != Context && !Context->is_host()) { - EventImplPtr GlueEvent(new detail::event_impl()); - GlueEvent->setContextImpl(Context); + EventImplPtr GlueEvent = connectDepEvent(DepEvent, DepEventContext, + Context); - RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); - auto Plugin = Context->getPlugin(); - // Add an event on the current context that - // is triggered when the DepEvent is complete - // TODO eliminate creation of user-event - Plugin.call(Context->getHandleRef(), - &GlueEventHandle); - - // enqueue GlueCmd - std::function Func = [GlueEvent]() { - RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); - const detail::plugin &Plugin = GlueEvent->getPlugin(); - Plugin.call(GlueEventHandle, CL_COMPLETE); - }; - - std::unique_ptr HT(new detail::HostTask(std::move(Func))); - - std::unique_ptr GlueCG(new detail::CGHostTask( - std::move(HT), DepEventContext, /* Args = */ {}, /* ArgsStorage = */ {}, - /* AccStorage = */ {}, /* SharedPtrStorage = */ {}, - /* Requirements = */ {}, /* DepEvents = */ {DepEvent}, - CG::CODEPLAY_HOST_TASK, /* Payload */ {})); - - Command *GlueCmd = Scheduler::getInstance().MGraphBuilder.addCG( - std::move(GlueCG), Scheduler::getInstance().getDefaultHostQueue()); - - EnqueueResultT Res; - bool Enqueued = Scheduler::GraphProcessor::enqueueCommand(GlueCmd, Res); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error("Failed to enqueue a sync event between two contexts", - PI_INVALID_OPERATION); - - MDepsEvents.push_back(std::move(GlueEvent)); - } + MPreparedDepsEvents.push_back(std::move(GlueEvent)); + } else + MPreparedDepsEvents.push_back(std::move(DepEvent)); } ContextImplPtr Command::getContext() const { @@ -439,8 +428,7 @@ ContextImplPtr Command::getContext() const { void Command::addDep(DepDesc NewDep) { if (NewDep.MDepCommand) { - MDepsEvents.push_back(NewDep.MDepCommand->getEvent()); - glueEvents(NewDep.MDepCommand->getEvent()); + processDepEvent(NewDep.MDepCommand->getEvent()); } MDeps.push_back(NewDep); #ifdef XPTI_ENABLE_INSTRUMENTATION @@ -460,8 +448,7 @@ void Command::addDep(EventImplPtr Event) { emitEdgeEventForEventDependence(Cmd, PiEventAddr); #endif - MDepsEvents.push_back(Event); - glueEvents(std::move(Event)); + processDepEvent(std::move(Event)); } void Command::emitEnqueuedEventSignal(RT::PiEvent &PiEventAddr) { @@ -654,7 +641,8 @@ void AllocaCommand::emitInstrumentationData() { } cl_int AllocaCommand::enqueueImp() { - std::vector EventImpls = Command::prepareEvents(getContext()); + std::vector EventImpls = MPreparedDepsEvents; + waitForPreparedHostEvents(); RT::PiEvent &Event = MEvent->getHandleRef(); @@ -743,7 +731,8 @@ void *AllocaSubBufCommand::getMemAllocation() const { } cl_int AllocaSubBufCommand::enqueueImp() { - std::vector EventImpls = Command::prepareEvents(getContext()); + std::vector EventImpls = MPreparedDepsEvents; + waitForPreparedHostEvents(); RT::PiEvent &Event = MEvent->getHandleRef(); MMemAllocation = MemoryManager::allocateMemSubBuffer( @@ -802,7 +791,8 @@ void ReleaseCommand::emitInstrumentationData() { } cl_int ReleaseCommand::enqueueImp() { - std::vector EventImpls = Command::prepareEvents(getContext()); + std::vector EventImpls = MPreparedDepsEvents; + waitForPreparedHostEvents(); std::vector RawEvents = getPiEvents(EventImpls); bool SkipRelease = false; @@ -912,7 +902,8 @@ void MapMemObject::emitInstrumentationData() { } cl_int MapMemObject::enqueueImp() { - std::vector EventImpls = Command::prepareEvents(getContext()); + std::vector EventImpls = MPreparedDepsEvents; + waitForPreparedHostEvents(); std::vector RawEvents = getPiEvents(EventImpls); RT::PiEvent &Event = MEvent->getHandleRef(); @@ -967,7 +958,8 @@ void UnMapMemObject::emitInstrumentationData() { } cl_int UnMapMemObject::enqueueImp() { - std::vector EventImpls = Command::prepareEvents(getContext()); + std::vector EventImpls = MPreparedDepsEvents; + waitForPreparedHostEvents(); std::vector RawEvents = getPiEvents(EventImpls); RT::PiEvent &Event = MEvent->getHandleRef(); @@ -1033,14 +1025,14 @@ void MemCpyCommand::emitInstrumentationData() { } ContextImplPtr MemCpyCommand::getContext() const { - QueueImplPtr Queue = MQueue->is_host() ? MSrcQueue : MQueue; + const QueueImplPtr &Queue = MQueue->is_host() ? MSrcQueue : MQueue; return detail::getSyclObjImpl(Queue->get_context()); } cl_int MemCpyCommand::enqueueImp() { - std::vector EventImpls; QueueImplPtr Queue = MQueue->is_host() ? MSrcQueue : MQueue; - EventImpls = Command::prepareEvents(getContext()); + std::vector EventImpls = MPreparedDepsEvents; + waitForPreparedHostEvents(); RT::PiEvent &Event = MEvent->getHandleRef(); @@ -1103,8 +1095,8 @@ void ExecCGCommand::flushStreams() { } cl_int UpdateHostRequirementCommand::enqueueImp() { - std::vector EventImpls; - EventImpls = Command::prepareEvents(getContext()); + std::vector EventImpls = MPreparedDepsEvents; + waitForPreparedHostEvents(); RT::PiEvent &Event = MEvent->getHandleRef(); Command::waitForEvents(MQueue, EventImpls, Event); @@ -1176,13 +1168,14 @@ void MemCpyCommandHost::emitInstrumentationData() { } ContextImplPtr MemCpyCommandHost::getContext() const { - QueueImplPtr Queue = MQueue->is_host() ? MSrcQueue : MQueue; + const QueueImplPtr &Queue = MQueue->is_host() ? MSrcQueue : MQueue; return detail::getSyclObjImpl(Queue->get_context()); } cl_int MemCpyCommandHost::enqueueImp() { QueueImplPtr Queue = MQueue->is_host() ? MSrcQueue : MQueue; - std::vector EventImpls = Command::prepareEvents(getContext()); + std::vector EventImpls = MPreparedDepsEvents; + waitForPreparedHostEvents(); std::vector RawEvents = getPiEvents(EventImpls); RT::PiEvent &Event = MEvent->getHandleRef(); @@ -1536,7 +1529,8 @@ void DispatchNativeKernel(void *Blob) { } cl_int ExecCGCommand::enqueueImp() { - std::vector EventImpls = Command::prepareEvents(getContext()); + std::vector EventImpls = MPreparedDepsEvents; + waitForPreparedHostEvents(); auto RawEvents = getPiEvents(EventImpls); diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 70480d0b493c3..02f68339b990b 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -167,11 +167,16 @@ class Command { protected: EventImplPtr MEvent; QueueImplPtr MQueue; - std::vector MDepsEvents; + + /// Dependency events prepared for waiting by backend. + /// See processDepEvent for details. + std::vector MPreparedDepsEvents; + std::vector MPreparedHostDepsEvents; void waitForEvents(QueueImplPtr Queue, std::vector &RawEvents, RT::PiEvent &Event); - std::vector prepareEvents(ContextImplPtr Context); + + void waitForPreparedHostEvents() const; /// Perform glueing of events from different contexts /// \param DepEvent event this commands should depend on @@ -179,7 +184,11 @@ class Command { /// Glueing (i.e. connecting) will be performed if and only if DepEvent is /// not from host context and its context doesn't match to context of this /// command. Context of this command is fetched via getContext(). - void glueEvents(EventImplPtr DepEvent); + void processDepEvent(EventImplPtr DepEvent); + + static EventImplPtr connectDepEvent(EventImplPtr DepEvent, + const ContextImplPtr &DepEventContext, + const ContextImplPtr &Context); virtual ContextImplPtr getContext() const; From d04677573f7972934fb3eb3d4c8f711af0b1bc52 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 15 Apr 2020 13:45:53 +0300 Subject: [PATCH 050/188] [SYCL] Add empty command/node right after host-task Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 28 ++++++++++++++++-- sycl/source/detail/scheduler/commands.hpp | 5 ++-- .../source/detail/scheduler/graph_builder.cpp | 29 ++++++++++++++++++- sycl/source/detail/scheduler/scheduler.cpp | 25 ++++++++++++++++ sycl/source/detail/scheduler/scheduler.hpp | 5 ++++ 5 files changed, 86 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index fd480a8d7a86b..59af6cb593450 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -369,6 +369,9 @@ EventImplPtr Command::connectDepEvent(EventImplPtr DepEvent, Plugin.call(Context->getHandleRef(), &GlueEventHandle); + // TODO Use internal API here + // TODO return event, createb by host-task enqueue process + // enqueue GlueCmd std::function Func = [GlueEvent]() { RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); @@ -1199,8 +1202,13 @@ cl_int MemCpyCommandHost::enqueueImp() { EmptyCommand::EmptyCommand(QueueImplPtr Queue, Requirement Req) : Command(CommandType::EMPTY_TASK, std::move(Queue)), - MRequirement(std::move(Req)) { + MRequirement(new Requirement(std::move(Req))) { + + emitInstrumentationDataProxy(); +} +EmptyCommand::EmptyCommand(QueueImplPtr Queue) + :Command(CommandType::EMPTY_TASK, std::move(Queue)) { emitInstrumentationDataProxy(); } @@ -1846,13 +1854,16 @@ cl_int ExecCGCommand::enqueueImp() { EventImplPtr SelfEvent = MEvent; RT::PiContext ContextRef = HTContext->getHandleRef(); + // You can't create event for host-queue/host-context const detail::plugin &Plugin = HTContext->getPlugin(); Plugin.call(ContextRef, &Event); SelfEvent->setContextImpl(HTContext); + std::vector Deps = MDeps; + // init dependency events in Ctx - auto DispatchHostTask = [EventImpls, HostTask, SelfEvent] () { + auto DispatchHostTask = [EventImpls, HostTask, Deps, SelfEvent] () mutable { std::map> RequiredEventsPerPlugin; @@ -1876,9 +1887,20 @@ cl_int ExecCGCommand::enqueueImp() { const detail::plugin &Plugin = SelfEvent->getPlugin(); Plugin.call(SelfEvent->getHandleRef(), PI_EVENT_COMPLETE); + + // perform release (unblock) of empty command + std::vector Reqs; + Reqs.resize(Deps.size()); + + std::transform(Deps.begin(), Deps.end(), Reqs.begin(), + [](const DepDesc &Dep) { + return const_cast(Dep.MDepRequirement); + }); + + Scheduler::getInstance().unblockRequirements(Reqs); }; - MQueue->getThreadPool().submit(DispatchHostTask); + MQueue->getThreadPool().submit(std::move(DispatchHostTask)); } return CL_SUCCESS; diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 02f68339b990b..356192cb0e52d 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -247,16 +247,17 @@ class Command { class EmptyCommand : public Command { public: EmptyCommand(QueueImplPtr Queue, Requirement Req); + EmptyCommand(QueueImplPtr Queue); void printDot(std::ostream &Stream) const final; - const Requirement *getRequirement() const final { return &MRequirement; } + const Requirement *getRequirement() const final { return MRequirement.get(); } void emitInstrumentationData(); private: cl_int enqueueImp() final { return CL_SUCCESS; } - Requirement MRequirement; + std::unique_ptr MRequirement; }; // The command enqueues release instance of memory allocated on Host or diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 1c5bead00a9f9..858f25d93fdbf 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -650,6 +650,7 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, QueueImplPtr Queue) { const std::vector &Reqs = CommandGroup->MRequirements; const std::vector &Events = CommandGroup->MEvents; + const CG::CGTYPE CGType = CommandGroup->getType(); if (CommandGroup->getType() == CG::CGTYPE::CODEPLAY_HOST_TASK) Queue = Scheduler::getInstance().getDefaultHostQueue(); @@ -662,6 +663,17 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, if (MPrintOptionsArray[BeforeAddCG]) printGraphAsDot("before_addCG"); + EmptyCommand *EmptyCmd = nullptr; + + if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { + EmptyCmd = new EmptyCommand( + Scheduler::getInstance().getDefaultHostQueue()); + + EmptyCmd->MIsBlockable = true; + EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; + EmptyCmd->MBlockReason = "Blocked by host task"; + } + for (Requirement *Req : Reqs) { MemObjRecord *Record = getOrInsertMemObjRecord(Queue, Req); markModifiedIfWrite(Record, Req); @@ -688,18 +700,32 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, for (Command *Dep : Deps) NewCmd->addDep(DepDesc{Dep, Req, AllocaCmd}); + + if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { + EmptyCmd->addDep(DepDesc{NewCmd.get(), Req, AllocaCmd}); + + Req->MBlockedCmd = EmptyCmd; + } + } + + if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { + NewCmd->addUser(EmptyCmd); } // Set new command as user for dependencies and update leaves. // Node dependencies can be modified further when adding the node to leaves, // iterate over their copy. + // FIXME employ a reference here to eliminate copying of a vector std::vector Deps = NewCmd->MDeps; for (DepDesc &Dep : Deps) { Dep.MDepCommand->addUser(NewCmd.get()); const Requirement *Req = Dep.MDepRequirement; MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); updateLeaves({Dep.MDepCommand}, Record, Req->MAccessMode); - addNodeToLeaves(Record, NewCmd.get(), Req->MAccessMode); + if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) + addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); + else + addNodeToLeaves(Record, NewCmd.get(), Req->MAccessMode); } // Register all the events as dependencies @@ -709,6 +735,7 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, if (MPrintOptionsArray[AfterAddCG]) printGraphAsDot("after_addCG"); + return NewCmd.release(); } diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 766f626072c41..cccb34ca666bc 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -181,6 +181,10 @@ EventImplPtr Scheduler::addHostAccessor(Requirement *Req, void Scheduler::releaseHostAccessor(Requirement *Req) { Req->MBlockedCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; + unblockSingleReq(Req); +} + +void Scheduler::unblockSingleReq(Requirement * Req) { MemObjRecord* Record = Req->MSYCLMemObj->MRecord.get(); auto EnqueueLeaves = [](CircularBuffer &Leaves) { for (Command *Cmd : Leaves) { @@ -194,6 +198,27 @@ void Scheduler::releaseHostAccessor(Requirement *Req) { EnqueueLeaves(Record->MWriteLeaves); } +void Scheduler::unblockRequirements(const std::vector &Reqs) { + // fetch unique blocked cmds + std::unordered_map> BlockedCmds; + + for (Requirement *Req : Reqs) + BlockedCmds[Req->MBlockedCmd].push_back(Req); + + for (const auto &It : BlockedCmds) { + if (!It.first) + continue; + + Command *BlockedCmd = It.first; + const std::vector &SubReqs = It.second; + + BlockedCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; + + for (Requirement *Req : SubReqs) + unblockSingleReq(Req); + } +} + Scheduler::Scheduler() { sycl::device HostDevice; DefaultHostQueue = QueueImplPtr(new queue_impl( diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index aa9881f158419..587f418d08be1 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -95,6 +95,9 @@ class Scheduler { // Unblocks operations with the memory object. void releaseHostAccessor(Requirement *Req); + // Unblocks operations with memory objects + void unblockRequirements(const std::vector &Reqs); + // Returns an instance of the scheduler object. static Scheduler &getInstance(); @@ -107,6 +110,8 @@ class Scheduler { Scheduler(); static Scheduler instance; + void unblockSingleReq(Requirement *Req); + // The graph builder provides interfaces that can change already existing // graph (e.g. add/remove edges/nodes). class GraphBuilder { From cf3bbf3bcd40e9bb8b411ac2e39a0841d16d6fbb Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 15 Apr 2020 14:22:25 +0300 Subject: [PATCH 051/188] [NFC] [SYCL] Shift DispatchHostTask lambda to functor to reduce size of enqueueImp method Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 93 +++++++++++++---------- sycl/source/detail/thread_pool.hpp | 10 +++ 2 files changed, 62 insertions(+), 41 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 59af6cb593450..4c84fd092467d 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1536,6 +1536,56 @@ void DispatchNativeKernel(void *Blob) { HostTask->MHostKernel->call(HostTask->MNDRDesc, nullptr); } +class DispatchHostTask { + std::vector MDepEvents; + CGHostTask *MHostTask; + std::vector MDeps; + EventImplPtr MSelfEvent; + +public: + DispatchHostTask(std::vector DepEvents, CGHostTask *HostTask, + std::vector Deps, EventImplPtr SelfEvent) + : MDepEvents(std::move(DepEvents)), MHostTask{HostTask}, + MDeps(std::move(Deps)), MSelfEvent(std::move(SelfEvent)) {} + + void operator()() const { + std::map> + RequiredEventsPerPlugin; + + for (const EventImplPtr &Event : MDepEvents) { + const detail::plugin &Plugin = Event->getPlugin(); + RequiredEventsPerPlugin[&Plugin].push_back(Event); + } + + // wait for dependency events + // FIXME introduce a more sophisticated wait mechanism + for (auto &PluginWithEvents : RequiredEventsPerPlugin) { + std::vector RawEvents = getPiEvents( + PluginWithEvents.second); + PluginWithEvents.first->call( + RawEvents.size(), RawEvents.data()); + } + + // we're ready to call the user-defined lambda now + MHostTask->MHostTask->call(); + + const detail::plugin &Plugin = MSelfEvent->getPlugin(); + Plugin.call(MSelfEvent->getHandleRef(), + PI_EVENT_COMPLETE); + + // perform release (unblock) of empty command + std::vector Reqs; + Reqs.resize(MDeps.size()); + + std::transform(MDeps.begin(), MDeps.end(), Reqs.begin(), + [](const DepDesc &Dep) { + return const_cast(Dep.MDepRequirement); + }); + + Scheduler::getInstance().unblockRequirements(Reqs); + } +}; + cl_int ExecCGCommand::enqueueImp() { std::vector EventImpls = MPreparedDepsEvents; waitForPreparedHostEvents(); @@ -1860,47 +1910,8 @@ cl_int ExecCGCommand::enqueueImp() { SelfEvent->setContextImpl(HTContext); - std::vector Deps = MDeps; - - // init dependency events in Ctx - auto DispatchHostTask = [EventImpls, HostTask, Deps, SelfEvent] () mutable { - std::map> - RequiredEventsPerPlugin; - - for (const EventImplPtr &Event : EventImpls) { - const detail::plugin &Plugin = Event->getPlugin(); - RequiredEventsPerPlugin[&Plugin].push_back(Event); - } - - // wait for dependency events - // FIXME introduce a more sophisticated wait mechanism - for (auto &PluginWithEvents : RequiredEventsPerPlugin) { - std::vector RawEvents = getPiEvents( - PluginWithEvents.second); - PluginWithEvents.first->call( - RawEvents.size(), RawEvents.data()); - } - - // we're ready to call the user-defined lambda now - HostTask->MHostTask->call(); - - const detail::plugin &Plugin = SelfEvent->getPlugin(); - Plugin.call(SelfEvent->getHandleRef(), - PI_EVENT_COMPLETE); - - // perform release (unblock) of empty command - std::vector Reqs; - Reqs.resize(Deps.size()); - - std::transform(Deps.begin(), Deps.end(), Reqs.begin(), - [](const DepDesc &Dep) { - return const_cast(Dep.MDepRequirement); - }); - - Scheduler::getInstance().unblockRequirements(Reqs); - }; - - MQueue->getThreadPool().submit(std::move(DispatchHostTask)); + MQueue->getThreadPool().submit(std::move( + DispatchHostTask(EventImpls, HostTask, MDeps, SelfEvent))); } return CL_SUCCESS; diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 4e765f847e3d4..393303197db2e 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -67,6 +67,16 @@ class ThreadPool { Thread.join(); } + template + void submit(T &&Func) { + { + std::lock_guard Lock(MJobQueueMutex); + MJobQueue.emplace(std::move([Func]() { Func(); })); + } + + MDoSmthOrStop.notify_one(); + } + void submit(std::function &&Func) { { std::lock_guard Lock(MJobQueueMutex); From 35fdcde59f4b59d4df7bafe3acb8d317ac0b7508 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 15 Apr 2020 17:41:02 +0300 Subject: [PATCH 052/188] [SYCL] Eliminate use of addCG when connecting multiple context. Currently connection HostTask won't appear in graph. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 4c84fd092467d..a5bdedbfe3e45 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -369,29 +369,27 @@ EventImplPtr Command::connectDepEvent(EventImplPtr DepEvent, Plugin.call(Context->getHandleRef(), &GlueEventHandle); - // TODO Use internal API here - // TODO return event, createb by host-task enqueue process - - // enqueue GlueCmd - std::function Func = [GlueEvent]() { + // construct Host Task type command manually and make it depend on DepEvent + std::function CFunc = [GlueEvent]() { RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); const detail::plugin &Plugin = GlueEvent->getPlugin(); Plugin.call(GlueEventHandle, CL_COMPLETE); }; - std::unique_ptr HT(new detail::HostTask(std::move(Func))); + std::unique_ptr HT(new detail::HostTask(std::move(CFunc))); - std::unique_ptr GlueCG(new detail::CGHostTask( + std::unique_ptr ConnectCG(new detail::CGHostTask( std::move(HT), DepEventContext, /* Args = */ {}, /* ArgsStorage = */ {}, /* AccStorage = */ {}, /* SharedPtrStorage = */ {}, /* Requirements = */ {}, /* DepEvents = */ {DepEvent}, CG::CODEPLAY_HOST_TASK, /* Payload */ {})); + ExecCGCommand *ConnectCmd = new ExecCGCommand( + std::move(ConnectCG), Scheduler::getInstance().getDefaultHostQueue()); - Command *GlueCmd = Scheduler::getInstance().MGraphBuilder.addCG( - std::move(GlueCG), Scheduler::getInstance().getDefaultHostQueue()); + ConnectCmd->addDep(DepEvent); EnqueueResultT Res; - bool Enqueued = Scheduler::GraphProcessor::enqueueCommand(GlueCmd, Res); + bool Enqueued = Scheduler::GraphProcessor::enqueueCommand(ConnectCmd, Res); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Failed to enqueue a sync event between two contexts", PI_INVALID_OPERATION); @@ -1904,7 +1902,7 @@ cl_int ExecCGCommand::enqueueImp() { EventImplPtr SelfEvent = MEvent; RT::PiContext ContextRef = HTContext->getHandleRef(); - // You can't create event for host-queue/host-context + // FIXME You can't create event for host-queue/host-context const detail::plugin &Plugin = HTContext->getPlugin(); Plugin.call(ContextRef, &Event); From 427c81d4e14cb752fbb7699cc3a3e8e5b0d8af0b Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 15 Apr 2020 17:42:21 +0300 Subject: [PATCH 053/188] [SYCL] Fix indentation Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 858f25d93fdbf..51cf0fcf91ea0 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -666,8 +666,8 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, EmptyCommand *EmptyCmd = nullptr; if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { - EmptyCmd = new EmptyCommand( - Scheduler::getInstance().getDefaultHostQueue()); + EmptyCmd = new EmptyCommand( + Scheduler::getInstance().getDefaultHostQueue()); EmptyCmd->MIsBlockable = true; EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; From 9a936ee57413c207129c6bb0be662710c31f3076 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 16 Apr 2020 11:37:27 +0300 Subject: [PATCH 054/188] [SYCL] Fix build issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index fdef7f708a6a5..142c25f674566 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1216,6 +1216,9 @@ void EmptyCommand::emitInstrumentationData() { return; // Create a payload with the command name and an event using this payload to // emit a node_create + if (!MRequirement.get()) + return; + MAddress = MRequirement.MSYCLMemObj; makeTraceEventProlog(MAddress); From a1c23d5ef453f9ba20b13d50f61e0563c4989d3a Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 16 Apr 2020 12:35:43 +0300 Subject: [PATCH 055/188] [SYCL] Add empty command for connecting command Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 142c25f674566..f081713fb91fd 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -388,6 +388,20 @@ EventImplPtr Command::connectDepEvent(EventImplPtr DepEvent, ConnectCmd->addDep(DepEvent); + if (Command *DepCmd = reinterpret_cast(DepEvent->getCommand())) { + (void)DepCmd; + EmptyCommand *EmptyCmd = new EmptyCommand( + Scheduler::getInstance().getDefaultHostQueue()); + + EmptyCmd->MIsBlockable = true; + EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; + EmptyCmd->MBlockReason = "Blocked by host task"; + + DepCmd->addUser(ConnectCmd); + EmptyCmd->addDep(ConnectCmd->MEvent); + ConnectCmd->addUser(EmptyCmd); + } + EnqueueResultT Res; bool Enqueued = Scheduler::GraphProcessor::enqueueCommand(ConnectCmd, Res); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) From 8427b4aff1253e92235fbfe7c5a1dc16d00c7b5c Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 16 Apr 2020 12:36:17 +0300 Subject: [PATCH 056/188] [NFC] [SYCL] Split method into smaller ones Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 38 ++++++++++++++--------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index f081713fb91fd..879e586cc81d2 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1557,13 +1557,7 @@ class DispatchHostTask { std::vector MDeps; EventImplPtr MSelfEvent; -public: - DispatchHostTask(std::vector DepEvents, CGHostTask *HostTask, - std::vector Deps, EventImplPtr SelfEvent) - : MDepEvents(std::move(DepEvents)), MHostTask{HostTask}, - MDeps(std::move(Deps)), MSelfEvent(std::move(SelfEvent)) {} - - void operator()() const { + void waitForEvents() const { std::map> RequiredEventsPerPlugin; @@ -1580,15 +1574,9 @@ class DispatchHostTask { PluginWithEvents.first->call( RawEvents.size(), RawEvents.data()); } + } - // we're ready to call the user-defined lambda now - MHostTask->MHostTask->call(); - - const detail::plugin &Plugin = MSelfEvent->getPlugin(); - Plugin.call(MSelfEvent->getHandleRef(), - PI_EVENT_COMPLETE); - - // perform release (unblock) of empty command + void unblockBlockedDeps() const { std::vector Reqs; Reqs.resize(MDeps.size()); @@ -1599,6 +1587,26 @@ class DispatchHostTask { Scheduler::getInstance().unblockRequirements(Reqs); } + +public: + DispatchHostTask(std::vector DepEvents, CGHostTask *HostTask, + std::vector Deps, EventImplPtr SelfEvent) + : MDepEvents(std::move(DepEvents)), MHostTask{HostTask}, + MDeps(std::move(Deps)), MSelfEvent(std::move(SelfEvent)) {} + + void operator()() const { + waitForEvents(); + + // we're ready to call the user-defined lambda now + MHostTask->MHostTask->call(); + + // update self-event status + const detail::plugin &Plugin = MSelfEvent->getPlugin(); + Plugin.call(MSelfEvent->getHandleRef(), + PI_EVENT_COMPLETE); + + unblockBlockedDeps(); + } }; cl_int ExecCGCommand::enqueueImp() { From ff033074448fb12e14ed5283b03fb030b55061a4 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 16 Apr 2020 12:37:10 +0300 Subject: [PATCH 057/188] [SYCL] Fix build issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 879e586cc81d2..be0cfa81370bf 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1233,7 +1233,7 @@ void EmptyCommand::emitInstrumentationData() { if (!MRequirement.get()) return; - MAddress = MRequirement.MSYCLMemObj; + MAddress = MRequirement->MSYCLMemObj; makeTraceEventProlog(MAddress); if (MFirstInstance) { From 49ed81ee5ec2222513f24b6f491d30f1394087da Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 16 Apr 2020 13:18:03 +0300 Subject: [PATCH 058/188] [SYCL] Remove unneeded line Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index be0cfa81370bf..1bc940178811f 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -389,7 +389,6 @@ EventImplPtr Command::connectDepEvent(EventImplPtr DepEvent, ConnectCmd->addDep(DepEvent); if (Command *DepCmd = reinterpret_cast(DepEvent->getCommand())) { - (void)DepCmd; EmptyCommand *EmptyCmd = new EmptyCommand( Scheduler::getInstance().getDefaultHostQueue()); From d3a5cf9a53968f7c187cd4b727a7ceee71be2462 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 17 Apr 2020 19:16:12 +0300 Subject: [PATCH 059/188] [SYCL] Worked on fixing of race condition. Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/accessor_impl.hpp | 17 ++ sycl/source/detail/accessor_impl.cpp | 49 +++- sycl/source/detail/scheduler/commands.cpp | 271 +++++++++++------- sycl/source/detail/scheduler/commands.hpp | 30 +- .../source/detail/scheduler/graph_builder.cpp | 32 ++- sycl/source/detail/scheduler/scheduler.cpp | 79 ++++- sycl/source/detail/scheduler/scheduler.hpp | 6 +- 7 files changed, 363 insertions(+), 121 deletions(-) diff --git a/sycl/include/CL/sycl/detail/accessor_impl.hpp b/sycl/include/CL/sycl/detail/accessor_impl.hpp index f9cffa5344b9f..3938f7510ca60 100644 --- a/sycl/include/CL/sycl/detail/accessor_impl.hpp +++ b/sycl/include/CL/sycl/detail/accessor_impl.hpp @@ -15,11 +15,15 @@ #include #include +#include +#include + __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { class Command; +class Scheduler; // The class describes a requirement to access a SYCL memory object such as // sycl::buffer and sycl::image. For example, each accessor used in a kernel, @@ -96,6 +100,19 @@ class __SYCL_EXPORT AccessorImplHost { void *MData = nullptr; Command *MBlockedCmd = nullptr; + +protected: + using CheckCmdFn = std::function; + void addBlockedCommand(Command *BlockedCmd); + Command *findBlockedCommand(const CheckCmdFn &Check); + bool removeBlockedCommand(Command *BlockedCmd); + + friend class Command; + friend class Scheduler; + +private: + std::mutex MBlockedCmdsMutex; + std::unordered_set MBlockedCmds; }; using AccessorImplPtr = shared_ptr_class; diff --git a/sycl/source/detail/accessor_impl.cpp b/sycl/source/detail/accessor_impl.cpp index 96a7657bb27a5..09ad5a27cf5d5 100644 --- a/sycl/source/detail/accessor_impl.cpp +++ b/sycl/source/detail/accessor_impl.cpp @@ -10,24 +10,65 @@ #include #include +#include + __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { AccessorImplHost::~AccessorImplHost() { try { - if (MBlockedCmd) + bool BlockedCmdNotEmpty = false; + + { + std::lock_guard Lock(MBlockedCmdsMutex); + BlockedCmdNotEmpty = !!MBlockedCmds.size(); + + fprintf(stderr, "Gonna release host accessor %p, %i, %zu\n", + (void *)this, + (int)BlockedCmdNotEmpty, MBlockedCmds.size()); + + for (Command *Cmd : MBlockedCmds) + fprintf(stderr, " Blocked: %p, type: %i, reason %s\n", + (void *)Cmd, + (int)Cmd->getType(), Cmd->getBlockReason()); + } + + if (BlockedCmdNotEmpty) detail::Scheduler::getInstance().releaseHostAccessor(this); } catch (...) { } } +void AccessorImplHost::addBlockedCommand(Command *BlockedCmd) { + std::lock_guard Lock(MBlockedCmdsMutex); + + MBlockedCmds.insert(BlockedCmd); +} + +Command * +AccessorImplHost::findBlockedCommand(const CheckCmdFn &Check) { + std::lock_guard Lock(MBlockedCmdsMutex); + + auto FoundIt = std::find_if(MBlockedCmds.begin(), MBlockedCmds.end(), Check); + + return FoundIt == MBlockedCmds.end() ? nullptr : *FoundIt; +} + +bool AccessorImplHost::removeBlockedCommand(Command *BlockedCmd) { + std::lock_guard Lock(MBlockedCmdsMutex); + + MBlockedCmds.erase(BlockedCmd); + + return MBlockedCmds.empty(); +} + void addHostAccessorAndWait(Requirement *Req) { detail::EventImplPtr Event = detail::Scheduler::getInstance().addHostAccessor(Req); Event->wait(Event); } -} -} -} +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 1bc940178811f..a06c6d6602025 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -41,6 +41,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { + #ifdef XPTI_ENABLE_INSTRUMENTATION // Global graph for the application extern xpti::trace_event_data_t *GSYCLGraphEvent; @@ -156,6 +157,70 @@ getPiEvents(const std::vector &EventImpls) { return RetPiEvents; } +class DispatchHostTask { + std::vector MDepEvents; + CGHostTask *MHostTask; + std::vector MDeps; + EventImplPtr MSelfEvent; + + void waitForEvents() const { + std::map> + RequiredEventsPerPlugin; + + for (const EventImplPtr &Event : MDepEvents) { + const detail::plugin &Plugin = Event->getPlugin(); + RequiredEventsPerPlugin[&Plugin].push_back(Event); + } + + // wait for dependency events + // FIXME introduce a more sophisticated wait mechanism + for (auto &PluginWithEvents : RequiredEventsPerPlugin) { + std::vector RawEvents = getPiEvents( + PluginWithEvents.second); + PluginWithEvents.first->call( + RawEvents.size(), RawEvents.data()); + } + } + +public: + DispatchHostTask(std::vector DepEvents, CGHostTask *HostTask, + std::vector Deps, EventImplPtr SelfEvent) + : MDepEvents(std::move(DepEvents)), MHostTask{HostTask}, + MDeps(std::move(Deps)), MSelfEvent(std::move(SelfEvent)) {} + + void operator()() const { + waitForEvents(); + + // we're ready to call the user-defined lambda now + MHostTask->MHostTask->call(); + + // update self-event status + if (MSelfEvent->is_host()) { + // TODO + fprintf(stderr, "Gonna enqueue smth here\n"); + } else { + const detail::plugin &Plugin = MSelfEvent->getPlugin(); + Plugin.call(MSelfEvent->getHandleRef(), + PI_EVENT_COMPLETE); + } + + unblockBlockedDeps(MDeps); + } + + static void unblockBlockedDeps(const std::vector &Deps) { + std::vector Reqs; + Reqs.resize(Deps.size()); + + std::transform(Deps.begin(), Deps.end(), Reqs.begin(), + [](const DepDesc &Dep) { + return const_cast(Dep.MDepRequirement); + }); + + Scheduler::getInstance().unblockRequirements( + Reqs, Command::BlockReason::HostTask); + } +}; + void Command::waitForPreparedHostEvents() const { for (const EventImplPtr &HostEvent : MPreparedHostDepsEvents) HostEvent->waitInternal(); @@ -355,38 +420,45 @@ void Command::makeTraceEventEpilog() { } // static -EventImplPtr Command::connectDepEvent(EventImplPtr DepEvent, +/*EventImplPtr*/ void Command::connectDepEvent(EventImplPtr DepEvent, const ContextImplPtr &DepEventContext, - const ContextImplPtr &Context) { - EventImplPtr GlueEvent(new detail::event_impl()); - GlueEvent->setContextImpl(Context); + const ContextImplPtr &Context, + const DepDesc &Dep) { +// EventImplPtr GlueEvent(new detail::event_impl()); +// GlueEvent->setContextImpl(Context); - RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); - auto Plugin = Context->getPlugin(); +// RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); +// auto Plugin = Context->getPlugin(); // Add an event on the current context that // is triggered when the DepEvent is complete // TODO eliminate creation of user-event - Plugin.call(Context->getHandleRef(), - &GlueEventHandle); +// Plugin.call(Context->getHandleRef(), +// &GlueEventHandle); // construct Host Task type command manually and make it depend on DepEvent - std::function CFunc = [GlueEvent]() { - RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); - const detail::plugin &Plugin = GlueEvent->getPlugin(); - Plugin.call(GlueEventHandle, CL_COMPLETE); - }; - - std::unique_ptr HT(new detail::HostTask(std::move(CFunc))); - - std::unique_ptr ConnectCG(new detail::CGHostTask( - std::move(HT), DepEventContext, /* Args = */ {}, /* ArgsStorage = */ {}, - /* AccStorage = */ {}, /* SharedPtrStorage = */ {}, - /* Requirements = */ {}, /* DepEvents = */ {DepEvent}, - CG::CODEPLAY_HOST_TASK, /* Payload */ {})); - ExecCGCommand *ConnectCmd = new ExecCGCommand( - std::move(ConnectCG), Scheduler::getInstance().getDefaultHostQueue()); + ExecCGCommand *ConnectCmd = nullptr; + + { + // Temporary function. Will be replaced depending on circumstances. +#if 0 + std::function Func = [GlueEvent]() { + RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); + const detail::plugin &Plugin = GlueEvent->getPlugin(); + Plugin.call(GlueEventHandle, CL_COMPLETE); + }; +#else + std::function Func = []() {}; +#endif - ConnectCmd->addDep(DepEvent); + std::unique_ptr HT(new detail::HostTask(std::move(Func))); + std::unique_ptr ConnectCG(new detail::CGHostTask( + std::move(HT), DepEventContext, /* Args = */ {}, /* ArgsStorage = */ {}, + /* AccStorage = */ {}, /* SharedPtrStorage = */ {}, + /* Requirements = */ {}, /* DepEvents = */ {DepEvent}, + CG::CODEPLAY_HOST_TASK, /* Payload */ {})); + ConnectCmd = new ExecCGCommand(std::move(ConnectCG), + Scheduler::getInstance().getDefaultHostQueue()); + } if (Command *DepCmd = reinterpret_cast(DepEvent->getCommand())) { EmptyCommand *EmptyCmd = new EmptyCommand( @@ -394,12 +466,60 @@ EventImplPtr Command::connectDepEvent(EventImplPtr DepEvent, EmptyCmd->MIsBlockable = true; EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; - EmptyCmd->MBlockReason = "Blocked by host task"; + EmptyCmd->MBlockReason = BlockReason::HostTask; + //EmptyCmd->MBlockReason = "Blocked by host task for dependency"; DepCmd->addUser(ConnectCmd); - EmptyCmd->addDep(ConnectCmd->MEvent); + + if (Dep.MDepRequirement) { + // We can't set Dep as dependency for connect cmd 'cause Dep's command is + // from different context. Thus we'll employ a hack here. + +#if 1 + { + DepDesc ConnectCmdDep = Dep; + ConnectCmdDep.MDepCommand = this; + //ConnectCmd->addDep(ConnectCmdDep); + std::function Func = [ConnectCmdDep]() { + std::vector Deps; + Deps.push_back(ConnectCmdDep); + DispatchHostTask::unblockBlockedDeps(Deps); + }; + + auto *CG = static_cast( + ConnectCmd->MCommandGroup.get()); + + CG->MHostTask.reset(new detail::HostTask(std::move(Func))); + } +#endif + + { + DepDesc EmptyCmdDep = Dep; + EmptyCmdDep.MDepCommand = ConnectCmd; + EmptyCmd->addDep(EmptyCmdDep); + } + + { + const Requirement *Req = Dep.MDepRequirement; + //assert(!Req->MBlockedCmd && "Already blocked 3!"); + //const_cast(Req)->MBlockedCmd = EmptyCmd; + fprintf(stderr, "Blocking Req %p by cmd %p for %s\n", + (const void *)Req, (void *)EmptyCmd, EmptyCmd->getBlockReason()); + const_cast(Req)->addBlockedCommand(EmptyCmd); + Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; + MemObjRecord *Record = GB.getMemObjRecord(Req->MSYCLMemObj); + Dep.MDepCommand->addUser(ConnectCmd); + GB.updateLeaves({Dep.MDepCommand}, Record, Req->MAccessMode); + GB.addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); + } + } else { + ConnectCmd->addDep(DepEvent); + EmptyCmd->addDep(ConnectCmd->MEvent); + } + ConnectCmd->addUser(EmptyCmd); - } + } else + ConnectCmd->addDep(DepEvent); EnqueueResultT Res; bool Enqueued = Scheduler::GraphProcessor::enqueueCommand(ConnectCmd, Res); @@ -407,10 +527,12 @@ EventImplPtr Command::connectDepEvent(EventImplPtr DepEvent, throw runtime_error("Failed to enqueue a sync event between two contexts", PI_INVALID_OPERATION); - return GlueEvent; + MPreparedHostDepsEvents.push_back(ConnectCmd->getEvent()); + +// return GlueEvent; } -void Command::processDepEvent(EventImplPtr DepEvent) { +void Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) { const ContextImplPtr &Context = getContext(); // Async work is not supported for host device. @@ -428,10 +550,10 @@ void Command::processDepEvent(EventImplPtr DepEvent) { ContextImplPtr DepEventContext = DepEvent->getContextImpl(); // If contexts don't match - connect them using user event if (DepEventContext != Context && !Context->is_host()) { - EventImplPtr GlueEvent = connectDepEvent(DepEvent, DepEventContext, - Context); + /*EventImplPtr GlueEvent = */connectDepEvent(DepEvent, DepEventContext, + Context, Dep); - MPreparedDepsEvents.push_back(std::move(GlueEvent)); +// MPreparedDepsEvents.push_back(std::move(GlueEvent)); } else MPreparedDepsEvents.push_back(std::move(DepEvent)); } @@ -442,7 +564,7 @@ ContextImplPtr Command::getContext() const { void Command::addDep(DepDesc NewDep) { if (NewDep.MDepCommand) { - processDepEvent(NewDep.MDepCommand->getEvent()); + processDepEvent(NewDep.MDepCommand->getEvent(), NewDep); } MDeps.push_back(NewDep); #ifdef XPTI_ENABLE_INSTRUMENTATION @@ -462,7 +584,7 @@ void Command::addDep(EventImplPtr Event) { emitEdgeEventForEventDependence(Cmd, PiEventAddr); #endif - processDepEvent(std::move(Event)); + processDepEvent(std::move(Event), DepDesc{nullptr, nullptr, nullptr}); } void Command::emitEnqueuedEventSignal(RT::PiEvent &PiEventAddr) { @@ -504,7 +626,7 @@ bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking) { if (ThrowOnBlock) throw sycl::runtime_error( std::string("Waiting for blocked command. Block reason: ") + - std::string(MBlockReason), + std::string(getBlockReason()), PI_INVALID_OPERATION); #ifdef XPTI_ENABLE_INSTRUMENTATION @@ -512,7 +634,7 @@ bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking) { // event, which models the barrier while enqueuing along with the blocked // reason, as determined by the scheduler std::string Info = "enqueue.barrier["; - Info += std::string(MBlockReason) + "]"; + Info += std::string(getBlockReason()) + "]"; emitInstrumentation(xpti::trace_barrier_begin, Info.c_str()); #endif @@ -602,6 +724,17 @@ void Command::resolveReleaseDependencies(std::set &DepList) { #endif } +const char *Command::getBlockReason() const { + switch (MBlockReason) { + case BlockReason::HostAccessor: + return "A Buffer is locked by the host accessor"; + case BlockReason::HostTask: + return "Blocked by host task"; + } + + return "Unknown block reason"; +} + AllocaCommandBase::AllocaCommandBase(CommandType Type, QueueImplPtr Queue, Requirement Req, AllocaCommandBase *LinkedAllocaCmd) @@ -1550,64 +1683,6 @@ void DispatchNativeKernel(void *Blob) { HostTask->MHostKernel->call(HostTask->MNDRDesc, nullptr); } -class DispatchHostTask { - std::vector MDepEvents; - CGHostTask *MHostTask; - std::vector MDeps; - EventImplPtr MSelfEvent; - - void waitForEvents() const { - std::map> - RequiredEventsPerPlugin; - - for (const EventImplPtr &Event : MDepEvents) { - const detail::plugin &Plugin = Event->getPlugin(); - RequiredEventsPerPlugin[&Plugin].push_back(Event); - } - - // wait for dependency events - // FIXME introduce a more sophisticated wait mechanism - for (auto &PluginWithEvents : RequiredEventsPerPlugin) { - std::vector RawEvents = getPiEvents( - PluginWithEvents.second); - PluginWithEvents.first->call( - RawEvents.size(), RawEvents.data()); - } - } - - void unblockBlockedDeps() const { - std::vector Reqs; - Reqs.resize(MDeps.size()); - - std::transform(MDeps.begin(), MDeps.end(), Reqs.begin(), - [](const DepDesc &Dep) { - return const_cast(Dep.MDepRequirement); - }); - - Scheduler::getInstance().unblockRequirements(Reqs); - } - -public: - DispatchHostTask(std::vector DepEvents, CGHostTask *HostTask, - std::vector Deps, EventImplPtr SelfEvent) - : MDepEvents(std::move(DepEvents)), MHostTask{HostTask}, - MDeps(std::move(Deps)), MSelfEvent(std::move(SelfEvent)) {} - - void operator()() const { - waitForEvents(); - - // we're ready to call the user-defined lambda now - MHostTask->MHostTask->call(); - - // update self-event status - const detail::plugin &Plugin = MSelfEvent->getPlugin(); - Plugin.call(MSelfEvent->getHandleRef(), - PI_EVENT_COMPLETE); - - unblockBlockedDeps(); - } -}; - cl_int ExecCGCommand::enqueueImp() { std::vector EventImpls = MPreparedDepsEvents; waitForPreparedHostEvents(); @@ -1925,13 +2000,13 @@ cl_int ExecCGCommand::enqueueImp() { // Init self-event EventImplPtr SelfEvent = MEvent; - RT::PiContext ContextRef = HTContext->getHandleRef(); +// RT::PiContext ContextRef = HTContext->getHandleRef(); // FIXME You can't create event for host-queue/host-context - const detail::plugin &Plugin = HTContext->getPlugin(); - Plugin.call(ContextRef, &Event); +// const detail::plugin &Plugin = HTContext->getPlugin(); +// Plugin.call(ContextRef, &Event); - SelfEvent->setContextImpl(HTContext); +// SelfEvent->setContextImpl(HTContext); MQueue->getThreadPool().submit(std::move( DispatchHostTask(EventImpls, HostTask, MDeps, SelfEvent))); diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 5d6fcc6eab28c..51aef5ae281d4 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -167,6 +167,8 @@ class Command { virtual ~Command() = default; + const char *getBlockReason() const; + protected: EventImplPtr MEvent; QueueImplPtr MQueue; @@ -183,15 +185,26 @@ class Command { /// Perform glueing of events from different contexts /// \param DepEvent event this commands should depend on + /// \param Dep optional DepDesc to perform connection of events properly /// /// Glueing (i.e. connecting) will be performed if and only if DepEvent is /// not from host context and its context doesn't match to context of this /// command. Context of this command is fetched via getContext(). - void processDepEvent(EventImplPtr DepEvent); - - static EventImplPtr connectDepEvent(EventImplPtr DepEvent, + /// + /// Optionality of Dep is set by Dep.MDepCommand not equal to nullptr. + void processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep); + + /// Perform connection of events in multiple contexts + /// \param DepEvent event to depend on + /// \param DepEventContext context of DepEvent + /// \param Context context of command which wants to depend on DepEvent + /// \param Dep optional DepDesc to perform connection properly + /// + /// Optionality of Dep is set by Dep.MDepCommand not equal to nullptr. + void connectDepEvent(EventImplPtr DepEvent, const ContextImplPtr &DepEventContext, - const ContextImplPtr &Context); + const ContextImplPtr &Context, + const DepDesc &Dep); virtual ContextImplPtr getContext() const; @@ -213,7 +226,12 @@ class Command { /// Counts the number of memory objects this command is a leaf for. unsigned MLeafCounter = 0; - const char *MBlockReason = "Unknown"; + enum class BlockReason : int { + HostAccessor = 0, + HostTask + }; + + BlockReason MBlockReason; /// Describes the status of the command. std::atomic MEnqueueStatus; @@ -453,6 +471,8 @@ class ExecCGCommand : public Command { AllocaCommandBase *getAllocaForReq(Requirement *Req); std::unique_ptr MCommandGroup; + + friend class Command; }; class UpdateHostRequirementCommand : public Command { diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index b32da1e90552c..81ea97fe89099 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -396,6 +396,7 @@ Command *Scheduler::GraphBuilder::addCopyBack(Requirement *Req) { Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, const bool destructor) { + fprintf(stderr, "Gonna add host accessor for req %p\n", (void *)Req); const QueueImplPtr &HostQueue = getInstance().getDefaultHostQueue(); MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req); @@ -423,12 +424,18 @@ Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, EmptyCmd->MIsBlockable = true; EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; - EmptyCmd->MBlockReason = "A Buffer is locked by the host accessor"; + EmptyCmd->MBlockReason = Command::BlockReason::HostAccessor; +// EmptyCmd->MBlockReason = "A Buffer is locked by the host accessor"; updateLeaves({UpdateHostAccCmd}, Record, Req->MAccessMode); addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); - Req->MBlockedCmd = EmptyCmd; +// assert(!Req->MBlockedCmd && "Already blocked!"); +// Req->MBlockedCmd = EmptyCmd; + + fprintf(stderr, "Blocking Req %p by cmd %p for %s\n", + (void *)Req, (void *)EmptyCmd, EmptyCmd->getBlockReason()); + Req->addBlockedCommand(EmptyCmd); if (MPrintOptionsArray[AfterAddHostAcc]) printGraphAsDot("after_addHostAccessor"); @@ -614,6 +621,17 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( } else { LinkedAllocaCmd->MIsActive = false; Record->MCurContext = Queue->getContextImplPtr(); + +#if 0 + std::set Deps = + findDepsForReq(Record, Req, Queue->getContextImplPtr()); + for (Command *Dep : Deps) { + AllocaCmd->addDep(DepDesc{Dep, Req, AllocaCmd}); + Dep->addUser(AllocaCmd); + } + updateLeaves(Deps, Record, Req->MAccessMode); + addNodeToLeaves(Record, AllocaCmd, Req->MAccessMode); +#endif } } } @@ -666,7 +684,8 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, EmptyCmd->MIsBlockable = true; EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; - EmptyCmd->MBlockReason = "Blocked by host task"; + EmptyCmd->MBlockReason = Command::BlockReason::HostTask; + //EmptyCmd->MBlockReason = "Blocked by host task"; } for (Requirement *Req : Reqs) { @@ -699,7 +718,11 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { EmptyCmd->addDep(DepDesc{NewCmd.get(), Req, AllocaCmd}); - Req->MBlockedCmd = EmptyCmd; +// assert(!Req->MBlockedCmd && "Already blocked 2!"); +// Req->MBlockedCmd = EmptyCmd; + fprintf(stderr, "Blocking Req %p by cmd %p for %s\n", + (void *)Req, (void *)EmptyCmd, EmptyCmd->getBlockReason()); + Req->addBlockedCommand(EmptyCmd); } } @@ -717,6 +740,7 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, const Requirement *Req = Dep.MDepRequirement; MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); updateLeaves({Dep.MDepCommand}, Record, Req->MAccessMode); + if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); else diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index cccb34ca666bc..8decfad0356f2 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -166,6 +166,8 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { EventImplPtr Scheduler::addHostAccessor(Requirement *Req, const bool destructor) { + fprintf(stderr, "Gonna add host accessor for req %p\n", + (void *)Req); std::lock_guard lock(MGraphLock); Command *NewCmd = MGraphBuilder.addHostAccessor(Req, destructor); @@ -180,8 +182,23 @@ EventImplPtr Scheduler::addHostAccessor(Requirement *Req, } void Scheduler::releaseHostAccessor(Requirement *Req) { - Req->MBlockedCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; - unblockSingleReq(Req); + Command *const BlockedCmd = Req->findBlockedCommand( + [](const Command * const Cmd) { + return Cmd->MBlockReason == Command::BlockReason::HostAccessor; + }); + +// assert(BlockedCmd && "Can't find appropriate command to unblock"); + + if (!BlockedCmd) + return; + + BlockedCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; + + fprintf(stderr, "release host accessor. Req %p, Blocked cmd %p, reason: %s\n", + (void *)Req, (void *)BlockedCmd, BlockedCmd->getBlockReason()); + + if (Req->removeBlockedCommand(BlockedCmd)) + unblockSingleReq(Req); } void Scheduler::unblockSingleReq(Requirement * Req) { @@ -198,24 +215,68 @@ void Scheduler::unblockSingleReq(Requirement * Req) { EnqueueLeaves(Record->MWriteLeaves); } -void Scheduler::unblockRequirements(const std::vector &Reqs) { +void Scheduler::bulkUnblockReqs(Command * const BlockedCmd, + const std::unordered_set &Reqs) { + bool BlockedCmdEnqueued = false; + + auto EnqueueLeaves = [BlockedCmd, &BlockedCmdEnqueued](CircularBuffer &Leaves) { + for (Command *Cmd : Leaves) { + if (BlockedCmd == Cmd && BlockedCmdEnqueued) + continue; + + BlockedCmdEnqueued |= BlockedCmd == Cmd; + + EnqueueResultT Res; + bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); + } + }; + + for (Requirement *Req : Reqs) { + fprintf(stderr, " Req %p in bulk. Cmd = %p, reason = %s\n", + (void *)Req, (void *)BlockedCmd, BlockedCmd->getBlockReason()); + + if (Req->removeBlockedCommand(BlockedCmd)) { + MemObjRecord* Record = Req->MSYCLMemObj->MRecord.get(); + EnqueueLeaves(Record->MReadLeaves); + EnqueueLeaves(Record->MWriteLeaves); + } + } +} + +void Scheduler::unblockRequirements(const std::vector &Reqs, + Command::BlockReason Reason) { // fetch unique blocked cmds - std::unordered_map> BlockedCmds; + std::unordered_map> BlockedCmds; + + std::function CheckCmd = + [Reason](const Command * const Cmd) { + return Cmd->MBlockReason == Reason; + }; + + for (Requirement *Req : Reqs) { + Command *BlockedCmd = Req->findBlockedCommand(CheckCmd); - for (Requirement *Req : Reqs) - BlockedCmds[Req->MBlockedCmd].push_back(Req); +// assert(BlockedCmd && +// "Can't find appropriate command to unblock multiple requirements"); + + BlockedCmds[BlockedCmd].insert(Req); + } for (const auto &It : BlockedCmds) { if (!It.first) continue; Command *BlockedCmd = It.first; - const std::vector &SubReqs = It.second; + const std::unordered_set &SubReqs = It.second; BlockedCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; - for (Requirement *Req : SubReqs) - unblockSingleReq(Req); + fprintf(stderr, "Bulk unblock reqs. Blocked cmd reason: %s\n", + BlockedCmd->getBlockReason()); + + bulkUnblockReqs(BlockedCmd, SubReqs); } } diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 784e99e39f706..61312e40ff410 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -17,6 +17,7 @@ #include #include #include +#include #include /// \defgroup sycl_graph DPC++ Execution Graph @@ -417,7 +418,8 @@ class Scheduler { void releaseHostAccessor(Requirement *Req); // Unblocks operations with memory objects - void unblockRequirements(const std::vector &Reqs); + void unblockRequirements(const std::vector &Reqs, + Command::BlockReason Reason); /// \return an instance of the scheduler object. static Scheduler &getInstance(); @@ -432,6 +434,8 @@ class Scheduler { static Scheduler instance; void unblockSingleReq(Requirement *Req); + void bulkUnblockReqs(Command * const BlockedCmd, + const std::unordered_set &Reqs); /// Graph builder class. /// From 32f2f1bd899ea7f5870e0f14b5fecab24db64dab Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Sun, 19 Apr 2020 20:35:48 +0300 Subject: [PATCH 060/188] [SYCL] Fix runtime issue. Remove debug outputs. Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/accessor_impl.hpp | 3 +- sycl/source/detail/accessor_impl.cpp | 34 +++++++++---------- sycl/source/detail/scheduler/commands.cpp | 7 ++-- .../source/detail/scheduler/graph_builder.cpp | 12 ------- sycl/source/detail/scheduler/scheduler.cpp | 21 +++--------- 5 files changed, 25 insertions(+), 52 deletions(-) diff --git a/sycl/include/CL/sycl/detail/accessor_impl.hpp b/sycl/include/CL/sycl/detail/accessor_impl.hpp index 3938f7510ca60..9535affc6a9ae 100644 --- a/sycl/include/CL/sycl/detail/accessor_impl.hpp +++ b/sycl/include/CL/sycl/detail/accessor_impl.hpp @@ -99,13 +99,12 @@ class __SYCL_EXPORT AccessorImplHost { void *MData = nullptr; - Command *MBlockedCmd = nullptr; - protected: using CheckCmdFn = std::function; void addBlockedCommand(Command *BlockedCmd); Command *findBlockedCommand(const CheckCmdFn &Check); bool removeBlockedCommand(Command *BlockedCmd); + size_t countBlockedCommand(const CheckCmdFn &Check); friend class Command; friend class Scheduler; diff --git a/sycl/source/detail/accessor_impl.cpp b/sycl/source/detail/accessor_impl.cpp index 09ad5a27cf5d5..4a549c6da7cb6 100644 --- a/sycl/source/detail/accessor_impl.cpp +++ b/sycl/source/detail/accessor_impl.cpp @@ -18,23 +18,12 @@ namespace detail { AccessorImplHost::~AccessorImplHost() { try { - bool BlockedCmdNotEmpty = false; + size_t Count = countBlockedCommand( + [](const Command * const Cmd) { + return Cmd->MBlockReason == Command::BlockReason::HostAccessor; + }); - { - std::lock_guard Lock(MBlockedCmdsMutex); - BlockedCmdNotEmpty = !!MBlockedCmds.size(); - - fprintf(stderr, "Gonna release host accessor %p, %i, %zu\n", - (void *)this, - (int)BlockedCmdNotEmpty, MBlockedCmds.size()); - - for (Command *Cmd : MBlockedCmds) - fprintf(stderr, " Blocked: %p, type: %i, reason %s\n", - (void *)Cmd, - (int)Cmd->getType(), Cmd->getBlockReason()); - } - - if (BlockedCmdNotEmpty) + if (Count) detail::Scheduler::getInstance().releaseHostAccessor(this); } catch (...) { } @@ -42,10 +31,21 @@ AccessorImplHost::~AccessorImplHost() { void AccessorImplHost::addBlockedCommand(Command *BlockedCmd) { std::lock_guard Lock(MBlockedCmdsMutex); - MBlockedCmds.insert(BlockedCmd); } +size_t AccessorImplHost::countBlockedCommand(const CheckCmdFn &Check) { + std::lock_guard Lock(MBlockedCmdsMutex); + + size_t Count = 0; + + for (const Command *Cmd : MBlockedCmds) + if (Check(Cmd)) + ++Count; + + return Count; +} + Command * AccessorImplHost::findBlockedCommand(const CheckCmdFn &Check) { std::lock_guard Lock(MBlockedCmdsMutex); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index a06c6d6602025..9709fc8aa169f 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -467,7 +467,6 @@ void Command::makeTraceEventEpilog() { EmptyCmd->MIsBlockable = true; EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; EmptyCmd->MBlockReason = BlockReason::HostTask; - //EmptyCmd->MBlockReason = "Blocked by host task for dependency"; DepCmd->addUser(ConnectCmd); @@ -501,11 +500,9 @@ void Command::makeTraceEventEpilog() { { const Requirement *Req = Dep.MDepRequirement; - //assert(!Req->MBlockedCmd && "Already blocked 3!"); - //const_cast(Req)->MBlockedCmd = EmptyCmd; - fprintf(stderr, "Blocking Req %p by cmd %p for %s\n", - (const void *)Req, (void *)EmptyCmd, EmptyCmd->getBlockReason()); + const_cast(Req)->addBlockedCommand(EmptyCmd); + Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; MemObjRecord *Record = GB.getMemObjRecord(Req->MSYCLMemObj); Dep.MDepCommand->addUser(ConnectCmd); diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 81ea97fe89099..27d205f9d2061 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -396,7 +396,6 @@ Command *Scheduler::GraphBuilder::addCopyBack(Requirement *Req) { Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, const bool destructor) { - fprintf(stderr, "Gonna add host accessor for req %p\n", (void *)Req); const QueueImplPtr &HostQueue = getInstance().getDefaultHostQueue(); MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req); @@ -425,16 +424,10 @@ Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, EmptyCmd->MIsBlockable = true; EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; EmptyCmd->MBlockReason = Command::BlockReason::HostAccessor; -// EmptyCmd->MBlockReason = "A Buffer is locked by the host accessor"; updateLeaves({UpdateHostAccCmd}, Record, Req->MAccessMode); addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); -// assert(!Req->MBlockedCmd && "Already blocked!"); -// Req->MBlockedCmd = EmptyCmd; - - fprintf(stderr, "Blocking Req %p by cmd %p for %s\n", - (void *)Req, (void *)EmptyCmd, EmptyCmd->getBlockReason()); Req->addBlockedCommand(EmptyCmd); if (MPrintOptionsArray[AfterAddHostAcc]) @@ -685,7 +678,6 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, EmptyCmd->MIsBlockable = true; EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; EmptyCmd->MBlockReason = Command::BlockReason::HostTask; - //EmptyCmd->MBlockReason = "Blocked by host task"; } for (Requirement *Req : Reqs) { @@ -718,10 +710,6 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { EmptyCmd->addDep(DepDesc{NewCmd.get(), Req, AllocaCmd}); -// assert(!Req->MBlockedCmd && "Already blocked 2!"); -// Req->MBlockedCmd = EmptyCmd; - fprintf(stderr, "Blocking Req %p by cmd %p for %s\n", - (void *)Req, (void *)EmptyCmd, EmptyCmd->getBlockReason()); Req->addBlockedCommand(EmptyCmd); } } diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 8decfad0356f2..4e72a6bed0f13 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -166,8 +166,6 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { EventImplPtr Scheduler::addHostAccessor(Requirement *Req, const bool destructor) { - fprintf(stderr, "Gonna add host accessor for req %p\n", - (void *)Req); std::lock_guard lock(MGraphLock); Command *NewCmd = MGraphBuilder.addHostAccessor(Req, destructor); @@ -187,16 +185,13 @@ void Scheduler::releaseHostAccessor(Requirement *Req) { return Cmd->MBlockReason == Command::BlockReason::HostAccessor; }); -// assert(BlockedCmd && "Can't find appropriate command to unblock"); + assert(BlockedCmd && "Can't find appropriate command to unblock"); if (!BlockedCmd) return; BlockedCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; - fprintf(stderr, "release host accessor. Req %p, Blocked cmd %p, reason: %s\n", - (void *)Req, (void *)BlockedCmd, BlockedCmd->getBlockReason()); - if (Req->removeBlockedCommand(BlockedCmd)) unblockSingleReq(Req); } @@ -215,7 +210,7 @@ void Scheduler::unblockSingleReq(Requirement * Req) { EnqueueLeaves(Record->MWriteLeaves); } -void Scheduler::bulkUnblockReqs(Command * const BlockedCmd, +void Scheduler::bulkUnblockReqs(Command * const BlockedCmd, const std::unordered_set &Reqs) { bool BlockedCmdEnqueued = false; @@ -234,9 +229,6 @@ void Scheduler::bulkUnblockReqs(Command * const BlockedCmd, }; for (Requirement *Req : Reqs) { - fprintf(stderr, " Req %p in bulk. Cmd = %p, reason = %s\n", - (void *)Req, (void *)BlockedCmd, BlockedCmd->getBlockReason()); - if (Req->removeBlockedCommand(BlockedCmd)) { MemObjRecord* Record = Req->MSYCLMemObj->MRecord.get(); EnqueueLeaves(Record->MReadLeaves); @@ -250,7 +242,7 @@ void Scheduler::unblockRequirements(const std::vector &Reqs, // fetch unique blocked cmds std::unordered_map> BlockedCmds; - std::function CheckCmd = + std::function CheckCmd = [Reason](const Command * const Cmd) { return Cmd->MBlockReason == Reason; }; @@ -258,8 +250,8 @@ void Scheduler::unblockRequirements(const std::vector &Reqs, for (Requirement *Req : Reqs) { Command *BlockedCmd = Req->findBlockedCommand(CheckCmd); -// assert(BlockedCmd && -// "Can't find appropriate command to unblock multiple requirements"); + assert(BlockedCmd && + "Can't find appropriate command to unblock multiple requirements"); BlockedCmds[BlockedCmd].insert(Req); } @@ -273,9 +265,6 @@ void Scheduler::unblockRequirements(const std::vector &Reqs, BlockedCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; - fprintf(stderr, "Bulk unblock reqs. Blocked cmd reason: %s\n", - BlockedCmd->getBlockReason()); - bulkUnblockReqs(BlockedCmd, SubReqs); } } From cb10eca4f489b647938d59c12c0bb4ee6ee4bced Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Sun, 19 Apr 2020 20:42:01 +0300 Subject: [PATCH 061/188] [SYCL] Remove '#if 1' Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 9709fc8aa169f..5fc988d09c7ab 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -474,7 +474,6 @@ void Command::makeTraceEventEpilog() { // We can't set Dep as dependency for connect cmd 'cause Dep's command is // from different context. Thus we'll employ a hack here. -#if 1 { DepDesc ConnectCmdDep = Dep; ConnectCmdDep.MDepCommand = this; @@ -490,7 +489,6 @@ void Command::makeTraceEventEpilog() { CG->MHostTask.reset(new detail::HostTask(std::move(Func))); } -#endif { DepDesc EmptyCmdDep = Dep; From 58246a7c052f9eb1be512b98cf8a2a9150718596 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Sun, 19 Apr 2020 22:13:30 +0300 Subject: [PATCH 062/188] [SYCL] Don't store context in CGHostTask. Remove commented code Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/cg.hpp | 5 +-- sycl/source/detail/scheduler/commands.cpp | 50 ++++------------------- sycl/source/detail/scheduler/commands.hpp | 6 +-- sycl/source/handler.cpp | 2 +- 4 files changed, 12 insertions(+), 51 deletions(-) diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index 1778c179a5be0..c6db1f2dd30ef 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -644,12 +644,9 @@ class CGInteropTask : public CG { class CGHostTask : public CG { public: std::unique_ptr MHostTask; - // context to create self event with - shared_ptr_class MContext; vector_class MArgs; CGHostTask(std::unique_ptr HostTask, - std::shared_ptr Context, vector_class Args, std::vector> ArgsStorage, std::vector AccStorage, @@ -660,7 +657,7 @@ class CGHostTask : public CG { : CG(Type, std::move(ArgsStorage), std::move(AccStorage), std::move(SharedPtrStorage), std::move(Requirements), std::move(Events), std::move(loc)), - MHostTask(std::move(HostTask)), MContext(Context), + MHostTask(std::move(HostTask)), MArgs(std::move(Args)) {} }; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 5fc988d09c7ab..9807a1de58617 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -419,40 +419,20 @@ void Command::makeTraceEventEpilog() { #endif } -// static -/*EventImplPtr*/ void Command::connectDepEvent(EventImplPtr DepEvent, - const ContextImplPtr &DepEventContext, - const ContextImplPtr &Context, - const DepDesc &Dep) { -// EventImplPtr GlueEvent(new detail::event_impl()); -// GlueEvent->setContextImpl(Context); - -// RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); -// auto Plugin = Context->getPlugin(); - // Add an event on the current context that - // is triggered when the DepEvent is complete - // TODO eliminate creation of user-event -// Plugin.call(Context->getHandleRef(), -// &GlueEventHandle); - +void Command::connectDepEvent(EventImplPtr DepEvent, + const ContextImplPtr &DepEventContext, + const ContextImplPtr &Context, + const DepDesc &Dep) { // construct Host Task type command manually and make it depend on DepEvent ExecCGCommand *ConnectCmd = nullptr; { // Temporary function. Will be replaced depending on circumstances. -#if 0 - std::function Func = [GlueEvent]() { - RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); - const detail::plugin &Plugin = GlueEvent->getPlugin(); - Plugin.call(GlueEventHandle, CL_COMPLETE); - }; -#else std::function Func = []() {}; -#endif std::unique_ptr HT(new detail::HostTask(std::move(Func))); std::unique_ptr ConnectCG(new detail::CGHostTask( - std::move(HT), DepEventContext, /* Args = */ {}, /* ArgsStorage = */ {}, + std::move(HT), /* Args = */ {}, /* ArgsStorage = */ {}, /* AccStorage = */ {}, /* SharedPtrStorage = */ {}, /* Requirements = */ {}, /* DepEvents = */ {DepEvent}, CG::CODEPLAY_HOST_TASK, /* Payload */ {})); @@ -523,8 +503,6 @@ void Command::makeTraceEventEpilog() { PI_INVALID_OPERATION); MPreparedHostDepsEvents.push_back(ConnectCmd->getEvent()); - -// return GlueEvent; } void Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) { @@ -1990,22 +1968,8 @@ cl_int ExecCGCommand::enqueueImp() { ++ArgIdx; } - { - ContextImplPtr HTContext = HostTask->MContext; - - // Init self-event - EventImplPtr SelfEvent = MEvent; -// RT::PiContext ContextRef = HTContext->getHandleRef(); - - // FIXME You can't create event for host-queue/host-context -// const detail::plugin &Plugin = HTContext->getPlugin(); -// Plugin.call(ContextRef, &Event); - -// SelfEvent->setContextImpl(HTContext); - - MQueue->getThreadPool().submit(std::move( - DispatchHostTask(EventImpls, HostTask, MDeps, SelfEvent))); - } + MQueue->getThreadPool().submit(std::move( + DispatchHostTask(EventImpls, HostTask, MDeps, MEvent))); return CL_SUCCESS; } diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 51aef5ae281d4..50cbef3159ee1 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -202,9 +202,9 @@ class Command { /// /// Optionality of Dep is set by Dep.MDepCommand not equal to nullptr. void connectDepEvent(EventImplPtr DepEvent, - const ContextImplPtr &DepEventContext, - const ContextImplPtr &Context, - const DepDesc &Dep); + const ContextImplPtr &DepEventContext, + const ContextImplPtr &Context, + const DepDesc &Dep); virtual ContextImplPtr getContext() const; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index f8fb31c36fb27..f7b372c2f3535 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -79,7 +79,7 @@ event handler::finalize(const cl::sycl::detail::code_location &Payload) { break; case detail::CG::CODEPLAY_HOST_TASK: CommandGroup.reset(new detail::CGHostTask( - std::move(MHostTask), MQueue->getContextImplPtr(), std::move(MArgs), + std::move(MHostTask), /*MQueue,*/ std::move(MArgs), std::move(MArgsStorage), std::move(MAccStorage), std::move(MSharedPtrStorage), std::move(MRequirements), std::move(MEvents), MCGType, Payload)); From 96e4d4beae20fd6fff5b1c1c57b1538841bea096 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Sun, 19 Apr 2020 22:46:03 +0300 Subject: [PATCH 063/188] [SYCL] Fix some review comments. Signed-off-by: Sergey Kanaev --- sycl/source/detail/accessor_impl.cpp | 8 +------- sycl/source/detail/event_impl.hpp | 2 -- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/sycl/source/detail/accessor_impl.cpp b/sycl/source/detail/accessor_impl.cpp index 4a549c6da7cb6..961c1c583d027 100644 --- a/sycl/source/detail/accessor_impl.cpp +++ b/sycl/source/detail/accessor_impl.cpp @@ -37,13 +37,7 @@ void AccessorImplHost::addBlockedCommand(Command *BlockedCmd) { size_t AccessorImplHost::countBlockedCommand(const CheckCmdFn &Check) { std::lock_guard Lock(MBlockedCmdsMutex); - size_t Count = 0; - - for (const Command *Cmd : MBlockedCmds) - if (Check(Cmd)) - ++Count; - - return Count; + return std::count_if(MBlockedCmds.begin(), MBlockedCmds.end(), Check); } Command * diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 11e6cebb61ca4..94600f5eb6f9f 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -145,8 +145,6 @@ class event_impl { /// @return a pointer to HostProfilingInfo instance. HostProfilingInfo *getHostProfilingInfo() { return MHostProfilingInfo.get(); } - QueueImplWPtr getQueueWPtr() const { return MQueue; } - /// Gets the native handle of the SYCL event. /// /// \return a native handle. From 30156f2443b71a953670fb16b6ed52176e019d13 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Sun, 19 Apr 2020 23:01:32 +0300 Subject: [PATCH 064/188] [SYCL] Remove unneeded code. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 27d205f9d2061..7c708ce66d00c 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -658,9 +658,6 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, const std::vector &Events = CommandGroup->MEvents; const CG::CGTYPE CGType = CommandGroup->getType(); - if (CommandGroup->getType() == CG::CGTYPE::CODEPLAY_HOST_TASK) - Queue = Scheduler::getInstance().getDefaultHostQueue(); - std::unique_ptr NewCmd( new ExecCGCommand(std::move(CommandGroup), Queue)); if (!NewCmd) From 269319dd8de27d53ad97ebbe53b457188046ed2b Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Sun, 19 Apr 2020 23:30:16 +0300 Subject: [PATCH 065/188] [NFC] [SYCL] Fix codestyle issues Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/accessor_impl.hpp | 2 +- sycl/include/CL/sycl/detail/cg.hpp | 6 +-- sycl/source/detail/accessor_impl.cpp | 7 +-- sycl/source/detail/scheduler/commands.cpp | 43 ++++++++----------- sycl/source/detail/scheduler/commands.hpp | 8 +--- .../source/detail/scheduler/graph_builder.cpp | 3 +- sycl/source/detail/scheduler/scheduler.cpp | 17 ++++---- sycl/source/detail/scheduler/scheduler.hpp | 2 +- sycl/source/detail/thread_pool.hpp | 3 +- 9 files changed, 38 insertions(+), 53 deletions(-) diff --git a/sycl/include/CL/sycl/detail/accessor_impl.hpp b/sycl/include/CL/sycl/detail/accessor_impl.hpp index 9535affc6a9ae..8c26f4b7ae227 100644 --- a/sycl/include/CL/sycl/detail/accessor_impl.hpp +++ b/sycl/include/CL/sycl/detail/accessor_impl.hpp @@ -100,7 +100,7 @@ class __SYCL_EXPORT AccessorImplHost { void *MData = nullptr; protected: - using CheckCmdFn = std::function; + using CheckCmdFn = std::function; void addBlockedCommand(Command *BlockedCmd); Command *findBlockedCommand(const CheckCmdFn &Check); bool removeBlockedCommand(Command *BlockedCmd); diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index c6db1f2dd30ef..af1f63df2b53e 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -646,8 +646,7 @@ class CGHostTask : public CG { std::unique_ptr MHostTask; vector_class MArgs; - CGHostTask(std::unique_ptr HostTask, - vector_class Args, + CGHostTask(std::unique_ptr HostTask, vector_class Args, std::vector> ArgsStorage, std::vector AccStorage, std::vector> SharedPtrStorage, @@ -657,8 +656,7 @@ class CGHostTask : public CG { : CG(Type, std::move(ArgsStorage), std::move(AccStorage), std::move(SharedPtrStorage), std::move(Requirements), std::move(Events), std::move(loc)), - MHostTask(std::move(HostTask)), - MArgs(std::move(Args)) {} + MHostTask(std::move(HostTask)), MArgs(std::move(Args)) {} }; } // namespace detail diff --git a/sycl/source/detail/accessor_impl.cpp b/sycl/source/detail/accessor_impl.cpp index 961c1c583d027..4efab683532f4 100644 --- a/sycl/source/detail/accessor_impl.cpp +++ b/sycl/source/detail/accessor_impl.cpp @@ -18,8 +18,7 @@ namespace detail { AccessorImplHost::~AccessorImplHost() { try { - size_t Count = countBlockedCommand( - [](const Command * const Cmd) { + size_t Count = countBlockedCommand([](const Command *const Cmd) { return Cmd->MBlockReason == Command::BlockReason::HostAccessor; }); @@ -40,8 +39,7 @@ size_t AccessorImplHost::countBlockedCommand(const CheckCmdFn &Check) { return std::count_if(MBlockedCmds.begin(), MBlockedCmds.end(), Check); } -Command * -AccessorImplHost::findBlockedCommand(const CheckCmdFn &Check) { +Command *AccessorImplHost::findBlockedCommand(const CheckCmdFn &Check) { std::lock_guard Lock(MBlockedCmdsMutex); auto FoundIt = std::find_if(MBlockedCmds.begin(), MBlockedCmds.end(), Check); @@ -65,4 +63,3 @@ void addHostAccessorAndWait(Requirement *Req) { } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) - diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 9807a1de58617..5117d11fc7b5c 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -175,10 +175,9 @@ class DispatchHostTask { // wait for dependency events // FIXME introduce a more sophisticated wait mechanism for (auto &PluginWithEvents : RequiredEventsPerPlugin) { - std::vector RawEvents = getPiEvents( - PluginWithEvents.second); - PluginWithEvents.first->call( - RawEvents.size(), RawEvents.data()); + std::vector RawEvents = getPiEvents(PluginWithEvents.second); + PluginWithEvents.first->call(RawEvents.size(), + RawEvents.data()); } } @@ -436,13 +435,13 @@ void Command::connectDepEvent(EventImplPtr DepEvent, /* AccStorage = */ {}, /* SharedPtrStorage = */ {}, /* Requirements = */ {}, /* DepEvents = */ {DepEvent}, CG::CODEPLAY_HOST_TASK, /* Payload */ {})); - ConnectCmd = new ExecCGCommand(std::move(ConnectCG), - Scheduler::getInstance().getDefaultHostQueue()); + ConnectCmd = new ExecCGCommand( + std::move(ConnectCG), Scheduler::getInstance().getDefaultHostQueue()); } if (Command *DepCmd = reinterpret_cast(DepEvent->getCommand())) { - EmptyCommand *EmptyCmd = new EmptyCommand( - Scheduler::getInstance().getDefaultHostQueue()); + EmptyCommand *EmptyCmd = + new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); EmptyCmd->MIsBlockable = true; EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; @@ -457,15 +456,14 @@ void Command::connectDepEvent(EventImplPtr DepEvent, { DepDesc ConnectCmdDep = Dep; ConnectCmdDep.MDepCommand = this; - //ConnectCmd->addDep(ConnectCmdDep); std::function Func = [ConnectCmdDep]() { std::vector Deps; Deps.push_back(ConnectCmdDep); DispatchHostTask::unblockBlockedDeps(Deps); }; - auto *CG = static_cast( - ConnectCmd->MCommandGroup.get()); + auto *CG = + static_cast(ConnectCmd->MCommandGroup.get()); CG->MHostTask.reset(new detail::HostTask(std::move(Func))); } @@ -522,12 +520,9 @@ void Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) { ContextImplPtr DepEventContext = DepEvent->getContextImpl(); // If contexts don't match - connect them using user event - if (DepEventContext != Context && !Context->is_host()) { - /*EventImplPtr GlueEvent = */connectDepEvent(DepEvent, DepEventContext, - Context, Dep); - -// MPreparedDepsEvents.push_back(std::move(GlueEvent)); - } else + if (DepEventContext != Context && !Context->is_host()) + connectDepEvent(DepEvent, DepEventContext, Context, Dep); + else MPreparedDepsEvents.push_back(std::move(DepEvent)); } @@ -699,10 +694,10 @@ void Command::resolveReleaseDependencies(std::set &DepList) { const char *Command::getBlockReason() const { switch (MBlockReason) { - case BlockReason::HostAccessor: - return "A Buffer is locked by the host accessor"; - case BlockReason::HostTask: - return "Blocked by host task"; + case BlockReason::HostAccessor: + return "A Buffer is locked by the host accessor"; + case BlockReason::HostTask: + return "Blocked by host task"; } return "Unknown block reason"; @@ -1325,7 +1320,7 @@ EmptyCommand::EmptyCommand(QueueImplPtr Queue, Requirement Req) } EmptyCommand::EmptyCommand(QueueImplPtr Queue) - :Command(CommandType::EMPTY_TASK, std::move(Queue)) { + : Command(CommandType::EMPTY_TASK, std::move(Queue)) { emitInstrumentationDataProxy(); } @@ -1968,8 +1963,8 @@ cl_int ExecCGCommand::enqueueImp() { ++ArgIdx; } - MQueue->getThreadPool().submit(std::move( - DispatchHostTask(EventImpls, HostTask, MDeps, MEvent))); + MQueue->getThreadPool().submit( + std::move(DispatchHostTask(EventImpls, HostTask, MDeps, MEvent))); return CL_SUCCESS; } diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 50cbef3159ee1..9d727dfd78ef6 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -203,8 +203,7 @@ class Command { /// Optionality of Dep is set by Dep.MDepCommand not equal to nullptr. void connectDepEvent(EventImplPtr DepEvent, const ContextImplPtr &DepEventContext, - const ContextImplPtr &Context, - const DepDesc &Dep); + const ContextImplPtr &Context, const DepDesc &Dep); virtual ContextImplPtr getContext() const; @@ -226,10 +225,7 @@ class Command { /// Counts the number of memory objects this command is a leaf for. unsigned MLeafCounter = 0; - enum class BlockReason : int { - HostAccessor = 0, - HostTask - }; + enum class BlockReason : int { HostAccessor = 0, HostTask }; BlockReason MBlockReason; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 7c708ce66d00c..593be57f3d649 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -669,8 +669,7 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, EmptyCommand *EmptyCmd = nullptr; if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { - EmptyCmd = new EmptyCommand( - Scheduler::getInstance().getDefaultHostQueue()); + EmptyCmd = new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); EmptyCmd->MIsBlockable = true; EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 4e72a6bed0f13..43fa32f7bd07c 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -180,8 +180,8 @@ EventImplPtr Scheduler::addHostAccessor(Requirement *Req, } void Scheduler::releaseHostAccessor(Requirement *Req) { - Command *const BlockedCmd = Req->findBlockedCommand( - [](const Command * const Cmd) { + Command *const BlockedCmd = + Req->findBlockedCommand([](const Command * const Cmd) { return Cmd->MBlockReason == Command::BlockReason::HostAccessor; }); @@ -196,7 +196,7 @@ void Scheduler::releaseHostAccessor(Requirement *Req) { unblockSingleReq(Req); } -void Scheduler::unblockSingleReq(Requirement * Req) { +void Scheduler::unblockSingleReq(Requirement *Req) { MemObjRecord* Record = Req->MSYCLMemObj->MRecord.get(); auto EnqueueLeaves = [](CircularBuffer &Leaves) { for (Command *Cmd : Leaves) { @@ -210,11 +210,12 @@ void Scheduler::unblockSingleReq(Requirement * Req) { EnqueueLeaves(Record->MWriteLeaves); } -void Scheduler::bulkUnblockReqs(Command * const BlockedCmd, +void Scheduler::bulkUnblockReqs(Command *const BlockedCmd, const std::unordered_set &Reqs) { bool BlockedCmdEnqueued = false; - auto EnqueueLeaves = [BlockedCmd, &BlockedCmdEnqueued](CircularBuffer &Leaves) { + auto EnqueueLeaves = [BlockedCmd, &BlockedCmdEnqueued]( + CircularBuffer &Leaves) { for (Command *Cmd : Leaves) { if (BlockedCmd == Cmd && BlockedCmdEnqueued) continue; @@ -230,7 +231,7 @@ void Scheduler::bulkUnblockReqs(Command * const BlockedCmd, for (Requirement *Req : Reqs) { if (Req->removeBlockedCommand(BlockedCmd)) { - MemObjRecord* Record = Req->MSYCLMemObj->MRecord.get(); + MemObjRecord *Record = Req->MSYCLMemObj->MRecord.get(); EnqueueLeaves(Record->MReadLeaves); EnqueueLeaves(Record->MWriteLeaves); } @@ -242,8 +243,8 @@ void Scheduler::unblockRequirements(const std::vector &Reqs, // fetch unique blocked cmds std::unordered_map> BlockedCmds; - std::function CheckCmd = - [Reason](const Command * const Cmd) { + std::function CheckCmd = + [Reason](const Command *const Cmd) { return Cmd->MBlockReason == Reason; }; diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 61312e40ff410..dd19b047b1741 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -434,7 +434,7 @@ class Scheduler { static Scheduler instance; void unblockSingleReq(Requirement *Req); - void bulkUnblockReqs(Command * const BlockedCmd, + void bulkUnblockReqs(Command *const BlockedCmd, const std::unordered_set &Reqs); /// Graph builder class. diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 393303197db2e..35ef0a07a4a84 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -67,8 +67,7 @@ class ThreadPool { Thread.join(); } - template - void submit(T &&Func) { + template void submit(T &&Func) { { std::lock_guard Lock(MJobQueueMutex); MJobQueue.emplace(std::move([Func]() { Func(); })); From 212a484a48a3bcb2d796995aed88250380ba19cb Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Sun, 19 Apr 2020 23:30:48 +0300 Subject: [PATCH 066/188] [NFC] [SYCL] Remove unused code Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 593be57f3d649..1f30ecfc51244 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -614,17 +614,6 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( } else { LinkedAllocaCmd->MIsActive = false; Record->MCurContext = Queue->getContextImplPtr(); - -#if 0 - std::set Deps = - findDepsForReq(Record, Req, Queue->getContextImplPtr()); - for (Command *Dep : Deps) { - AllocaCmd->addDep(DepDesc{Dep, Req, AllocaCmd}); - Dep->addUser(AllocaCmd); - } - updateLeaves(Deps, Record, Req->MAccessMode); - addNodeToLeaves(Record, AllocaCmd, Req->MAccessMode); -#endif } } } From 07133a71faed347caa6aec83fabf4c7d993a6ac3 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 20 Apr 2020 00:05:46 +0300 Subject: [PATCH 067/188] [NFC] [SYCL] Fix codestyle issues Signed-off-by: Sergey Kanaev --- sycl/source/detail/accessor_impl.cpp | 4 ++-- sycl/source/detail/scheduler/scheduler.cpp | 2 +- sycl/source/detail/thread_pool.hpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/accessor_impl.cpp b/sycl/source/detail/accessor_impl.cpp index 4efab683532f4..ed7b13985e6e8 100644 --- a/sycl/source/detail/accessor_impl.cpp +++ b/sycl/source/detail/accessor_impl.cpp @@ -19,8 +19,8 @@ namespace detail { AccessorImplHost::~AccessorImplHost() { try { size_t Count = countBlockedCommand([](const Command *const Cmd) { - return Cmd->MBlockReason == Command::BlockReason::HostAccessor; - }); + return Cmd->MBlockReason == Command::BlockReason::HostAccessor; + }); if (Count) detail::Scheduler::getInstance().releaseHostAccessor(this); diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 43fa32f7bd07c..dc18f8027fe70 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -181,7 +181,7 @@ EventImplPtr Scheduler::addHostAccessor(Requirement *Req, void Scheduler::releaseHostAccessor(Requirement *Req) { Command *const BlockedCmd = - Req->findBlockedCommand([](const Command * const Cmd) { + Req->findBlockedCommand([](const Command *const Cmd) { return Cmd->MBlockReason == Command::BlockReason::HostAccessor; }); diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 35ef0a07a4a84..1b22349ccf6fb 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -67,7 +67,7 @@ class ThreadPool { Thread.join(); } - template void submit(T &&Func) { + template void submit(T &&Func) { { std::lock_guard Lock(MJobQueueMutex); MJobQueue.emplace(std::move([Func]() { Func(); })); From 8a6ee309976796c9aa2a6bc20cdcc178a72a076a Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 20 Apr 2020 10:23:49 +0300 Subject: [PATCH 068/188] [NFC] [SYCL] Fix codestyle issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/scheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index dc18f8027fe70..1c74a8a373e04 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -180,7 +180,7 @@ EventImplPtr Scheduler::addHostAccessor(Requirement *Req, } void Scheduler::releaseHostAccessor(Requirement *Req) { - Command *const BlockedCmd = + Command *const BlockedCmd = Req->findBlockedCommand([](const Command *const Cmd) { return Cmd->MBlockReason == Command::BlockReason::HostAccessor; }); From ff5023c1f8aee4b45ad62f453227be73a7dfef5e Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 20 Apr 2020 13:14:23 +0300 Subject: [PATCH 069/188] [SYCL] Employ a hack to prevent invalid read in some cases Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 1f30ecfc51244..1d2d702788962 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -811,6 +811,8 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(MemObjRecord *Record) { void Scheduler::GraphBuilder::cleanupFinishedCommands(Command *FinishedCmd) { std::queue CmdsToVisit({FinishedCmd}); std::set Visited; + // FIXME a more sophisticated solution instead of this hack + std::set Deleted; // Traverse the graph using BFS while (!CmdsToVisit.empty()) { @@ -834,6 +836,10 @@ void Scheduler::GraphBuilder::cleanupFinishedCommands(Command *FinishedCmd) { continue; for (Command *UserCmd : Cmd->MUsers) { + // Prevent invalid read. + // FIXME remove this hack. + if (Deleted.count(UserCmd)) + continue; for (DepDesc &Dep : UserCmd->MDeps) { // Link the users of the command to the alloca command(s) instead if (Dep.MDepCommand == Cmd) { @@ -849,6 +855,8 @@ void Scheduler::GraphBuilder::cleanupFinishedCommands(Command *FinishedCmd) { } Cmd->getEvent()->setCommand(nullptr); delete Cmd; + + Deleted.insert(Cmd); } } From 8a567ba5296c84080280fa342dde40f361712b4e Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 20 Apr 2020 14:31:12 +0300 Subject: [PATCH 070/188] [SYCL] Enqueue dependant commands. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 15 ++++++++++++++- sycl/source/detail/scheduler/scheduler.hpp | 2 ++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 5117d11fc7b5c..3e2c6a7b92111 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -195,8 +195,21 @@ class DispatchHostTask { // update self-event status if (MSelfEvent->is_host()) { + fprintf(stderr, "Gonna enqueue smth here for cmd %p\n", + (void *)MSelfEvent->getCommand()); // TODO - fprintf(stderr, "Gonna enqueue smth here\n"); + + Command *ThisCmd = reinterpret_cast(MSelfEvent->getCommand()); + + assert(ThisCmd && "No command found for host-task self event"); + + for (Command *UserCmd : ThisCmd->MUsers) { + EnqueueResultT Res; + bool Enqueued = Scheduler::GraphProcessor::enqueueCommand(UserCmd, Res); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Failed to enqueue a dependant command", + PI_INVALID_OPERATION); + } } else { const detail::plugin &Plugin = MSelfEvent->getPlugin(); Plugin.call(MSelfEvent->getHandleRef(), diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index dd19b047b1741..32eaf70e52002 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -173,6 +173,7 @@ namespace detail { class queue_impl; class event_impl; class context_impl; +class DispatchHostTask; using QueueImplPtr = std::shared_ptr; using EventImplPtr = std::shared_ptr; @@ -676,6 +677,7 @@ class Scheduler { QueueImplPtr DefaultHostQueue; friend class Command; + friend class DispatchHostTask; }; } // namespace detail From dd4ac89f05abdd07d7258ef3d5b7b2bda02a3880 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 21 Apr 2020 17:56:27 +0300 Subject: [PATCH 071/188] [SYCL] Worked on fixing runtime issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/accessor_impl.cpp | 1 + sycl/source/detail/scheduler/commands.cpp | 114 +++++++++++------- sycl/source/detail/scheduler/commands.hpp | 9 ++ .../source/detail/scheduler/graph_builder.cpp | 22 +++- sycl/source/detail/scheduler/scheduler.hpp | 5 + 5 files changed, 101 insertions(+), 50 deletions(-) diff --git a/sycl/source/detail/accessor_impl.cpp b/sycl/source/detail/accessor_impl.cpp index ed7b13985e6e8..b52f24f101828 100644 --- a/sycl/source/detail/accessor_impl.cpp +++ b/sycl/source/detail/accessor_impl.cpp @@ -30,6 +30,7 @@ AccessorImplHost::~AccessorImplHost() { void AccessorImplHost::addBlockedCommand(Command *BlockedCmd) { std::lock_guard Lock(MBlockedCmdsMutex); + MBlockedCmds.insert(BlockedCmd); } diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 3e2c6a7b92111..d3bc35aed9df5 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -193,12 +193,10 @@ class DispatchHostTask { // we're ready to call the user-defined lambda now MHostTask->MHostTask->call(); + unblockBlockedDeps(MDeps); + // update self-event status if (MSelfEvent->is_host()) { - fprintf(stderr, "Gonna enqueue smth here for cmd %p\n", - (void *)MSelfEvent->getCommand()); - // TODO - Command *ThisCmd = reinterpret_cast(MSelfEvent->getCommand()); assert(ThisCmd && "No command found for host-task self event"); @@ -215,8 +213,6 @@ class DispatchHostTask { Plugin.call(MSelfEvent->getHandleRef(), PI_EVENT_COMPLETE); } - - unblockBlockedDeps(MDeps); } static void unblockBlockedDeps(const std::vector &Deps) { @@ -431,6 +427,66 @@ void Command::makeTraceEventEpilog() { #endif } +void Command::addConnectCmdWithReq(const ContextImplPtr &DepEventContext, + ExecCGCommand *const ConnectCmd, + EmptyCommand *const EmptyCmd, + const DepDesc &Dep) { + Requirement *Req = const_cast(Dep.MDepRequirement); + + Req->addBlockedCommand(EmptyCmd); + + // We can't set Dep as dependency for connect cmd 'cause Dep's command is + // from different context. Thus we'll employ a hack here. + if (false) { + DepDesc ConnectCmdDep = Dep; + ConnectCmdDep.MDepCommand = this; + std::function Func = [ConnectCmdDep]() { + std::vector Deps; + Deps.push_back(ConnectCmdDep); + DispatchHostTask::unblockBlockedDeps(Deps); + }; + + auto *CG = + static_cast(ConnectCmd->MCommandGroup.get()); + + CG->MHostTask.reset(new detail::HostTask(std::move(Func))); + } + + { + Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; + + MemObjRecord *Record = GB.getMemObjRecord(Req->MSYCLMemObj); + Dep.MDepCommand->addUser(ConnectCmd); + + AllocaCommandBase *AllocaCmd = + GB.findAllocaForReq(Record, Req, DepEventContext); + assert(AllocaCmd && "There must be alloca for requirement!"); + + std::set Deps = + GB.findDepsForReq(Record, Req, DepEventContext); + assert(Deps.size() && "There must be some deps"); + + for (Command *ReqDepCmd : Deps) { + ConnectCmd->addDep(DepDesc{ReqDepCmd, Req, AllocaCmd}); + ReqDepCmd->addUser(ConnectCmd); + } + + GB.updateLeaves(Deps, Record, Req->MAccessMode); + GB.addNodeToLeaves(Record, ConnectCmd, Req->MAccessMode); + + { + DepDesc EmptyCmdDep = Dep; + EmptyCmdDep.MDepCommand = ConnectCmd; + + EmptyCmd->addDep(EmptyCmdDep); + ConnectCmd->addUser(EmptyCmd); + } + + GB.updateLeaves({ConnectCmd}, Record, Req->MAccessMode); + GB.addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); + } +} + void Command::connectDepEvent(EventImplPtr DepEvent, const ContextImplPtr &DepEventContext, const ContextImplPtr &Context, @@ -452,6 +508,9 @@ void Command::connectDepEvent(EventImplPtr DepEvent, std::move(ConnectCG), Scheduler::getInstance().getDefaultHostQueue()); } + if (!ConnectCmd) + throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); + if (Command *DepCmd = reinterpret_cast(DepEvent->getCommand())) { EmptyCommand *EmptyCmd = new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); @@ -463,48 +522,13 @@ void Command::connectDepEvent(EventImplPtr DepEvent, DepCmd->addUser(ConnectCmd); if (Dep.MDepRequirement) { - // We can't set Dep as dependency for connect cmd 'cause Dep's command is - // from different context. Thus we'll employ a hack here. - - { - DepDesc ConnectCmdDep = Dep; - ConnectCmdDep.MDepCommand = this; - std::function Func = [ConnectCmdDep]() { - std::vector Deps; - Deps.push_back(ConnectCmdDep); - DispatchHostTask::unblockBlockedDeps(Deps); - }; - - auto *CG = - static_cast(ConnectCmd->MCommandGroup.get()); - - CG->MHostTask.reset(new detail::HostTask(std::move(Func))); - } - - { - DepDesc EmptyCmdDep = Dep; - EmptyCmdDep.MDepCommand = ConnectCmd; - EmptyCmd->addDep(EmptyCmdDep); - } - - { - const Requirement *Req = Dep.MDepRequirement; - - const_cast(Req)->addBlockedCommand(EmptyCmd); - - Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; - MemObjRecord *Record = GB.getMemObjRecord(Req->MSYCLMemObj); - Dep.MDepCommand->addUser(ConnectCmd); - GB.updateLeaves({Dep.MDepCommand}, Record, Req->MAccessMode); - GB.addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); - } - } else { + addConnectCmdWithReq(DepEventContext, ConnectCmd, EmptyCmd, Dep); + } else /* if (!Dep.MDepRequirement) */ { ConnectCmd->addDep(DepEvent); EmptyCmd->addDep(ConnectCmd->MEvent); + ConnectCmd->addUser(EmptyCmd); } - - ConnectCmd->addUser(EmptyCmd); - } else + } else // if (!DepEvent->getCommand()) ConnectCmd->addDep(DepEvent); EnqueueResultT Res; diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 9d727dfd78ef6..c9865369253b6 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -35,6 +35,8 @@ class Command; class AllocaCommand; class AllocaCommandBase; class ReleaseCommand; +class ExecCGCommand; +class EmptyCommand; enum BlockingT { NON_BLOCKING = 0, BLOCKING }; @@ -204,6 +206,13 @@ class Command { void connectDepEvent(EventImplPtr DepEvent, const ContextImplPtr &DepEventContext, const ContextImplPtr &Context, const DepDesc &Dep); + /// Helper for connectDepEvent + /// \param ConnectCmd connection cmd to properly add + /// \param Dep DepDesc with non-null MDepRequirmeent + void addConnectCmdWithReq(const ContextImplPtr &DepEventContext, + ExecCGCommand *const ConnectCmd, + EmptyCommand *const EmptyCmd, + const DepDesc &Dep); virtual ContextImplPtr getContext() const; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 1d2d702788962..da55f153f580a 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -417,6 +417,7 @@ Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, // Need empty command to be blocked until host accessor is destructed EmptyCommand *EmptyCmd = new EmptyCommand(HostQueue, *Req); + EmptyCmd->addDep( DepDesc{UpdateHostAccCmd, EmptyCmd->getRequirement(), HostAllocaCmd}); UpdateHostAccCmd->addUser(EmptyCmd); @@ -614,6 +615,15 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( } else { LinkedAllocaCmd->MIsActive = false; Record->MCurContext = Queue->getContextImplPtr(); + + std::set Deps = + findDepsForReq(Record, Req, Queue->getContextImplPtr()); + for (Command *Dep : Deps) { + AllocaCmd->addDep(DepDesc{Dep, Req, LinkedAllocaCmd}); + Dep->addUser(AllocaCmd); + } + updateLeaves(Deps, Record, Req->MAccessMode); + addNodeToLeaves(Record, AllocaCmd, Req->MAccessMode); } } } @@ -659,7 +669,6 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { EmptyCmd = new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); - EmptyCmd->MIsBlockable = true; EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; EmptyCmd->MBlockReason = Command::BlockReason::HostTask; @@ -713,11 +722,12 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, const Requirement *Req = Dep.MDepRequirement; MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); updateLeaves({Dep.MDepCommand}, Record, Req->MAccessMode); + addNodeToLeaves(Record, NewCmd.get(), Req->MAccessMode); - if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) + if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { + updateLeaves({NewCmd.get()}, Record, Req->MAccessMode); addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); - else - addNodeToLeaves(Record, NewCmd.get(), Req->MAccessMode); + } } // Register all the events as dependencies @@ -840,6 +850,7 @@ void Scheduler::GraphBuilder::cleanupFinishedCommands(Command *FinishedCmd) { // FIXME remove this hack. if (Deleted.count(UserCmd)) continue; + for (DepDesc &Dep : UserCmd->MDeps) { // Link the users of the command to the alloca command(s) instead if (Dep.MDepCommand == Cmd) { @@ -854,9 +865,10 @@ void Scheduler::GraphBuilder::cleanupFinishedCommands(Command *FinishedCmd) { DepCmd->MUsers.erase(Cmd); } Cmd->getEvent()->setCommand(nullptr); + delete Cmd; - Deleted.insert(Cmd); + //Deleted.insert(Cmd); } } diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 32eaf70e52002..a16de7862d7af 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -546,6 +546,7 @@ class Scheduler { std::set findDepsForReq(MemObjRecord *Record, Requirement *Req, const ContextImplPtr &Context); + protected: /// Finds a command dependency corresponding to the record. DepDesc findDepForRecord(Command *Cmd, MemObjRecord *Record); @@ -553,6 +554,10 @@ class Scheduler { AllocaCommandBase *findAllocaForReq(MemObjRecord *Record, Requirement *Req, const ContextImplPtr &Context); + friend class Command; + + private: + /// Searches for suitable alloca in memory record. /// /// If none found, creates new one. From 0724914cbac0daa96f24864c4efc6387eb7cd7f1 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 21 Apr 2020 18:57:34 +0300 Subject: [PATCH 072/188] [SYCL] Modify test Signed-off-by: Sergey Kanaev --- .../host-task-dependency.cpp | 40 +++++++++++++++---- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/sycl/test/host-interop-task/host-task-dependency.cpp b/sycl/test/host-interop-task/host-task-dependency.cpp index a6ab395e2f5da..d92bc75010b7a 100644 --- a/sycl/test/host-interop-task/host-task-dependency.cpp +++ b/sycl/test/host-interop-task/host-task-dependency.cpp @@ -27,12 +27,30 @@ void Thread1Fn(Context &Ctx) { { S::accessor - Acc(Ctx.Buf2); + Acc(Ctx.Buf1); for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) Acc[Idx] = -1; } + { + S::accessor + Acc(Ctx.Buf2); + + for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) + Acc[Idx] = -2; + } + + { + S::accessor + Acc(Ctx.Buf3); + + for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) + Acc[Idx] = -2; + } + // 1. submit task writing to buffer 1 Ctx.Queue.submit([&](S::handler &CGH) { S::accessor Acc(Ctx.Buf3); - for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) - assert(Acc[Idx] == Idx && "Invalid data in third buffer"); + bool Failure = false; + + for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) { + fprintf(stderr, "Third buffer [%3zu] = %i\n", Idx, Acc[Idx]); + + Failure |= (Acc[Idx] != Idx); + //assert(Acc[Idx] == Idx && "Invalid data in third buffer"); + } + + assert(!Failure && "Invalid data in third buffer"); } } @@ -143,14 +169,14 @@ void test() { S::access::target::host_buffer> ResultAcc(Ctx.Buf2); - bool failure = false; + bool Failure = false; for (size_t Idx = 0; Idx < ResultAcc.get_count(); ++Idx) { - fprintf(stderr, "Third buffer [%3zu] = %i\n", Idx, ResultAcc[Idx]); + fprintf(stderr, "Second buffer [%3zu] = %i\n", Idx, ResultAcc[Idx]); - failure |= (ResultAcc[Idx] != Idx); + Failure |= (ResultAcc[Idx] != Idx); } - assert(!failure && "Invalid data in result buffer"); + assert(!Failure && "Invalid data in result buffer"); } } From 2e3fbe17df82b8042a3c2a8cab35d321908433e6 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 21 Apr 2020 22:15:31 +0300 Subject: [PATCH 073/188] [SYCL] Worked on fixing runtime issue Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/accessor_impl.hpp | 1 + sycl/source/detail/accessor_impl.cpp | 15 +- sycl/source/detail/event_impl.cpp | 40 ++++- sycl/source/detail/event_impl.hpp | 9 +- sycl/source/detail/scheduler/commands.cpp | 150 +++++++++++++++--- sycl/source/detail/scheduler/commands.hpp | 1 + .../source/detail/scheduler/graph_builder.cpp | 9 -- 7 files changed, 182 insertions(+), 43 deletions(-) diff --git a/sycl/include/CL/sycl/detail/accessor_impl.hpp b/sycl/include/CL/sycl/detail/accessor_impl.hpp index 8c26f4b7ae227..22a67b23b3ec1 100644 --- a/sycl/include/CL/sycl/detail/accessor_impl.hpp +++ b/sycl/include/CL/sycl/detail/accessor_impl.hpp @@ -101,6 +101,7 @@ class __SYCL_EXPORT AccessorImplHost { protected: using CheckCmdFn = std::function; + void addBlockedCommand(Command *BlockedCmd); Command *findBlockedCommand(const CheckCmdFn &Check); bool removeBlockedCommand(Command *BlockedCmd); diff --git a/sycl/source/detail/accessor_impl.cpp b/sycl/source/detail/accessor_impl.cpp index b52f24f101828..7461ffaef4af4 100644 --- a/sycl/source/detail/accessor_impl.cpp +++ b/sycl/source/detail/accessor_impl.cpp @@ -18,10 +18,21 @@ namespace detail { AccessorImplHost::~AccessorImplHost() { try { - size_t Count = countBlockedCommand([](const Command *const Cmd) { - return Cmd->MBlockReason == Command::BlockReason::HostAccessor; + std::set BlockedCmds; + size_t Count = countBlockedCommand([&BlockedCmds](const Command *const Cmd) { + if (Cmd->MBlockReason == Command::BlockReason::HostAccessor) { + BlockedCmds.insert(Cmd); + return true; + } + + return false; }); + for (const Command *Cmd : BlockedCmds) + if (EventImplPtr Event = Cmd->getEvent()) + if (Event->is_host()) + Event->setComplete(); + if (Count) detail::Scheduler::getInstance().releaseHostAccessor(this); } catch (...) { diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index f3263720a2bb4..ea62017187257 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -52,9 +52,28 @@ event_impl::~event_impl() { void event_impl::waitInternal() const { if (!MHostEvent) { getPlugin().call(1, &MEvent); + return; } // Waiting of host events is NOP so far as all operations on host device // are blocking. + + while (MState != HES_Ready) + ; +} + +void event_impl::setComplete() { + assert(MHostEvent && "setComplete is only allowed for host events"); + +#ifndef NDEBUG + int Expected = HES_NotReady; + int Desired = HES_Ready; + + bool Succeeded = MState.compare_exchange_strong(Expected, Desired); + + assert(Succeeded && "Unexpected state of event"); +#else + MState.store(static_cast(HES_Ready)); +#endif } const RT::PiEvent &event_impl::getHandleRef() const { return MEvent; } @@ -70,9 +89,12 @@ void event_impl::setContextImpl(const ContextImplPtr &Context) { MContext = Context; } +event_impl::event_impl() + : MState(HES_Ready) {} + event_impl::event_impl(RT::PiEvent Event, const context &SyclContext) : MEvent(Event), MContext(detail::getSyclObjImpl(SyclContext)), - MOpenCLInterop(true), MHostEvent(false) { + MOpenCLInterop(true), MHostEvent(false), MState(HES_Ready) { if (MContext->is_host()) { throw cl::sycl::invalid_parameter_error( @@ -96,12 +118,16 @@ event_impl::event_impl(RT::PiEvent Event, const context &SyclContext) } event_impl::event_impl(QueueImplPtr Queue) : MQueue(Queue) { - if (Queue->is_host() && - Queue->has_property()) { - MHostProfilingInfo.reset(new HostProfilingInfo()); - if (!MHostProfilingInfo) - throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); - } + if (Queue->is_host()) { + MState.store(HES_NotReady); + + if (Queue->has_property()) { + MHostProfilingInfo.reset(new HostProfilingInfo()); + if (!MHostProfilingInfo) + throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); + } + } else + MState.store(HES_Ready); } void *event_impl::instrumentationProlog(string_class &Name, int32_t StreamID, diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 94600f5eb6f9f..b5f110df48850 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -14,6 +14,7 @@ #include #include +#include #include __SYCL_INLINE_NAMESPACE(cl) { @@ -32,7 +33,7 @@ class event_impl { /// Constructs a ready SYCL event. /// /// If the constructed SYCL event is waited on it will complete immediately. - event_impl() = default; + event_impl(); /// Constructs an event instance from a plug-in event handle. /// /// The SyclContext must match the plug-in context associated with the @@ -166,6 +167,12 @@ class event_impl { bool MHostEvent = true; std::unique_ptr MHostProfilingInfo; void *MCommand = nullptr; + + enum HostEventState : int { HES_NotReady = 0, HES_Ready }; + + // State of host event. Employed only for host events. + // Used values are listed in HostEventState enum. + std::atomic MState; }; } // namespace detail diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index d3bc35aed9df5..a7b2d2efcf049 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -159,6 +159,7 @@ getPiEvents(const std::vector &EventImpls) { class DispatchHostTask { std::vector MDepEvents; + std::vector MDepHostEvents; CGHostTask *MHostTask; std::vector MDeps; EventImplPtr MSelfEvent; @@ -172,19 +173,51 @@ class DispatchHostTask { RequiredEventsPerPlugin[&Plugin].push_back(Event); } - // wait for dependency events + // wait for dependency device events // FIXME introduce a more sophisticated wait mechanism for (auto &PluginWithEvents : RequiredEventsPerPlugin) { std::vector RawEvents = getPiEvents(PluginWithEvents.second); PluginWithEvents.first->call(RawEvents.size(), RawEvents.data()); } + + // wait for dependency host events + for (const EventImplPtr &Event : MDepHostEvents) { + Event->waitInternal(); + } + } + + // Lookup for empty command amongst users of this cmd + static EmptyCommand *findMyEmptyCommand(Command *ThisCmd) { +#ifndef NDEBUG + EmptyCommand *Result = nullptr; +#endif + + for (Command *Cmd : ThisCmd->MUsers) + if (Cmd->getType() == Command::CommandType::EMPTY_TASK && + Cmd->MIsBlockable && + Cmd->MBlockReason == Command::BlockReason::HostTask) { +#ifndef NDEBUG + assert(!Result && + "Multiple empty commands in users of a single host task"); + Result = static_cast(Cmd); +#else + return static_cast(Cmd); +#endif + } + +#ifndef NDEBUG + return Result; +#endif } public: - DispatchHostTask(std::vector DepEvents, CGHostTask *HostTask, - std::vector Deps, EventImplPtr SelfEvent) - : MDepEvents(std::move(DepEvents)), MHostTask{HostTask}, + DispatchHostTask(std::vector DepEvents, + std::vector DepHostEvents, + CGHostTask *HostTask, std::vector Deps, + EventImplPtr SelfEvent) + : MDepEvents(std::move(DepEvents)), MDepHostEvents(DepHostEvents), + MHostTask{HostTask}, MDeps(std::move(Deps)), MSelfEvent(std::move(SelfEvent)) {} void operator()() const { @@ -193,8 +226,6 @@ class DispatchHostTask { // we're ready to call the user-defined lambda now MHostTask->MHostTask->call(); - unblockBlockedDeps(MDeps); - // update self-event status if (MSelfEvent->is_host()) { Command *ThisCmd = reinterpret_cast(MSelfEvent->getCommand()); @@ -208,11 +239,18 @@ class DispatchHostTask { throw runtime_error("Failed to enqueue a dependant command", PI_INVALID_OPERATION); } + + MSelfEvent->setComplete(); + + EmptyCommand *MyEmptyCmd = findMyEmptyCommand(ThisCmd); + MyEmptyCmd->getEvent()->setComplete(); } else { const detail::plugin &Plugin = MSelfEvent->getPlugin(); Plugin.call(MSelfEvent->getHandleRef(), PI_EVENT_COMPLETE); } + + unblockBlockedDeps(MDeps); } static void unblockBlockedDeps(const std::vector &Deps) { @@ -435,23 +473,6 @@ void Command::addConnectCmdWithReq(const ContextImplPtr &DepEventContext, Req->addBlockedCommand(EmptyCmd); - // We can't set Dep as dependency for connect cmd 'cause Dep's command is - // from different context. Thus we'll employ a hack here. - if (false) { - DepDesc ConnectCmdDep = Dep; - ConnectCmdDep.MDepCommand = this; - std::function Func = [ConnectCmdDep]() { - std::vector Deps; - Deps.push_back(ConnectCmdDep); - DispatchHostTask::unblockBlockedDeps(Deps); - }; - - auto *CG = - static_cast(ConnectCmd->MCommandGroup.get()); - - CG->MHostTask.reset(new detail::HostTask(std::move(Func))); - } - { Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; @@ -804,6 +825,9 @@ cl_int AllocaCommand::enqueueImp() { if (MQueue->is_host()) { // Do not need to make allocation if we have a linked device allocation Command::waitForEvents(MQueue, EventImpls, Event); + + MEvent->setComplete(); + return CL_SUCCESS; } HostPtr = MLinkedAllocaCmd->getMemAllocation(); @@ -813,6 +837,10 @@ cl_int AllocaCommand::enqueueImp() { MMemAllocation = MemoryManager::allocate( detail::getSyclObjImpl(MQueue->get_context()), getSYCLMemObj(), MInitFromUserData, HostPtr, std::move(EventImpls), Event); + + if (MEvent->is_host()) + MEvent->setComplete(); + return CL_SUCCESS; } @@ -892,6 +920,10 @@ cl_int AllocaSubBufCommand::enqueueImp() { MParentAlloca->getMemAllocation(), MRequirement.MElemSize, MRequirement.MOffsetInBytes, MRequirement.MAccessRange, std::move(EventImpls), Event); + + if (MEvent->is_host()) + MEvent->setComplete(); + return CL_SUCCESS; } @@ -1004,6 +1036,9 @@ cl_int ReleaseCommand::enqueueImp() { MAllocaCmd->getMemAllocation(), std::move(EventImpls), Event); + if (MEvent->is_host()) + MEvent->setComplete(); + return CL_SUCCESS; } @@ -1063,6 +1098,10 @@ cl_int MapMemObject::enqueueImp() { MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(), MQueue, MMapMode, MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange, MSrcReq.MOffset, MSrcReq.MElemSize, std::move(RawEvents), Event); + + if (MEvent->is_host()) + MEvent->setComplete(); + return CL_SUCCESS; } @@ -1118,6 +1157,10 @@ cl_int UnMapMemObject::enqueueImp() { MemoryManager::unmap(MDstAllocaCmd->getSYCLMemObj(), MDstAllocaCmd->getMemAllocation(), MQueue, *MSrcPtr, std::move(RawEvents), Event); + + if (MEvent->is_host()) + MEvent->setComplete(); + return CL_SUCCESS; } @@ -1206,6 +1249,9 @@ cl_int MemCpyCommand::enqueueImp() { MDstReq.MOffset, MDstReq.MElemSize, std::move(RawEvents), Event); } + if (MEvent->is_host()) + MEvent->setComplete(); + return CL_SUCCESS; } @@ -1256,6 +1302,10 @@ cl_int UpdateHostRequirementCommand::enqueueImp() { assert(MSrcAllocaCmd->getMemAllocation() && "Expected valid source pointer"); assert(MDstPtr && "Expected valid target pointer"); *MDstPtr = MSrcAllocaCmd->getMemAllocation(); + + if (MEvent->is_host()) + MEvent->setComplete(); + return CL_SUCCESS; } @@ -1337,6 +1387,10 @@ cl_int MemCpyCommandHost::enqueueImp() { if (MDstReq.MAccessMode == access::mode::discard_read_write || MDstReq.MAccessMode == access::mode::discard_write) { Command::waitForEvents(Queue, EventImpls, Event); + + if (MEvent->is_host()) + MEvent->setComplete(); + return CL_SUCCESS; } @@ -1346,6 +1400,10 @@ cl_int MemCpyCommandHost::enqueueImp() { MSrcReq.MOffset, MSrcReq.MElemSize, *MDstPtr, MQueue, MDstReq.MDims, MDstReq.MMemoryRange, MDstReq.MAccessRange, MDstReq.MOffset, MDstReq.MElemSize, std::move(RawEvents), Event); + + if (MEvent->is_host()) + MEvent->setComplete(); + return CL_SUCCESS; } @@ -1715,6 +1773,10 @@ cl_int ExecCGCommand::enqueueImp() { Scheduler::getInstance().getDefaultHostQueue(), Req->MDims, Req->MAccessRange, Req->MAccessRange, /*DstOffset=*/{0, 0, 0}, Req->MElemSize, std::move(RawEvents), Event); + + if (MEvent->is_host()) + MEvent->setComplete(); + return CL_SUCCESS; } case CG::CGTYPE::COPY_PTR_TO_ACC: { @@ -1732,6 +1794,9 @@ cl_int ExecCGCommand::enqueueImp() { Req->MMemoryRange, Req->MAccessRange, Req->MOffset, Req->MElemSize, std::move(RawEvents), Event); + if (MEvent->is_host()) + MEvent->setComplete(); + return CL_SUCCESS; } case CG::CGTYPE::COPY_ACC_TO_ACC: { @@ -1748,6 +1813,10 @@ cl_int ExecCGCommand::enqueueImp() { ReqSrc->MOffset, ReqSrc->MElemSize, AllocaCmdDst->getMemAllocation(), MQueue, ReqDst->MDims, ReqDst->MMemoryRange, ReqDst->MAccessRange, ReqDst->MOffset, ReqDst->MElemSize, std::move(RawEvents), Event); + + if (MEvent->is_host()) + MEvent->setComplete(); + return CL_SUCCESS; } case CG::CGTYPE::FILL: { @@ -1760,6 +1829,10 @@ cl_int ExecCGCommand::enqueueImp() { Fill->MPattern.size(), Fill->MPattern.data(), Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset, Req->MElemSize, std::move(RawEvents), Event); + + if (MEvent->is_host()) + MEvent->setComplete(); + return CL_SUCCESS; } case CG::CGTYPE::RUN_ON_HOST_INTEL: { @@ -1789,6 +1862,10 @@ cl_int ExecCGCommand::enqueueImp() { Plugin.call(RawEvents.size(), &RawEvents[0]); } DispatchNativeKernel((void *)ArgsBlob.data()); + + if (MEvent->is_host()) + MEvent->setComplete(); + return CL_SUCCESS; } @@ -1846,6 +1923,10 @@ cl_int ExecCGCommand::enqueueImp() { } ExecKernel->MHostKernel->call(NDRDesc, getEvent()->getHostProfilingInfo()); + + if (MEvent->is_host()) + MEvent->setComplete(); + return CL_SUCCESS; } @@ -1926,18 +2007,30 @@ cl_int ExecCGCommand::enqueueImp() { return detail::enqueue_kernel_launch::handleError(Error, DeviceImpl, Kernel, NDRDesc); } + + if (MEvent->is_host()) + MEvent->setComplete(); + return PI_SUCCESS; } case CG::CGTYPE::COPY_USM: { CGCopyUSM *Copy = (CGCopyUSM *)MCommandGroup.get(); MemoryManager::copy_usm(Copy->getSrc(), MQueue, Copy->getLength(), Copy->getDst(), std::move(RawEvents), Event); + + if (MEvent->is_host()) + MEvent->setComplete(); + return CL_SUCCESS; } case CG::CGTYPE::FILL_USM: { CGFillUSM *Fill = (CGFillUSM *)MCommandGroup.get(); MemoryManager::fill_usm(Fill->getDst(), MQueue, Fill->getLength(), Fill->getFill(), std::move(RawEvents), Event); + + if (MEvent->is_host()) + MEvent->setComplete(); + return CL_SUCCESS; } case CG::CGTYPE::PREFETCH_USM: { @@ -1945,6 +2038,10 @@ cl_int ExecCGCommand::enqueueImp() { MemoryManager::prefetch_usm(Prefetch->getDst(), MQueue, Prefetch->getLength(), std::move(RawEvents), Event); + + if (MEvent->is_host()) + MEvent->setComplete(); + return CL_SUCCESS; } case CG::CGTYPE::INTEROP_TASK_CODEPLAY: { @@ -1974,6 +2071,10 @@ cl_int ExecCGCommand::enqueueImp() { nullptr, &Event); Plugin.call( reinterpret_cast(MQueue->get())); + + if (MEvent->is_host()) + MEvent->setComplete(); + return CL_SUCCESS; } case CG::CGTYPE::CODEPLAY_HOST_TASK: { @@ -2001,7 +2102,8 @@ cl_int ExecCGCommand::enqueueImp() { } MQueue->getThreadPool().submit( - std::move(DispatchHostTask(EventImpls, HostTask, MDeps, MEvent))); + std::move(DispatchHostTask(EventImpls, MPreparedHostDepsEvents, + HostTask, MDeps, MEvent))); return CL_SUCCESS; } diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index c9865369253b6..67e6c704c999e 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -236,6 +236,7 @@ class Command { enum class BlockReason : int { HostAccessor = 0, HostTask }; + // Only have reasonable value while MIsBlockable is true BlockReason MBlockReason; /// Describes the status of the command. diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index da55f153f580a..03861c8617f73 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -821,8 +821,6 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(MemObjRecord *Record) { void Scheduler::GraphBuilder::cleanupFinishedCommands(Command *FinishedCmd) { std::queue CmdsToVisit({FinishedCmd}); std::set Visited; - // FIXME a more sophisticated solution instead of this hack - std::set Deleted; // Traverse the graph using BFS while (!CmdsToVisit.empty()) { @@ -846,11 +844,6 @@ void Scheduler::GraphBuilder::cleanupFinishedCommands(Command *FinishedCmd) { continue; for (Command *UserCmd : Cmd->MUsers) { - // Prevent invalid read. - // FIXME remove this hack. - if (Deleted.count(UserCmd)) - continue; - for (DepDesc &Dep : UserCmd->MDeps) { // Link the users of the command to the alloca command(s) instead if (Dep.MDepCommand == Cmd) { @@ -867,8 +860,6 @@ void Scheduler::GraphBuilder::cleanupFinishedCommands(Command *FinishedCmd) { Cmd->getEvent()->setCommand(nullptr); delete Cmd; - - //Deleted.insert(Cmd); } } From 0465f2a7d1392f36831b341ec6a291f6cb4fcfd7 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 21 Apr 2020 22:27:28 +0300 Subject: [PATCH 074/188] [SYCL] Fix test Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/host-task-dependency.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sycl/test/host-interop-task/host-task-dependency.cpp b/sycl/test/host-interop-task/host-task-dependency.cpp index d92bc75010b7a..786a330f75bd5 100644 --- a/sycl/test/host-interop-task/host-task-dependency.cpp +++ b/sycl/test/host-interop-task/host-task-dependency.cpp @@ -192,12 +192,8 @@ int main() { // CHECK:---> piEnqueueKernelLaunch( // prepare for host task // CHECK:---> piEnqueueMemBufferMap( -// creation of host task self-event -// CHECK:---> piEventCreate( // wait on dependencies of host task // CHECK:---> piEventsWait( -// host task is done, set status of self-event -// CHECK:---> piEventSetStatus( // launch of CopierTask kernel // CHECK:---> piKernelCreate( // CHECK: CopierTask From c4ab0f1f15de5ddb9da904c6604e60e19c4f2271 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 21 Apr 2020 22:29:03 +0300 Subject: [PATCH 075/188] [SYCL] Fix codestyle issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/accessor_impl.cpp | 17 +++++++++-------- sycl/source/detail/event_impl.cpp | 3 +-- sycl/source/detail/scheduler/commands.cpp | 12 +++++------- sycl/source/detail/scheduler/commands.hpp | 3 +-- sycl/source/detail/scheduler/scheduler.hpp | 1 - 5 files changed, 16 insertions(+), 20 deletions(-) diff --git a/sycl/source/detail/accessor_impl.cpp b/sycl/source/detail/accessor_impl.cpp index 7461ffaef4af4..8427bd9a51cd6 100644 --- a/sycl/source/detail/accessor_impl.cpp +++ b/sycl/source/detail/accessor_impl.cpp @@ -19,14 +19,15 @@ namespace detail { AccessorImplHost::~AccessorImplHost() { try { std::set BlockedCmds; - size_t Count = countBlockedCommand([&BlockedCmds](const Command *const Cmd) { - if (Cmd->MBlockReason == Command::BlockReason::HostAccessor) { - BlockedCmds.insert(Cmd); - return true; - } - - return false; - }); + size_t Count = + countBlockedCommand([&BlockedCmds](const Command *const Cmd) { + if (Cmd->MBlockReason == Command::BlockReason::HostAccessor) { + BlockedCmds.insert(Cmd); + return true; + } + + return false; + }); for (const Command *Cmd : BlockedCmds) if (EventImplPtr Event = Cmd->getEvent()) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index ea62017187257..6f4c499f49693 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -89,8 +89,7 @@ void event_impl::setContextImpl(const ContextImplPtr &Context) { MContext = Context; } -event_impl::event_impl() - : MState(HES_Ready) {} +event_impl::event_impl() : MState(HES_Ready) {} event_impl::event_impl(RT::PiEvent Event, const context &SyclContext) : MEvent(Event), MContext(detail::getSyclObjImpl(SyclContext)), diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index a7b2d2efcf049..421cb75a1e36f 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -216,8 +216,8 @@ class DispatchHostTask { std::vector DepHostEvents, CGHostTask *HostTask, std::vector Deps, EventImplPtr SelfEvent) - : MDepEvents(std::move(DepEvents)), MDepHostEvents(DepHostEvents), - MHostTask{HostTask}, + : MDepEvents(std::move(DepEvents)), + MDepHostEvents(DepHostEvents), MHostTask{HostTask}, MDeps(std::move(Deps)), MSelfEvent(std::move(SelfEvent)) {} void operator()() const { @@ -483,8 +483,7 @@ void Command::addConnectCmdWithReq(const ContextImplPtr &DepEventContext, GB.findAllocaForReq(Record, Req, DepEventContext); assert(AllocaCmd && "There must be alloca for requirement!"); - std::set Deps = - GB.findDepsForReq(Record, Req, DepEventContext); + std::set Deps = GB.findDepsForReq(Record, Req, DepEventContext); assert(Deps.size() && "There must be some deps"); for (Command *ReqDepCmd : Deps) { @@ -2101,9 +2100,8 @@ cl_int ExecCGCommand::enqueueImp() { ++ArgIdx; } - MQueue->getThreadPool().submit( - std::move(DispatchHostTask(EventImpls, MPreparedHostDepsEvents, - HostTask, MDeps, MEvent))); + MQueue->getThreadPool().submit(std::move(DispatchHostTask( + EventImpls, MPreparedHostDepsEvents, HostTask, MDeps, MEvent))); return CL_SUCCESS; } diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 67e6c704c999e..293b7a9a6c990 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -211,8 +211,7 @@ class Command { /// \param Dep DepDesc with non-null MDepRequirmeent void addConnectCmdWithReq(const ContextImplPtr &DepEventContext, ExecCGCommand *const ConnectCmd, - EmptyCommand *const EmptyCmd, - const DepDesc &Dep); + EmptyCommand *const EmptyCmd, const DepDesc &Dep); virtual ContextImplPtr getContext() const; diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index a16de7862d7af..ca5209e01d860 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -557,7 +557,6 @@ class Scheduler { friend class Command; private: - /// Searches for suitable alloca in memory record. /// /// If none found, creates new one. From 741e2577a3d267a7a51810b345fc77e3e988aee9 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 22 Apr 2020 10:28:20 +0300 Subject: [PATCH 076/188] [SYCL] Fix test Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/host-task-dependency.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sycl/test/host-interop-task/host-task-dependency.cpp b/sycl/test/host-interop-task/host-task-dependency.cpp index 786a330f75bd5..b90f4e7528a62 100644 --- a/sycl/test/host-interop-task/host-task-dependency.cpp +++ b/sycl/test/host-interop-task/host-task-dependency.cpp @@ -1,6 +1,6 @@ // RUN: %clangxx -fsycl %s -o %t.out %threads_lib // RUN: %CPU_RUN_PLACEHOLDER %t.out -// RUN: env SYCL_PI_TRACE=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1 %CPU_CHECK_PLACEHOLDER +// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=1 %t.out 2>&1 %CPU_CHECK_PLACEHOLDER #include #include @@ -192,9 +192,10 @@ int main() { // CHECK:---> piEnqueueKernelLaunch( // prepare for host task // CHECK:---> piEnqueueMemBufferMap( -// wait on dependencies of host task -// CHECK:---> piEventsWait( // launch of CopierTask kernel // CHECK:---> piKernelCreate( // CHECK: CopierTask // CHECK:---> piEnqueueKernelLaunch( +// TODO need to check for piEventsWait as "wait on dependencies of host task". +// At the same time this piEventsWait may occur anywhere after +// piEnqueueMemBufferMap ("prepare for host task"). From d00f031e20cf94849a95e637f3df9a0d7f0b5353 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 22 Apr 2020 10:52:00 +0300 Subject: [PATCH 077/188] [SYCL] Slight fix Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 421cb75a1e36f..693eae005f490 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -188,7 +188,7 @@ class DispatchHostTask { } // Lookup for empty command amongst users of this cmd - static EmptyCommand *findMyEmptyCommand(Command *ThisCmd) { + static EmptyCommand *findUserEmptyCommand(Command *ThisCmd) { #ifndef NDEBUG EmptyCommand *Result = nullptr; #endif @@ -208,6 +208,8 @@ class DispatchHostTask { #ifndef NDEBUG return Result; +#else + return nullptr; #endif } @@ -226,12 +228,11 @@ class DispatchHostTask { // we're ready to call the user-defined lambda now MHostTask->MHostTask->call(); + Command *ThisCmd = reinterpret_cast(MSelfEvent->getCommand()); + assert(ThisCmd && "No command found for host-task self event"); + // update self-event status if (MSelfEvent->is_host()) { - Command *ThisCmd = reinterpret_cast(MSelfEvent->getCommand()); - - assert(ThisCmd && "No command found for host-task self event"); - for (Command *UserCmd : ThisCmd->MUsers) { EnqueueResultT Res; bool Enqueued = Scheduler::GraphProcessor::enqueueCommand(UserCmd, Res); @@ -241,15 +242,18 @@ class DispatchHostTask { } MSelfEvent->setComplete(); - - EmptyCommand *MyEmptyCmd = findMyEmptyCommand(ThisCmd); - MyEmptyCmd->getEvent()->setComplete(); } else { const detail::plugin &Plugin = MSelfEvent->getPlugin(); Plugin.call(MSelfEvent->getHandleRef(), PI_EVENT_COMPLETE); } + EmptyCommand *EmptyCmd = findUserEmptyCommand(ThisCmd); + assert(EmptyCmd && "No empty command found"); + + if (EmptyCmd->getEvent()->is_host()) + EmptyCmd->getEvent()->setComplete(); + unblockBlockedDeps(MDeps); } From b4a2a47a17943848e13226cec2168ebba898d5d2 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 22 Apr 2020 11:55:22 +0300 Subject: [PATCH 078/188] [SYCL] Revert patch of test for assert Signed-off-by: Sergey Kanaev --- sycl/test/devicelib/assert.cpp | 130 ++++++++++++--------------------- 1 file changed, 47 insertions(+), 83 deletions(-) diff --git a/sycl/test/devicelib/assert.cpp b/sycl/test/devicelib/assert.cpp index 351b3a861deab..9170d2557e283 100644 --- a/sycl/test/devicelib/assert.cpp +++ b/sycl/test/devicelib/assert.cpp @@ -1,5 +1,4 @@ // REQUIRES: cpu,linux -// RUN: %clangxx %s -DPARENT_PROCESS -o %t.parent.bin // RUN: %clangxx -fsycl -c %s -o %t.o // RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-glibc.o -o %t.out // (see the other RUN lines below; it is a bit complicated) @@ -76,12 +75,12 @@ // // Overall this sounds stable enough. What could possibly go wrong? // -// RUN: env SYCL_PI_TRACE=1 SHOULD_CRASH=1 CL_CONFIG_USE_VECTORIZER=False SYCL_DEVICE_TYPE=CPU EXPECTED_SIGNAL=SIGABRT SKIP_IF_NO_EXT=1 %t.parent.bin %t.out 2>%t.stderr.native >%t.stdout.native +// RUN: env SYCL_PI_TRACE=1 SHOULD_CRASH=1 CL_CONFIG_USE_VECTORIZER=False SYCL_DEVICE_TYPE=CPU EXPECTED_SIGNAL=SIGABRT SKIP_IF_NO_EXT=1 %t.out 2>%t.stderr.native >%t.stdout.native // RUN: FileCheck %s --input-file %t.stdout.native --check-prefixes=CHECK-NATIVE || FileCheck %s --input-file %t.stderr.native --check-prefix CHECK-NOTSUPPORTED // RUN: FileCheck %s --input-file %t.stderr.native --check-prefixes=CHECK-MESSAGE || FileCheck %s --input-file %t.stderr.native --check-prefix CHECK-NOTSUPPORTED // -// RUN: env SYCL_PI_TRACE=1 SYCL_DEVICELIB_INHIBIT_NATIVE=cl_intel_devicelib_assert CL_CONFIG_USE_VECTORIZER=False SYCL_DEVICE_TYPE=CPU EXPECTED_SIGNAL=SIGSEGV %t.parent.bin %t.out >%t.stdout.pi.fallback -// RUN: env SHOULD_CRASH=1 SYCL_DEVICELIB_INHIBIT_NATIVE=cl_intel_devicelib_assert CL_CONFIG_USE_VECTORIZER=False SYCL_DEVICE_TYPE=CPU EXPECTED_SIGNAL=SIGSEGV %t.parent.bin %t.out >%t.stdout.msg.fallback +// RUN: env SYCL_PI_TRACE=1 SYCL_DEVICELIB_INHIBIT_NATIVE=cl_intel_devicelib_assert CL_CONFIG_USE_VECTORIZER=False SYCL_DEVICE_TYPE=CPU EXPECTED_SIGNAL=SIGSEGV %t.out >%t.stdout.pi.fallback +// RUN: env SHOULD_CRASH=1 SYCL_DEVICELIB_INHIBIT_NATIVE=cl_intel_devicelib_assert CL_CONFIG_USE_VECTORIZER=False SYCL_DEVICE_TYPE=CPU EXPECTED_SIGNAL=SIGSEGV %t.out >%t.stdout.msg.fallback // RUN: FileCheck %s --input-file %t.stdout.pi.fallback --check-prefixes=CHECK-FALLBACK // RUN: FileCheck %s --input-file %t.stdout.msg.fallback --check-prefixes=CHECK-MESSAGE // @@ -100,95 +99,21 @@ // Note that the work-item that hits the assert first may vary, since the order // of execution is undefined. We catch only the first one (whatever id it is). -#ifndef PARENT_PROCESS #include -#endif - #include #include #include - -#ifdef PARENT_PROCESS -#include -#endif - #include #include #include -const int EXIT_SKIP_TEST = 42; - -#ifdef PARENT_PROCESS -int main(int argc, char *argv[]) { - assert(argc > 1); - - char **ChildArgv = new char *; - ChildArgv[0] = argv[1]; - - int Child = fork(); - - if (Child < 0) { - perror("Fork failed"); - return 1; - } - - if (!Child) { - int ExecFailed = execve(argv[1], ChildArgv, environ); - - if (ExecFailed) { - perror("Execve failed"); - return 1; - } - - assert(false && "Unreachanble reached"); - } - - int status = 0; - waitpid(Child, &status, 0); - if (WIFEXITED(status) && WEXITSTATUS(status) == EXIT_SKIP_TEST) { - return 0; - } - if (getenv("SHOULD_CRASH")) { - if (!WIFSIGNALED(status)) { - fprintf(stderr, "error: process did not terminate by a signal\n"); - return 1; - } - } else { - if (WIFSIGNALED(status)) { - fprintf(stderr, "error: process should not terminate\n"); - return 1; - } - // We should not check anything if the child finished successful and this - // was expected. - return 0; - } - int sig = WTERMSIG(status); - int expected = 0; - if (const char *env = getenv("EXPECTED_SIGNAL")) { - if (0 == strcmp(env, "SIGABRT")) { - expected = SIGABRT; - } else if (0 == strcmp(env, "SIGSEGV")) { - expected = SIGSEGV; - } - if (!expected) { - fprintf(stderr, "EXPECTED_SIGNAL should be set to either \"SIGABRT\", " - "or \"SIGSEGV\"!\n"); - return 1; - } - } - if (sig != expected) { - fprintf(stderr, "error: expected signal %d, got %d\n", expected, sig); - return 1; - } - - return 0; -} -#else using namespace cl::sycl; constexpr auto sycl_read = cl::sycl::access::mode::read; constexpr auto sycl_write = cl::sycl::access::mode::write; +const int EXIT_SKIP_TEST = 42; + template void simple_vadd(const std::array &VA, const std::array &VB, std::array &VC) { @@ -239,6 +164,48 @@ void simple_vadd(const std::array &VA, const std::array &VB, } int main() { + int child = fork(); + if (child) { + int status = 0; + waitpid(child, &status, 0); + if (WIFEXITED(status) && WEXITSTATUS(status) == EXIT_SKIP_TEST) { + return 0; + } + if (getenv("SHOULD_CRASH")) { + if (!WIFSIGNALED(status)) { + fprintf(stderr, "error: process did not terminate by a signal\n"); + return 1; + } + } else { + if (WIFSIGNALED(status)) { + fprintf(stderr, "error: process should not terminate\n"); + return 1; + } + // We should not check anything if the child finished successful and this + // was expected. + return 0; + } + int sig = WTERMSIG(status); + int expected = 0; + if (const char *env = getenv("EXPECTED_SIGNAL")) { + if (0 == strcmp(env, "SIGABRT")) { + expected = SIGABRT; + } else if (0 == strcmp(env, "SIGSEGV")) { + expected = SIGSEGV; + } + if (!expected) { + fprintf(stderr, "EXPECTED_SIGNAL should be set to either \"SIGABRT\", " + "or \"SIGSEGV\"!\n"); + return 1; + } + } + if (sig != expected) { + fprintf(stderr, "error: expected signal %d, got %d\n", expected, sig); + return 1; + } + return 0; + } + // Turn the bufferization off to not loose the assert message if it is written // to stdout. if (setvbuf(stdout, NULL, _IONBF, 0)) { @@ -251,7 +218,4 @@ int main() { std::array C = {0, 0, 0}; simple_vadd(A, B, C); - - return 0; } -#endif // PARENT_PROCESS From 092d887a8d3f07d073b43d69275c3e02cd9ecdd4 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 22 Apr 2020 12:16:56 +0300 Subject: [PATCH 079/188] [NFC] [SYCL] Add description of new environment variable. Signed-off-by: Sergey Kanaev --- sycl/doc/EnvironmentVariables.md | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/doc/EnvironmentVariables.md b/sycl/doc/EnvironmentVariables.md index 0a99f8899f517..65e0d8e186dd6 100644 --- a/sycl/doc/EnvironmentVariables.md +++ b/sycl/doc/EnvironmentVariables.md @@ -23,6 +23,7 @@ subject to change. Do not rely on these variables in production code. | SYCL_THROW_ON_BLOCK | Any(\*) | Throw an exception on attempt to wait for a blocked command. | | SYCL_DEVICELIB_INHIBIT_NATIVE | String of device library extensions (separated by a whitespace) | Do not rely on device native support for devicelib extensions listed in this option. | | SYCL_DEVICE_ALLOWLIST | A list of devices and their minimum driver version following the pattern: DeviceName:{{XXX}},DriverVersion:{{X.Y.Z.W}}. Also may contain PlatformName and PlatformVersion | Filter out devices that do not match the pattern specified. Regular expression can be passed and the DPC++ runtime will select only those devices which satisfy the regex. | +| SYCL_QUEUE_THREAD_POOL_SIZE | Positive integer | Number of threads in thread pool of queue. | `(*) Note: Any means this environment variable is effective when set to any non-null value.` ### SYCL_PRINT_EXECUTION_GRAPH Options From 05a655855bf046ec03c3296bc6287dcc6e5c0c72 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 22 Apr 2020 15:15:03 +0300 Subject: [PATCH 080/188] [SYCL] Fix windows build. Signed-off-by: Sergey Kanaev --- sycl/source/detail/queue_impl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 83c3fc797f842..e9db70f1a3bfb 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -193,7 +193,7 @@ void queue_impl::initHostTaskAndEventCallbackThreadPool() { if (const char *val = std::getenv("SYCL_QUEUE_THREAD_POOL_SIZE")) try { Size = std::stoi(val); - } catch (const std::exception &e) { + } catch (...) { throw invalid_parameter_error( "Invalid value for SYCL_QUEUE_THREAD_POOL_SIZE environment variable", PI_INVALID_VALUE); From b193031fe4d93d45005d18d44a30874195922a77 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 22 Apr 2020 16:13:46 +0300 Subject: [PATCH 081/188] [SYCL] Init buffer with proper data Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/host-task-dependency.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/host-interop-task/host-task-dependency.cpp b/sycl/test/host-interop-task/host-task-dependency.cpp index b90f4e7528a62..980f8a010e825 100644 --- a/sycl/test/host-interop-task/host-task-dependency.cpp +++ b/sycl/test/host-interop-task/host-task-dependency.cpp @@ -48,7 +48,7 @@ void Thread1Fn(Context &Ctx) { Acc(Ctx.Buf3); for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) - Acc[Idx] = -2; + Acc[Idx] = -3; } // 1. submit task writing to buffer 1 From a04c01f7ba8b39e457f553d9892ba0c62ce93b12 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 24 Apr 2020 11:52:15 +0300 Subject: [PATCH 082/188] [SYCL] Convert back to single MBlockedCmd in requirement Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/accessor_impl.hpp | 18 +------- sycl/source/detail/accessor_impl.cpp | 46 +------------------ sycl/source/detail/event_impl.cpp | 6 +-- sycl/source/detail/scheduler/commands.cpp | 7 ++- .../source/detail/scheduler/graph_builder.cpp | 15 ++++-- sycl/source/detail/scheduler/scheduler.cpp | 33 +++++-------- 6 files changed, 33 insertions(+), 92 deletions(-) diff --git a/sycl/include/CL/sycl/detail/accessor_impl.hpp b/sycl/include/CL/sycl/detail/accessor_impl.hpp index 22a67b23b3ec1..181cd6dc151d4 100644 --- a/sycl/include/CL/sycl/detail/accessor_impl.hpp +++ b/sycl/include/CL/sycl/detail/accessor_impl.hpp @@ -15,9 +15,6 @@ #include #include -#include -#include - __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { @@ -99,20 +96,7 @@ class __SYCL_EXPORT AccessorImplHost { void *MData = nullptr; -protected: - using CheckCmdFn = std::function; - - void addBlockedCommand(Command *BlockedCmd); - Command *findBlockedCommand(const CheckCmdFn &Check); - bool removeBlockedCommand(Command *BlockedCmd); - size_t countBlockedCommand(const CheckCmdFn &Check); - - friend class Command; - friend class Scheduler; - -private: - std::mutex MBlockedCmdsMutex; - std::unordered_set MBlockedCmds; + Command *MBlockedCmd = nullptr; }; using AccessorImplPtr = shared_ptr_class; diff --git a/sycl/source/detail/accessor_impl.cpp b/sycl/source/detail/accessor_impl.cpp index 8427bd9a51cd6..90484ac4fb0ff 100644 --- a/sycl/source/detail/accessor_impl.cpp +++ b/sycl/source/detail/accessor_impl.cpp @@ -18,56 +18,12 @@ namespace detail { AccessorImplHost::~AccessorImplHost() { try { - std::set BlockedCmds; - size_t Count = - countBlockedCommand([&BlockedCmds](const Command *const Cmd) { - if (Cmd->MBlockReason == Command::BlockReason::HostAccessor) { - BlockedCmds.insert(Cmd); - return true; - } - - return false; - }); - - for (const Command *Cmd : BlockedCmds) - if (EventImplPtr Event = Cmd->getEvent()) - if (Event->is_host()) - Event->setComplete(); - - if (Count) + if (MBlockedCmd) detail::Scheduler::getInstance().releaseHostAccessor(this); } catch (...) { } } -void AccessorImplHost::addBlockedCommand(Command *BlockedCmd) { - std::lock_guard Lock(MBlockedCmdsMutex); - - MBlockedCmds.insert(BlockedCmd); -} - -size_t AccessorImplHost::countBlockedCommand(const CheckCmdFn &Check) { - std::lock_guard Lock(MBlockedCmdsMutex); - - return std::count_if(MBlockedCmds.begin(), MBlockedCmds.end(), Check); -} - -Command *AccessorImplHost::findBlockedCommand(const CheckCmdFn &Check) { - std::lock_guard Lock(MBlockedCmdsMutex); - - auto FoundIt = std::find_if(MBlockedCmds.begin(), MBlockedCmds.end(), Check); - - return FoundIt == MBlockedCmds.end() ? nullptr : *FoundIt; -} - -bool AccessorImplHost::removeBlockedCommand(Command *BlockedCmd) { - std::lock_guard Lock(MBlockedCmdsMutex); - - MBlockedCmds.erase(BlockedCmd); - - return MBlockedCmds.empty(); -} - void addHostAccessorAndWait(Requirement *Req) { detail::EventImplPtr Event = detail::Scheduler::getInstance().addHostAccessor(Req); diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 6f4c499f49693..ebbbdc2e56f57 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -68,9 +68,9 @@ void event_impl::setComplete() { int Expected = HES_NotReady; int Desired = HES_Ready; - bool Succeeded = MState.compare_exchange_strong(Expected, Desired); + /*bool Succeeded = */MState.compare_exchange_strong(Expected, Desired); - assert(Succeeded && "Unexpected state of event"); + //assert(Succeeded && "Unexpected state of event"); #else MState.store(static_cast(HES_Ready)); #endif @@ -89,7 +89,7 @@ void event_impl::setContextImpl(const ContextImplPtr &Context) { MContext = Context; } -event_impl::event_impl() : MState(HES_Ready) {} +event_impl::event_impl() : MState(HES_NotReady) {} event_impl::event_impl(RT::PiEvent Event, const context &SyclContext) : MEvent(Event), MContext(detail::getSyclObjImpl(SyclContext)), diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 13efbdb1a1e4b..3a6e3f8c2c04e 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -474,7 +474,7 @@ void Command::addConnectCmdWithReq(const ContextImplPtr &DepEventContext, const DepDesc &Dep) { Requirement *Req = const_cast(Dep.MDepRequirement); - Req->addBlockedCommand(EmptyCmd); + Req->MBlockedCmd = EmptyCmd; { Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; @@ -538,6 +538,11 @@ void Command::connectDepEvent(EventImplPtr DepEvent, EmptyCommand *EmptyCmd = new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); + fprintf(stderr, "Created empty cmd %p for host task (dep) for " + "connect cmd %p for req %p\n", + (void *)EmptyCmd, (void *)ConnectCmd, + (const void *)Dep.MDepRequirement); + EmptyCmd->MIsBlockable = true; EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; EmptyCmd->MBlockReason = BlockReason::HostTask; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 03861c8617f73..b59c8a14288f2 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -429,7 +429,7 @@ Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, updateLeaves({UpdateHostAccCmd}, Record, Req->MAccessMode); addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); - Req->addBlockedCommand(EmptyCmd); + Req->MBlockedCmd = EmptyCmd; if (MPrintOptionsArray[AfterAddHostAcc]) printGraphAsDot("after_addHostAccessor"); @@ -669,6 +669,7 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { EmptyCmd = new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); + EmptyCmd->MIsBlockable = true; EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; EmptyCmd->MBlockReason = Command::BlockReason::HostTask; @@ -704,7 +705,7 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { EmptyCmd->addDep(DepDesc{NewCmd.get(), Req, AllocaCmd}); - Req->addBlockedCommand(EmptyCmd); + Req->MBlockedCmd = EmptyCmd; } } @@ -763,8 +764,12 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(MemObjRecord *Record) { // Dependencies of the users will be cleaned up during the traversal for (Command *AllocaCmd : AllocaCommands) { Visited.insert(AllocaCmd); + for (Command *UserCmd : AllocaCmd->MUsers) - ToVisit.push(UserCmd); + if (UserCmd->getType() != Command::CommandType::ALLOCA) { + ToVisit.push(UserCmd); + } + CmdsToDelete.push_back(AllocaCmd); // These commands will be deleted later, clear users now to avoid // updating them during edge removal @@ -780,7 +785,9 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(MemObjRecord *Record) { continue; for (Command *UserCmd : Cmd->MUsers) - ToVisit.push(UserCmd); + if (UserCmd->getType() != Command::CommandType::ALLOCA) { + ToVisit.push(UserCmd); + } // Delete all dependencies on any allocations being removed // Track which commands should have their users updated diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 1c74a8a373e04..52f7305df0f6e 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -158,6 +158,7 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { if (!Record) // No operations were performed on the mem object return; + waitForRecordToFinish(Record); MGraphBuilder.decrementLeafCountersForRecord(Record); MGraphBuilder.cleanupCommandsForRecord(Record); @@ -180,20 +181,18 @@ EventImplPtr Scheduler::addHostAccessor(Requirement *Req, } void Scheduler::releaseHostAccessor(Requirement *Req) { - Command *const BlockedCmd = - Req->findBlockedCommand([](const Command *const Cmd) { - return Cmd->MBlockReason == Command::BlockReason::HostAccessor; - }); + Command *const BlockedCmd = Req->MBlockedCmd; assert(BlockedCmd && "Can't find appropriate command to unblock"); - if (!BlockedCmd) - return; + if (EventImplPtr Event = BlockedCmd->getEvent()) + if (Event->is_host()) { + Event->setComplete(); + } BlockedCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; - if (Req->removeBlockedCommand(BlockedCmd)) - unblockSingleReq(Req); + unblockSingleReq(Req); } void Scheduler::unblockSingleReq(Requirement *Req) { @@ -230,11 +229,9 @@ void Scheduler::bulkUnblockReqs(Command *const BlockedCmd, }; for (Requirement *Req : Reqs) { - if (Req->removeBlockedCommand(BlockedCmd)) { - MemObjRecord *Record = Req->MSYCLMemObj->MRecord.get(); - EnqueueLeaves(Record->MReadLeaves); - EnqueueLeaves(Record->MWriteLeaves); - } + MemObjRecord *Record = Req->MSYCLMemObj->MRecord.get(); + EnqueueLeaves(Record->MReadLeaves); + EnqueueLeaves(Record->MWriteLeaves); } } @@ -243,16 +240,8 @@ void Scheduler::unblockRequirements(const std::vector &Reqs, // fetch unique blocked cmds std::unordered_map> BlockedCmds; - std::function CheckCmd = - [Reason](const Command *const Cmd) { - return Cmd->MBlockReason == Reason; - }; - for (Requirement *Req : Reqs) { - Command *BlockedCmd = Req->findBlockedCommand(CheckCmd); - - assert(BlockedCmd && - "Can't find appropriate command to unblock multiple requirements"); + Command *BlockedCmd = Req->MBlockedCmd; BlockedCmds[BlockedCmd].insert(Req); } From 410654bff2a65a98608aa40c126a220c0101b865 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 24 Apr 2020 13:06:14 +0300 Subject: [PATCH 083/188] [SYCL] Address comments: - Employ the right way to enqueue dependants of host task. - Don't store empty command in blocked cmds of requirement. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 43 +++++++++++-------- sycl/source/detail/scheduler/commands.hpp | 4 +- .../source/detail/scheduler/graph_builder.cpp | 5 +-- sycl/source/detail/scheduler/scheduler.cpp | 5 ++- sycl/source/detail/scheduler/scheduler.hpp | 2 +- 5 files changed, 34 insertions(+), 25 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 3a6e3f8c2c04e..bea0ebd5fabe9 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -230,30 +230,32 @@ class DispatchHostTask { Command *ThisCmd = reinterpret_cast(MSelfEvent->getCommand()); assert(ThisCmd && "No command found for host-task self event"); + // unblock user empty command here + EmptyCommand *EmptyCmd = findUserEmptyCommand(ThisCmd); + assert(EmptyCmd && "No empty command found"); + + EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; + // update self-event status if (MSelfEvent->is_host()) { - for (Command *UserCmd : ThisCmd->MUsers) { - EnqueueResultT Res; - bool Enqueued = Scheduler::GraphProcessor::enqueueCommand(UserCmd, Res); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error("Failed to enqueue a dependant command", - PI_INVALID_OPERATION); - } - MSelfEvent->setComplete(); + + if (EmptyCmd->getEvent()->is_host()) + EmptyCmd->getEvent()->setComplete(); + + // enqueue leaves or enqueue leaves of reqs in ThisCmd.MDeps + for (DepDesc &Dep : ThisCmd->MDeps) + Scheduler::enqueueLeavesOfReq(Dep.MDepRequirement); } else { const detail::plugin &Plugin = MSelfEvent->getPlugin(); Plugin.call(MSelfEvent->getHandleRef(), PI_EVENT_COMPLETE); - } - EmptyCommand *EmptyCmd = findUserEmptyCommand(ThisCmd); - assert(EmptyCmd && "No empty command found"); - - if (EmptyCmd->getEvent()->is_host()) - EmptyCmd->getEvent()->setComplete(); + if (EmptyCmd->getEvent()->is_host()) + EmptyCmd->getEvent()->setComplete(); - unblockBlockedDeps(MDeps); + // the enqueue process is driven by backend now + } } static void unblockBlockedDeps(const std::vector &Deps) { @@ -1415,9 +1417,8 @@ cl_int MemCpyCommandHost::enqueueImp() { } EmptyCommand::EmptyCommand(QueueImplPtr Queue, Requirement Req) - : Command(CommandType::EMPTY_TASK, std::move(Queue)), - MRequirement(new Requirement(std::move(Req))) { - + : Command(CommandType::EMPTY_TASK, std::move(Queue)) { + MRequirements.emplace_back(std::move(Req)); emitInstrumentationDataProxy(); } @@ -1426,6 +1427,12 @@ EmptyCommand::EmptyCommand(QueueImplPtr Queue) emitInstrumentationDataProxy(); } +const Requirement *EmptyCommand::addRequirement(Requirement Req) { + MRequirements.emplace_back(std::move(Req)); + + return &MRequirements.back(); +} + void EmptyCommand::emitInstrumentationData() { #ifdef XPTI_ENABLE_INSTRUMENTATION if (!xptiTraceEnabled()) diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 293b7a9a6c990..ca47da3bd67ca 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -277,13 +277,15 @@ class EmptyCommand : public Command { EmptyCommand(QueueImplPtr Queue); void printDot(std::ostream &Stream) const final; - const Requirement *getRequirement() const final { return MRequirement.get(); } + const Requirement *getRequirement() const final { return &MRequirements[0]; } + const Requirement *addRequirement(Requirement Req); void emitInstrumentationData(); private: cl_int enqueueImp() final { return CL_SUCCESS; } + std::vector MRequirements; std::unique_ptr MRequirement; }; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index b59c8a14288f2..239e7e8ed07f9 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -703,9 +703,8 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, NewCmd->addDep(DepDesc{Dep, Req, AllocaCmd}); if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { - EmptyCmd->addDep(DepDesc{NewCmd.get(), Req, AllocaCmd}); - - Req->MBlockedCmd = EmptyCmd; + const Requirement *StoredReq = EmptyCmd->addRequirement(*Req); + EmptyCmd->addDep(DepDesc{NewCmd.get(), StoredReq, AllocaCmd}); } } diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 52f7305df0f6e..579eb7e758321 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -192,10 +192,11 @@ void Scheduler::releaseHostAccessor(Requirement *Req) { BlockedCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; - unblockSingleReq(Req); + enqueueLeavesOfReq(Req); } -void Scheduler::unblockSingleReq(Requirement *Req) { +// static +void Scheduler::enqueueLeavesOfReq(const Requirement *const Req) { MemObjRecord* Record = Req->MSYCLMemObj->MRecord.get(); auto EnqueueLeaves = [](CircularBuffer &Leaves) { for (Command *Cmd : Leaves) { diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index ca5209e01d860..5c09ee40fd5b9 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -434,7 +434,7 @@ class Scheduler { Scheduler(); static Scheduler instance; - void unblockSingleReq(Requirement *Req); + static void enqueueLeavesOfReq(const Requirement *const Req); void bulkUnblockReqs(Command *const BlockedCmd, const std::unordered_set &Reqs); From bad38452e3131d70292c7458306b29786ef40f4a Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 24 Apr 2020 13:09:04 +0300 Subject: [PATCH 084/188] [SYCL] Remove unused code Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 13 ------ sycl/source/detail/scheduler/scheduler.cpp | 50 ---------------------- sycl/source/detail/scheduler/scheduler.hpp | 6 --- 3 files changed, 69 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index bea0ebd5fabe9..f5dac32fb925e 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -257,19 +257,6 @@ class DispatchHostTask { // the enqueue process is driven by backend now } } - - static void unblockBlockedDeps(const std::vector &Deps) { - std::vector Reqs; - Reqs.resize(Deps.size()); - - std::transform(Deps.begin(), Deps.end(), Reqs.begin(), - [](const DepDesc &Dep) { - return const_cast(Dep.MDepRequirement); - }); - - Scheduler::getInstance().unblockRequirements( - Reqs, Command::BlockReason::HostTask); - } }; void Command::waitForPreparedHostEvents() const { diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 579eb7e758321..b70bf71b0b6c8 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -210,56 +210,6 @@ void Scheduler::enqueueLeavesOfReq(const Requirement *const Req) { EnqueueLeaves(Record->MWriteLeaves); } -void Scheduler::bulkUnblockReqs(Command *const BlockedCmd, - const std::unordered_set &Reqs) { - bool BlockedCmdEnqueued = false; - - auto EnqueueLeaves = [BlockedCmd, &BlockedCmdEnqueued]( - CircularBuffer &Leaves) { - for (Command *Cmd : Leaves) { - if (BlockedCmd == Cmd && BlockedCmdEnqueued) - continue; - - BlockedCmdEnqueued |= BlockedCmd == Cmd; - - EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); - } - }; - - for (Requirement *Req : Reqs) { - MemObjRecord *Record = Req->MSYCLMemObj->MRecord.get(); - EnqueueLeaves(Record->MReadLeaves); - EnqueueLeaves(Record->MWriteLeaves); - } -} - -void Scheduler::unblockRequirements(const std::vector &Reqs, - Command::BlockReason Reason) { - // fetch unique blocked cmds - std::unordered_map> BlockedCmds; - - for (Requirement *Req : Reqs) { - Command *BlockedCmd = Req->MBlockedCmd; - - BlockedCmds[BlockedCmd].insert(Req); - } - - for (const auto &It : BlockedCmds) { - if (!It.first) - continue; - - Command *BlockedCmd = It.first; - const std::unordered_set &SubReqs = It.second; - - BlockedCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; - - bulkUnblockReqs(BlockedCmd, SubReqs); - } -} - Scheduler::Scheduler() { sycl::device HostDevice; DefaultHostQueue = QueueImplPtr(new queue_impl( diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 5c09ee40fd5b9..6da1e29d9a2e5 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -418,10 +418,6 @@ class Scheduler { /// unblocked. void releaseHostAccessor(Requirement *Req); - // Unblocks operations with memory objects - void unblockRequirements(const std::vector &Reqs, - Command::BlockReason Reason); - /// \return an instance of the scheduler object. static Scheduler &getInstance(); @@ -435,8 +431,6 @@ class Scheduler { static Scheduler instance; static void enqueueLeavesOfReq(const Requirement *const Req); - void bulkUnblockReqs(Command *const BlockedCmd, - const std::unordered_set &Reqs); /// Graph builder class. /// From 6ae8754dc4499850c653e2100ff016b55b893232 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 24 Apr 2020 13:36:41 +0300 Subject: [PATCH 085/188] [SYCL] Throw 'out of host memory' exception upon failed allocation of empty command. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 3 +++ sycl/source/detail/scheduler/graph_builder.cpp | 3 +++ 2 files changed, 6 insertions(+) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index f5dac32fb925e..8d1fe2099ab3e 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -527,6 +527,9 @@ void Command::connectDepEvent(EventImplPtr DepEvent, EmptyCommand *EmptyCmd = new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); + if (!EmptyCmd) + throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); + fprintf(stderr, "Created empty cmd %p for host task (dep) for " "connect cmd %p for req %p\n", (void *)EmptyCmd, (void *)ConnectCmd, diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 239e7e8ed07f9..ad143b27f1e90 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -670,6 +670,9 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { EmptyCmd = new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); + if (!EmptyCmd) + throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); + EmptyCmd->MIsBlockable = true; EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; EmptyCmd->MBlockReason = Command::BlockReason::HostTask; From 67a98e0afe91a223fc43a85f129728331ed29523 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 24 Apr 2020 13:52:24 +0300 Subject: [PATCH 086/188] [SYCL] Fix typo Signed-off-by: Sergey Kanaev --- sycl/source/detail/event_impl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index ebbbdc2e56f57..badee9c0eba4d 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -89,7 +89,7 @@ void event_impl::setContextImpl(const ContextImplPtr &Context) { MContext = Context; } -event_impl::event_impl() : MState(HES_NotReady) {} +event_impl::event_impl() : MState(HES_Ready) {} event_impl::event_impl(RT::PiEvent Event, const context &SyclContext) : MEvent(Event), MContext(detail::getSyclObjImpl(SyclContext)), From b12d9ee0ced6928b3aee44cd438eec2aa1804730 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 24 Apr 2020 14:30:03 +0300 Subject: [PATCH 087/188] [SYCL] Don't store empty command in blocked cmds of requirement upon glueing. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 8d1fe2099ab3e..6aa2d5dc4cf8c 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -463,8 +463,6 @@ void Command::addConnectCmdWithReq(const ContextImplPtr &DepEventContext, const DepDesc &Dep) { Requirement *Req = const_cast(Dep.MDepRequirement); - Req->MBlockedCmd = EmptyCmd; - { Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; @@ -488,6 +486,7 @@ void Command::addConnectCmdWithReq(const ContextImplPtr &DepEventContext, { DepDesc EmptyCmdDep = Dep; + EmptyCmdDep.MDepRequirement = EmptyCmd->addRequirement(*Req); EmptyCmdDep.MDepCommand = ConnectCmd; EmptyCmd->addDep(EmptyCmdDep); From 931128b5524456acea2720e87654d9bd9998bb64 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 24 Apr 2020 17:11:49 +0300 Subject: [PATCH 088/188] [SYCL] Fix runtime issue. Make code look cleaner. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 73 ++++++++++--------- sycl/source/detail/scheduler/commands.hpp | 5 +- .../source/detail/scheduler/graph_builder.cpp | 12 ++- 3 files changed, 50 insertions(+), 40 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 6aa2d5dc4cf8c..33f3e337c01ad 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -463,39 +463,36 @@ void Command::addConnectCmdWithReq(const ContextImplPtr &DepEventContext, const DepDesc &Dep) { Requirement *Req = const_cast(Dep.MDepRequirement); - { - Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; - - MemObjRecord *Record = GB.getMemObjRecord(Req->MSYCLMemObj); - Dep.MDepCommand->addUser(ConnectCmd); + Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; - AllocaCommandBase *AllocaCmd = - GB.findAllocaForReq(Record, Req, DepEventContext); - assert(AllocaCmd && "There must be alloca for requirement!"); + MemObjRecord *Record = GB.getMemObjRecord(Req->MSYCLMemObj); + Dep.MDepCommand->addUser(ConnectCmd); - std::set Deps = GB.findDepsForReq(Record, Req, DepEventContext); - assert(Deps.size() && "There must be some deps"); - - for (Command *ReqDepCmd : Deps) { - ConnectCmd->addDep(DepDesc{ReqDepCmd, Req, AllocaCmd}); - ReqDepCmd->addUser(ConnectCmd); - } + AllocaCommandBase *AllocaCmd = + GB.findAllocaForReq(Record, Req, DepEventContext); + assert(AllocaCmd && "There must be alloca for requirement!"); - GB.updateLeaves(Deps, Record, Req->MAccessMode); - GB.addNodeToLeaves(Record, ConnectCmd, Req->MAccessMode); + std::set Deps = GB.findDepsForReq(Record, Req, DepEventContext); + assert(Deps.size() && "There must be some deps"); - { - DepDesc EmptyCmdDep = Dep; - EmptyCmdDep.MDepRequirement = EmptyCmd->addRequirement(*Req); - EmptyCmdDep.MDepCommand = ConnectCmd; + for (Command *ReqDepCmd : Deps) { + ConnectCmd->addDep(DepDesc{ReqDepCmd, Req, AllocaCmd}); + ReqDepCmd->addUser(ConnectCmd); + } - EmptyCmd->addDep(EmptyCmdDep); - ConnectCmd->addUser(EmptyCmd); - } + GB.updateLeaves(Deps, Record, Req->MAccessMode); + GB.addNodeToLeaves(Record, ConnectCmd, Req->MAccessMode); - GB.updateLeaves({ConnectCmd}, Record, Req->MAccessMode); - GB.addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); + { + std::vector Allocas(1, Dep.MAllocaCmd); + std::vector Reqs( + 1, const_cast(Dep.MDepRequirement)); + EmptyCmd->addRequirementsAndDeps(ConnectCmd, Allocas, Reqs); + ConnectCmd->addUser(EmptyCmd); } + + GB.updateLeaves({ConnectCmd}, Record, Req->MAccessMode); + GB.addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); } void Command::connectDepEvent(EventImplPtr DepEvent, @@ -529,11 +526,6 @@ void Command::connectDepEvent(EventImplPtr DepEvent, if (!EmptyCmd) throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); - fprintf(stderr, "Created empty cmd %p for host task (dep) for " - "connect cmd %p for req %p\n", - (void *)EmptyCmd, (void *)ConnectCmd, - (const void *)Dep.MDepRequirement); - EmptyCmd->MIsBlockable = true; EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; EmptyCmd->MBlockReason = BlockReason::HostTask; @@ -1416,10 +1408,23 @@ EmptyCommand::EmptyCommand(QueueImplPtr Queue) emitInstrumentationDataProxy(); } -const Requirement *EmptyCommand::addRequirement(Requirement Req) { - MRequirements.emplace_back(std::move(Req)); +void EmptyCommand::addRequirementsAndDeps( + Command *const DepCmd, + const std::vector &Allocas, + const std::vector &Reqs) { + assert(Allocas.size() == Reqs.size()); + + MRequirements.reserve(Reqs.size()); - return &MRequirements.back(); + for (size_t Idx = 0; Idx < Reqs.size(); ++Idx) { + const Requirement &Req = *Reqs[Idx]; + AllocaCommandBase *AllocaCmd = Allocas[Idx]; + + MRequirements.emplace_back(Req); + const Requirement *const StoredReq = &MRequirements.back(); + + addDep(DepDesc{DepCmd, StoredReq, AllocaCmd}); + } } void EmptyCommand::emitInstrumentationData() { diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index ca47da3bd67ca..1f1c12742be24 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -278,7 +278,9 @@ class EmptyCommand : public Command { void printDot(std::ostream &Stream) const final; const Requirement *getRequirement() const final { return &MRequirements[0]; } - const Requirement *addRequirement(Requirement Req); + void addRequirementsAndDeps(Command *const DepCmd, + const std::vector &Allocas, + const std::vector &Reqs); void emitInstrumentationData(); @@ -286,7 +288,6 @@ class EmptyCommand : public Command { cl_int enqueueImp() final { return CL_SUCCESS; } std::vector MRequirements; - std::unique_ptr MRequirement; }; /// The release command enqueues release of a memory object instance allocated diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index ad143b27f1e90..45134ad8c691d 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -678,6 +678,11 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, EmptyCmd->MBlockReason = Command::BlockReason::HostTask; } + std::vector AllocasForReqs; + + if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) + AllocasForReqs.reserve(Reqs.size()); + for (Requirement *Req : Reqs) { MemObjRecord *Record = getOrInsertMemObjRecord(Queue, Req); markModifiedIfWrite(Record, Req); @@ -705,13 +710,12 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, for (Command *Dep : Deps) NewCmd->addDep(DepDesc{Dep, Req, AllocaCmd}); - if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { - const Requirement *StoredReq = EmptyCmd->addRequirement(*Req); - EmptyCmd->addDep(DepDesc{NewCmd.get(), StoredReq, AllocaCmd}); - } + if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) + AllocasForReqs.push_back(AllocaCmd); } if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { + EmptyCmd->addRequirementsAndDeps(NewCmd.get(), AllocasForReqs, Reqs); NewCmd->addUser(EmptyCmd); } From 5ee3ba5ed26a658df6327a2a87cfb8c55a48377f Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 24 Apr 2020 17:23:23 +0300 Subject: [PATCH 089/188] [SYCL] Fix style issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/event_impl.cpp | 4 ++-- sycl/source/detail/scheduler/commands.cpp | 3 +-- sycl/source/detail/scheduler/graph_builder.cpp | 3 +-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index badee9c0eba4d..6f4c499f49693 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -68,9 +68,9 @@ void event_impl::setComplete() { int Expected = HES_NotReady; int Desired = HES_Ready; - /*bool Succeeded = */MState.compare_exchange_strong(Expected, Desired); + bool Succeeded = MState.compare_exchange_strong(Expected, Desired); - //assert(Succeeded && "Unexpected state of event"); + assert(Succeeded && "Unexpected state of event"); #else MState.store(static_cast(HES_Ready)); #endif diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 10a9a0f87a861..adf8814ce34a7 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1410,8 +1410,7 @@ EmptyCommand::EmptyCommand(QueueImplPtr Queue) } void EmptyCommand::addRequirementsAndDeps( - Command *const DepCmd, - const std::vector &Allocas, + Command *const DepCmd, const std::vector &Allocas, const std::vector &Reqs) { assert(Allocas.size() == Reqs.size()); diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 45134ad8c691d..8553b37fa1e73 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -772,9 +772,8 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(MemObjRecord *Record) { Visited.insert(AllocaCmd); for (Command *UserCmd : AllocaCmd->MUsers) - if (UserCmd->getType() != Command::CommandType::ALLOCA) { + if (UserCmd->getType() != Command::CommandType::ALLOCA) ToVisit.push(UserCmd); - } CmdsToDelete.push_back(AllocaCmd); // These commands will be deleted later, clear users now to avoid From 14a47a2c6be486f9cb970c00e84c6947fcb3bb81 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 27 Apr 2020 21:34:12 +0300 Subject: [PATCH 090/188] [SYCL] Add comment Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index adf8814ce34a7..532aa086f04a1 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -563,6 +563,8 @@ void Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) { return; } + // The event handle can be null in case of, for example, alloca command, + // which is currently synchrounious, so don't generate OpenCL event. if (DepEvent->getHandleRef() == nullptr) { return; } From 038495f9f110d0455f1c7afe44b767cd84c83fd2 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 27 Apr 2020 21:59:28 +0300 Subject: [PATCH 091/188] [SYCL] Eliminate explicit calls to setComplete() in users of EmptyCommand Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 20 +++++++++++++------- sycl/source/detail/scheduler/commands.hpp | 2 +- sycl/source/detail/scheduler/scheduler.cpp | 5 ----- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 532aa086f04a1..ed68ee8864643 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -241,9 +241,6 @@ class DispatchHostTask { if (MSelfEvent->is_host()) { MSelfEvent->setComplete(); - if (EmptyCmd->getEvent()->is_host()) - EmptyCmd->getEvent()->setComplete(); - // enqueue leaves or enqueue leaves of reqs in ThisCmd.MDeps for (DepDesc &Dep : ThisCmd->MDeps) Scheduler::enqueueLeavesOfReq(Dep.MDepRequirement); @@ -251,10 +248,6 @@ class DispatchHostTask { const detail::plugin &Plugin = MSelfEvent->getPlugin(); Plugin.call(MSelfEvent->getHandleRef(), PI_EVENT_COMPLETE); - - if (EmptyCmd->getEvent()->is_host()) - EmptyCmd->getEvent()->setComplete(); - // the enqueue process is driven by backend now } } @@ -1411,6 +1404,16 @@ EmptyCommand::EmptyCommand(QueueImplPtr Queue) emitInstrumentationDataProxy(); } +cl_int EmptyCommand::enqueueImp() { + waitForPreparedHostEvents(); + waitForEvents(MQueue, MPreparedDepsEvents, MEvent->getHandleRef()); + + if (MEvent->is_host()) + MEvent->setComplete(); + + return CL_SUCCESS; +} + void EmptyCommand::addRequirementsAndDeps( Command *const DepCmd, const std::vector &Allocas, const std::vector &Reqs) { @@ -1903,6 +1906,9 @@ cl_int ExecCGCommand::enqueueImp() { const_cast(MemLocs.data()), RawEvents.size(), RawEvents.empty() ? nullptr : RawEvents.data(), &Event); + if (MEvent->is_host()) + MEvent->setComplete(); + switch (Error) { case PI_INVALID_OPERATION: throw cl::sycl::runtime_error( diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 1f1c12742be24..00b69d9348d8f 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -285,7 +285,7 @@ class EmptyCommand : public Command { void emitInstrumentationData(); private: - cl_int enqueueImp() final { return CL_SUCCESS; } + cl_int enqueueImp() final; std::vector MRequirements; }; diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index b70bf71b0b6c8..b44c171f2fb4a 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -185,11 +185,6 @@ void Scheduler::releaseHostAccessor(Requirement *Req) { assert(BlockedCmd && "Can't find appropriate command to unblock"); - if (EventImplPtr Event = BlockedCmd->getEvent()) - if (Event->is_host()) { - Event->setComplete(); - } - BlockedCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; enqueueLeavesOfReq(Req); From 083abeb40d874d69b9e73d31467f99a63e8c02bf Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 27 Apr 2020 22:02:29 +0300 Subject: [PATCH 092/188] [SYCL] Enforce asserts in event_impl::setComplete() Signed-off-by: Sergey Kanaev --- sycl/source/detail/event_impl.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 6f4c499f49693..d18136eece811 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -63,6 +63,7 @@ void event_impl::waitInternal() const { void event_impl::setComplete() { assert(MHostEvent && "setComplete is only allowed for host events"); + assert(!MEvent && "setComplete is only allowed for host events"); #ifndef NDEBUG int Expected = HES_NotReady; From c929d8b5f1265e883d8d396874b79d692e11d4ed Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 27 Apr 2020 22:27:07 +0300 Subject: [PATCH 093/188] [SYCL] Employ piEventSetStatus in event_impl::setComplete(). Signed-off-by: Sergey Kanaev --- sycl/source/detail/event_impl.cpp | 19 +++++++++++-------- sycl/source/detail/scheduler/commands.cpp | 13 ++++--------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index d18136eece811..18d344a97a2eb 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -62,19 +62,22 @@ void event_impl::waitInternal() const { } void event_impl::setComplete() { - assert(MHostEvent && "setComplete is only allowed for host events"); - assert(!MEvent && "setComplete is only allowed for host events"); - + if (MHostEvent && !MEvent) { #ifndef NDEBUG - int Expected = HES_NotReady; - int Desired = HES_Ready; + int Expected = HES_NotReady; + int Desired = HES_Ready; - bool Succeeded = MState.compare_exchange_strong(Expected, Desired); + bool Succeeded = MState.compare_exchange_strong(Expected, Desired); - assert(Succeeded && "Unexpected state of event"); + assert(Succeeded && "Unexpected state of event"); #else - MState.store(static_cast(HES_Ready)); + MState.store(static_cast(HES_Ready)); #endif + } else if (MEvent) + getPlugin().call( + getHandleRef(), PI_EVENT_COMPLETE); + else + assert(false && "Event is neither host nor device one."); } const RT::PiEvent &event_impl::getHandleRef() const { return MEvent; } diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index ed68ee8864643..f2451792d4114 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -238,18 +238,13 @@ class DispatchHostTask { EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; // update self-event status - if (MSelfEvent->is_host()) { - MSelfEvent->setComplete(); + MSelfEvent->setComplete(); - // enqueue leaves or enqueue leaves of reqs in ThisCmd.MDeps + // The enqueue process is driven by backend for non-host. + // For host event we'll enqueue leaves of requirements + if (MSelfEvent->is_host()) for (DepDesc &Dep : ThisCmd->MDeps) Scheduler::enqueueLeavesOfReq(Dep.MDepRequirement); - } else { - const detail::plugin &Plugin = MSelfEvent->getPlugin(); - Plugin.call(MSelfEvent->getHandleRef(), - PI_EVENT_COMPLETE); - // the enqueue process is driven by backend now - } } }; From 167487812208db47dc43454290dcdd4ba5b0dda1 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 27 Apr 2020 23:16:52 +0300 Subject: [PATCH 094/188] [SYCL] Address some review comments. Signed-off-by: Sergey Kanaev --- sycl/source/detail/accessor_impl.cpp | 2 -- sycl/source/detail/event_impl.cpp | 7 ++-- sycl/source/detail/scheduler/commands.cpp | 34 +++++-------------- sycl/source/detail/scheduler/commands.hpp | 8 ++--- .../source/detail/scheduler/graph_builder.cpp | 11 ++---- 5 files changed, 21 insertions(+), 41 deletions(-) diff --git a/sycl/source/detail/accessor_impl.cpp b/sycl/source/detail/accessor_impl.cpp index 90484ac4fb0ff..c5f2281bf13c8 100644 --- a/sycl/source/detail/accessor_impl.cpp +++ b/sycl/source/detail/accessor_impl.cpp @@ -10,8 +10,6 @@ #include #include -#include - __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 18d344a97a2eb..dea12f8663041 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -129,8 +129,11 @@ event_impl::event_impl(QueueImplPtr Queue) : MQueue(Queue) { if (!MHostProfilingInfo) throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); } - } else - MState.store(HES_Ready); + + return; + } + + MState.store(HES_Ready); } void *event_impl::instrumentationProlog(string_class &Name, int32_t StreamID, diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index f2451792d4114..5c75bc95c51b1 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -243,7 +243,7 @@ class DispatchHostTask { // The enqueue process is driven by backend for non-host. // For host event we'll enqueue leaves of requirements if (MSelfEvent->is_host()) - for (DepDesc &Dep : ThisCmd->MDeps) + for (const DepDesc &Dep : ThisCmd->MDeps) Scheduler::enqueueLeavesOfReq(Dep.MDepRequirement); } }; @@ -472,13 +472,8 @@ void Command::addConnectCmdWithReq(const ContextImplPtr &DepEventContext, GB.updateLeaves(Deps, Record, Req->MAccessMode); GB.addNodeToLeaves(Record, ConnectCmd, Req->MAccessMode); - { - std::vector Allocas(1, Dep.MAllocaCmd); - std::vector Reqs( - 1, const_cast(Dep.MDepRequirement)); - EmptyCmd->addRequirementsAndDeps(ConnectCmd, Allocas, Reqs); - ConnectCmd->addUser(EmptyCmd); - } + EmptyCmd->addRequirement(ConnectCmd, Dep.MAllocaCmd, Dep.MDepRequirement); + ConnectCmd->addUser(EmptyCmd); GB.updateLeaves({ConnectCmd}, Record, Req->MAccessMode); GB.addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); @@ -1409,22 +1404,12 @@ cl_int EmptyCommand::enqueueImp() { return CL_SUCCESS; } -void EmptyCommand::addRequirementsAndDeps( - Command *const DepCmd, const std::vector &Allocas, - const std::vector &Reqs) { - assert(Allocas.size() == Reqs.size()); - - MRequirements.reserve(Reqs.size()); +void EmptyCommand::addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd, + const Requirement *Req) { + MRequirements.emplace_back(*Req); + const Requirement *const StoredReq = &MRequirements.back(); - for (size_t Idx = 0; Idx < Reqs.size(); ++Idx) { - const Requirement &Req = *Reqs[Idx]; - AllocaCommandBase *AllocaCmd = Allocas[Idx]; - - MRequirements.emplace_back(Req); - const Requirement *const StoredReq = &MRequirements.back(); - - addDep(DepDesc{DepCmd, StoredReq, AllocaCmd}); - } + addDep(DepDesc{DepCmd, StoredReq, AllocaCmd}); } void EmptyCommand::emitInstrumentationData() { @@ -2100,8 +2085,7 @@ cl_int ExecCGCommand::enqueueImp() { Requirement *Req = static_cast(Arg.MPtr); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); - detail::Requirement *TaskReq = HostTask->MRequirements[ReqIdx]; - TaskReq->MData = AllocaCmd->getMemAllocation(); + Req->MData = AllocaCmd->getMemAllocation(); ++ReqIdx; break; } diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 00b69d9348d8f..94703f3ca4a95 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -278,16 +279,15 @@ class EmptyCommand : public Command { void printDot(std::ostream &Stream) const final; const Requirement *getRequirement() const final { return &MRequirements[0]; } - void addRequirementsAndDeps(Command *const DepCmd, - const std::vector &Allocas, - const std::vector &Reqs); + void addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd, + const Requirement *Req); void emitInstrumentationData(); private: cl_int enqueueImp() final; - std::vector MRequirements; + std::deque MRequirements; }; /// The release command enqueues release of a memory object instance allocated diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 8553b37fa1e73..096ade356aeec 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -678,11 +678,6 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, EmptyCmd->MBlockReason = Command::BlockReason::HostTask; } - std::vector AllocasForReqs; - - if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) - AllocasForReqs.reserve(Reqs.size()); - for (Requirement *Req : Reqs) { MemObjRecord *Record = getOrInsertMemObjRecord(Queue, Req); markModifiedIfWrite(Record, Req); @@ -710,12 +705,12 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, for (Command *Dep : Deps) NewCmd->addDep(DepDesc{Dep, Req, AllocaCmd}); - if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) - AllocasForReqs.push_back(AllocaCmd); + if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { + EmptyCmd->addRequirement(NewCmd.get(), AllocaCmd, Req); + } } if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { - EmptyCmd->addRequirementsAndDeps(NewCmd.get(), AllocasForReqs, Reqs); NewCmd->addUser(EmptyCmd); } From f41a60526ea92618d33c72638a0c06614f8005a1 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 27 Apr 2020 23:48:14 +0300 Subject: [PATCH 095/188] [SYCL] Move some mthods from Command to Scheduler::GraphBuilder Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 95 +------------------ sycl/source/detail/scheduler/commands.hpp | 17 ---- .../source/detail/scheduler/graph_builder.cpp | 90 ++++++++++++++++++ sycl/source/detail/scheduler/scheduler.hpp | 18 ++++ 4 files changed, 112 insertions(+), 108 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 5c75bc95c51b1..4b01749f8e74a 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -446,95 +446,6 @@ void Command::makeTraceEventEpilog() { #endif } -void Command::addConnectCmdWithReq(const ContextImplPtr &DepEventContext, - ExecCGCommand *const ConnectCmd, - EmptyCommand *const EmptyCmd, - const DepDesc &Dep) { - Requirement *Req = const_cast(Dep.MDepRequirement); - - Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; - - MemObjRecord *Record = GB.getMemObjRecord(Req->MSYCLMemObj); - Dep.MDepCommand->addUser(ConnectCmd); - - AllocaCommandBase *AllocaCmd = - GB.findAllocaForReq(Record, Req, DepEventContext); - assert(AllocaCmd && "There must be alloca for requirement!"); - - std::set Deps = GB.findDepsForReq(Record, Req, DepEventContext); - assert(Deps.size() && "There must be some deps"); - - for (Command *ReqDepCmd : Deps) { - ConnectCmd->addDep(DepDesc{ReqDepCmd, Req, AllocaCmd}); - ReqDepCmd->addUser(ConnectCmd); - } - - GB.updateLeaves(Deps, Record, Req->MAccessMode); - GB.addNodeToLeaves(Record, ConnectCmd, Req->MAccessMode); - - EmptyCmd->addRequirement(ConnectCmd, Dep.MAllocaCmd, Dep.MDepRequirement); - ConnectCmd->addUser(EmptyCmd); - - GB.updateLeaves({ConnectCmd}, Record, Req->MAccessMode); - GB.addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); -} - -void Command::connectDepEvent(EventImplPtr DepEvent, - const ContextImplPtr &DepEventContext, - const ContextImplPtr &Context, - const DepDesc &Dep) { - // construct Host Task type command manually and make it depend on DepEvent - ExecCGCommand *ConnectCmd = nullptr; - - { - // Temporary function. Will be replaced depending on circumstances. - std::function Func = []() {}; - - std::unique_ptr HT(new detail::HostTask(std::move(Func))); - std::unique_ptr ConnectCG(new detail::CGHostTask( - std::move(HT), /* Args = */ {}, /* ArgsStorage = */ {}, - /* AccStorage = */ {}, /* SharedPtrStorage = */ {}, - /* Requirements = */ {}, /* DepEvents = */ {DepEvent}, - CG::CODEPLAY_HOST_TASK, /* Payload */ {})); - ConnectCmd = new ExecCGCommand( - std::move(ConnectCG), Scheduler::getInstance().getDefaultHostQueue()); - } - - if (!ConnectCmd) - throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); - - if (Command *DepCmd = reinterpret_cast(DepEvent->getCommand())) { - EmptyCommand *EmptyCmd = - new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); - - if (!EmptyCmd) - throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); - - EmptyCmd->MIsBlockable = true; - EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; - EmptyCmd->MBlockReason = BlockReason::HostTask; - - DepCmd->addUser(ConnectCmd); - - if (Dep.MDepRequirement) { - addConnectCmdWithReq(DepEventContext, ConnectCmd, EmptyCmd, Dep); - } else /* if (!Dep.MDepRequirement) */ { - ConnectCmd->addDep(DepEvent); - EmptyCmd->addDep(ConnectCmd->MEvent); - ConnectCmd->addUser(EmptyCmd); - } - } else // if (!DepEvent->getCommand()) - ConnectCmd->addDep(DepEvent); - - EnqueueResultT Res; - bool Enqueued = Scheduler::GraphProcessor::enqueueCommand(ConnectCmd, Res); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error("Failed to enqueue a sync event between two contexts", - PI_INVALID_OPERATION); - - MPreparedHostDepsEvents.push_back(ConnectCmd->getEvent()); -} - void Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) { const ContextImplPtr &Context = getContext(); @@ -554,8 +465,10 @@ void Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) { ContextImplPtr DepEventContext = DepEvent->getContextImpl(); // If contexts don't match - connect them using user event - if (DepEventContext != Context && !Context->is_host()) - connectDepEvent(DepEvent, DepEventContext, Context, Dep); + if (DepEventContext != Context && !Context->is_host()) { + Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; + GB.connectDepEvent(this, DepEvent, DepEventContext, Context, Dep); + } else MPreparedDepsEvents.push_back(std::move(DepEvent)); } diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 94703f3ca4a95..ad9a6f82c0ac9 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -197,23 +197,6 @@ class Command { /// Optionality of Dep is set by Dep.MDepCommand not equal to nullptr. void processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep); - /// Perform connection of events in multiple contexts - /// \param DepEvent event to depend on - /// \param DepEventContext context of DepEvent - /// \param Context context of command which wants to depend on DepEvent - /// \param Dep optional DepDesc to perform connection properly - /// - /// Optionality of Dep is set by Dep.MDepCommand not equal to nullptr. - void connectDepEvent(EventImplPtr DepEvent, - const ContextImplPtr &DepEventContext, - const ContextImplPtr &Context, const DepDesc &Dep); - /// Helper for connectDepEvent - /// \param ConnectCmd connection cmd to properly add - /// \param Dep DepDesc with non-null MDepRequirmeent - void addConnectCmdWithReq(const ContextImplPtr &DepEventContext, - ExecCGCommand *const ConnectCmd, - EmptyCommand *const EmptyCmd, const DepDesc &Dep); - virtual ContextImplPtr getContext() const; /// Private interface. Derived classes should implement this method. diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 096ade356aeec..fb620a7cffa3f 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -879,6 +879,96 @@ void Scheduler::GraphBuilder::removeRecordForMemObj(SYCLMemObjI *MemObject) { MemObject->MRecord.reset(); } +void Scheduler::GraphBuilder::connectDepEvent( + Command *const Cmd, EventImplPtr DepEvent, + const ContextImplPtr &DepEventContext, const ContextImplPtr &Context, + const DepDesc &Dep) { + // construct Host Task type command manually and make it depend on DepEvent + ExecCGCommand *ConnectCmd = nullptr; + + { + // Temporary function. Will be replaced depending on circumstances. + std::function Func = []() {}; + + std::unique_ptr HT(new detail::HostTask(std::move(Func))); + std::unique_ptr ConnectCG(new detail::CGHostTask( + std::move(HT), /* Args = */ {}, /* ArgsStorage = */ {}, + /* AccStorage = */ {}, /* SharedPtrStorage = */ {}, + /* Requirements = */ {}, /* DepEvents = */ {DepEvent}, + CG::CODEPLAY_HOST_TASK, /* Payload */ {})); + ConnectCmd = new ExecCGCommand( + std::move(ConnectCG), Scheduler::getInstance().getDefaultHostQueue()); + } + + if (!ConnectCmd) + throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); + + if (Command *DepCmd = reinterpret_cast(DepEvent->getCommand())) { + EmptyCommand *EmptyCmd = + new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); + + if (!EmptyCmd) + throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); + + EmptyCmd->MIsBlockable = true; + EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; + EmptyCmd->MBlockReason = Command::BlockReason::HostTask; + + DepCmd->addUser(ConnectCmd); + + if (Dep.MDepRequirement) { + addConnectCmdWithReq(Cmd, DepEventContext, ConnectCmd, EmptyCmd, Dep); + } else /* if (!Dep.MDepRequirement) */ { + ConnectCmd->addDep(DepEvent); + EmptyCmd->addDep(ConnectCmd->getEvent()); + ConnectCmd->addUser(EmptyCmd); + } + } else // if (!DepEvent->getCommand()) + ConnectCmd->addDep(DepEvent); + + EnqueueResultT Res; + bool Enqueued = Scheduler::GraphProcessor::enqueueCommand(ConnectCmd, Res); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Failed to enqueue a sync event between two contexts", + PI_INVALID_OPERATION); + + Cmd->addDep(ConnectCmd->getEvent()); + //Cmd->MPreparedHostDepsEvents.push_back(ConnectCmd->getEvent()); +} + +void Scheduler::GraphBuilder::addConnectCmdWithReq( + Command *const Cmd, const ContextImplPtr &DepEventContext, + ExecCGCommand *const ConnectCmd, EmptyCommand *const EmptyCmd, + const DepDesc &Dep) { + Requirement *Req = const_cast(Dep.MDepRequirement); + + Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; + + MemObjRecord *Record = GB.getMemObjRecord(Req->MSYCLMemObj); + Dep.MDepCommand->addUser(ConnectCmd); + + AllocaCommandBase *AllocaCmd = + GB.findAllocaForReq(Record, Req, DepEventContext); + assert(AllocaCmd && "There must be alloca for requirement!"); + + std::set Deps = GB.findDepsForReq(Record, Req, DepEventContext); + assert(Deps.size() && "There must be some deps"); + + for (Command *ReqDepCmd : Deps) { + ConnectCmd->addDep(DepDesc{ReqDepCmd, Req, AllocaCmd}); + ReqDepCmd->addUser(ConnectCmd); + } + + updateLeaves(Deps, Record, Req->MAccessMode); + addNodeToLeaves(Record, ConnectCmd, Req->MAccessMode); + + EmptyCmd->addRequirement(ConnectCmd, Dep.MAllocaCmd, Dep.MDepRequirement); + ConnectCmd->addUser(EmptyCmd); + + updateLeaves({ConnectCmd}, Record, Req->MAccessMode); + addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 6da1e29d9a2e5..0776018c105ea 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -512,6 +512,24 @@ class Scheduler { void updateLeaves(const std::set &Cmds, MemObjRecord *Record, access::mode AccessMode); + /// Perform connection of events in multiple contexts + /// \param DepEvent event to depend on + /// \param DepEventContext context of DepEvent + /// \param Context context of command which wants to depend on DepEvent + /// \param Dep optional DepDesc to perform connection properly + /// + /// Optionality of Dep is set by Dep.MDepCommand not equal to nullptr. + void connectDepEvent(Command *const Cmd, EventImplPtr DepEvent, + const ContextImplPtr &DepEventContext, + const ContextImplPtr &Context, const DepDesc &Dep); + /// Helper for connectDepEvent + /// \param ConnectCmd connection cmd to properly add + /// \param Dep DepDesc with non-null MDepRequirmeent + void addConnectCmdWithReq(Command *const Cmd, + const ContextImplPtr &DepEventContext, + ExecCGCommand *const ConnectCmd, + EmptyCommand *const EmptyCmd, const DepDesc &Dep); + std::vector MMemObjs; private: From 857d4337d3e7f83e48b90fee385604a98ca7cebf Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 28 Apr 2020 09:59:58 +0300 Subject: [PATCH 096/188] [SYCL] Simpify DispatchHostTask::findUserEmptyCommand() method. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 30 ++++++++--------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 4b01749f8e74a..c49ec90ff76c1 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -189,28 +189,18 @@ class DispatchHostTask { // Lookup for empty command amongst users of this cmd static EmptyCommand *findUserEmptyCommand(Command *ThisCmd) { -#ifndef NDEBUG - EmptyCommand *Result = nullptr; -#endif + assert(ThisCmd->MUsers.size() == 1 && + "Only a single user is expected for host task command"); - for (Command *Cmd : ThisCmd->MUsers) - if (Cmd->getType() == Command::CommandType::EMPTY_TASK && - Cmd->MIsBlockable && - Cmd->MBlockReason == Command::BlockReason::HostTask) { -#ifndef NDEBUG - assert(!Result && - "Multiple empty commands in users of a single host task"); - Result = static_cast(Cmd); -#else - return static_cast(Cmd); -#endif - } + Command *User = *ThisCmd->MUsers.begin(); -#ifndef NDEBUG - return Result; -#else - return nullptr; -#endif + assert(User->getType() == Command::CommandType::EMPTY_TASK && + "Expected empty command as single user of host task command"); + assert(User->MIsBlockable && "Empty command is expected to be blockable"); + assert(User->MBlockReason == Command::BlockReason::HostTask && + "Empty command is expected to be blocked due to host task"); + + return static_cast(User); } public: From faf3fa1b320b936cb3c315817fe123f2b977f205 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 28 Apr 2020 10:21:21 +0300 Subject: [PATCH 097/188] [SYCL] Simplify loop Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index c49ec90ff76c1..a8143facce4bd 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1979,24 +1979,18 @@ cl_int ExecCGCommand::enqueueImp() { case CG::CGTYPE::CODEPLAY_HOST_TASK: { CGHostTask *HostTask = static_cast(MCommandGroup.get()); - size_t ArgIdx = 0, ReqIdx = 0; - while (ArgIdx < HostTask->MArgs.size()) { - ArgDesc &Arg = HostTask->MArgs[ArgIdx]; - + for (ArgDesc &Arg : HostTask->MArgs) { switch (Arg.MType) { case kernel_param_kind_t::kind_accessor: { Requirement *Req = static_cast(Arg.MPtr); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); Req->MData = AllocaCmd->getMemAllocation(); - ++ReqIdx; break; } default: throw std::runtime_error("Yet unsupported arg type"); } - - ++ArgIdx; } MQueue->getThreadPool().submit(std::move(DispatchHostTask( From 02c4cadceb8ee5f1c662e777a017fe34abe77cba Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 28 Apr 2020 10:24:49 +0300 Subject: [PATCH 098/188] [SYCL] Remove unwanted changes from lit.cfg.py Signed-off-by: Sergey Kanaev --- sycl/test/lit.cfg.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/sycl/test/lit.cfg.py b/sycl/test/lit.cfg.py index d708f386175ab..1682b57c3d24b 100644 --- a/sycl/test/lit.cfg.py +++ b/sycl/test/lit.cfg.py @@ -56,15 +56,9 @@ llvm_config.with_environment('CPATH', "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/", append_path=True) llvm_config.with_environment('DYLD_LIBRARY_PATH', config.sycl_libs_dir) -# propagate the environment variable OCL_ICD_FILANEMES to use proper runtime. -if 'OCL_ICD_FILENAMES' in os.environ: - config.environment['OCL_ICD_FILENAMES'] = os.environ['OCL_ICD_FILENAMES'] +llvm_config.with_environment('PATH', config.sycl_tools_dir, append_path=True) config.substitutions.append( ('%threads_lib', config.sycl_threads_lib) ) - -if 'SYCL_DEVICE_ALLOWLIST' in os.environ: - config.environment['SYCL_DEVICE_ALLOWLIST'] = os.environ['SYCL_DEVICE_ALLOWLIST'] - config.substitutions.append( ('%sycl_libs_dir', config.sycl_libs_dir ) ) config.substitutions.append( ('%sycl_include', config.sycl_include ) ) config.substitutions.append( ('%sycl_source_dir', config.sycl_source_dir) ) @@ -75,8 +69,6 @@ config.substitutions.append( ('%llvm_build_lib_dir', config.llvm_build_lib_dir ) ) config.substitutions.append( ('%llvm_build_bin_dir', config.llvm_build_bin_dir ) ) -llvm_config.with_environment('PATH', config.sycl_tools_dir, append_path=True) - llvm_config.use_clang() llvm_config.add_tool_substitutions(['llvm-spirv'], [config.sycl_tools_dir]) From 5d1524702db960795a024587a57f12c4094b2c48 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 28 Apr 2020 10:34:13 +0300 Subject: [PATCH 099/188] [SYCL] Remove unwanted change. Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/accessor_impl.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/include/CL/sycl/detail/accessor_impl.hpp b/sycl/include/CL/sycl/detail/accessor_impl.hpp index 181cd6dc151d4..f9cffa5344b9f 100644 --- a/sycl/include/CL/sycl/detail/accessor_impl.hpp +++ b/sycl/include/CL/sycl/detail/accessor_impl.hpp @@ -20,7 +20,6 @@ namespace sycl { namespace detail { class Command; -class Scheduler; // The class describes a requirement to access a SYCL memory object such as // sycl::buffer and sycl::image. For example, each accessor used in a kernel, From e366310accac649e018d8364d7059def8481af36 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 28 Apr 2020 10:35:37 +0300 Subject: [PATCH 100/188] [SYCL] Resolve style issues Signed-off-by: Sergey Kanaev --- sycl/source/detail/event_impl.cpp | 4 ++-- sycl/source/detail/scheduler/commands.cpp | 3 +-- sycl/source/detail/scheduler/graph_builder.cpp | 1 - 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index dea12f8663041..6be144ab1dc3e 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -74,8 +74,8 @@ void event_impl::setComplete() { MState.store(static_cast(HES_Ready)); #endif } else if (MEvent) - getPlugin().call( - getHandleRef(), PI_EVENT_COMPLETE); + getPlugin().call(getHandleRef(), + PI_EVENT_COMPLETE); else assert(false && "Event is neither host nor device one."); } diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index a8143facce4bd..69a5b77778617 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -458,8 +458,7 @@ void Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) { if (DepEventContext != Context && !Context->is_host()) { Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; GB.connectDepEvent(this, DepEvent, DepEventContext, Context, Dep); - } - else + } else MPreparedDepsEvents.push_back(std::move(DepEvent)); } diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index fb620a7cffa3f..1d2a4469bf22c 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -933,7 +933,6 @@ void Scheduler::GraphBuilder::connectDepEvent( PI_INVALID_OPERATION); Cmd->addDep(ConnectCmd->getEvent()); - //Cmd->MPreparedHostDepsEvents.push_back(ConnectCmd->getEvent()); } void Scheduler::GraphBuilder::addConnectCmdWithReq( From 2b1335b4141a49988292055093dc65388fc9bd9c Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 28 Apr 2020 10:39:32 +0300 Subject: [PATCH 101/188] [SYCL] Rename HOST_TASK_CODEPLAY to CODEPLAY_HOST_TASK Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/cg.hpp | 2 +- sycl/include/CL/sycl/handler.hpp | 2 +- sycl/source/detail/scheduler/commands.cpp | 4 ++-- sycl/source/detail/scheduler/graph_builder.cpp | 10 +++++----- sycl/source/detail/scheduler/scheduler.cpp | 2 +- sycl/source/handler.cpp | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index af1f63df2b53e..e7b2f3a996abc 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -401,7 +401,7 @@ class CG { FILL_USM, PREFETCH_USM, INTEROP_TASK_CODEPLAY, - CODEPLAY_HOST_TASK + HOST_TASK_CODEPLAY }; CG(CGTYPE Type, vector_class> ArgsStorage, diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index e850f97526ca6..81ab7d13b30d9 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -705,7 +705,7 @@ class __SYCL_EXPORT handler { MHostTask.reset(new detail::HostTask(std::move(Func))); - MCGType = detail::CG::CODEPLAY_HOST_TASK; + MCGType = detail::CG::HOST_TASK_CODEPLAY; } /// Defines and invokes a SYCL kernel function for the specified range and diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 69a5b77778617..925310a0e511f 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1428,7 +1428,7 @@ static std::string cgTypeToString(detail::CG::CGTYPE Type) { case detail::CG::PREFETCH_USM: return "prefetch usm"; break; - case detail::CG::CODEPLAY_HOST_TASK: + case detail::CG::HOST_TASK_CODEPLAY: return "host task"; break; default: @@ -1975,7 +1975,7 @@ cl_int ExecCGCommand::enqueueImp() { return CL_SUCCESS; } - case CG::CGTYPE::CODEPLAY_HOST_TASK: { + case CG::CGTYPE::HOST_TASK_CODEPLAY: { CGHostTask *HostTask = static_cast(MCommandGroup.get()); for (ArgDesc &Arg : HostTask->MArgs) { diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 1d2a4469bf22c..97e6d90464d78 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -667,7 +667,7 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, EmptyCommand *EmptyCmd = nullptr; - if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { + if (CGType == CG::CGTYPE::HOST_TASK_CODEPLAY) { EmptyCmd = new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); if (!EmptyCmd) @@ -705,12 +705,12 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, for (Command *Dep : Deps) NewCmd->addDep(DepDesc{Dep, Req, AllocaCmd}); - if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { + if (CGType == CG::CGTYPE::HOST_TASK_CODEPLAY) { EmptyCmd->addRequirement(NewCmd.get(), AllocaCmd, Req); } } - if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { + if (CGType == CG::CGTYPE::HOST_TASK_CODEPLAY) { NewCmd->addUser(EmptyCmd); } @@ -726,7 +726,7 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, updateLeaves({Dep.MDepCommand}, Record, Req->MAccessMode); addNodeToLeaves(Record, NewCmd.get(), Req->MAccessMode); - if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { + if (CGType == CG::CGTYPE::HOST_TASK_CODEPLAY) { updateLeaves({NewCmd.get()}, Record, Req->MAccessMode); addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); } @@ -895,7 +895,7 @@ void Scheduler::GraphBuilder::connectDepEvent( std::move(HT), /* Args = */ {}, /* ArgsStorage = */ {}, /* AccStorage = */ {}, /* SharedPtrStorage = */ {}, /* Requirements = */ {}, /* DepEvents = */ {DepEvent}, - CG::CODEPLAY_HOST_TASK, /* Payload */ {})); + CG::HOST_TASK_CODEPLAY, /* Payload */ {})); ConnectCmd = new ExecCGCommand( std::move(ConnectCG), Scheduler::getInstance().getDefaultHostQueue()); } diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index b44c171f2fb4a..727fc394cbf1a 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -79,7 +79,7 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, NewCmd = MGraphBuilder.addCGUpdateHost(std::move(CommandGroup), DefaultHostQueue); break; - case CG::CODEPLAY_HOST_TASK: + case CG::HOST_TASK_CODEPLAY: NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), DefaultHostQueue); break; default: diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index f7b372c2f3535..fa8b7c2c545ae 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -77,7 +77,7 @@ event handler::finalize(const cl::sycl::detail::code_location &Payload) { std::move(MSharedPtrStorage), std::move(MRequirements), std::move(MEvents), Payload)); break; - case detail::CG::CODEPLAY_HOST_TASK: + case detail::CG::HOST_TASK_CODEPLAY: CommandGroup.reset(new detail::CGHostTask( std::move(MHostTask), /*MQueue,*/ std::move(MArgs), std::move(MArgsStorage), std::move(MAccStorage), From d9ec78a56961b8d8aa4458c4d68ff87562a70245 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 28 Apr 2020 11:29:48 +0300 Subject: [PATCH 102/188] [SYCL] Fix build issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 925310a0e511f..10c85220c79fb 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1320,10 +1320,12 @@ void EmptyCommand::emitInstrumentationData() { return; // Create a payload with the command name and an event using this payload to // emit a node_create - if (!MRequirement.get()) + if (MRequirements.empty()) return; - MAddress = MRequirement->MSYCLMemObj; + Requirement &Req = *MRequirements.begin(); + + MAddress = Req->MSYCLMemObj; makeTraceEventProlog(MAddress); if (MFirstInstance) { From 0e38582f06b8d88b63c2b34d0fef4f328728761b Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 28 Apr 2020 12:20:37 +0300 Subject: [PATCH 103/188] [SYCL] Move changes in addCG to distinct function. Signed-off-by: Sergey Kanaev --- .../source/detail/scheduler/graph_builder.cpp | 62 +++++++++++-------- sycl/source/detail/scheduler/scheduler.hpp | 4 ++ 2 files changed, 40 insertions(+), 26 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 97e6d90464d78..7ef4f30629fe5 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -650,6 +650,39 @@ void Scheduler::GraphBuilder::markModifiedIfWrite(MemObjRecord *Record, } } +void Scheduler::GraphBuilder::addEmptyCmdForHostTask( + ExecCGCommand *Cmd, const std::unique_ptr &CmdGroup, + const QueueImplPtr &Queue) { + const std::vector &Reqs = CmdGroup->MRequirements; + + EmptyCommand *EmptyCmd = + new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); + + if (!EmptyCmd) + throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); + + EmptyCmd->MIsBlockable = true; + EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; + EmptyCmd->MBlockReason = Command::BlockReason::HostTask; + + for (Requirement *Req : Reqs) { + MemObjRecord *Record = getOrInsertMemObjRecord(Queue, Req); + AllocaCommandBase *AllocaCmd = getOrCreateAllocaForReq(Record, Req, Queue); + EmptyCmd->addRequirement(Cmd, AllocaCmd, Req); + } + + Cmd->addUser(EmptyCmd); + + const std::vector &Deps = Cmd->MDeps; + for (const DepDesc &Dep : Deps) { + const Requirement *Req = Dep.MDepRequirement; + MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); + + updateLeaves({Cmd}, Record, Req->MAccessMode); + addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); + } +} + Command * Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, QueueImplPtr Queue) { @@ -665,19 +698,6 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, if (MPrintOptionsArray[BeforeAddCG]) printGraphAsDot("before_addCG"); - EmptyCommand *EmptyCmd = nullptr; - - if (CGType == CG::CGTYPE::HOST_TASK_CODEPLAY) { - EmptyCmd = new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); - - if (!EmptyCmd) - throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); - - EmptyCmd->MIsBlockable = true; - EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; - EmptyCmd->MBlockReason = Command::BlockReason::HostTask; - } - for (Requirement *Req : Reqs) { MemObjRecord *Record = getOrInsertMemObjRecord(Queue, Req); markModifiedIfWrite(Record, Req); @@ -704,14 +724,6 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, for (Command *Dep : Deps) NewCmd->addDep(DepDesc{Dep, Req, AllocaCmd}); - - if (CGType == CG::CGTYPE::HOST_TASK_CODEPLAY) { - EmptyCmd->addRequirement(NewCmd.get(), AllocaCmd, Req); - } - } - - if (CGType == CG::CGTYPE::HOST_TASK_CODEPLAY) { - NewCmd->addUser(EmptyCmd); } // Set new command as user for dependencies and update leaves. @@ -725,11 +737,6 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); updateLeaves({Dep.MDepCommand}, Record, Req->MAccessMode); addNodeToLeaves(Record, NewCmd.get(), Req->MAccessMode); - - if (CGType == CG::CGTYPE::HOST_TASK_CODEPLAY) { - updateLeaves({NewCmd.get()}, Record, Req->MAccessMode); - addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); - } } // Register all the events as dependencies @@ -737,6 +744,9 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, NewCmd->addDep(e); } + if (CGType == CG::CGTYPE::HOST_TASK_CODEPLAY) + addEmptyCmdForHostTask(NewCmd.get(), CommandGroup, Queue); + if (MPrintOptionsArray[AfterAddCG]) printGraphAsDot("after_addCG"); diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 0776018c105ea..f255980e313e6 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -558,6 +558,10 @@ class Scheduler { std::set findDepsForReq(MemObjRecord *Record, Requirement *Req, const ContextImplPtr &Context); + void addEmptyCmdForHostTask(ExecCGCommand *Cmd, + const std::unique_ptr &CommandGroup, + const QueueImplPtr &Queue); + protected: /// Finds a command dependency corresponding to the record. DepDesc findDepForRecord(Command *Cmd, MemObjRecord *Record); From 1b62aed6121a5ef43f8cbcb322fc89191c06b064 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 28 Apr 2020 12:22:06 +0300 Subject: [PATCH 104/188] [SYCL] Fix build issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 10c85220c79fb..df7710a389ea4 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1325,7 +1325,7 @@ void EmptyCommand::emitInstrumentationData() { Requirement &Req = *MRequirements.begin(); - MAddress = Req->MSYCLMemObj; + MAddress = Req.MSYCLMemObj; makeTraceEventProlog(MAddress); if (MFirstInstance) { From deb3e671ef03fa6f185798bdd893bf89dc28ae15 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 28 Apr 2020 12:35:56 +0300 Subject: [PATCH 105/188] [SYCL] Remove unneeded comment Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 7ef4f30629fe5..64f21732c1840 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -897,7 +897,6 @@ void Scheduler::GraphBuilder::connectDepEvent( ExecCGCommand *ConnectCmd = nullptr; { - // Temporary function. Will be replaced depending on circumstances. std::function Func = []() {}; std::unique_ptr HT(new detail::HostTask(std::move(Func))); From eaa8005879fb9f669eda6973218b908c97ee5510 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 28 Apr 2020 14:24:05 +0300 Subject: [PATCH 106/188] [SYCL] Fix runtime issue. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.hpp | 2 ++ sycl/source/detail/scheduler/graph_builder.cpp | 7 +++---- sycl/source/detail/scheduler/scheduler.hpp | 4 +--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index ad9a6f82c0ac9..4791f524b49c1 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -456,6 +456,8 @@ class ExecCGCommand : public Command { void printDot(std::ostream &Stream) const final; void emitInstrumentationData(); + const std::unique_ptr &getCG() const { return MCommandGroup; } + private: cl_int enqueueImp() final; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 64f21732c1840..76d45948aab62 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -651,9 +651,8 @@ void Scheduler::GraphBuilder::markModifiedIfWrite(MemObjRecord *Record, } void Scheduler::GraphBuilder::addEmptyCmdForHostTask( - ExecCGCommand *Cmd, const std::unique_ptr &CmdGroup, - const QueueImplPtr &Queue) { - const std::vector &Reqs = CmdGroup->MRequirements; + ExecCGCommand *Cmd, const QueueImplPtr &Queue) { + const std::vector &Reqs = Cmd->getCG()->MRequirements; EmptyCommand *EmptyCmd = new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); @@ -745,7 +744,7 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, } if (CGType == CG::CGTYPE::HOST_TASK_CODEPLAY) - addEmptyCmdForHostTask(NewCmd.get(), CommandGroup, Queue); + addEmptyCmdForHostTask(NewCmd.get(), Queue); if (MPrintOptionsArray[AfterAddCG]) printGraphAsDot("after_addCG"); diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index f255980e313e6..70102fb87f485 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -558,9 +558,7 @@ class Scheduler { std::set findDepsForReq(MemObjRecord *Record, Requirement *Req, const ContextImplPtr &Context); - void addEmptyCmdForHostTask(ExecCGCommand *Cmd, - const std::unique_ptr &CommandGroup, - const QueueImplPtr &Queue); + void addEmptyCmdForHostTask(ExecCGCommand *Cmd, const QueueImplPtr &Queue); protected: /// Finds a command dependency corresponding to the record. From 36dffd94b5335fc0ee952032752d88a947046d81 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 28 Apr 2020 15:30:22 +0300 Subject: [PATCH 107/188] [SYCL] Remove unwanted changes Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 76d45948aab62..02e5abec4ccf5 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -615,15 +615,6 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( } else { LinkedAllocaCmd->MIsActive = false; Record->MCurContext = Queue->getContextImplPtr(); - - std::set Deps = - findDepsForReq(Record, Req, Queue->getContextImplPtr()); - for (Command *Dep : Deps) { - AllocaCmd->addDep(DepDesc{Dep, Req, LinkedAllocaCmd}); - Dep->addUser(AllocaCmd); - } - updateLeaves(Deps, Record, Req->MAccessMode); - addNodeToLeaves(Record, AllocaCmd, Req->MAccessMode); } } } @@ -776,8 +767,7 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(MemObjRecord *Record) { Visited.insert(AllocaCmd); for (Command *UserCmd : AllocaCmd->MUsers) - if (UserCmd->getType() != Command::CommandType::ALLOCA) - ToVisit.push(UserCmd); + ToVisit.push(UserCmd); CmdsToDelete.push_back(AllocaCmd); // These commands will be deleted later, clear users now to avoid @@ -794,9 +784,7 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(MemObjRecord *Record) { continue; for (Command *UserCmd : Cmd->MUsers) - if (UserCmd->getType() != Command::CommandType::ALLOCA) { - ToVisit.push(UserCmd); - } + ToVisit.push(UserCmd); // Delete all dependencies on any allocations being removed // Track which commands should have their users updated From 37917a76a6dd6112acf5a79680f77c319fcce875 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 28 Apr 2020 16:37:23 +0300 Subject: [PATCH 108/188] [SYCL] Wait for host events in first place Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index df7710a389ea4..75b6236e077ab 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -692,8 +692,8 @@ void AllocaCommand::emitInstrumentationData() { } cl_int AllocaCommand::enqueueImp() { - std::vector EventImpls = MPreparedDepsEvents; waitForPreparedHostEvents(); + std::vector EventImpls = MPreparedDepsEvents; RT::PiEvent &Event = MEvent->getHandleRef(); @@ -789,8 +789,8 @@ void *AllocaSubBufCommand::getMemAllocation() const { } cl_int AllocaSubBufCommand::enqueueImp() { - std::vector EventImpls = MPreparedDepsEvents; waitForPreparedHostEvents(); + std::vector EventImpls = MPreparedDepsEvents; RT::PiEvent &Event = MEvent->getHandleRef(); MMemAllocation = MemoryManager::allocateMemSubBuffer( @@ -853,8 +853,8 @@ void ReleaseCommand::emitInstrumentationData() { } cl_int ReleaseCommand::enqueueImp() { - std::vector EventImpls = MPreparedDepsEvents; waitForPreparedHostEvents(); + std::vector EventImpls = MPreparedDepsEvents; std::vector RawEvents = getPiEvents(EventImpls); bool SkipRelease = false; @@ -967,8 +967,8 @@ void MapMemObject::emitInstrumentationData() { } cl_int MapMemObject::enqueueImp() { - std::vector EventImpls = MPreparedDepsEvents; waitForPreparedHostEvents(); + std::vector EventImpls = MPreparedDepsEvents; std::vector RawEvents = getPiEvents(EventImpls); RT::PiEvent &Event = MEvent->getHandleRef(); @@ -1027,8 +1027,8 @@ void UnMapMemObject::emitInstrumentationData() { } cl_int UnMapMemObject::enqueueImp() { - std::vector EventImpls = MPreparedDepsEvents; waitForPreparedHostEvents(); + std::vector EventImpls = MPreparedDepsEvents; std::vector RawEvents = getPiEvents(EventImpls); RT::PiEvent &Event = MEvent->getHandleRef(); @@ -1104,8 +1104,8 @@ ContextImplPtr MemCpyCommand::getContext() const { cl_int MemCpyCommand::enqueueImp() { QueueImplPtr Queue = MQueue->is_host() ? MSrcQueue : MQueue; - std::vector EventImpls = MPreparedDepsEvents; waitForPreparedHostEvents(); + std::vector EventImpls = MPreparedDepsEvents; RT::PiEvent &Event = MEvent->getHandleRef(); @@ -1171,8 +1171,8 @@ void ExecCGCommand::flushStreams() { } cl_int UpdateHostRequirementCommand::enqueueImp() { - std::vector EventImpls = MPreparedDepsEvents; waitForPreparedHostEvents(); + std::vector EventImpls = MPreparedDepsEvents; RT::PiEvent &Event = MEvent->getHandleRef(); Command::waitForEvents(MQueue, EventImpls, Event); @@ -1254,8 +1254,8 @@ ContextImplPtr MemCpyCommandHost::getContext() const { cl_int MemCpyCommandHost::enqueueImp() { QueueImplPtr Queue = MQueue->is_host() ? MSrcQueue : MQueue; - std::vector EventImpls = MPreparedDepsEvents; waitForPreparedHostEvents(); + std::vector EventImpls = MPreparedDepsEvents; std::vector RawEvents = getPiEvents(EventImpls); RT::PiEvent &Event = MEvent->getHandleRef(); @@ -1644,8 +1644,8 @@ void DispatchNativeKernel(void *Blob) { } cl_int ExecCGCommand::enqueueImp() { - std::vector EventImpls = MPreparedDepsEvents; waitForPreparedHostEvents(); + std::vector EventImpls = MPreparedDepsEvents; auto RawEvents = getPiEvents(EventImpls); From eab005d7314e5729e63ac1bd05470f0c3da89f13 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 28 Apr 2020 16:38:06 +0300 Subject: [PATCH 109/188] [SYCL] Employ common wait mechanism upon enqueueing command for waiting. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_processor.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_processor.cpp b/sycl/source/detail/scheduler/graph_processor.cpp index 08c8a67431376..aa72388a70a12 100644 --- a/sycl/source/detail/scheduler/graph_processor.cpp +++ b/sycl/source/detail/scheduler/graph_processor.cpp @@ -45,11 +45,7 @@ void Scheduler::GraphProcessor::waitForEvent(EventImplPtr Event) { // TODO: Reschedule commands. throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); - RT::PiEvent &CLEvent = Cmd->getEvent()->getHandleRef(); - if (CLEvent) { - const detail::plugin &Plugin = Event->getPlugin(); - Plugin.call(1, &CLEvent); - } + Cmd->getEvent()->waitInternal(); } bool Scheduler::GraphProcessor::enqueueCommand(Command *Cmd, From 239afd89de061493305952db2ab24f23577d5ea5 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 28 Apr 2020 16:39:23 +0300 Subject: [PATCH 110/188] Revert "[SYCL] Remove unwanted changes" This reverts commit 36dffd94b5335fc0ee952032752d88a947046d81. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 02e5abec4ccf5..76d45948aab62 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -615,6 +615,15 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( } else { LinkedAllocaCmd->MIsActive = false; Record->MCurContext = Queue->getContextImplPtr(); + + std::set Deps = + findDepsForReq(Record, Req, Queue->getContextImplPtr()); + for (Command *Dep : Deps) { + AllocaCmd->addDep(DepDesc{Dep, Req, LinkedAllocaCmd}); + Dep->addUser(AllocaCmd); + } + updateLeaves(Deps, Record, Req->MAccessMode); + addNodeToLeaves(Record, AllocaCmd, Req->MAccessMode); } } } @@ -767,7 +776,8 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(MemObjRecord *Record) { Visited.insert(AllocaCmd); for (Command *UserCmd : AllocaCmd->MUsers) - ToVisit.push(UserCmd); + if (UserCmd->getType() != Command::CommandType::ALLOCA) + ToVisit.push(UserCmd); CmdsToDelete.push_back(AllocaCmd); // These commands will be deleted later, clear users now to avoid @@ -784,7 +794,9 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(MemObjRecord *Record) { continue; for (Command *UserCmd : Cmd->MUsers) - ToVisit.push(UserCmd); + if (UserCmd->getType() != Command::CommandType::ALLOCA) { + ToVisit.push(UserCmd); + } // Delete all dependencies on any allocations being removed // Track which commands should have their users updated From e73f49f42ca779c3bf1cc1f12b951fe4c7ee89f2 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 28 Apr 2020 20:10:27 +0300 Subject: [PATCH 111/188] [SYCL] Stylistic issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 75b6236e077ab..db72faa7b48c0 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1995,7 +1995,7 @@ cl_int ExecCGCommand::enqueueImp() { } MQueue->getThreadPool().submit(std::move(DispatchHostTask( - EventImpls, MPreparedHostDepsEvents, HostTask, MDeps, MEvent))); + MPreparedDepsEvents, MPreparedHostDepsEvents, HostTask, MDeps, MEvent))); return CL_SUCCESS; } From c96566db476118d58f346d3f5155c817ef468f02 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 28 Apr 2020 20:11:03 +0300 Subject: [PATCH 112/188] [SYCL] Update state of event upon setting of context Signed-off-by: Sergey Kanaev --- sycl/source/detail/event_impl.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 6be144ab1dc3e..0c6856b7e8a6a 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -91,6 +91,8 @@ void event_impl::setContextImpl(const ContextImplPtr &Context) { MHostEvent = Context->is_host(); MOpenCLInterop = !MHostEvent; MContext = Context; + + MState = MHostEvent ? HES_NotReady : HES_Ready; } event_impl::event_impl() : MState(HES_Ready) {} From 68c6cc8b8d283b048dfd56e185c399bfd982ed64 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 29 Apr 2020 00:54:41 +0300 Subject: [PATCH 113/188] [SYCL] Fix race-condition Signed-off-by: Sergey Kanaev --- sycl/source/detail/event_impl.cpp | 2 +- sycl/source/detail/scheduler/commands.cpp | 14 +++++--------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 0c6856b7e8a6a..e36e9d96b6a7a 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -50,7 +50,7 @@ event_impl::~event_impl() { } void event_impl::waitInternal() const { - if (!MHostEvent) { + if (!MHostEvent && MEvent) { getPlugin().call(1, &MEvent); return; } diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index db72faa7b48c0..39c84ae749124 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -439,20 +439,17 @@ void Command::makeTraceEventEpilog() { void Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) { const ContextImplPtr &Context = getContext(); - // Async work is not supported for host device. - if (DepEvent->is_host()) { + // 1. Async work is not supported for host device. + // 2. The event handle can be null in case of, for example, alloca command, + // which is currently synchrounious, so don't generate OpenCL event. + // Though, this event isn't host one as it's context isn't host one. + if (DepEvent->is_host() || DepEvent->getHandleRef() == nullptr) { // call to waitInternal() is in waitForPreparedHostEvents() as it's called // from enqueue process functions MPreparedHostDepsEvents.push_back(DepEvent); return; } - // The event handle can be null in case of, for example, alloca command, - // which is currently synchrounious, so don't generate OpenCL event. - if (DepEvent->getHandleRef() == nullptr) { - return; - } - ContextImplPtr DepEventContext = DepEvent->getContextImpl(); // If contexts don't match - connect them using user event if (DepEventContext != Context && !Context->is_host()) { @@ -1646,7 +1643,6 @@ void DispatchNativeKernel(void *Blob) { cl_int ExecCGCommand::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; - auto RawEvents = getPiEvents(EventImpls); RT::PiEvent &Event = MEvent->getHandleRef(); From 13fa22b43b4062549e4c77f9166f13ee21e7eae1 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 29 Apr 2020 01:20:03 +0300 Subject: [PATCH 114/188] [SYCL] Fix sporadic segfault in scheduler Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/scheduler.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 727fc394cbf1a..6e65668f4461f 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -139,6 +139,7 @@ std::vector Scheduler::getWaitList(EventImplPtr Event) { } void Scheduler::waitForEvent(EventImplPtr Event) { + std::lock_guard lock(MGraphLock); GraphProcessor::waitForEvent(std::move(Event)); } @@ -181,6 +182,7 @@ EventImplPtr Scheduler::addHostAccessor(Requirement *Req, } void Scheduler::releaseHostAccessor(Requirement *Req) { + std::lock_guard lock(MGraphLock); Command *const BlockedCmd = Req->MBlockedCmd; assert(BlockedCmd && "Can't find appropriate command to unblock"); From c77f7f966f7d2e1bb5e776ad0227c5db1a40bb1b Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 29 Apr 2020 01:20:36 +0300 Subject: [PATCH 115/188] [SYCL] Properly release resources in scheduler unit-test Signed-off-by: Sergey Kanaev --- sycl/unittests/scheduler/LeafLimit.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sycl/unittests/scheduler/LeafLimit.cpp b/sycl/unittests/scheduler/LeafLimit.cpp index 8537857d8970e..1bec26eeb5791 100644 --- a/sycl/unittests/scheduler/LeafLimit.cpp +++ b/sycl/unittests/scheduler/LeafLimit.cpp @@ -67,4 +67,8 @@ TEST_F(SchedulerTest, LeafLimit) { EXPECT_TRUE(std::any_of( NewestLeaf->MDeps.begin(), NewestLeaf->MDeps.end(), [&](const detail::DepDesc &DD) { return DD.MDepCommand == OldestLeaf; })); + + FakeDepCmd->getEvent()->setComplete(); + for (FakeCommand *Cmd : LeavesToAdd) + Cmd->getEvent()->setComplete(); } From 78e032cf409f3bd5e97f5cdf52dbafd85996ab07 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 29 Apr 2020 01:32:42 +0300 Subject: [PATCH 116/188] [SYCL] Fix style issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 39c84ae749124..f0828c59214a0 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1990,8 +1990,9 @@ cl_int ExecCGCommand::enqueueImp() { } } - MQueue->getThreadPool().submit(std::move(DispatchHostTask( - MPreparedDepsEvents, MPreparedHostDepsEvents, HostTask, MDeps, MEvent))); + MQueue->getThreadPool().submit( + std::move(DispatchHostTask(MPreparedDepsEvents, MPreparedHostDepsEvents, + HostTask, MDeps, MEvent))); return CL_SUCCESS; } From 4216af1a33e8afde805b6dc5829ebba5aa2c11a8 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 29 Apr 2020 09:25:45 +0300 Subject: [PATCH 117/188] [SYCL] Fix build issue (merge glitch). Signed-off-by: Sergey Kanaev --- sycl/source/detail/queue_impl.hpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index edd1e9aa4a885..38bde3737e0d8 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -391,11 +391,6 @@ class queue_impl { void instrumentationEpilog(void *TelementryEvent, string_class &Name, int32_t StreamID, uint64_t IId); - /// Stores an event that should be associated with the queue - /// - /// \param Event is the event to be stored - void addEvent(event Event); - void initHostTaskAndEventCallbackThreadPool(); /// Stores a USM operation event that should be associated with the queue From ece3bcba3c9d223bbe272c51d2f7c485a4de7b7b Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 29 Apr 2020 09:48:52 +0300 Subject: [PATCH 118/188] [SYCL] Fix build issue (merge glitch). Signed-off-by: Sergey Kanaev --- sycl/source/handler.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 6f3ee879eeb2c..ef33006d6e754 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -90,10 +90,9 @@ event handler::finalize() { break; case detail::CG::HOST_TASK_CODEPLAY: CommandGroup.reset(new detail::CGHostTask( - std::move(MHostTask), /*MQueue,*/ std::move(MArgs), - std::move(MArgsStorage), std::move(MAccStorage), - std::move(MSharedPtrStorage), std::move(MRequirements), - std::move(MEvents), MCGType, Payload)); + std::move(MHostTask), std::move(MArgs), std::move(MArgsStorage), + std::move(MAccStorage), std::move(MSharedPtrStorage), + std::move(MRequirements), std::move(MEvents), MCGType, MCodeLoc)); break; case detail::CG::NONE: throw runtime_error("Command group submitted without a kernel or a " From 565bd8319658ee1e66d7c2247a3ae7a7c3f92fed Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 29 Apr 2020 10:22:11 +0300 Subject: [PATCH 119/188] [SYCL] Fix build issue (merge glitch). Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/handler.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 629b6ad497e3b..b24f644eb7110 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -131,7 +131,7 @@ struct get_reduction_aux_2nd_kernel_name_t { typename get_kernel_name_t::name>; }; -device getDeviceFromHandler(handler &); +__SYCL_EXPORT device getDeviceFromHandler(handler &); template struct check_fn_signature { static_assert(std::integral_constant::value, From 30321a174215ab6115f327cb43b57ece79c81934 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 29 Apr 2020 10:48:15 +0300 Subject: [PATCH 120/188] [SYCL] Employ C++14 feature in thread pool. Signed-off-by: Sergey Kanaev --- sycl/source/detail/thread_pool.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 1b22349ccf6fb..4481b66b68b12 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -54,7 +54,7 @@ class ThreadPool { MStop.store(false); for (size_t Idx = 0; Idx < MThreadCount; ++Idx) - MLaunchedThreads.emplace_back(&ThreadPool::worker, this); + MLaunchedThreads.emplace_back([this] { worker(); }); } void finishAndWait() { @@ -70,7 +70,7 @@ class ThreadPool { template void submit(T &&Func) { { std::lock_guard Lock(MJobQueueMutex); - MJobQueue.emplace(std::move([Func]() { Func(); })); + MJobQueue.emplace([F = std::move(Func)]() { F(); }); } MDoSmthOrStop.notify_one(); From 14c94760ffb4f9b24b8669b282446fb58712a0d4 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 29 Apr 2020 11:55:13 +0300 Subject: [PATCH 121/188] [SYCL] Fix ABI test Signed-off-by: Sergey Kanaev --- sycl/test/abi/layout_handler.cpp | 1 + sycl/test/abi/sycl_symbols_linux.dump | 2 +- sycl/test/abi/symbol_size.cpp | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl/test/abi/layout_handler.cpp b/sycl/test/abi/layout_handler.cpp index 49887124183a4..65ad8670db1fc 100644 --- a/sycl/test/abi/layout_handler.cpp +++ b/sycl/test/abi/layout_handler.cpp @@ -31,6 +31,7 @@ // CHECK-NEXT: IntegerLiteral {{.*}} 'int' 0 // CHECK-NEXT: FieldDecl {{.*}} MPattern 'vector_class':'std::vector>' // CHECK-NEXT: FieldDecl {{.*}} MHostKernel 'unique_ptr_class':'std::unique_ptr>' +// CHECK-NEXT: FieldDecl {{.*}} MHostTask 'unique_ptr_class':'std::unique_ptr>' // CHECK-NEXT: FieldDecl {{.*}} MOSModuleHandle 'detail::OSModuleHandle':'long' // CHECK-NEXT: FieldDecl {{.*}} MInteropTask 'std::unique_ptr':'std::unique_ptr>' // CHECK-NEXT: FieldDecl {{.*}} MEvents 'vector_class':'std::vector, std::allocator>>' diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 73e9c1ebb4dec..2877a351385df 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3229,9 +3229,9 @@ _ZN2cl4sycl7contextC2ERKSt8functionIFvNS0_14exception_listEEEb _ZN2cl4sycl7contextC2ESt10shared_ptrINS0_6detail12context_implEE _ZN2cl4sycl7handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmb _ZN2cl4sycl7handler13getKernelNameB5cxx11Ev +_ZN2cl4sycl7handler15addEventToQueueESt10shared_ptrINS0_6detail10queue_implEENS0_5eventE _ZN2cl4sycl7handler18extractArgsAndReqsEv _ZN2cl4sycl7handler28extractArgsAndReqsFromLambdaEPcmPKNS0_6detail19kernel_param_desc_tE -_ZN2cl4sycl7handler15addEventToQueueESt10shared_ptrINS0_6detail10queue_implEENS0_5eventE _ZN2cl4sycl7handler8finalizeEv _ZN2cl4sycl7program17build_with_sourceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES7_ _ZN2cl4sycl7program19compile_with_sourceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES7_ diff --git a/sycl/test/abi/symbol_size.cpp b/sycl/test/abi/symbol_size.cpp index 5b595a86978ea..941eedb104f2f 100644 --- a/sycl/test/abi/symbol_size.cpp +++ b/sycl/test/abi/symbol_size.cpp @@ -46,7 +46,7 @@ int main() { #ifdef _MSC_VER check_size(); #else - check_size(); + check_size(); #endif check_size, 16>(); check_size(); From a17d607cc3720dd4079e7e3a763be803c7faae60 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 29 Apr 2020 12:26:07 +0300 Subject: [PATCH 122/188] [SYCL] Set pi trace level Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/host-task-dependency.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/host-interop-task/host-task-dependency.cpp b/sycl/test/host-interop-task/host-task-dependency.cpp index 980f8a010e825..6c4fcc8be1ecb 100644 --- a/sycl/test/host-interop-task/host-task-dependency.cpp +++ b/sycl/test/host-interop-task/host-task-dependency.cpp @@ -1,6 +1,6 @@ // RUN: %clangxx -fsycl %s -o %t.out %threads_lib // RUN: %CPU_RUN_PLACEHOLDER %t.out -// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=1 %t.out 2>&1 %CPU_CHECK_PLACEHOLDER +// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %CPU_CHECK_PLACEHOLDER #include #include From 7caf17b500e6a8e2e82ce018e27a5cb32e420b54 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 29 Apr 2020 13:06:34 +0300 Subject: [PATCH 123/188] [SYCL] Fix ABI test. Signed-off-by: Sergey Kanaev --- sycl/test/abi/symbol_size.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/abi/symbol_size.cpp b/sycl/test/abi/symbol_size.cpp index 941eedb104f2f..9380fd1526de1 100644 --- a/sycl/test/abi/symbol_size.cpp +++ b/sycl/test/abi/symbol_size.cpp @@ -44,7 +44,7 @@ int main() { check_size(); check_size(); #ifdef _MSC_VER - check_size(); + check_size(); #else check_size(); #endif From b6b924cf8fa84b1034fb12562ce4b612aa3944e6 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 29 Apr 2020 13:25:54 +0300 Subject: [PATCH 124/188] Revert "Revert "[SYCL] Remove unwanted changes"" This reverts commit 239afd89de061493305952db2ab24f23577d5ea5. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 76d45948aab62..02e5abec4ccf5 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -615,15 +615,6 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( } else { LinkedAllocaCmd->MIsActive = false; Record->MCurContext = Queue->getContextImplPtr(); - - std::set Deps = - findDepsForReq(Record, Req, Queue->getContextImplPtr()); - for (Command *Dep : Deps) { - AllocaCmd->addDep(DepDesc{Dep, Req, LinkedAllocaCmd}); - Dep->addUser(AllocaCmd); - } - updateLeaves(Deps, Record, Req->MAccessMode); - addNodeToLeaves(Record, AllocaCmd, Req->MAccessMode); } } } @@ -776,8 +767,7 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(MemObjRecord *Record) { Visited.insert(AllocaCmd); for (Command *UserCmd : AllocaCmd->MUsers) - if (UserCmd->getType() != Command::CommandType::ALLOCA) - ToVisit.push(UserCmd); + ToVisit.push(UserCmd); CmdsToDelete.push_back(AllocaCmd); // These commands will be deleted later, clear users now to avoid @@ -794,9 +784,7 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(MemObjRecord *Record) { continue; for (Command *UserCmd : Cmd->MUsers) - if (UserCmd->getType() != Command::CommandType::ALLOCA) { - ToVisit.push(UserCmd); - } + ToVisit.push(UserCmd); // Delete all dependencies on any allocations being removed // Track which commands should have their users updated From a30c3a058affb8b7c58c08a79beea708a2475f50 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 29 Apr 2020 13:29:08 +0300 Subject: [PATCH 125/188] [SYCL] Remove redundant test Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/host-task.cpp | 125 ---------------------- 1 file changed, 125 deletions(-) delete mode 100644 sycl/test/host-interop-task/host-task.cpp diff --git a/sycl/test/host-interop-task/host-task.cpp b/sycl/test/host-interop-task/host-task.cpp deleted file mode 100644 index fd973fbd285a1..0000000000000 --- a/sycl/test/host-interop-task/host-task.cpp +++ /dev/null @@ -1,125 +0,0 @@ -// RUN: %clangxx -fsycl %s -o %t.out %threads_lib -// RUN: %CPU_RUN_PLACEHOLDER %t.out - -#include -#include -#include -#include - -#include - -namespace S = cl::sycl; - -struct Context { - std::atomic_bool Flag; - S::queue &Queue; - std::string Message; - S::buffer Buf1; - S::buffer Buf2; - std::mutex Mutex; - std::condition_variable CV; -}; - -void Thread1Fn(Context &Ctx) { - // 0. initialize resulting buffer with apriori wrong result - { - S::accessor - Acc(Ctx.Buf2); - - for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) - Acc[Idx] = -1; - } - - // 1. submit task writing to buffer 1 - Ctx.Queue.submit([&](S::handler &CGH) { - S::accessor - GeneratorAcc(Ctx.Buf1, CGH); - - auto GeneratorKernel = [GeneratorAcc]() { - for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) - GeneratorAcc[Idx] = Idx; - }; - - CGH.single_task(GeneratorKernel); - }); - - // 2. submit host task writing from buf 1 to buf 2 - Ctx.Queue.submit([&](S::handler &CGH) { - S::accessor - CopierSrcAcc(Ctx.Buf1, CGH); - S::accessor - CopierDstAcc(Ctx.Buf2, CGH); - - auto CopierHostTask = [CopierSrcAcc, CopierDstAcc, &Ctx]() { - for (size_t Idx = 0; Idx < CopierDstAcc.get_count(); ++Idx) - CopierDstAcc[Idx] = CopierSrcAcc[Idx]; - - bool Expected = false; - bool Desired = true; - assert(Ctx.Flag.compare_exchange_strong(Expected, Desired)); - - // let's employ some locking here - { - std::lock_guard Lock(Ctx.Mutex); - Ctx.CV.notify_all(); - } - }; - - CGH.codeplay_host_task(CopierHostTask); - }); -} - -void Thread2Fn(Context &Ctx) { - std::unique_lock Lock(Ctx.Mutex); - - // T2.1. Wait until flag F is set eq true. - Ctx.CV.wait(Lock, [&Ctx] { return Ctx.Flag.load(); }); - - assert(Ctx.Flag.load()); - - // T2.2. print some "hello, world" message - Ctx.Message = "Hello, world"; -} - -void test() { - auto EH = [](S::exception_list EL) { - for (const std::exception_ptr &E : EL) { - throw E; - } - }; - - S::queue Queue(EH); - - Context Ctx{{false}, Queue, "", {10}, {10}}; - - // 0. setup: thread 1 T1: exec smth; thread 2 T2: waits; init flag F = false - std::thread Thread1(Thread1Fn, std::reference_wrapper(Ctx)); - std::thread Thread2(Thread2Fn, std::reference_wrapper(Ctx)); - - Thread1.join(); - Thread2.join(); - - assert(Ctx.Flag.load()); - assert(Ctx.Message == "Hello, world"); - - // 3. check via host accessor that buf 2 contains valid data - { - S::accessor - ResultAcc(Ctx.Buf2); - - for (size_t Idx = 0; Idx < ResultAcc.get_count(); ++Idx) { - assert(ResultAcc[Idx] == Idx && "Invalid data in result buffer"); - } - } -} - -int main() { - test(); - - return 0; -} From b3efd812853b8288fc8a17dbe7b362fe29646832 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 29 Apr 2020 19:15:24 +0300 Subject: [PATCH 126/188] [SYCL] Fix style issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/scheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 8072ad5a472d5..b01da71df9de2 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -192,7 +192,7 @@ void Scheduler::releaseHostAccessor(Requirement *Req) { // static void Scheduler::enqueueLeavesOfReq(const Requirement *const Req) { - MemObjRecord* Record = Req->MSYCLMemObj->MRecord.get(); + MemObjRecord *Record = Req->MSYCLMemObj->MRecord.get(); auto EnqueueLeaves = [](CircularBuffer &Leaves) { for (Command *Cmd : Leaves) { EnqueueResultT Res; From 17a9faf4c9606749a371a6d6e83c8a6798ca2af0 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 29 Apr 2020 19:37:43 +0300 Subject: [PATCH 127/188] [SYCL] Update test Signed-off-by: Sergey Kanaev --- .../host-task-dependency.cpp | 68 +++++++++---------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/sycl/test/host-interop-task/host-task-dependency.cpp b/sycl/test/host-interop-task/host-task-dependency.cpp index 6c4fcc8be1ecb..f5910be062ab2 100644 --- a/sycl/test/host-interop-task/host-task-dependency.cpp +++ b/sycl/test/host-interop-task/host-task-dependency.cpp @@ -1,9 +1,14 @@ // RUN: %clangxx -fsycl %s -o %t.out %threads_lib // RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out // RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %CPU_CHECK_PLACEHOLDER +// RUN: %GPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %GPU_CHECK_PLACEHOLDER +// RUN: %ACC_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %ACC_CHECK_PLACEHOLDER #include #include +#include #include #include @@ -14,7 +19,6 @@ namespace S = cl::sycl; struct Context { std::atomic_bool Flag; S::queue &Queue; - std::string Message; S::buffer Buf1; S::buffer Buf2; S::buffer Buf3; @@ -22,12 +26,12 @@ struct Context { std::condition_variable CV; }; -void Thread1Fn(Context &Ctx) { +void Thread1Fn(Context *Ctx) { // 0. initialize resulting buffer with apriori wrong result { S::accessor - Acc(Ctx.Buf1); + Acc(Ctx->Buf1); for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) Acc[Idx] = -1; @@ -36,7 +40,7 @@ void Thread1Fn(Context &Ctx) { { S::accessor - Acc(Ctx.Buf2); + Acc(Ctx->Buf2); for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) Acc[Idx] = -2; @@ -45,19 +49,19 @@ void Thread1Fn(Context &Ctx) { { S::accessor - Acc(Ctx.Buf3); + Acc(Ctx->Buf3); for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) Acc[Idx] = -3; } // 1. submit task writing to buffer 1 - Ctx.Queue.submit([&](S::handler &CGH) { + Ctx->Queue.submit([&](S::handler &CGH) { S::accessor - GeneratorAcc(Ctx.Buf1, CGH); + GeneratorAcc(Ctx->Buf1, CGH); - auto GeneratorKernel = [GeneratorAcc]() { + auto GeneratorKernel = [GeneratorAcc] { for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) GeneratorAcc[Idx] = Idx; }; @@ -66,26 +70,25 @@ void Thread1Fn(Context &Ctx) { }); // 2. submit host task writing from buf 1 to buf 2 - auto HostTaskEvent = Ctx.Queue.submit([&](S::handler &CGH) { + auto HostTaskEvent = Ctx->Queue.submit([&](S::handler &CGH) { S::accessor - CopierSrcAcc(Ctx.Buf1, CGH); + CopierSrcAcc(Ctx->Buf1, CGH); S::accessor - CopierDstAcc(Ctx.Buf2, CGH); + CopierDstAcc(Ctx->Buf2, CGH); - auto CopierHostTask = [CopierSrcAcc, CopierDstAcc, &Ctx]() { + auto CopierHostTask = [CopierSrcAcc, CopierDstAcc, &Ctx] { for (size_t Idx = 0; Idx < CopierDstAcc.get_count(); ++Idx) CopierDstAcc[Idx] = CopierSrcAcc[Idx]; bool Expected = false; bool Desired = true; - assert(Ctx.Flag.compare_exchange_strong(Expected, Desired)); + assert(Ctx->Flag.compare_exchange_strong(Expected, Desired)); - // let's employ some locking here { - std::lock_guard Lock(Ctx.Mutex); - Ctx.CV.notify_all(); + std::lock_guard Lock(Ctx->Mutex); + Ctx->CV.notify_all(); } }; @@ -93,17 +96,17 @@ void Thread1Fn(Context &Ctx) { }); // 3. submit simple task to move data between two buffers - Ctx.Queue.submit([&](S::handler &CGH) { + Ctx->Queue.submit([&](S::handler &CGH) { S::accessor - SrcAcc(Ctx.Buf2, CGH); + SrcAcc(Ctx->Buf2, CGH); S::accessor - DstAcc(Ctx.Buf3, CGH); + DstAcc(Ctx->Buf3, CGH); CGH.depends_on(HostTaskEvent); - auto CopierKernel = [SrcAcc, DstAcc]() { + auto CopierKernel = [SrcAcc, DstAcc] { for (size_t Idx = 0; Idx < DstAcc.get_count(); ++Idx) DstAcc[Idx] = SrcAcc[Idx]; }; @@ -115,7 +118,7 @@ void Thread1Fn(Context &Ctx) { { S::accessor - Acc(Ctx.Buf3); + Acc(Ctx->Buf3); bool Failure = false; @@ -123,23 +126,19 @@ void Thread1Fn(Context &Ctx) { fprintf(stderr, "Third buffer [%3zu] = %i\n", Idx, Acc[Idx]); Failure |= (Acc[Idx] != Idx); - //assert(Acc[Idx] == Idx && "Invalid data in third buffer"); } assert(!Failure && "Invalid data in third buffer"); } } -void Thread2Fn(Context &Ctx) { - std::unique_lock Lock(Ctx.Mutex); +void Thread2Fn(Context *Ctx) { + std::unique_lock Lock(Ctx->Mutex); // T2.1. Wait until flag F is set eq true. - Ctx.CV.wait(Lock, [&Ctx] { return Ctx.Flag.load(); }); + Ctx->CV.wait(Lock, [Ctx] { return Ctx->Flag.load(); }); - assert(Ctx.Flag.load()); - - // T2.2. print some "hello, world" message - Ctx.Message = "Hello, world"; + assert(Ctx->Flag.load()); } void test() { @@ -151,17 +150,16 @@ void test() { S::queue Queue(EH); - Context Ctx{{false}, Queue, "", {10}, {10}, {10}, {}, {}}; + Context Ctx{{false}, Queue, {10}, {10}, {10}, {}, {}}; // 0. setup: thread 1 T1: exec smth; thread 2 T2: waits; init flag F = false - std::thread Thread1(Thread1Fn, std::reference_wrapper(Ctx)); - std::thread Thread2(Thread2Fn, std::reference_wrapper(Ctx)); + auto A1 = std::async(std::launch::async, Thread1Fn, &Ctx); + auto A2 = std::async(std::launch::async, Thread2Fn, &Ctx); - Thread1.join(); - Thread2.join(); + A1.wait(); + A2.wait(); assert(Ctx.Flag.load()); - assert(Ctx.Message == "Hello, world"); // 3. check via host accessor that buf 2 contains valid data { From 2f6af3bf6ce8001caf0fcdc9042c3150ebc143d3 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 29 Apr 2020 22:04:13 +0300 Subject: [PATCH 128/188] [SYCL] Runtime fixes Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index f0828c59214a0..c97effd94e147 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -160,7 +160,9 @@ getPiEvents(const std::vector &EventImpls) { class DispatchHostTask { std::vector MDepEvents; std::vector MDepHostEvents; - CGHostTask *MHostTask; + // Store cg in shared ptr due to copy-constructor call by thread pool + // FIXME Employ unique_ptr + std::shared_ptr MHostTask; std::vector MDeps; EventImplPtr MSelfEvent; @@ -217,6 +219,7 @@ class DispatchHostTask { // we're ready to call the user-defined lambda now MHostTask->MHostTask->call(); + MHostTask->MHostTask.reset(); Command *ThisCmd = reinterpret_cast(MSelfEvent->getCommand()); assert(ThisCmd && "No command found for host-task self event"); @@ -1974,7 +1977,7 @@ cl_int ExecCGCommand::enqueueImp() { return CL_SUCCESS; } case CG::CGTYPE::HOST_TASK_CODEPLAY: { - CGHostTask *HostTask = static_cast(MCommandGroup.get()); + CGHostTask *HostTask = static_cast(MCommandGroup.release()); for (ArgDesc &Arg : HostTask->MArgs) { switch (Arg.MType) { From c309777abd8f3a33ae48eb0988113915f38b5e36 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 30 Apr 2020 08:38:45 +0300 Subject: [PATCH 129/188] Revert "[SYCL] Remove unwanted changes" This reverts commit 36dffd94b5335fc0ee952032752d88a947046d81. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 02e5abec4ccf5..76d45948aab62 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -615,6 +615,15 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( } else { LinkedAllocaCmd->MIsActive = false; Record->MCurContext = Queue->getContextImplPtr(); + + std::set Deps = + findDepsForReq(Record, Req, Queue->getContextImplPtr()); + for (Command *Dep : Deps) { + AllocaCmd->addDep(DepDesc{Dep, Req, LinkedAllocaCmd}); + Dep->addUser(AllocaCmd); + } + updateLeaves(Deps, Record, Req->MAccessMode); + addNodeToLeaves(Record, AllocaCmd, Req->MAccessMode); } } } @@ -767,7 +776,8 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(MemObjRecord *Record) { Visited.insert(AllocaCmd); for (Command *UserCmd : AllocaCmd->MUsers) - ToVisit.push(UserCmd); + if (UserCmd->getType() != Command::CommandType::ALLOCA) + ToVisit.push(UserCmd); CmdsToDelete.push_back(AllocaCmd); // These commands will be deleted later, clear users now to avoid @@ -784,7 +794,9 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(MemObjRecord *Record) { continue; for (Command *UserCmd : Cmd->MUsers) - ToVisit.push(UserCmd); + if (UserCmd->getType() != Command::CommandType::ALLOCA) { + ToVisit.push(UserCmd); + } // Delete all dependencies on any allocations being removed // Track which commands should have their users updated From aa4c67926614091402f1943f05a34206e1083f98 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 30 Apr 2020 10:16:21 +0300 Subject: [PATCH 130/188] [SYCL] Fix runtime issue for linked alloca deps Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 76d45948aab62..4ea5b628db25f 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -785,6 +785,13 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(MemObjRecord *Record) { AllocaCmd->MUsers.clear(); } + // unchain from deps allocas, whose linked allocas are to be removed currently + for (AllocaCommandBase *AllocaCmd : AllocaCommands) + if (Visited.count(AllocaCmd->MLinkedAllocaCmd)) + for (DepDesc &Dep : AllocaCmd->MDeps) + if (Dep.MDepCommand) + Dep.MDepCommand->MUsers.erase(AllocaCmd); + // Traverse the graph using BFS while (!ToVisit.empty()) { Command *Cmd = ToVisit.front(); @@ -794,9 +801,8 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(MemObjRecord *Record) { continue; for (Command *UserCmd : Cmd->MUsers) - if (UserCmd->getType() != Command::CommandType::ALLOCA) { + if (UserCmd->getType() != Command::CommandType::ALLOCA) ToVisit.push(UserCmd); - } // Delete all dependencies on any allocations being removed // Track which commands should have their users updated From a4bc8f0799841f0c188178f5ad2b692f58539c10 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 30 Apr 2020 16:47:11 +0300 Subject: [PATCH 131/188] [SYCL] Fix merge glitch Signed-off-by: Sergey Kanaev --- sycl/test/abi/sycl_symbols_linux.dump | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 2877a351385df..cac45ced92b59 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3229,7 +3229,6 @@ _ZN2cl4sycl7contextC2ERKSt8functionIFvNS0_14exception_listEEEb _ZN2cl4sycl7contextC2ESt10shared_ptrINS0_6detail12context_implEE _ZN2cl4sycl7handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmb _ZN2cl4sycl7handler13getKernelNameB5cxx11Ev -_ZN2cl4sycl7handler15addEventToQueueESt10shared_ptrINS0_6detail10queue_implEENS0_5eventE _ZN2cl4sycl7handler18extractArgsAndReqsEv _ZN2cl4sycl7handler28extractArgsAndReqsFromLambdaEPcmPKNS0_6detail19kernel_param_desc_tE _ZN2cl4sycl7handler8finalizeEv From 5234efdfb9d9f2ae564c47ab88f1a95bf15a5f42 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 30 Apr 2020 17:39:13 +0300 Subject: [PATCH 132/188] [SYCL] Fix merge glitch Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/handler.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 42fe66088005c..4bd74552b3126 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -779,7 +779,7 @@ class __SYCL_EXPORT handler { MHostTask.reset(new detail::HostTask(std::move(Func))); - MCGType = detail::CG::CODEPLAY_HOST_TASK; + MCGType = detail::CG::HOST_TASK_CODEPLAY; } /// Defines and invokes a SYCL kernel function for the specified range and From 4a8de921bbd56d53c9472cf5cbbbbd22c7867f23 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 6 May 2020 12:19:48 +0300 Subject: [PATCH 133/188] [SYCL] Fix merge glitch Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/handler.hpp | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 359ac10fbac51..d8e250fbac16e 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -105,32 +105,6 @@ template struct get_kernel_name_t { using name = Type; }; -/// These are the forward declaration for the classes that help to create -/// names for additional kernels. It is used only when there are -/// more then 1 kernels in one parallel_for() implementing SYCL reduction. -template class __sycl_reduction_main_2nd_kernel; -template class __sycl_reduction_aux_1st_kernel; -template class __sycl_reduction_aux_2nd_kernel; - -/// Helper structs to get additional kernel name types based on given -/// \c Name and \c Type types: if \c Name is undefined (is a \c auto_name) then -/// \c Type becomes the \c Name. -template -struct get_reduction_main_2nd_kernel_name_t { - using name = __sycl_reduction_main_2nd_kernel< - typename get_kernel_name_t::name>; -}; -template -struct get_reduction_aux_1st_kernel_name_t { - using name = __sycl_reduction_aux_1st_kernel< - typename get_kernel_name_t::name>; -}; -template -struct get_reduction_aux_2nd_kernel_name_t { - using name = __sycl_reduction_aux_2nd_kernel< - typename get_kernel_name_t::name>; -}; - template struct check_fn_signature { static_assert(std::integral_constant::value, "Second template parameter is required to be of function type"); From 16ca481d8c51dbaa361d472be09ed20b148fa023 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 6 May 2020 12:20:59 +0300 Subject: [PATCH 134/188] [SYCL] Fix typo Signed-off-by: Sergey Kanaev --- sycl/source/detail/queue_impl.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index ddec6289fbabf..c99c184b2ed28 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -424,8 +424,7 @@ class queue_impl { bool MSupportOOO = true; // Thread pool for host task and event callbacks execution. - // The thread pool is instantiated upon the very first call to - // getHostTaskAndEventCallbackThreadPool + // The thread pool is instantiated upon the very first call to getThreadPool() std::unique_ptr MHostTaskThreadPool; }; From 294bd9dff5bf5d6b0f542d356bee83b68ed80094 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 6 May 2020 22:22:50 +0300 Subject: [PATCH 135/188] [SYCL] A more proper way to unchaining deps of linked alloca's Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 4ea5b628db25f..a3592420991f5 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -775,9 +775,13 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(MemObjRecord *Record) { for (Command *AllocaCmd : AllocaCommands) { Visited.insert(AllocaCmd); + // Linked alloca cmd may be in users of this alloca. We're not going to + // visit it. for (Command *UserCmd : AllocaCmd->MUsers) if (UserCmd->getType() != Command::CommandType::ALLOCA) ToVisit.push(UserCmd); + else + Visited.insert(UserCmd); CmdsToDelete.push_back(AllocaCmd); // These commands will be deleted later, clear users now to avoid @@ -785,12 +789,16 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(MemObjRecord *Record) { AllocaCmd->MUsers.clear(); } - // unchain from deps allocas, whose linked allocas are to be removed currently - for (AllocaCommandBase *AllocaCmd : AllocaCommands) - if (Visited.count(AllocaCmd->MLinkedAllocaCmd)) + // Linked alloca's share dependencies. Unchain from deps linked alloca's. + // Any cmd of the alloca - linked_alloca may be used later on. + for (AllocaCommandBase *AllocaCmd : AllocaCommands) { + AllocaCommandBase *LinkedCmd = AllocaCmd->MLinkedAllocaCmd; + + if (LinkedCmd && Visited.count(LinkedCmd)) for (DepDesc &Dep : AllocaCmd->MDeps) if (Dep.MDepCommand) Dep.MDepCommand->MUsers.erase(AllocaCmd); + } // Traverse the graph using BFS while (!ToVisit.empty()) { From c094e9aeea9ec48af0352e421a5c2859579e36f9 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 6 May 2020 22:24:11 +0300 Subject: [PATCH 136/188] [SYCL] Fix another race-condition. The race condition was appearing after destruction of buffer which had accessor in host-task. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 9 +++++---- sycl/source/detail/scheduler/scheduler.cpp | 7 ++++++- sycl/source/detail/scheduler/scheduler.hpp | 3 ++- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index c97effd94e147..fba0ae9fec321 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -228,16 +228,16 @@ class DispatchHostTask { EmptyCommand *EmptyCmd = findUserEmptyCommand(ThisCmd); assert(EmptyCmd && "No empty command found"); - EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; - // update self-event status MSelfEvent->setComplete(); + EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; + // The enqueue process is driven by backend for non-host. // For host event we'll enqueue leaves of requirements if (MSelfEvent->is_host()) for (const DepDesc &Dep : ThisCmd->MDeps) - Scheduler::enqueueLeavesOfReq(Dep.MDepRequirement); + Scheduler::getInstance().enqueueLeavesOfReq(Dep.MDepRequirement); } }; @@ -1308,7 +1308,8 @@ cl_int EmptyCommand::enqueueImp() { void EmptyCommand::addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd, const Requirement *Req) { - MRequirements.emplace_back(*Req); + const Requirement &ReqRef = *Req; + MRequirements.emplace_back(ReqRef); const Requirement *const StoredReq = &MRequirements.back(); addDep(DepDesc{DepCmd, StoredReq, AllocaCmd}); diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index b01da71df9de2..bb58c8ea0c3c3 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -190,8 +190,13 @@ void Scheduler::releaseHostAccessor(Requirement *Req) { enqueueLeavesOfReq(Req); } -// static void Scheduler::enqueueLeavesOfReq(const Requirement *const Req) { + std::shared_lock Lock(MGraphLock); + enqueueLeavesOfReqUnlocked(Req); +} + +// static +void Scheduler::enqueueLeavesOfReqUnlocked(const Requirement *const Req) { MemObjRecord *Record = Req->MSYCLMemObj->MRecord.get(); auto EnqueueLeaves = [](CircularBuffer &Leaves) { for (Command *Cmd : Leaves) { diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 5cd240bdf9f8f..517ddf86dc224 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -430,7 +430,8 @@ class Scheduler { Scheduler(); static Scheduler instance; - static void enqueueLeavesOfReq(const Requirement *const Req); + static void enqueueLeavesOfReqUnlocked(const Requirement *const Req); + void enqueueLeavesOfReq(const Requirement *const Req); /// Graph builder class. /// From af89f457fc085b35ce0f9c62d75e4c2d708f7c9d Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 7 May 2020 11:33:43 +0300 Subject: [PATCH 137/188] [SYCL] Fix merge issues Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/cg.hpp | 2 +- sycl/source/detail/scheduler/commands.cpp | 2 +- sycl/source/detail/scheduler/graph_builder.cpp | 9 +++++---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index 0fad973c5304b..398c9410872b4 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -652,7 +652,7 @@ class CGHostTask : public CG { std::unique_ptr MHostTask; // queue for host-interop task shared_ptr_class MQueue; - // context to create self event with + // context for host-interop task shared_ptr_class MContext; vector_class MArgs; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index a366bf9e66ecb..ca9f4e7d78cf8 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2023,7 +2023,7 @@ cl_int ExecCGCommand::enqueueImp() { MQueue->getThreadPool().submit(std::move(DispatchHostTask( MPreparedDepsEvents, MPreparedHostDepsEvents, HostTask, MDeps, - std::move(ReqToMem) MEvent))); + std::move(ReqToMem), MEvent))); return CL_SUCCESS; } diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index a3592420991f5..36d9bd5525a4c 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -914,10 +914,11 @@ void Scheduler::GraphBuilder::connectDepEvent( std::unique_ptr HT(new detail::HostTask(std::move(Func))); std::unique_ptr ConnectCG(new detail::CGHostTask( - std::move(HT), /* Args = */ {}, /* ArgsStorage = */ {}, - /* AccStorage = */ {}, /* SharedPtrStorage = */ {}, - /* Requirements = */ {}, /* DepEvents = */ {DepEvent}, - CG::HOST_TASK_CODEPLAY, /* Payload */ {})); + std::move(HT), /* Queue = */ {}, /* Context = */ {}, /* Args = */ {}, + /* ArgsStorage = */ {}, /* AccStorage = */ {}, + /* SharedPtrStorage = */ {}, /* Requirements = */ {}, + /* DepEvents = */ {DepEvent}, CG::HOST_TASK_CODEPLAY, + /* Payload */ {})); ConnectCmd = new ExecCGCommand( std::move(ConnectCG), Scheduler::getInstance().getDefaultHostQueue()); } From 5056f66603ef0a0034dd71a60ec0ea9bb438e256 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 7 May 2020 11:34:10 +0300 Subject: [PATCH 138/188] [SYCL] Remove redundant test Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/interop-task.cpp | 129 ------------------- 1 file changed, 129 deletions(-) delete mode 100644 sycl/test/host-interop-task/interop-task.cpp diff --git a/sycl/test/host-interop-task/interop-task.cpp b/sycl/test/host-interop-task/interop-task.cpp deleted file mode 100644 index 96528438acda2..0000000000000 --- a/sycl/test/host-interop-task/interop-task.cpp +++ /dev/null @@ -1,129 +0,0 @@ -// RUN: %clangxx -fsycl %s -o %t.out %threads_lib -// RUN: %CPU_RUN_PLACEHOLDER %t.out - -#include -#include -#include -#include - -#include - -namespace S = cl::sycl; - -struct Context { - std::atomic_bool Flag; - S::queue &Queue; - std::string Message; - S::buffer Buf1; - S::buffer Buf2; - std::mutex Mutex; - std::condition_variable CV; -}; - -void Thread1Fn(Context &Ctx) { - // 0. initialize resulting buffer with apriori wrong result - { - S::accessor - Acc(Ctx.Buf2); - - for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) - Acc[Idx] = -1; - } - - // 1. submit task writing to buffer 1 - Ctx.Queue.submit([&](S::handler &CGH) { - S::accessor - GeneratorAcc(Ctx.Buf1, CGH); - - auto GeneratorKernel = [GeneratorAcc]() { - for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) - GeneratorAcc[Idx] = Idx; - }; - - CGH.single_task(GeneratorKernel); - }); - - // 2. submit host task writing from buf 1 to buf 2 - Ctx.Queue.submit([&](S::handler &CGH) { - S::accessor - CopierSrcAcc(Ctx.Buf1, CGH); - S::accessor CopierDstAcc(Ctx.Buf2, CGH); - - auto CopierHostTask = [CopierSrcAcc, CopierDstAcc, &Ctx](S::interop_handle IH) { - // TODO write through interop handle objects - //(void)IH.get_native_mem(CopierSrcAcc); - (void)IH.get_native_mem(CopierDstAcc); - (void)IH.get_native_queue(); - (void)IH.get_native_device(); - (void)IH.get_native_context(); -// for (size_t Idx = 0; Idx < CopierDstAcc.get_count(); ++Idx) -// CopierDstAcc[Idx] = CopierSrcAcc[Idx]; - - bool Expected = false; - bool Desired = true; - assert(Ctx.Flag.compare_exchange_strong(Expected, Desired)); - - // let's employ some locking here - { - std::lock_guard Lock(Ctx.Mutex); - Ctx.CV.notify_all(); - } - }; - - CGH.codeplay_host_task(CopierHostTask); - }); -} - -void Thread2Fn(Context &Ctx) { - std::unique_lock Lock(Ctx.Mutex); - - // T2.1. Wait until flag F is set eq true. - Ctx.CV.wait(Lock, [&Ctx] { return Ctx.Flag.load(); }); - - assert(Ctx.Flag.load()); - - // T2.2. print some "hello, world" message - Ctx.Message = "Hello, world"; -} - -void test() { - auto EH = [](S::exception_list EL) { - for (const std::exception_ptr &E : EL) { - throw E; - } - }; - - S::queue Queue(EH); - - Context Ctx{{false}, Queue, "", {10}, {10}}; - - // 0. setup: thread 1 T1: exec smth; thread 2 T2: waits; init flag F = false - std::thread Thread1(Thread1Fn, std::reference_wrapper(Ctx)); - std::thread Thread2(Thread2Fn, std::reference_wrapper(Ctx)); - - Thread1.join(); - Thread2.join(); - - assert(Ctx.Flag.load()); - assert(Ctx.Message == "Hello, world"); - - // 3. check via host accessor that buf 2 contains valid data - { - S::accessor - ResultAcc(Ctx.Buf2); - - for (size_t Idx = 0; Idx < ResultAcc.get_count(); ++Idx) { - // assert(ResultAcc[Idx] == Idx && "Invalid data in result buffer"); - } - } -} - -int main() { - test(); - - return 0; -} From 1bc5fcd5732d2e96c33e5a7fdf9833d3d3ec1bb7 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 7 May 2020 15:13:17 +0300 Subject: [PATCH 139/188] [SYCL] Update test Signed-off-by: Sergey Kanaev --- .../interop-task-dependency.cpp | 76 ++++++++++++------- 1 file changed, 50 insertions(+), 26 deletions(-) diff --git a/sycl/test/host-interop-task/interop-task-dependency.cpp b/sycl/test/host-interop-task/interop-task-dependency.cpp index 72331dc08e36b..820cba3364f23 100644 --- a/sycl/test/host-interop-task/interop-task-dependency.cpp +++ b/sycl/test/host-interop-task/interop-task-dependency.cpp @@ -1,9 +1,14 @@ // RUN: %clangxx -fsycl %s -o %t.out %threads_lib // RUN: %CPU_RUN_PLACEHOLDER %t.out -// RUN: env SYCL_PI_TRACE=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1 %CPU_CHECK_PLACEHOLDER +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %CPU_CHECK_PLACEHOLDER +// RUN: %GPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %GPU_CHECK_PLACEHOLDER +// RUN: %ACC_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %ACC_CHECK_PLACEHOLDER #include #include +#include #include #include @@ -14,7 +19,6 @@ namespace S = cl::sycl; struct Context { std::atomic_bool Flag; S::queue &Queue; - std::string Message; S::buffer Buf1; S::buffer Buf2; S::buffer Buf3; @@ -22,22 +26,40 @@ struct Context { std::condition_variable CV; }; -void Thread1Fn(Context &Ctx) { +void Thread1Fn(Context *Ctx) { // 0. initialize resulting buffer with apriori wrong result { S::accessor - Acc(Ctx.Buf2); + Acc(Ctx->Buf2); for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) Acc[Idx] = -1; } + { + S::accessor + Acc(Ctx->Buf2); + + for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) + Acc[Idx] = -2; + } + + { + S::accessor + Acc(Ctx->Buf3); + + for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) + Acc[Idx] = -3; + } + // 1. submit task writing to buffer 1 Ctx.Queue.submit([&](S::handler &CGH) { S::accessor - GeneratorAcc(Ctx.Buf1, CGH); + GeneratorAcc(Ctx->Buf1, CGH); auto GeneratorKernel = [GeneratorAcc]() { for (size_t Idx = 0; Idx < GeneratorAcc.get_count(); ++Idx) @@ -51,10 +73,10 @@ void Thread1Fn(Context &Ctx) { auto HostTaskEvent = Ctx.Queue.submit([&](S::handler &CGH) { S::accessor - CopierSrcAcc(Ctx.Buf1, CGH); + CopierSrcAcc(Ctx->Buf1, CGH); S::accessor - CopierDstAcc(Ctx.Buf2, CGH); + CopierDstAcc(Ctx->Buf2, CGH); auto CopierHostTask = [CopierSrcAcc, CopierDstAcc, &Ctx](S::interop_handle IH) { // TODO write through interop handle objects @@ -68,9 +90,8 @@ void Thread1Fn(Context &Ctx) { bool Expected = false; bool Desired = true; - assert(Ctx.Flag.compare_exchange_strong(Expected, Desired)); + assert(Ctx->Flag.compare_exchange_strong(Expected, Desired)); - // let's employ some locking here { std::lock_guard Lock(Ctx.Mutex); Ctx.CV.notify_all(); @@ -84,10 +105,10 @@ void Thread1Fn(Context &Ctx) { Ctx.Queue.submit([&](S::handler &CGH) { S::accessor - SrcAcc(Ctx.Buf2, CGH); + SrcAcc(Ctx->Buf2, CGH); S::accessor - DstAcc(Ctx.Buf3, CGH); + DstAcc(Ctx->Buf3, CGH); CGH.depends_on(HostTaskEvent); @@ -103,23 +124,27 @@ void Thread1Fn(Context &Ctx) { { S::accessor - Acc(Ctx.Buf3); + Acc(Ctx->Buf3); - for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) - assert(Acc[Idx] == Idx && "Invalid data in third buffer"); + bool Failure = false; + + for (size_t Idx = 0; Idx < Acc.get_count(); ++Idx) { + fprintf(stderr, "Third buffer [%3zu] = %i\n", Idx, Acc[Idx]); + + Failure |= (Acc[Idx] != Idx); + } + + assert(!Failure && "Invalid data in third buffer"); } } -void Thread2Fn(Context &Ctx) { - std::unique_lock Lock(Ctx.Mutex); +void Thread2Fn(Context *Ctx) { + std::unique_lock Lock(Ctx->Mutex); // T2.1. Wait until flag F is set eq true. - Ctx.CV.wait(Lock, [&Ctx] { return Ctx.Flag.load(); }); - - assert(Ctx.Flag.load()); + Ctx.CV.wait(Lock, [&Ctx] { return Ctx->Flag.load(); }); - // T2.2. print some "hello, world" message - Ctx.Message = "Hello, world"; + assert(Ctx->Flag.load()); } void test() { @@ -134,14 +159,13 @@ void test() { Context Ctx{{false}, Queue, "", {10}, {10}, {10}, {}, {}}; // 0. setup: thread 1 T1: exec smth; thread 2 T2: waits; init flag F = false - std::thread Thread1(Thread1Fn, std::reference_wrapper(Ctx)); - std::thread Thread2(Thread2Fn, std::reference_wrapper(Ctx)); + auto A1 = std::async(std::launch::async, Thread1Fn, &Ctx); + auto A2 = std::async(std::launch::async, Thread2Fn, &Ctx); - Thread1.join(); - Thread2.join(); + A1.wait(); + A2.wait(); assert(Ctx.Flag.load()); - assert(Ctx.Message == "Hello, world"); // 3. check via host accessor that buf 2 contains valid data { From ba3d009966c109d5cf8134f3a47fe5502906a2f1 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 7 May 2020 16:00:55 +0300 Subject: [PATCH 140/188] [SYCL] Address some review comments. Signed-off-by: Sergey Kanaev --- sycl/source/detail/accessor_impl.cpp | 7 +- sycl/source/detail/event_impl.cpp | 31 +++---- sycl/source/detail/event_impl.hpp | 7 +- sycl/source/detail/scheduler/commands.cpp | 85 +++---------------- sycl/source/detail/scheduler/commands.hpp | 5 ++ .../source/detail/scheduler/graph_builder.cpp | 29 ++++--- sycl/source/detail/scheduler/scheduler.cpp | 2 +- sycl/source/detail/thread_pool.hpp | 2 +- sycl/unittests/scheduler/LeafLimit.cpp | 4 - 9 files changed, 61 insertions(+), 111 deletions(-) diff --git a/sycl/source/detail/accessor_impl.cpp b/sycl/source/detail/accessor_impl.cpp index c5f2281bf13c8..96a7657bb27a5 100644 --- a/sycl/source/detail/accessor_impl.cpp +++ b/sycl/source/detail/accessor_impl.cpp @@ -27,6 +27,7 @@ void addHostAccessorAndWait(Requirement *Req) { detail::Scheduler::getInstance().addHostAccessor(Req); Event->wait(Event); } -} // namespace detail -} // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) +} +} +} + diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index e36e9d96b6a7a..313ef01472d98 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -54,30 +54,27 @@ void event_impl::waitInternal() const { getPlugin().call(1, &MEvent); return; } - // Waiting of host events is NOP so far as all operations on host device - // are blocking. - while (MState != HES_Ready) + while (MState != HES_Complete) ; } void event_impl::setComplete() { - if (MHostEvent && !MEvent) { + if (MHostEvent || !MEvent) { #ifndef NDEBUG - int Expected = HES_NotReady; - int Desired = HES_Ready; + int Expected = HES_NotComplete; + int Desired = HES_Complete; bool Succeeded = MState.compare_exchange_strong(Expected, Desired); assert(Succeeded && "Unexpected state of event"); #else - MState.store(static_cast(HES_Ready)); + MState.store(static_cast(HES_Complete)); #endif - } else if (MEvent) - getPlugin().call(getHandleRef(), - PI_EVENT_COMPLETE); - else - assert(false && "Event is neither host nor device one."); + return; + } + + assert(false && "setComplete is not supported for non-host event"); } const RT::PiEvent &event_impl::getHandleRef() const { return MEvent; } @@ -92,14 +89,14 @@ void event_impl::setContextImpl(const ContextImplPtr &Context) { MOpenCLInterop = !MHostEvent; MContext = Context; - MState = MHostEvent ? HES_NotReady : HES_Ready; + MState = HES_NotComplete; } -event_impl::event_impl() : MState(HES_Ready) {} +event_impl::event_impl() : MState(HES_Complete) {} event_impl::event_impl(RT::PiEvent Event, const context &SyclContext) : MEvent(Event), MContext(detail::getSyclObjImpl(SyclContext)), - MOpenCLInterop(true), MHostEvent(false), MState(HES_Ready) { + MOpenCLInterop(true), MHostEvent(false), MState(HES_Complete) { if (MContext->is_host()) { throw cl::sycl::invalid_parameter_error( @@ -124,7 +121,7 @@ event_impl::event_impl(RT::PiEvent Event, const context &SyclContext) event_impl::event_impl(QueueImplPtr Queue) : MQueue(Queue) { if (Queue->is_host()) { - MState.store(HES_NotReady); + MState.store(HES_NotComplete); if (Queue->has_property()) { MHostProfilingInfo.reset(new HostProfilingInfo()); @@ -135,7 +132,7 @@ event_impl::event_impl(QueueImplPtr Queue) : MQueue(Queue) { return; } - MState.store(HES_Ready); + MState.store(HES_Complete); } void *event_impl::instrumentationProlog(string_class &Name, int32_t StreamID, diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index b5f110df48850..58616e13a12d2 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -168,10 +168,11 @@ class event_impl { std::unique_ptr MHostProfilingInfo; void *MCommand = nullptr; - enum HostEventState : int { HES_NotReady = 0, HES_Ready }; + enum HostEventState : int { HES_NotComplete = 0, HES_Complete }; - // State of host event. Employed only for host events. - // Used values are listed in HostEventState enum. + // State of host event. Employed only for host events and event with no + // backend's representation (e.g. alloca). Used values are listed in + // HostEventState enum. std::atomic MState; }; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index fba0ae9fec321..2dfa745d2d9e4 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -176,7 +176,10 @@ class DispatchHostTask { } // wait for dependency device events - // FIXME introduce a more sophisticated wait mechanism + // FIXME Current implementation of waiting for events will make the thread + // 'sleep' until all of dependency events are complete. We need a bit more + // sophisticated waiting mechanism to allow to utilize this thread for any + // other available job and resume once all required events are ready. for (auto &PluginWithEvents : RequiredEventsPerPlugin) { std::vector RawEvents = getPiEvents(PluginWithEvents.second); PluginWithEvents.first->call(RawEvents.size(), @@ -454,7 +457,7 @@ void Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) { } ContextImplPtr DepEventContext = DepEvent->getContextImpl(); - // If contexts don't match - connect them using user event + // If contexts don't match we'll connect them using host task if (DepEventContext != Context && !Context->is_host()) { Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; GB.connectDepEvent(this, DepEvent, DepEventContext, Context, Dep); @@ -569,15 +572,21 @@ bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking) { // has already been marked as "failed" if enqueueImp throws an exception. // This will avoid execution of the same failed command twice. MEnqueueStatus = EnqueueResultT::SyclEnqueueFailed; + MShouldCompleteEventIfPossible = true; cl_int Res = enqueueImp(); if (CL_SUCCESS != Res) EnqueueResult = EnqueueResultT(EnqueueResultT::SyclEnqueueFailed, this, Res); - else + else { + if (MShouldCompleteEventIfPossible && + (MEvent->is_host() || MEvent->getHandleRef() == nullptr)) + MEvent->setComplete(); + // Consider the command is successfully enqueued if return code is // CL_SUCCESS MEnqueueStatus = EnqueueResultT::SyclEnqueueSuccess; + } // Emit this correlation signal before the task end emitEnqueuedEventSignal(MEvent->getHandleRef()); @@ -704,8 +713,6 @@ cl_int AllocaCommand::enqueueImp() { // Do not need to make allocation if we have a linked device allocation Command::waitForEvents(MQueue, EventImpls, Event); - MEvent->setComplete(); - return CL_SUCCESS; } HostPtr = MLinkedAllocaCmd->getMemAllocation(); @@ -716,9 +723,6 @@ cl_int AllocaCommand::enqueueImp() { detail::getSyclObjImpl(MQueue->get_context()), getSYCLMemObj(), MInitFromUserData, HostPtr, std::move(EventImpls), Event); - if (MEvent->is_host()) - MEvent->setComplete(); - return CL_SUCCESS; } @@ -799,9 +803,6 @@ cl_int AllocaSubBufCommand::enqueueImp() { MRequirement.MOffsetInBytes, MRequirement.MAccessRange, std::move(EventImpls), Event); - if (MEvent->is_host()) - MEvent->setComplete(); - return CL_SUCCESS; } @@ -914,9 +915,6 @@ cl_int ReleaseCommand::enqueueImp() { MAllocaCmd->getMemAllocation(), std::move(EventImpls), Event); - if (MEvent->is_host()) - MEvent->setComplete(); - return CL_SUCCESS; } @@ -977,9 +975,6 @@ cl_int MapMemObject::enqueueImp() { MMapMode, MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange, MSrcReq.MOffset, MSrcReq.MElemSize, std::move(RawEvents), Event); - if (MEvent->is_host()) - MEvent->setComplete(); - return CL_SUCCESS; } @@ -1036,9 +1031,6 @@ cl_int UnMapMemObject::enqueueImp() { MDstAllocaCmd->getMemAllocation(), MQueue, *MSrcPtr, std::move(RawEvents), Event); - if (MEvent->is_host()) - MEvent->setComplete(); - return CL_SUCCESS; } @@ -1127,9 +1119,6 @@ cl_int MemCpyCommand::enqueueImp() { MDstReq.MOffset, MDstReq.MElemSize, std::move(RawEvents), Event); } - if (MEvent->is_host()) - MEvent->setComplete(); - return CL_SUCCESS; } @@ -1181,9 +1170,6 @@ cl_int UpdateHostRequirementCommand::enqueueImp() { assert(MDstPtr && "Expected valid target pointer"); *MDstPtr = MSrcAllocaCmd->getMemAllocation(); - if (MEvent->is_host()) - MEvent->setComplete(); - return CL_SUCCESS; } @@ -1266,9 +1252,6 @@ cl_int MemCpyCommandHost::enqueueImp() { MDstReq.MAccessMode == access::mode::discard_write) { Command::waitForEvents(Queue, EventImpls, Event); - if (MEvent->is_host()) - MEvent->setComplete(); - return CL_SUCCESS; } @@ -1279,9 +1262,6 @@ cl_int MemCpyCommandHost::enqueueImp() { MDstReq.MMemoryRange, MDstReq.MAccessRange, MDstReq.MOffset, MDstReq.MElemSize, std::move(RawEvents), Event); - if (MEvent->is_host()) - MEvent->setComplete(); - return CL_SUCCESS; } @@ -1300,9 +1280,6 @@ cl_int EmptyCommand::enqueueImp() { waitForPreparedHostEvents(); waitForEvents(MQueue, MPreparedDepsEvents, MEvent->getHandleRef()); - if (MEvent->is_host()) - MEvent->setComplete(); - return CL_SUCCESS; } @@ -1671,9 +1648,6 @@ cl_int ExecCGCommand::enqueueImp() { Req->MAccessRange, Req->MAccessRange, /*DstOffset=*/{0, 0, 0}, Req->MElemSize, std::move(RawEvents), Event); - if (MEvent->is_host()) - MEvent->setComplete(); - return CL_SUCCESS; } case CG::CGTYPE::COPY_PTR_TO_ACC: { @@ -1691,9 +1665,6 @@ cl_int ExecCGCommand::enqueueImp() { Req->MMemoryRange, Req->MAccessRange, Req->MOffset, Req->MElemSize, std::move(RawEvents), Event); - if (MEvent->is_host()) - MEvent->setComplete(); - return CL_SUCCESS; } case CG::CGTYPE::COPY_ACC_TO_ACC: { @@ -1711,9 +1682,6 @@ cl_int ExecCGCommand::enqueueImp() { MQueue, ReqDst->MDims, ReqDst->MMemoryRange, ReqDst->MAccessRange, ReqDst->MOffset, ReqDst->MElemSize, std::move(RawEvents), Event); - if (MEvent->is_host()) - MEvent->setComplete(); - return CL_SUCCESS; } case CG::CGTYPE::FILL: { @@ -1727,9 +1695,6 @@ cl_int ExecCGCommand::enqueueImp() { Req->MMemoryRange, Req->MAccessRange, Req->MOffset, Req->MElemSize, std::move(RawEvents), Event); - if (MEvent->is_host()) - MEvent->setComplete(); - return CL_SUCCESS; } case CG::CGTYPE::RUN_ON_HOST_INTEL: { @@ -1760,9 +1725,6 @@ cl_int ExecCGCommand::enqueueImp() { } DispatchNativeKernel((void *)ArgsBlob.data()); - if (MEvent->is_host()) - MEvent->setComplete(); - return CL_SUCCESS; } @@ -1790,9 +1752,6 @@ cl_int ExecCGCommand::enqueueImp() { const_cast(MemLocs.data()), RawEvents.size(), RawEvents.empty() ? nullptr : RawEvents.data(), &Event); - if (MEvent->is_host()) - MEvent->setComplete(); - switch (Error) { case PI_INVALID_OPERATION: throw cl::sycl::runtime_error( @@ -1824,9 +1783,6 @@ cl_int ExecCGCommand::enqueueImp() { ExecKernel->MHostKernel->call(NDRDesc, getEvent()->getHostProfilingInfo()); - if (MEvent->is_host()) - MEvent->setComplete(); - return CL_SUCCESS; } @@ -1908,9 +1864,6 @@ cl_int ExecCGCommand::enqueueImp() { Kernel, NDRDesc); } - if (MEvent->is_host()) - MEvent->setComplete(); - return PI_SUCCESS; } case CG::CGTYPE::COPY_USM: { @@ -1918,9 +1871,6 @@ cl_int ExecCGCommand::enqueueImp() { MemoryManager::copy_usm(Copy->getSrc(), MQueue, Copy->getLength(), Copy->getDst(), std::move(RawEvents), Event); - if (MEvent->is_host()) - MEvent->setComplete(); - return CL_SUCCESS; } case CG::CGTYPE::FILL_USM: { @@ -1928,9 +1878,6 @@ cl_int ExecCGCommand::enqueueImp() { MemoryManager::fill_usm(Fill->getDst(), MQueue, Fill->getLength(), Fill->getFill(), std::move(RawEvents), Event); - if (MEvent->is_host()) - MEvent->setComplete(); - return CL_SUCCESS; } case CG::CGTYPE::PREFETCH_USM: { @@ -1939,9 +1886,6 @@ cl_int ExecCGCommand::enqueueImp() { Prefetch->getLength(), std::move(RawEvents), Event); - if (MEvent->is_host()) - MEvent->setComplete(); - return CL_SUCCESS; } case CG::CGTYPE::INTEROP_TASK_CODEPLAY: { @@ -1972,9 +1916,6 @@ cl_int ExecCGCommand::enqueueImp() { Plugin.call( reinterpret_cast(MQueue->get())); - if (MEvent->is_host()) - MEvent->setComplete(); - return CL_SUCCESS; } case CG::CGTYPE::HOST_TASK_CODEPLAY: { @@ -1998,6 +1939,8 @@ cl_int ExecCGCommand::enqueueImp() { std::move(DispatchHostTask(MPreparedDepsEvents, MPreparedHostDepsEvents, HostTask, MDeps, MEvent))); + MShouldCompleteEventIfPossible = false; + return CL_SUCCESS; } case CG::CGTYPE::NONE: diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 4791f524b49c1..e19d78712d6f3 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -251,6 +251,8 @@ class Command { bool MFirstInstance = false; /// Instance ID tracked for the command. uint64_t MInstanceID = 0; + + bool MShouldCompleteEventIfPossible = true; }; /// The empty command does nothing during enqueue. The task can be used to @@ -270,6 +272,9 @@ class EmptyCommand : public Command { private: cl_int enqueueImp() final; + // Employing deque here as it allows to push_back/emplace_back without + // invalidation of pointer or reference to stored data item regardless of + // iterator invalidation. std::deque MRequirements; }; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index a3592420991f5..62b840e6508af 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -775,9 +775,9 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(MemObjRecord *Record) { for (Command *AllocaCmd : AllocaCommands) { Visited.insert(AllocaCmd); - // Linked alloca cmd may be in users of this alloca. We're not going to - // visit it. for (Command *UserCmd : AllocaCmd->MUsers) + // Linked alloca cmd may be in users of this alloca. We're not going to + // visit it. if (UserCmd->getType() != Command::CommandType::ALLOCA) ToVisit.push(UserCmd); else @@ -794,10 +794,13 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(MemObjRecord *Record) { for (AllocaCommandBase *AllocaCmd : AllocaCommands) { AllocaCommandBase *LinkedCmd = AllocaCmd->MLinkedAllocaCmd; - if (LinkedCmd && Visited.count(LinkedCmd)) + if (LinkedCmd) { + assert(Visited.count(LinkedCmd)); + for (DepDesc &Dep : AllocaCmd->MDeps) if (Dep.MDepCommand) Dep.MDepCommand->MUsers.erase(AllocaCmd); + } } // Traverse the graph using BFS @@ -938,16 +941,22 @@ void Scheduler::GraphBuilder::connectDepEvent( DepCmd->addUser(ConnectCmd); - if (Dep.MDepRequirement) { + if (Dep.MDepRequirement) addConnectCmdWithReq(Cmd, DepEventContext, ConnectCmd, EmptyCmd, Dep); - } else /* if (!Dep.MDepRequirement) */ { + else { ConnectCmd->addDep(DepEvent); EmptyCmd->addDep(ConnectCmd->getEvent()); ConnectCmd->addUser(EmptyCmd); } - } else // if (!DepEvent->getCommand()) + } else + // if there is no command for the event (either the command is removed + // during cleanup or it's a user's event) ConnectCmd->addDep(DepEvent); + // FIXME graph builder shouldn't really enqueue commands. We're in the middle + // of enqueue process for some command Cmd. We're going to add a dependency + // for it. Need some nice and cute solution to enqueue ConnectCmd via standard + // scheduler/graph processor mechanisms. EnqueueResultT Res; bool Enqueued = Scheduler::GraphProcessor::enqueueCommand(ConnectCmd, Res); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) @@ -963,16 +972,14 @@ void Scheduler::GraphBuilder::addConnectCmdWithReq( const DepDesc &Dep) { Requirement *Req = const_cast(Dep.MDepRequirement); - Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; - - MemObjRecord *Record = GB.getMemObjRecord(Req->MSYCLMemObj); + MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); Dep.MDepCommand->addUser(ConnectCmd); AllocaCommandBase *AllocaCmd = - GB.findAllocaForReq(Record, Req, DepEventContext); + findAllocaForReq(Record, Req, DepEventContext); assert(AllocaCmd && "There must be alloca for requirement!"); - std::set Deps = GB.findDepsForReq(Record, Req, DepEventContext); + std::set Deps = findDepsForReq(Record, Req, DepEventContext); assert(Deps.size() && "There must be some deps"); for (Command *ReqDepCmd : Deps) { diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index bb58c8ea0c3c3..089f64d3374e3 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -187,7 +187,7 @@ void Scheduler::releaseHostAccessor(Requirement *Req) { BlockedCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; - enqueueLeavesOfReq(Req); + enqueueLeavesOfReqUnlocked(Req); } void Scheduler::enqueueLeavesOfReq(const Requirement *const Req) { diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 4481b66b68b12..59a9b5fe347a7 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -26,7 +26,7 @@ class ThreadPool { void worker() { std::unique_lock Lock(MJobQueueMutex); - for (;;) { + while (true) { MDoSmthOrStop.wait( Lock, [this]() { return !MJobQueue.empty() || MStop.load(); }); diff --git a/sycl/unittests/scheduler/LeafLimit.cpp b/sycl/unittests/scheduler/LeafLimit.cpp index 211c975a6225a..9e91d62c28ce2 100644 --- a/sycl/unittests/scheduler/LeafLimit.cpp +++ b/sycl/unittests/scheduler/LeafLimit.cpp @@ -58,8 +58,4 @@ TEST_F(SchedulerTest, LeafLimit) { EXPECT_TRUE(std::any_of( NewestLeaf->MDeps.begin(), NewestLeaf->MDeps.end(), [&](const detail::DepDesc &DD) { return DD.MDepCommand == OldestLeaf; })); - - MockDepCmd->getEvent()->setComplete(); - for (MockCommand *Cmd : LeavesToAdd) - Cmd->getEvent()->setComplete(); } From 6052e8509b519145d273f6f926fdec445b894036 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 7 May 2020 17:15:26 +0300 Subject: [PATCH 141/188] [SYCL] Set proper target tripple in test Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/host-task-dependency.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/host-interop-task/host-task-dependency.cpp b/sycl/test/host-interop-task/host-task-dependency.cpp index f5910be062ab2..1b17655560299 100644 --- a/sycl/test/host-interop-task/host-task-dependency.cpp +++ b/sycl/test/host-interop-task/host-task-dependency.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl %s -o %t.out %threads_lib +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out %threads_lib // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out From 471fb78470d4da9e8352a0c723620cf555092364 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 7 May 2020 17:24:12 +0300 Subject: [PATCH 142/188] [SYCL] Employ std::future properly in the test Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/host-task-dependency.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/test/host-interop-task/host-task-dependency.cpp b/sycl/test/host-interop-task/host-task-dependency.cpp index 1b17655560299..9e8055f783572 100644 --- a/sycl/test/host-interop-task/host-task-dependency.cpp +++ b/sycl/test/host-interop-task/host-task-dependency.cpp @@ -156,8 +156,8 @@ void test() { auto A1 = std::async(std::launch::async, Thread1Fn, &Ctx); auto A2 = std::async(std::launch::async, Thread2Fn, &Ctx); - A1.wait(); - A2.wait(); + A1.get(); + A2.get(); assert(Ctx.Flag.load()); From ab49e2a96dba09796e86d4ee66ca6693f25c6f1a Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 7 May 2020 19:08:28 +0300 Subject: [PATCH 143/188] [SYCL] Store command in DispatchHostTask instead of a lot of fields. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 56 ++++++++++------------- sycl/source/detail/scheduler/commands.hpp | 3 ++ 2 files changed, 28 insertions(+), 31 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 2dfa745d2d9e4..2267ff0611a08 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -158,19 +158,13 @@ getPiEvents(const std::vector &EventImpls) { } class DispatchHostTask { - std::vector MDepEvents; - std::vector MDepHostEvents; - // Store cg in shared ptr due to copy-constructor call by thread pool - // FIXME Employ unique_ptr - std::shared_ptr MHostTask; - std::vector MDeps; - EventImplPtr MSelfEvent; + ExecCGCommand *MThisCmd; void waitForEvents() const { std::map> RequiredEventsPerPlugin; - for (const EventImplPtr &Event : MDepEvents) { + for (const EventImplPtr &Event : MThisCmd->MPreparedDepsEvents) { const detail::plugin &Plugin = Event->getPlugin(); RequiredEventsPerPlugin[&Plugin].push_back(Event); } @@ -187,7 +181,7 @@ class DispatchHostTask { } // wait for dependency host events - for (const EventImplPtr &Event : MDepHostEvents) { + for (const EventImplPtr &Event : MThisCmd->MPreparedHostDepsEvents) { Event->waitInternal(); } } @@ -209,38 +203,39 @@ class DispatchHostTask { } public: - DispatchHostTask(std::vector DepEvents, - std::vector DepHostEvents, - CGHostTask *HostTask, std::vector Deps, - EventImplPtr SelfEvent) - : MDepEvents(std::move(DepEvents)), - MDepHostEvents(DepHostEvents), MHostTask{HostTask}, - MDeps(std::move(Deps)), MSelfEvent(std::move(SelfEvent)) {} + DispatchHostTask(ExecCGCommand *ThisCmd) + : MThisCmd{ThisCmd} {} void operator()() const { waitForEvents(); - // we're ready to call the user-defined lambda now - MHostTask->MHostTask->call(); - MHostTask->MHostTask.reset(); + assert(MThisCmd->getCG().get()); + assert(MThisCmd->getCG()->getType() == CG::CGTYPE::HOST_TASK_CODEPLAY); + + CGHostTask *HostTask = static_cast(MThisCmd->getCG().get()); - Command *ThisCmd = reinterpret_cast(MSelfEvent->getCommand()); - assert(ThisCmd && "No command found for host-task self event"); + // we're ready to call the user-defined lambda now + HostTask->MHostTask->call(); + HostTask->MHostTask.reset(); // unblock user empty command here - EmptyCommand *EmptyCmd = findUserEmptyCommand(ThisCmd); + EmptyCommand *EmptyCmd = findUserEmptyCommand(MThisCmd); assert(EmptyCmd && "No empty command found"); + // Completing command's event along with unblocking enqueue readiness of + // empty command may lead to quick deallocation of MThisCmd by some cleanup + // process. Thus we'll copy deps prior to completing of event and unblocking + // of empty command. + + std::vector Deps = MThisCmd->MDeps; + // update self-event status - MSelfEvent->setComplete(); + MThisCmd->MEvent->setComplete(); EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; - // The enqueue process is driven by backend for non-host. - // For host event we'll enqueue leaves of requirements - if (MSelfEvent->is_host()) - for (const DepDesc &Dep : ThisCmd->MDeps) - Scheduler::getInstance().enqueueLeavesOfReq(Dep.MDepRequirement); + for (const DepDesc &Dep : Deps) + Scheduler::getInstance().enqueueLeavesOfReq(Dep.MDepRequirement); } }; @@ -1919,7 +1914,7 @@ cl_int ExecCGCommand::enqueueImp() { return CL_SUCCESS; } case CG::CGTYPE::HOST_TASK_CODEPLAY: { - CGHostTask *HostTask = static_cast(MCommandGroup.release()); + CGHostTask *HostTask = static_cast(MCommandGroup.get()); for (ArgDesc &Arg : HostTask->MArgs) { switch (Arg.MType) { @@ -1936,8 +1931,7 @@ cl_int ExecCGCommand::enqueueImp() { } MQueue->getThreadPool().submit( - std::move(DispatchHostTask(MPreparedDepsEvents, MPreparedHostDepsEvents, - HostTask, MDeps, MEvent))); + std::move(DispatchHostTask(this))); MShouldCompleteEventIfPossible = false; diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index e19d78712d6f3..f5fe6bbb6bae0 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -27,6 +27,7 @@ namespace detail { class queue_impl; class event_impl; class context_impl; +class DispatchHostTask; using QueueImplPtr = std::shared_ptr; using EventImplPtr = std::shared_ptr; @@ -207,6 +208,8 @@ class Command { /// Mutex used to protect enqueueing from race conditions std::mutex MEnqueueMtx; + friend class DispatchHostTask; + public: /// Contains list of dependencies(edges) std::vector MDeps; From e44ad313d201c324a416b7dde41a1a6eb743a9b7 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 7 May 2020 19:26:48 +0300 Subject: [PATCH 144/188] [SYCL] Resolve style issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 3 +-- sycl/source/detail/scheduler/graph_builder.cpp | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 2267ff0611a08..1bfda3cca0d8f 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -203,8 +203,7 @@ class DispatchHostTask { } public: - DispatchHostTask(ExecCGCommand *ThisCmd) - : MThisCmd{ThisCmd} {} + DispatchHostTask(ExecCGCommand *ThisCmd) : MThisCmd{ThisCmd} {} void operator()() const { waitForEvents(); diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 62b840e6508af..1bd0e3a2350d9 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -975,8 +975,7 @@ void Scheduler::GraphBuilder::addConnectCmdWithReq( MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); Dep.MDepCommand->addUser(ConnectCmd); - AllocaCommandBase *AllocaCmd = - findAllocaForReq(Record, Req, DepEventContext); + AllocaCommandBase *AllocaCmd = findAllocaForReq(Record, Req, DepEventContext); assert(AllocaCmd && "There must be alloca for requirement!"); std::set Deps = findDepsForReq(Record, Req, DepEventContext); From b2aaee10e815e57f1db88dcf6db7407ae9a69f34 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 8 May 2020 09:20:58 +0300 Subject: [PATCH 145/188] [SYCL] Employ addEmptyCmd whilst creating host accessor. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 6 ----- sycl/source/detail/scheduler/commands.hpp | 1 - .../source/detail/scheduler/graph_builder.cpp | 23 +++++-------------- sycl/source/detail/scheduler/scheduler.hpp | 4 +++- 4 files changed, 9 insertions(+), 25 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 1bfda3cca0d8f..df66f360b7e7b 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1259,12 +1259,6 @@ cl_int MemCpyCommandHost::enqueueImp() { return CL_SUCCESS; } -EmptyCommand::EmptyCommand(QueueImplPtr Queue, Requirement Req) - : Command(CommandType::EMPTY_TASK, std::move(Queue)) { - MRequirements.emplace_back(std::move(Req)); - emitInstrumentationDataProxy(); -} - EmptyCommand::EmptyCommand(QueueImplPtr Queue) : Command(CommandType::EMPTY_TASK, std::move(Queue)) { emitInstrumentationDataProxy(); diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index f5fe6bbb6bae0..7cd910219aad7 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -262,7 +262,6 @@ class Command { /// implement lock in the graph, or to merge several nodes into one. class EmptyCommand : public Command { public: - EmptyCommand(QueueImplPtr Queue, Requirement Req); EmptyCommand(QueueImplPtr Queue); void printDot(std::ostream &Stream) const final; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 1bd0e3a2350d9..672d998c69a31 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -416,18 +416,7 @@ Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, Command *UpdateHostAccCmd = insertUpdateHostReqCmd(Record, Req, HostQueue); // Need empty command to be blocked until host accessor is destructed - EmptyCommand *EmptyCmd = new EmptyCommand(HostQueue, *Req); - - EmptyCmd->addDep( - DepDesc{UpdateHostAccCmd, EmptyCmd->getRequirement(), HostAllocaCmd}); - UpdateHostAccCmd->addUser(EmptyCmd); - - EmptyCmd->MIsBlockable = true; - EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; - EmptyCmd->MBlockReason = Command::BlockReason::HostAccessor; - - updateLeaves({UpdateHostAccCmd}, Record, Req->MAccessMode); - addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); + EmptyCommand *EmptyCmd = addEmptyCmd(UpdateHostAccCmd, {Req}, HostQueue); Req->MBlockedCmd = EmptyCmd; @@ -650,10 +639,8 @@ void Scheduler::GraphBuilder::markModifiedIfWrite(MemObjRecord *Record, } } -void Scheduler::GraphBuilder::addEmptyCmdForHostTask( - ExecCGCommand *Cmd, const QueueImplPtr &Queue) { - const std::vector &Reqs = Cmd->getCG()->MRequirements; - +EmptyCommand *Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, + const std::vector &Reqs, const QueueImplPtr &Queue) { EmptyCommand *EmptyCmd = new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); @@ -680,6 +667,8 @@ void Scheduler::GraphBuilder::addEmptyCmdForHostTask( updateLeaves({Cmd}, Record, Req->MAccessMode); addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); } + + return EmptyCmd; } Command * @@ -744,7 +733,7 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, } if (CGType == CG::CGTYPE::HOST_TASK_CODEPLAY) - addEmptyCmdForHostTask(NewCmd.get(), Queue); + addEmptyCmd(NewCmd.get(), NewCmd->getCG()->MRequirements, Queue); if (MPrintOptionsArray[AfterAddCG]) printGraphAsDot("after_addCG"); diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 517ddf86dc224..a89f5dfd92f92 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -559,7 +559,9 @@ class Scheduler { std::set findDepsForReq(MemObjRecord *Record, Requirement *Req, const ContextImplPtr &Context); - void addEmptyCmdForHostTask(ExecCGCommand *Cmd, const QueueImplPtr &Queue); + EmptyCommand *addEmptyCmd(Command *Cmd, + const std::vector &Req, + const QueueImplPtr &Queue); protected: /// Finds a command dependency corresponding to the record. From bc2a4df10e17887e1206ad21514e95be053f1835 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 8 May 2020 10:04:30 +0300 Subject: [PATCH 146/188] [SYCL] Pass less arguments to GraphBuilder::connectDepEvent Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 2 +- sycl/source/detail/scheduler/graph_builder.cpp | 9 ++++++--- sycl/source/detail/scheduler/scheduler.hpp | 8 +++----- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index df66f360b7e7b..47050ba65a0f8 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -454,7 +454,7 @@ void Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) { // If contexts don't match we'll connect them using host task if (DepEventContext != Context && !Context->is_host()) { Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; - GB.connectDepEvent(this, DepEvent, DepEventContext, Context, Dep); + GB.connectDepEvent(this, DepEvent, Dep); } else MPreparedDepsEvents.push_back(std::move(DepEvent)); } diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 672d998c69a31..1b4df85b889f5 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -895,9 +895,12 @@ void Scheduler::GraphBuilder::removeRecordForMemObj(SYCLMemObjI *MemObject) { } void Scheduler::GraphBuilder::connectDepEvent( - Command *const Cmd, EventImplPtr DepEvent, - const ContextImplPtr &DepEventContext, const ContextImplPtr &Context, - const DepDesc &Dep) { + Command *const Cmd, EventImplPtr DepEvent, const DepDesc &Dep) { + const ContextImplPtr &Context = Cmd->getContext(); + const ContextImplPtr &DepEventContext = DepEvent->getContext(); + + assert(Cntext != DepEventContext); + // construct Host Task type command manually and make it depend on DepEvent ExecCGCommand *ConnectCmd = nullptr; diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index a89f5dfd92f92..3fe2624992d9d 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -514,15 +514,13 @@ class Scheduler { access::mode AccessMode); /// Perform connection of events in multiple contexts + /// \param Cmd dependant command /// \param DepEvent event to depend on - /// \param DepEventContext context of DepEvent - /// \param Context context of command which wants to depend on DepEvent /// \param Dep optional DepDesc to perform connection properly /// - /// Optionality of Dep is set by Dep.MDepCommand not equal to nullptr. + /// Optionality of Dep is set by Dep.MDepCommand equal to nullptr. void connectDepEvent(Command *const Cmd, EventImplPtr DepEvent, - const ContextImplPtr &DepEventContext, - const ContextImplPtr &Context, const DepDesc &Dep); + const DepDesc &Dep); /// Helper for connectDepEvent /// \param ConnectCmd connection cmd to properly add /// \param Dep DepDesc with non-null MDepRequirmeent From 68da219ccf2111a01fdbd711c47b549af9a2c2f8 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 8 May 2020 11:01:56 +0300 Subject: [PATCH 147/188] [SYCL] Fix build issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 7cd910219aad7..ccdefd9100a69 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -173,6 +173,8 @@ class Command { const char *getBlockReason() const; + virtual ContextImplPtr getContext() const; + protected: EventImplPtr MEvent; QueueImplPtr MQueue; @@ -198,7 +200,6 @@ class Command { /// Optionality of Dep is set by Dep.MDepCommand not equal to nullptr. void processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep); - virtual ContextImplPtr getContext() const; /// Private interface. Derived classes should implement this method. virtual cl_int enqueueImp() = 0; @@ -417,9 +418,9 @@ class MemCpyCommand : public Command { void printDot(std::ostream &Stream) const final; const Requirement *getRequirement() const final { return &MDstReq; } void emitInstrumentationData(); + ContextImplPtr getContext() const override final; private: - ContextImplPtr getContext() const final; cl_int enqueueImp() final; QueueImplPtr MSrcQueue; @@ -440,9 +441,9 @@ class MemCpyCommandHost : public Command { void printDot(std::ostream &Stream) const final; const Requirement *getRequirement() const final { return &MDstReq; } void emitInstrumentationData(); + ContextImplPtr getContext() const override final; private: - ContextImplPtr getContext() const final; cl_int enqueueImp() final; QueueImplPtr MSrcQueue; From 126cc3239d145f8bf951df25311a05d332ed90f8 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 8 May 2020 11:03:15 +0300 Subject: [PATCH 148/188] [SYCL] Rewrite GraphBuilder::connectDepEvent in a clearer way. Signed-off-by: Sergey Kanaev --- .../source/detail/scheduler/graph_builder.cpp | 68 +++++-------------- sycl/source/detail/scheduler/scheduler.hpp | 7 -- 2 files changed, 18 insertions(+), 57 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 1b4df85b889f5..b7ee218a19c90 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -920,30 +920,28 @@ void Scheduler::GraphBuilder::connectDepEvent( if (!ConnectCmd) throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); - if (Command *DepCmd = reinterpret_cast(DepEvent->getCommand())) { - EmptyCommand *EmptyCmd = - new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); + if (Command *DepCmd = reinterpret_cast(DepEvent->getCommand())) + DepCmd->addUser(ConnectCmd); - if (!EmptyCmd) - throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); + ConnectCmd->addDep(DepEvent); - EmptyCmd->MIsBlockable = true; - EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; - EmptyCmd->MBlockReason = Command::BlockReason::HostTask; + EmptyCommand *EmptyCmd = nullptr; - DepCmd->addUser(ConnectCmd); + if (Dep.MDepRequirement) { + const auto &Reqs = std::vector( + 1, const_cast(Dep.MDepRequirement)); + EmptyCmd = addEmptyCmd(ConnectCmd, Reqs, + Scheduler::getInstance().getDefaultHostQueue()); + // Dependencies for EmptyCmd are set in addEmptyCmd for provided Reqs. + } + else { + EmptyCmd = addEmptyCmd(ConnectCmd, {}, + Scheduler::getInstance().getDefaultHostQueue()); - if (Dep.MDepRequirement) - addConnectCmdWithReq(Cmd, DepEventContext, ConnectCmd, EmptyCmd, Dep); - else { - ConnectCmd->addDep(DepEvent); - EmptyCmd->addDep(ConnectCmd->getEvent()); - ConnectCmd->addUser(EmptyCmd); - } - } else - // if there is no command for the event (either the command is removed - // during cleanup or it's a user's event) - ConnectCmd->addDep(DepEvent); + // There is no requirement thus, empty command will only depend on + // ConnectCmd via its event. + EmptyCmd->addDep(ConnectCmd->getEvent()); + } // FIXME graph builder shouldn't really enqueue commands. We're in the middle // of enqueue process for some command Cmd. We're going to add a dependency @@ -958,36 +956,6 @@ void Scheduler::GraphBuilder::connectDepEvent( Cmd->addDep(ConnectCmd->getEvent()); } -void Scheduler::GraphBuilder::addConnectCmdWithReq( - Command *const Cmd, const ContextImplPtr &DepEventContext, - ExecCGCommand *const ConnectCmd, EmptyCommand *const EmptyCmd, - const DepDesc &Dep) { - Requirement *Req = const_cast(Dep.MDepRequirement); - - MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); - Dep.MDepCommand->addUser(ConnectCmd); - - AllocaCommandBase *AllocaCmd = findAllocaForReq(Record, Req, DepEventContext); - assert(AllocaCmd && "There must be alloca for requirement!"); - - std::set Deps = findDepsForReq(Record, Req, DepEventContext); - assert(Deps.size() && "There must be some deps"); - - for (Command *ReqDepCmd : Deps) { - ConnectCmd->addDep(DepDesc{ReqDepCmd, Req, AllocaCmd}); - ReqDepCmd->addUser(ConnectCmd); - } - - updateLeaves(Deps, Record, Req->MAccessMode); - addNodeToLeaves(Record, ConnectCmd, Req->MAccessMode); - - EmptyCmd->addRequirement(ConnectCmd, Dep.MAllocaCmd, Dep.MDepRequirement); - ConnectCmd->addUser(EmptyCmd); - - updateLeaves({ConnectCmd}, Record, Req->MAccessMode); - addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); -} - } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 3fe2624992d9d..279d314997924 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -521,13 +521,6 @@ class Scheduler { /// Optionality of Dep is set by Dep.MDepCommand equal to nullptr. void connectDepEvent(Command *const Cmd, EventImplPtr DepEvent, const DepDesc &Dep); - /// Helper for connectDepEvent - /// \param ConnectCmd connection cmd to properly add - /// \param Dep DepDesc with non-null MDepRequirmeent - void addConnectCmdWithReq(Command *const Cmd, - const ContextImplPtr &DepEventContext, - ExecCGCommand *const ConnectCmd, - EmptyCommand *const EmptyCmd, const DepDesc &Dep); std::vector MMemObjs; From 45df093be938d4c2c02ec84343642f3873322a0d Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 8 May 2020 11:04:00 +0300 Subject: [PATCH 149/188] [SYCL] Fix build issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index b7ee218a19c90..530905f766d8d 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -897,9 +897,9 @@ void Scheduler::GraphBuilder::removeRecordForMemObj(SYCLMemObjI *MemObject) { void Scheduler::GraphBuilder::connectDepEvent( Command *const Cmd, EventImplPtr DepEvent, const DepDesc &Dep) { const ContextImplPtr &Context = Cmd->getContext(); - const ContextImplPtr &DepEventContext = DepEvent->getContext(); + const ContextImplPtr &DepEventContext = DepEvent->getContextImpl(); - assert(Cntext != DepEventContext); + assert(Context != DepEventContext); // construct Host Task type command manually and make it depend on DepEvent ExecCGCommand *ConnectCmd = nullptr; From 1542f8a0c91c1f1b914d2069ceb0f41f936a12eb Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 8 May 2020 11:04:40 +0300 Subject: [PATCH 150/188] [SYCL] Add comment on work of GraphBuilder::connectDepEvent. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 530905f766d8d..042f5b21fbb33 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -894,6 +894,18 @@ void Scheduler::GraphBuilder::removeRecordForMemObj(SYCLMemObjI *MemObject) { MemObject->MRecord.reset(); } +// Make Cmd depend on DepEvent from different context. Connection is performed +// via distinct ConnectCmd with host task command group on host queue. Cmd will +// depend on ConnectCmd's host event. +// DepEvent may not have an associated with it command in at least two cases: +// - the command was deleted upon cleanup process; +// - DepEvent is user event. +// In both these cases the only thing we can do is to make ConnectCmd depend on +// DepEvent. +// Otherwise, when there is a command associated with DepEvent, we make +// ConnectCmd depend on on this command. If there is valid, i.e. non-nil, +// requirement in Dep we make ConnectCmd depend on DepEvent's command with this +// requirement. void Scheduler::GraphBuilder::connectDepEvent( Command *const Cmd, EventImplPtr DepEvent, const DepDesc &Dep) { const ContextImplPtr &Context = Cmd->getContext(); From 3d8b0540f307377868c0cd3200fc8c6476fcc89c Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 8 May 2020 11:06:13 +0300 Subject: [PATCH 151/188] [SYCL] Uplift dev version Signed-off-by: Sergey Kanaev --- sycl/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 454a8f62e635e..25fc95138c935 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -11,7 +11,7 @@ option(SYCL_ADD_DEV_VERSION_POSTFIX "Adds -V postfix to version string" ON) set(SYCL_MAJOR_VERSION 0) set(SYCL_MINOR_VERSION 1) set(SYCL_PATCH_VERSION 0) -set(SYCL_DEV_ABI_VERSION 1) +set(SYCL_DEV_ABI_VERSION 2) if (SYCL_ADD_DEV_VERSION_POSTFIX) set(SYCL_VERSION_POSTFIX "-${SYCL_DEV_ABI_VERSION}") endif() From 1cdc04f7701cc7b06c394e8285facec510683e4b Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 8 May 2020 11:12:32 +0300 Subject: [PATCH 152/188] [SYCL] Fix style issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.hpp | 1 - sycl/source/detail/scheduler/graph_builder.cpp | 14 ++++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index ccdefd9100a69..483fc16e0c5b4 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -200,7 +200,6 @@ class Command { /// Optionality of Dep is set by Dep.MDepCommand not equal to nullptr. void processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep); - /// Private interface. Derived classes should implement this method. virtual cl_int enqueueImp() = 0; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 042f5b21fbb33..addcf2d18ec83 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -639,8 +639,10 @@ void Scheduler::GraphBuilder::markModifiedIfWrite(MemObjRecord *Record, } } -EmptyCommand *Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, - const std::vector &Reqs, const QueueImplPtr &Queue) { +EmptyCommand * +Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, + const std::vector &Reqs, + const QueueImplPtr &Queue) { EmptyCommand *EmptyCmd = new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); @@ -906,8 +908,9 @@ void Scheduler::GraphBuilder::removeRecordForMemObj(SYCLMemObjI *MemObject) { // ConnectCmd depend on on this command. If there is valid, i.e. non-nil, // requirement in Dep we make ConnectCmd depend on DepEvent's command with this // requirement. -void Scheduler::GraphBuilder::connectDepEvent( - Command *const Cmd, EventImplPtr DepEvent, const DepDesc &Dep) { +void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, + EventImplPtr DepEvent, + const DepDesc &Dep) { const ContextImplPtr &Context = Cmd->getContext(); const ContextImplPtr &DepEventContext = DepEvent->getContextImpl(); @@ -945,8 +948,7 @@ void Scheduler::GraphBuilder::connectDepEvent( EmptyCmd = addEmptyCmd(ConnectCmd, Reqs, Scheduler::getInstance().getDefaultHostQueue()); // Dependencies for EmptyCmd are set in addEmptyCmd for provided Reqs. - } - else { + } else { EmptyCmd = addEmptyCmd(ConnectCmd, {}, Scheduler::getInstance().getDefaultHostQueue()); From f900e6e9476d5a59db57a9625ab6e0062f91479f Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 8 May 2020 11:39:18 +0300 Subject: [PATCH 153/188] [SYCL] Removed reset Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 47050ba65a0f8..d87fe2f8092b9 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -215,7 +215,6 @@ class DispatchHostTask { // we're ready to call the user-defined lambda now HostTask->MHostTask->call(); - HostTask->MHostTask.reset(); // unblock user empty command here EmptyCommand *EmptyCmd = findUserEmptyCommand(MThisCmd); From 6731421acae33fd288b8e6fe1bc1a823d6714c29 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 8 May 2020 17:14:49 +0300 Subject: [PATCH 154/188] [SYCL] Set proper reason for blocking of empty cmd Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 17 +++++++++++------ sycl/source/detail/scheduler/scheduler.hpp | 3 ++- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index addcf2d18ec83..552ceac99570d 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -416,7 +416,8 @@ Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, Command *UpdateHostAccCmd = insertUpdateHostReqCmd(Record, Req, HostQueue); // Need empty command to be blocked until host accessor is destructed - EmptyCommand *EmptyCmd = addEmptyCmd(UpdateHostAccCmd, {Req}, HostQueue); + EmptyCommand *EmptyCmd = addEmptyCmd(UpdateHostAccCmd, {Req}, HostQueue, + Command::BlockReason::HostAccessor); Req->MBlockedCmd = EmptyCmd; @@ -642,7 +643,8 @@ void Scheduler::GraphBuilder::markModifiedIfWrite(MemObjRecord *Record, EmptyCommand * Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, const std::vector &Reqs, - const QueueImplPtr &Queue) { + const QueueImplPtr &Queue, + Command::BlockReason Reason) { EmptyCommand *EmptyCmd = new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); @@ -651,7 +653,7 @@ Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, EmptyCmd->MIsBlockable = true; EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; - EmptyCmd->MBlockReason = Command::BlockReason::HostTask; + EmptyCmd->MBlockReason = Reason; for (Requirement *Req : Reqs) { MemObjRecord *Record = getOrInsertMemObjRecord(Queue, Req); @@ -735,7 +737,8 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, } if (CGType == CG::CGTYPE::HOST_TASK_CODEPLAY) - addEmptyCmd(NewCmd.get(), NewCmd->getCG()->MRequirements, Queue); + addEmptyCmd(NewCmd.get(), NewCmd->getCG()->MRequirements, Queue, + Command::BlockReason::HostTask); if (MPrintOptionsArray[AfterAddCG]) printGraphAsDot("after_addCG"); @@ -946,11 +949,13 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, const auto &Reqs = std::vector( 1, const_cast(Dep.MDepRequirement)); EmptyCmd = addEmptyCmd(ConnectCmd, Reqs, - Scheduler::getInstance().getDefaultHostQueue()); + Scheduler::getInstance().getDefaultHostQueue(), + Command::BlockReason::HostTask); // Dependencies for EmptyCmd are set in addEmptyCmd for provided Reqs. } else { EmptyCmd = addEmptyCmd(ConnectCmd, {}, - Scheduler::getInstance().getDefaultHostQueue()); + Scheduler::getInstance().getDefaultHostQueue(), + Command::BlockReason::HostTask); // There is no requirement thus, empty command will only depend on // ConnectCmd via its event. diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 279d314997924..3ac677ef0c365 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -552,7 +552,8 @@ class Scheduler { EmptyCommand *addEmptyCmd(Command *Cmd, const std::vector &Req, - const QueueImplPtr &Queue); + const QueueImplPtr &Queue, + Command::BlockReason Reason); protected: /// Finds a command dependency corresponding to the record. From bf2c352c2736be266bd1d7ab9fd13b8403dd660a Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 8 May 2020 17:40:53 +0300 Subject: [PATCH 155/188] [SYCL] Make ConnectCmd depend on requirement Signed-off-by: Sergey Kanaev --- .../source/detail/scheduler/graph_builder.cpp | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 552ceac99570d..d1a8f88829db0 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -946,8 +946,30 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, EmptyCommand *EmptyCmd = nullptr; if (Dep.MDepRequirement) { - const auto &Reqs = std::vector( - 1, const_cast(Dep.MDepRequirement)); + Requirement *Req = const_cast(Dep.MDepRequirement); + + // make ConnectCmd depend on requirement + { + MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); + Dep.MDepCommand->addUser(ConnectCmd); + + AllocaCommandBase *AllocaCmd = findAllocaForReq(Record, Req, + DepEventContext); + assert(AllocaCmd && "There must be alloca for requirement!"); + + std::set Deps = findDepsForReq(Record, Req, DepEventContext); + assert(Deps.size() && "There must be some deps"); + + for (Command *ReqDepCmd : Deps) { + ConnectCmd->addDep(DepDesc{ReqDepCmd, Req, AllocaCmd}); + ReqDepCmd->addUser(ConnectCmd); + } + + updateLeaves(Deps, Record, Req->MAccessMode); + addNodeToLeaves(Record, ConnectCmd, Req->MAccessMode); + } + + const auto &Reqs = std::vector(1, Req); EmptyCmd = addEmptyCmd(ConnectCmd, Reqs, Scheduler::getInstance().getDefaultHostQueue(), Command::BlockReason::HostTask); From 76da746bd1957cc332370d818987522e1e5a71d2 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 8 May 2020 18:45:58 +0300 Subject: [PATCH 156/188] Revert "[SYCL] Removed reset" This reverts commit f900e6e9476d5a59db57a9625ab6e0062f91479f. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index d87fe2f8092b9..47050ba65a0f8 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -215,6 +215,7 @@ class DispatchHostTask { // we're ready to call the user-defined lambda now HostTask->MHostTask->call(); + HostTask->MHostTask.reset(); // unblock user empty command here EmptyCommand *EmptyCmd = findUserEmptyCommand(MThisCmd); From 168beb2babf9542a78339cafe4f2ff322a079dde Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Sun, 10 May 2020 22:25:31 +0300 Subject: [PATCH 157/188] [SYCL] Address review comments. Signed-off-by: Sergey Kanaev --- sycl/source/detail/thread_pool.hpp | 8 ++++++++ sycl/test/host-interop-task/host-task-dependency.cpp | 3 --- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 59a9b5fe347a7..2d7873748bcc9 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -1,3 +1,11 @@ +//===-- thread_pool.hpp - Simple thread pool --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + #pragma once #include diff --git a/sycl/test/host-interop-task/host-task-dependency.cpp b/sycl/test/host-interop-task/host-task-dependency.cpp index 9e8055f783572..942b79165aebd 100644 --- a/sycl/test/host-interop-task/host-task-dependency.cpp +++ b/sycl/test/host-interop-task/host-task-dependency.cpp @@ -1,7 +1,4 @@ // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out %threads_lib -// RUN: %CPU_RUN_PLACEHOLDER %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out -// RUN: %ACC_RUN_PLACEHOLDER %t.out // RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %CPU_CHECK_PLACEHOLDER // RUN: %GPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %GPU_CHECK_PLACEHOLDER // RUN: %ACC_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %ACC_CHECK_PLACEHOLDER From 14b923294303b50a445db6ca8e95648fb4ae5608 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 11 May 2020 17:26:07 +0300 Subject: [PATCH 158/188] [SYCL] Address review comments. Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/cg.hpp | 4 ++-- sycl/include/CL/sycl/handler.hpp | 4 ++-- sycl/source/detail/scheduler/commands.cpp | 8 ++++---- sycl/source/handler.cpp | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index cbd2dd370a3be..475ea7b5b024c 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -400,8 +400,8 @@ class CG { COPY_USM, FILL_USM, PREFETCH_USM, - INTEROP_TASK_CODEPLAY, - HOST_TASK_CODEPLAY + CODEPLAY_INTEROP_TASK, + CODEPLAY_HOST_TASK }; CG(CGTYPE Type, vector_class> ArgsStorage, diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index d8e250fbac16e..40aac4ebfe765 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -759,7 +759,7 @@ class __SYCL_EXPORT handler { MHostTask.reset(new detail::HostTask(std::move(Func))); - MCGType = detail::CG::HOST_TASK_CODEPLAY; + MCGType = detail::CG::CODEPLAY_HOST_TASK; } /// Defines and invokes a SYCL kernel function for the specified range and @@ -1088,7 +1088,7 @@ class __SYCL_EXPORT handler { template void interop_task(FuncT Func) { MInteropTask.reset(new detail::InteropTask(std::move(Func))); - MCGType = detail::CG::INTEROP_TASK_CODEPLAY; + MCGType = detail::CG::CODEPLAY_INTEROP_TASK; } /// Defines and invokes a SYCL kernel function for the specified range. diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 47050ba65a0f8..be6b7e804d2c9 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -209,7 +209,7 @@ class DispatchHostTask { waitForEvents(); assert(MThisCmd->getCG().get()); - assert(MThisCmd->getCG()->getType() == CG::CGTYPE::HOST_TASK_CODEPLAY); + assert(MThisCmd->getCG()->getType() == CG::CGTYPE::CODEPLAY_HOST_TASK); CGHostTask *HostTask = static_cast(MThisCmd->getCG().get()); @@ -1396,7 +1396,7 @@ static std::string cgTypeToString(detail::CG::CGTYPE Type) { case detail::CG::PREFETCH_USM: return "prefetch usm"; break; - case detail::CG::HOST_TASK_CODEPLAY: + case detail::CG::CODEPLAY_HOST_TASK: return "host task"; break; default: @@ -1876,7 +1876,7 @@ cl_int ExecCGCommand::enqueueImp() { return CL_SUCCESS; } - case CG::CGTYPE::INTEROP_TASK_CODEPLAY: { + case CG::CGTYPE::CODEPLAY_INTEROP_TASK: { const detail::plugin &Plugin = MQueue->getPlugin(); CGInteropTask *ExecInterop = (CGInteropTask *)MCommandGroup.get(); // Wait for dependencies to complete before dispatching work on the host @@ -1906,7 +1906,7 @@ cl_int ExecCGCommand::enqueueImp() { return CL_SUCCESS; } - case CG::CGTYPE::HOST_TASK_CODEPLAY: { + case CG::CGTYPE::CODEPLAY_HOST_TASK: { CGHostTask *HostTask = static_cast(MCommandGroup.get()); for (ArgDesc &Arg : HostTask->MArgs) { diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index b69eaec491d15..195cc9c1fc072 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -39,7 +39,7 @@ event handler::finalize() { MCodeLoc)); break; } - case detail::CG::INTEROP_TASK_CODEPLAY: + case detail::CG::CODEPLAY_INTEROP_TASK: CommandGroup.reset(new detail::CGInteropTask( std::move(MInteropTask), std::move(MArgsStorage), std::move(MAccStorage), std::move(MSharedPtrStorage), @@ -83,7 +83,7 @@ event handler::finalize() { std::move(MSharedPtrStorage), std::move(MRequirements), std::move(MEvents), MCodeLoc)); break; - case detail::CG::HOST_TASK_CODEPLAY: + case detail::CG::CODEPLAY_HOST_TASK: CommandGroup.reset(new detail::CGHostTask( std::move(MHostTask), std::move(MArgs), std::move(MArgsStorage), std::move(MAccStorage), std::move(MSharedPtrStorage), From e47a09373505004703a0682f89828eadfe6f6282 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 11 May 2020 17:31:18 +0300 Subject: [PATCH 159/188] [SYCL] Address review comments. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 4 ++-- sycl/source/detail/scheduler/scheduler.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index addcf2d18ec83..519ef56eadde7 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -734,7 +734,7 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, NewCmd->addDep(e); } - if (CGType == CG::CGTYPE::HOST_TASK_CODEPLAY) + if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) addEmptyCmd(NewCmd.get(), NewCmd->getCG()->MRequirements, Queue); if (MPrintOptionsArray[AfterAddCG]) @@ -927,7 +927,7 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, std::move(HT), /* Args = */ {}, /* ArgsStorage = */ {}, /* AccStorage = */ {}, /* SharedPtrStorage = */ {}, /* Requirements = */ {}, /* DepEvents = */ {DepEvent}, - CG::HOST_TASK_CODEPLAY, /* Payload */ {})); + CG::CODEPLAY_HOST_TASK, /* Payload */ {})); ConnectCmd = new ExecCGCommand( std::move(ConnectCG), Scheduler::getInstance().getDefaultHostQueue()); } diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 089f64d3374e3..299e449299036 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -73,7 +73,7 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, NewCmd = MGraphBuilder.addCGUpdateHost(std::move(CommandGroup), DefaultHostQueue); break; - case CG::HOST_TASK_CODEPLAY: + case CG::CODEPLAY_HOST_TASK: NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), DefaultHostQueue); break; default: From 2aab6a16946123e41beb5dab38d30e5f218b8899 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 12 May 2020 17:44:24 +0300 Subject: [PATCH 160/188] [SYCL] Employ only read-lock while enqueueing recently added command. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/scheduler.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 299e449299036..a7245546d92a9 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -79,6 +79,10 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, default: NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), std::move(Queue)); } + } + + { + std::shared_lock Lock(MGraphLock); // TODO: Check if lazy mode. EnqueueResultT Res; From 053a4c3240003dcb787c3cc59076bf1a8c68b3d5 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 12 May 2020 17:45:47 +0300 Subject: [PATCH 161/188] [SYCL] Fix race-condition Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 84e0c8f1935b3..41147e229f18c 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -225,16 +225,22 @@ class DispatchHostTask { // empty command may lead to quick deallocation of MThisCmd by some cleanup // process. Thus we'll copy deps prior to completing of event and unblocking // of empty command. + // Also, it's possible to have record deallocated prior to enqueue process. + // Thus we employ read-lock of graph. + { + Scheduler &Sched = Scheduler::getInstance(); + std::shared_lock Lock(Sched.MGraphLock); - std::vector Deps = MThisCmd->MDeps; + std::vector Deps = MThisCmd->MDeps; - // update self-event status - MThisCmd->MEvent->setComplete(); + // update self-event status + MThisCmd->MEvent->setComplete(); - EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; + EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; - for (const DepDesc &Dep : Deps) - Scheduler::getInstance().enqueueLeavesOfReq(Dep.MDepRequirement); + for (const DepDesc &Dep : Deps) + Scheduler::enqueueLeavesOfReqUnlocked(Dep.MDepRequirement); + } } }; From 89bd48b4bbedcabe26f4726af701b16f1ed03579 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 12 May 2020 17:51:50 +0300 Subject: [PATCH 162/188] [SYCL] Remove unneeded code. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/scheduler.cpp | 5 ----- sycl/source/detail/scheduler/scheduler.hpp | 1 - 2 files changed, 6 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index a7245546d92a9..2cac4499e16d8 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -194,11 +194,6 @@ void Scheduler::releaseHostAccessor(Requirement *Req) { enqueueLeavesOfReqUnlocked(Req); } -void Scheduler::enqueueLeavesOfReq(const Requirement *const Req) { - std::shared_lock Lock(MGraphLock); - enqueueLeavesOfReqUnlocked(Req); -} - // static void Scheduler::enqueueLeavesOfReqUnlocked(const Requirement *const Req) { MemObjRecord *Record = Req->MSYCLMemObj->MRecord.get(); diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 279d314997924..65b003bdb6ed8 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -431,7 +431,6 @@ class Scheduler { static Scheduler instance; static void enqueueLeavesOfReqUnlocked(const Requirement *const Req); - void enqueueLeavesOfReq(const Requirement *const Req); /// Graph builder class. /// From 5fa2789df7f3007bb71de48f7f5c465cc4264e97 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 8 May 2020 17:14:49 +0300 Subject: [PATCH 163/188] [SYCL] Set proper reason for blocking of empty cmd Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 17 +++++++++++------ sycl/source/detail/scheduler/scheduler.hpp | 3 ++- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 519ef56eadde7..aea8c6716bf9c 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -416,7 +416,8 @@ Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, Command *UpdateHostAccCmd = insertUpdateHostReqCmd(Record, Req, HostQueue); // Need empty command to be blocked until host accessor is destructed - EmptyCommand *EmptyCmd = addEmptyCmd(UpdateHostAccCmd, {Req}, HostQueue); + EmptyCommand *EmptyCmd = addEmptyCmd(UpdateHostAccCmd, {Req}, HostQueue, + Command::BlockReason::HostAccessor); Req->MBlockedCmd = EmptyCmd; @@ -642,7 +643,8 @@ void Scheduler::GraphBuilder::markModifiedIfWrite(MemObjRecord *Record, EmptyCommand * Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, const std::vector &Reqs, - const QueueImplPtr &Queue) { + const QueueImplPtr &Queue, + Command::BlockReason Reason) { EmptyCommand *EmptyCmd = new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); @@ -651,7 +653,7 @@ Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, EmptyCmd->MIsBlockable = true; EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; - EmptyCmd->MBlockReason = Command::BlockReason::HostTask; + EmptyCmd->MBlockReason = Reason; for (Requirement *Req : Reqs) { MemObjRecord *Record = getOrInsertMemObjRecord(Queue, Req); @@ -735,7 +737,8 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, } if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) - addEmptyCmd(NewCmd.get(), NewCmd->getCG()->MRequirements, Queue); + addEmptyCmd(NewCmd.get(), NewCmd->getCG()->MRequirements, Queue, + Command::BlockReason::HostTask); if (MPrintOptionsArray[AfterAddCG]) printGraphAsDot("after_addCG"); @@ -946,11 +949,13 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, const auto &Reqs = std::vector( 1, const_cast(Dep.MDepRequirement)); EmptyCmd = addEmptyCmd(ConnectCmd, Reqs, - Scheduler::getInstance().getDefaultHostQueue()); + Scheduler::getInstance().getDefaultHostQueue(), + Command::BlockReason::HostTask); // Dependencies for EmptyCmd are set in addEmptyCmd for provided Reqs. } else { EmptyCmd = addEmptyCmd(ConnectCmd, {}, - Scheduler::getInstance().getDefaultHostQueue()); + Scheduler::getInstance().getDefaultHostQueue(), + Command::BlockReason::HostTask); // There is no requirement thus, empty command will only depend on // ConnectCmd via its event. diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 65b003bdb6ed8..a8f8eebadc3aa 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -551,7 +551,8 @@ class Scheduler { EmptyCommand *addEmptyCmd(Command *Cmd, const std::vector &Req, - const QueueImplPtr &Queue); + const QueueImplPtr &Queue, + Command::BlockReason Reason); protected: /// Finds a command dependency corresponding to the record. From 2dc3564d15b4ebb4f195d479163d70bbcafba1c2 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 8 May 2020 17:40:53 +0300 Subject: [PATCH 164/188] [SYCL] Make ConnectCmd depend on requirement Signed-off-by: Sergey Kanaev --- .../source/detail/scheduler/graph_builder.cpp | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index aea8c6716bf9c..dcdb1ecf7ad74 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -946,8 +946,30 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, EmptyCommand *EmptyCmd = nullptr; if (Dep.MDepRequirement) { - const auto &Reqs = std::vector( - 1, const_cast(Dep.MDepRequirement)); + Requirement *Req = const_cast(Dep.MDepRequirement); + + // make ConnectCmd depend on requirement + { + MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); + Dep.MDepCommand->addUser(ConnectCmd); + + AllocaCommandBase *AllocaCmd = findAllocaForReq(Record, Req, + DepEventContext); + assert(AllocaCmd && "There must be alloca for requirement!"); + + std::set Deps = findDepsForReq(Record, Req, DepEventContext); + assert(Deps.size() && "There must be some deps"); + + for (Command *ReqDepCmd : Deps) { + ConnectCmd->addDep(DepDesc{ReqDepCmd, Req, AllocaCmd}); + ReqDepCmd->addUser(ConnectCmd); + } + + updateLeaves(Deps, Record, Req->MAccessMode); + addNodeToLeaves(Record, ConnectCmd, Req->MAccessMode); + } + + const auto &Reqs = std::vector(1, Req); EmptyCmd = addEmptyCmd(ConnectCmd, Reqs, Scheduler::getInstance().getDefaultHostQueue(), Command::BlockReason::HostTask); From 316e98387b8264ba8db03414f0511e8d614ca87d Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 13 May 2020 18:29:49 +0300 Subject: [PATCH 165/188] [SYCL] Don't depend on host task command explicitly. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index dcdb1ecf7ad74..8a5660501fa60 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -961,6 +961,14 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, assert(Deps.size() && "There must be some deps"); for (Command *ReqDepCmd : Deps) { + // we don't want to depend on any host task as the only "entry point" to + // host task is its empty cmd which is in Deps anyway + if (ReqDepCmd->getType() == Command::CommandType::RUN_CG) { + auto *Cmd = static_cast(ReqDepCmd); + if (Cmd->getCG()->getType() == CG::CGTYPE::CODEPLAY_HOST_TASK) + continue; + } + ConnectCmd->addDep(DepDesc{ReqDepCmd, Req, AllocaCmd}); ReqDepCmd->addUser(ConnectCmd); } From b93675c63352d7e52c4635e80e43a045d543d7d8 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 13 May 2020 19:17:05 +0300 Subject: [PATCH 166/188] [SYCL] Fix deadlock. Deadlock was caused by fault in dependencies of memory-move/copy operation and host-task Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 8a5660501fa60..a4baae69ba19c 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -941,8 +941,6 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, if (Command *DepCmd = reinterpret_cast(DepEvent->getCommand())) DepCmd->addUser(ConnectCmd); - ConnectCmd->addDep(DepEvent); - EmptyCommand *EmptyCmd = nullptr; if (Dep.MDepRequirement) { @@ -982,6 +980,14 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, Scheduler::getInstance().getDefaultHostQueue(), Command::BlockReason::HostTask); // Dependencies for EmptyCmd are set in addEmptyCmd for provided Reqs. + + // Depend Cmd on empty command + { + DepDesc CmdDep = Dep; + CmdDep.MDepCommand = EmptyCmd; + + Cmd->addDep(CmdDep); + } } else { EmptyCmd = addEmptyCmd(ConnectCmd, {}, Scheduler::getInstance().getDefaultHostQueue(), @@ -990,6 +996,10 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, // There is no requirement thus, empty command will only depend on // ConnectCmd via its event. EmptyCmd->addDep(ConnectCmd->getEvent()); + ConnectCmd->addDep(DepEvent); + + // Depend Cmd on empty command + Cmd->addDep(EmptyCmd->getEvent()); } // FIXME graph builder shouldn't really enqueue commands. We're in the middle @@ -1001,8 +1011,6 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Failed to enqueue a sync event between two contexts", PI_INVALID_OPERATION); - - Cmd->addDep(ConnectCmd->getEvent()); } } // namespace detail From 52456ce13b6f1d8be2f9fbd02f22dfac97db6f7b Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 13 May 2020 19:42:10 +0300 Subject: [PATCH 167/188] [SYCL] Add another test Signed-off-by: Sergey Kanaev --- .../host-task-two-queues.cpp | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 sycl/test/host-interop-task/host-task-two-queues.cpp diff --git a/sycl/test/host-interop-task/host-task-two-queues.cpp b/sycl/test/host-interop-task/host-task-two-queues.cpp new file mode 100644 index 0000000000000..b142f9010e2ee --- /dev/null +++ b/sycl/test/host-interop-task/host-task-two-queues.cpp @@ -0,0 +1,75 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %CPU_CHECK_PLACEHOLDER +// RUN: %GPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %GPU_CHECK_PLACEHOLDER +// RUN: %ACC_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %ACC_CHECK_PLACEHOLDER + +#include +#include + +namespace S = cl::sycl; + +#define WIDTH 5 +#define HEIGHT 5 + +void test() { + auto EH = [](S::exception_list EL) { + for (const std::exception_ptr &E : EL) { + throw E; + } + }; + + S::queue Q1(EH); + S::queue Q2(EH); + + std::vector DataA(WIDTH * HEIGHT, 2); + std::vector DataB(WIDTH * HEIGHT, 3); + std::vector DataC(WIDTH * HEIGHT, 1); + + S::buffer BufA{DataA.data(), S::range<2>{WIDTH, HEIGHT}}; + S::buffer BufB{DataB.data(), S::range<2>{WIDTH, HEIGHT}}; + S::buffer BufC{DataC.data(), S::range<2>{WIDTH, HEIGHT}}; + + auto CG1 = [&](S::handler &CGH) { + auto AccA = BufA.get_access(CGH); + auto AccB = BufB.get_access(CGH); + auto AccC = BufC.get_access(CGH); + auto Kernel = [=](S::nd_item<2> Item) { + size_t W = Item.get_global_id(0); + size_t H = Item.get_global_id(1); + AccC[W][H] += AccA[W][H] * AccB[W][H]; + }; + CGH.parallel_for(S::nd_range<2>({WIDTH, HEIGHT}, {1, 1}), Kernel); + }; + + auto CG2 = [&](S::handler &CGH) { + auto AccA = BufA.get_access(CGH); + auto AccB = BufB.get_access(CGH); + auto AccC = BufC.get_access(CGH); + + CGH.codeplay_host_task([=] { + for (size_t I = 0; I < WIDTH; ++I) + for (size_t J = 0; J < HEIGHT; ++J) { + std::cout << "C[" << I << "][" << J << "] = " << AccC[I][J] + << std::endl; + } + }); + }; + + Q1.submit(CG1); + Q2.submit(CG2); + Q2.submit(CG1); + Q1.submit(CG2); + + Q1.wait_and_throw(); + Q2.wait_and_throw(); + + for (size_t I = 0; I < WIDTH; ++I) + for (size_t J = 0; J < HEIGHT; ++J) + assert(DataC[I * HEIGHT + J] == (1 + 2 * 3) + 2 * 3); +} + +int main(void) { + test(); + return 0; +} + From b8f47cef9fe4d0e9a0f0ab0f2aaaecdc495aa215 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 13 May 2020 19:51:45 +0300 Subject: [PATCH 168/188] [SYCL] Fix style issues Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 17 ++++++++--------- .../host-interop-task/host-task-two-queues.cpp | 3 +-- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index a4baae69ba19c..68374c128973c 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -641,10 +641,9 @@ void Scheduler::GraphBuilder::markModifiedIfWrite(MemObjRecord *Record, } EmptyCommand * -Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, - const std::vector &Reqs, - const QueueImplPtr &Queue, - Command::BlockReason Reason) { +Scheduler::GraphBuilder::addEmptyCmd( + Command *Cmd, const std::vector &Reqs, + const QueueImplPtr &Queue, Command::BlockReason Reason) { EmptyCommand *EmptyCmd = new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); @@ -951,8 +950,8 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); Dep.MDepCommand->addUser(ConnectCmd); - AllocaCommandBase *AllocaCmd = findAllocaForReq(Record, Req, - DepEventContext); + AllocaCommandBase *AllocaCmd = + findAllocaForReq(Record, Req, DepEventContext); assert(AllocaCmd && "There must be alloca for requirement!"); std::set Deps = findDepsForReq(Record, Req, DepEventContext); @@ -962,9 +961,9 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, // we don't want to depend on any host task as the only "entry point" to // host task is its empty cmd which is in Deps anyway if (ReqDepCmd->getType() == Command::CommandType::RUN_CG) { - auto *Cmd = static_cast(ReqDepCmd); - if (Cmd->getCG()->getType() == CG::CGTYPE::CODEPLAY_HOST_TASK) - continue; + auto *Cmd = static_cast(ReqDepCmd); + if (Cmd->getCG()->getType() == CG::CGTYPE::CODEPLAY_HOST_TASK) + continue; } ConnectCmd->addDep(DepDesc{ReqDepCmd, Req, AllocaCmd}); diff --git a/sycl/test/host-interop-task/host-task-two-queues.cpp b/sycl/test/host-interop-task/host-task-two-queues.cpp index b142f9010e2ee..9036022dbbf73 100644 --- a/sycl/test/host-interop-task/host-task-two-queues.cpp +++ b/sycl/test/host-interop-task/host-task-two-queues.cpp @@ -3,8 +3,8 @@ // RUN: %GPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %GPU_CHECK_PLACEHOLDER // RUN: %ACC_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %ACC_CHECK_PLACEHOLDER -#include #include +#include namespace S = cl::sycl; @@ -72,4 +72,3 @@ int main(void) { test(); return 0; } - From 5b0d040c5196f55c073c402efb6699128f8c92ae Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 14 May 2020 09:32:35 +0300 Subject: [PATCH 169/188] [SYCL] Fix style issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 68374c128973c..16e56129ad0a5 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -640,8 +640,7 @@ void Scheduler::GraphBuilder::markModifiedIfWrite(MemObjRecord *Record, } } -EmptyCommand * -Scheduler::GraphBuilder::addEmptyCmd( +EmptyCommand *Scheduler::GraphBuilder::addEmptyCmd( Command *Cmd, const std::vector &Reqs, const QueueImplPtr &Queue, Command::BlockReason Reason) { EmptyCommand *EmptyCmd = From f7c890e8f7aa8a7a9e25e1f5fba29ef27eeeb2fd Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 14 May 2020 09:33:02 +0300 Subject: [PATCH 170/188] [SYCL] Remove FileCheck use. Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/host-task-two-queues.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/test/host-interop-task/host-task-two-queues.cpp b/sycl/test/host-interop-task/host-task-two-queues.cpp index 9036022dbbf73..712c0a9b2ddec 100644 --- a/sycl/test/host-interop-task/host-task-two-queues.cpp +++ b/sycl/test/host-interop-task/host-task-two-queues.cpp @@ -1,7 +1,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %CPU_CHECK_PLACEHOLDER -// RUN: %GPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %GPU_CHECK_PLACEHOLDER -// RUN: %ACC_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %ACC_CHECK_PLACEHOLDER +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out #include #include From 04807316c11a1aa23d7d31c4fb95de6d4a5d8b60 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 14 May 2020 10:30:16 +0300 Subject: [PATCH 171/188] [SYCL] Fix segfault caused by missed dependency Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 16e56129ad0a5..214044db327da 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -1000,6 +1000,8 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, Cmd->addDep(EmptyCmd->getEvent()); } + EmptyCmd->addUser(Cmd); + // FIXME graph builder shouldn't really enqueue commands. We're in the middle // of enqueue process for some command Cmd. We're going to add a dependency // for it. Need some nice and cute solution to enqueue ConnectCmd via standard From 066504e19607bbfcb4c2cf06016a0527943ecd7c Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 14 May 2020 18:17:28 +0300 Subject: [PATCH 172/188] [SYCL] Fix assertion triggering. Return reference to command group instead of pointer. Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 27 ++++--------------- sycl/source/detail/scheduler/commands.hpp | 8 +++++- .../source/detail/scheduler/graph_builder.cpp | 9 ++++--- 3 files changed, 18 insertions(+), 26 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 41147e229f18c..8c55157654c3b 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -186,39 +186,22 @@ class DispatchHostTask { } } - // Lookup for empty command amongst users of this cmd - static EmptyCommand *findUserEmptyCommand(Command *ThisCmd) { - assert(ThisCmd->MUsers.size() == 1 && - "Only a single user is expected for host task command"); - - Command *User = *ThisCmd->MUsers.begin(); - - assert(User->getType() == Command::CommandType::EMPTY_TASK && - "Expected empty command as single user of host task command"); - assert(User->MIsBlockable && "Empty command is expected to be blockable"); - assert(User->MBlockReason == Command::BlockReason::HostTask && - "Empty command is expected to be blocked due to host task"); - - return static_cast(User); - } - public: DispatchHostTask(ExecCGCommand *ThisCmd) : MThisCmd{ThisCmd} {} void operator()() const { waitForEvents(); - assert(MThisCmd->getCG().get()); - assert(MThisCmd->getCG()->getType() == CG::CGTYPE::CODEPLAY_HOST_TASK); + assert(MThisCmd->getCG().getType() == CG::CGTYPE::CODEPLAY_HOST_TASK); - CGHostTask *HostTask = static_cast(MThisCmd->getCG().get()); + CGHostTask &HostTask = static_cast(MThisCmd->getCG()); // we're ready to call the user-defined lambda now - HostTask->MHostTask->call(); - HostTask->MHostTask.reset(); + HostTask.MHostTask->call(); + HostTask.MHostTask.reset(); // unblock user empty command here - EmptyCommand *EmptyCmd = findUserEmptyCommand(MThisCmd); + EmptyCommand *EmptyCmd = MThisCmd->MEmptyCmd; assert(EmptyCmd && "No empty command found"); // Completing command's event along with unblocking enqueue readiness of diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 483fc16e0c5b4..2052d514356f6 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -463,7 +463,13 @@ class ExecCGCommand : public Command { void printDot(std::ostream &Stream) const final; void emitInstrumentationData(); - const std::unique_ptr &getCG() const { return MCommandGroup; } + detail::CG &getCG() const { return *MCommandGroup; } + + // MEmptyCmd one is only employed if this command refers to host-task. + // MEmptyCmd due to unreliable mechanism of lookup for single EmptyCommand + // amongst users of host-task-representing command. This unreliability roots + // in cleanup process. + EmptyCommand *MEmptyCmd = nullptr; private: cl_int enqueueImp() final; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 214044db327da..d75c45796852d 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -735,8 +735,9 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, } if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) - addEmptyCmd(NewCmd.get(), NewCmd->getCG()->MRequirements, Queue, - Command::BlockReason::HostTask); + NewCmd->MEmptyCmd = addEmptyCmd( + NewCmd.get(), NewCmd->getCG().MRequirements, Queue, + Command::BlockReason::HostTask); if (MPrintOptionsArray[AfterAddCG]) printGraphAsDot("after_addCG"); @@ -961,7 +962,7 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, // host task is its empty cmd which is in Deps anyway if (ReqDepCmd->getType() == Command::CommandType::RUN_CG) { auto *Cmd = static_cast(ReqDepCmd); - if (Cmd->getCG()->getType() == CG::CGTYPE::CODEPLAY_HOST_TASK) + if (Cmd->getCG().getType() == CG::CGTYPE::CODEPLAY_HOST_TASK) continue; } @@ -1002,6 +1003,8 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, EmptyCmd->addUser(Cmd); + ConnectCmd->MEmptyCmd = EmptyCmd; + // FIXME graph builder shouldn't really enqueue commands. We're in the middle // of enqueue process for some command Cmd. We're going to add a dependency // for it. Need some nice and cute solution to enqueue ConnectCmd via standard From 9e76b68ae6e6dfaa3646afe25363cb7b5821a3c3 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 14 May 2020 18:18:07 +0300 Subject: [PATCH 173/188] [SYCL] Update test Signed-off-by: Sergey Kanaev --- .../host-interop-task/host-task-two-queues.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/sycl/test/host-interop-task/host-task-two-queues.cpp b/sycl/test/host-interop-task/host-task-two-queues.cpp index 712c0a9b2ddec..55134149be36b 100644 --- a/sycl/test/host-interop-task/host-task-two-queues.cpp +++ b/sycl/test/host-interop-task/host-task-two-queues.cpp @@ -55,17 +55,21 @@ void test() { }); }; - Q1.submit(CG1); - Q2.submit(CG2); - Q2.submit(CG1); - Q1.submit(CG2); + static const size_t NTIMES = 100; + + for (size_t Idx = 0; Idx < NTIMES; ++Idx) { + Q1.submit(CG1); + Q2.submit(CG2); + Q2.submit(CG1); + Q1.submit(CG2); + } Q1.wait_and_throw(); Q2.wait_and_throw(); for (size_t I = 0; I < WIDTH; ++I) for (size_t J = 0; J < HEIGHT; ++J) - assert(DataC[I * HEIGHT + J] == (1 + 2 * 3) + 2 * 3); + assert(DataC[I * HEIGHT + J] == (1 + 2 * 3 * NTIMES * 2)); } int main(void) { From 6e123c4191a31a21756260985a56421e704569a3 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 14 May 2020 18:31:02 +0300 Subject: [PATCH 174/188] [SYCL] Fix style issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index d75c45796852d..5c569ec1e6e3b 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -735,9 +735,8 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, } if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) - NewCmd->MEmptyCmd = addEmptyCmd( - NewCmd.get(), NewCmd->getCG().MRequirements, Queue, - Command::BlockReason::HostTask); + NewCmd->MEmptyCmd = addEmptyCmd(NewCmd.get(), NewCmd->getCG().MRequirements, + Queue, Command::BlockReason::HostTask); if (MPrintOptionsArray[AfterAddCG]) printGraphAsDot("after_addCG"); From 74282e04702d61a881dd1f93442b41ee461337cc Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 15 May 2020 17:20:57 +0300 Subject: [PATCH 175/188] [SYCL] Address comments. Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/cg.hpp | 1 + sycl/source/detail/scheduler/commands.cpp | 2 +- sycl/source/detail/scheduler/commands.hpp | 6 ++++++ sycl/source/detail/scheduler/graph_builder.cpp | 10 ++++------ 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index 475ea7b5b024c..26ee7b6f89837 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -219,6 +219,7 @@ class HostTask { std::function MHostTask; public: + HostTask() : MHostTask([]() {}) {} HostTask(std::function &&Func) : MHostTask(Func) {} void call() { MHostTask(); } diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 8c55157654c3b..eb40bcdf82044 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1909,7 +1909,7 @@ cl_int ExecCGCommand::enqueueImp() { break; } default: - throw std::runtime_error("Yet unsupported arg type"); + throw runtime_error("Unsupported arg type", PI_INVALID_VALUE); } } diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 2052d514356f6..37880f41d1623 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -255,6 +255,12 @@ class Command { /// Instance ID tracked for the command. uint64_t MInstanceID = 0; + // This flag allows to control whether host event should be set complete + // after successfull enqueue of command. Event is considered as host event if + // either it's is_host() return true or there is no backend representation + // of event (i.e. getHandleRef() return reference to nullptr value). + // By default the flag is set to true due to most of host operations are + // synchronous. The only asynchronous operation currently is host-task. bool MShouldCompleteEventIfPossible = true; }; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 5c569ec1e6e3b..64af5632bd8c5 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -900,11 +900,11 @@ void Scheduler::GraphBuilder::removeRecordForMemObj(SYCLMemObjI *MemObject) { // Make Cmd depend on DepEvent from different context. Connection is performed // via distinct ConnectCmd with host task command group on host queue. Cmd will // depend on ConnectCmd's host event. -// DepEvent may not have an associated with it command in at least two cases: +// DepEvent may not have a command associated with it in at least two cases: // - the command was deleted upon cleanup process; // - DepEvent is user event. -// In both these cases the only thing we can do is to make ConnectCmd depend on -// DepEvent. +// In both of these cases the only thing we can do is to make ConnectCmd depend +// on DepEvent. // Otherwise, when there is a command associated with DepEvent, we make // ConnectCmd depend on on this command. If there is valid, i.e. non-nil, // requirement in Dep we make ConnectCmd depend on DepEvent's command with this @@ -921,9 +921,7 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, ExecCGCommand *ConnectCmd = nullptr; { - std::function Func = []() {}; - - std::unique_ptr HT(new detail::HostTask(std::move(Func))); + std::unique_ptr HT(new detail::HostTask); std::unique_ptr ConnectCG(new detail::CGHostTask( std::move(HT), /* Args = */ {}, /* ArgsStorage = */ {}, /* AccStorage = */ {}, /* SharedPtrStorage = */ {}, From b4ab2f2520dcf932d972dc4b2d266cbcbdde395d Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 18 May 2020 15:22:01 +0300 Subject: [PATCH 176/188] [SYCL] Eliminate for-loop whilst constructing deps of connect-cmd Signed-off-by: Sergey Kanaev --- .../source/detail/scheduler/graph_builder.cpp | 28 ++++--------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 64af5632bd8c5..7b6f84a78ec29 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -944,30 +944,14 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, // make ConnectCmd depend on requirement { - MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); - Dep.MDepCommand->addUser(ConnectCmd); - - AllocaCommandBase *AllocaCmd = - findAllocaForReq(Record, Req, DepEventContext); - assert(AllocaCmd && "There must be alloca for requirement!"); - - std::set Deps = findDepsForReq(Record, Req, DepEventContext); - assert(Deps.size() && "There must be some deps"); - - for (Command *ReqDepCmd : Deps) { - // we don't want to depend on any host task as the only "entry point" to - // host task is its empty cmd which is in Deps anyway - if (ReqDepCmd->getType() == Command::CommandType::RUN_CG) { - auto *Cmd = static_cast(ReqDepCmd); - if (Cmd->getCG().getType() == CG::CGTYPE::CODEPLAY_HOST_TASK) - continue; - } + ConnectCmd->addDep(Dep); + assert(reinterpret_cast(DepEvent->getCommand()) == + Dep.MDepCommand); + // add user to Dep.MDepCommand is already performed beyond this if branch - ConnectCmd->addDep(DepDesc{ReqDepCmd, Req, AllocaCmd}); - ReqDepCmd->addUser(ConnectCmd); - } + MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); - updateLeaves(Deps, Record, Req->MAccessMode); + updateLeaves({ Dep.MDepCommand }, Record, Req->MAccessMode); addNodeToLeaves(Record, ConnectCmd, Req->MAccessMode); } From 64d6ba80be8504a6d6dd80487c778aa64bf6e444 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 18 May 2020 15:23:34 +0300 Subject: [PATCH 177/188] [SYCL] Reword construction of a vector Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 7b6f84a78ec29..82241b9692cad 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -955,7 +955,7 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, addNodeToLeaves(Record, ConnectCmd, Req->MAccessMode); } - const auto &Reqs = std::vector(1, Req); + const std::vector Reqs(1, Req); EmptyCmd = addEmptyCmd(ConnectCmd, Reqs, Scheduler::getInstance().getDefaultHostQueue(), Command::BlockReason::HostTask); From 692bf79cfe2f25a2c2a39b3f7e792fd2640b0f98 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 18 May 2020 15:25:06 +0300 Subject: [PATCH 178/188] [SYCL] Reword comments Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 82241b9692cad..9527f01f41322 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -909,6 +909,7 @@ void Scheduler::GraphBuilder::removeRecordForMemObj(SYCLMemObjI *MemObject) { // ConnectCmd depend on on this command. If there is valid, i.e. non-nil, // requirement in Dep we make ConnectCmd depend on DepEvent's command with this // requirement. +// Optionality of Dep is set by Dep.MDepCommand equal to nullptr. void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, EventImplPtr DepEvent, const DepDesc &Dep) { @@ -990,6 +991,7 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, // of enqueue process for some command Cmd. We're going to add a dependency // for it. Need some nice and cute solution to enqueue ConnectCmd via standard // scheduler/graph processor mechanisms. + // Though, we need this call to enqueue to launch ConnectCmd. EnqueueResultT Res; bool Enqueued = Scheduler::GraphProcessor::enqueueCommand(ConnectCmd, Res); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) From 6f3b4d74b187c1b7b85cc1451270a2a6d402377f Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 18 May 2020 17:18:50 +0300 Subject: [PATCH 179/188] [SYCL] Eliminate const_cast Signed-off-by: Sergey Kanaev --- .../source/detail/scheduler/graph_builder.cpp | 53 ++++++++++--------- sycl/source/detail/scheduler/scheduler.hpp | 20 ++++--- 2 files changed, 39 insertions(+), 34 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 9527f01f41322..1fe07aed2b3b1 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -129,7 +129,7 @@ MemObjRecord *Scheduler::GraphBuilder::getMemObjRecord(SYCLMemObjI *MemObject) { MemObjRecord * Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue, - Requirement *Req) { + const Requirement *Req) { SYCLMemObjI *MemObject = Req->MSYCLMemObj; MemObjRecord *Record = getMemObjRecord(MemObject); @@ -416,8 +416,8 @@ Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, Command *UpdateHostAccCmd = insertUpdateHostReqCmd(Record, Req, HostQueue); // Need empty command to be blocked until host accessor is destructed - EmptyCommand *EmptyCmd = addEmptyCmd(UpdateHostAccCmd, {Req}, HostQueue, - Command::BlockReason::HostAccessor); + EmptyCommand *EmptyCmd = addEmptyCmd( + UpdateHostAccCmd, {Req}, HostQueue, Command::BlockReason::HostAccessor); Req->MBlockedCmd = EmptyCmd; @@ -446,7 +446,7 @@ Command *Scheduler::GraphBuilder::addCGUpdateHost( /// 2. New and examined commands has non-overlapping requirements -> can bypass /// 3. New and examined commands have different contexts -> cannot bypass std::set -Scheduler::GraphBuilder::findDepsForReq(MemObjRecord *Record, Requirement *Req, +Scheduler::GraphBuilder::findDepsForReq(MemObjRecord *Record, const Requirement *Req, const ContextImplPtr &Context) { std::set RetDeps; std::set Visited; @@ -514,7 +514,7 @@ DepDesc Scheduler::GraphBuilder::findDepForRecord(Command *Cmd, // The function searches for the alloca command matching context and // requirement. AllocaCommandBase *Scheduler::GraphBuilder::findAllocaForReq( - MemObjRecord *Record, Requirement *Req, const ContextImplPtr &Context) { + MemObjRecord *Record, const Requirement *Req, const ContextImplPtr &Context) { auto IsSuitableAlloca = [&Context, Req](AllocaCommandBase *AllocaCmd) { bool Res = sameCtx(AllocaCmd->getQueue()->getContextImplPtr(), Context); if (IsSuitableSubReq(Req)) { @@ -535,7 +535,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::findAllocaForReq( // Note, creation of new allocation command can lead to the current context // (Record->MCurContext) change. AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( - MemObjRecord *Record, Requirement *Req, QueueImplPtr Queue) { + MemObjRecord *Record, const Requirement *Req, QueueImplPtr Queue) { AllocaCommandBase *AllocaCmd = findAllocaForReq(Record, Req, Queue->getContextImplPtr()); @@ -640,9 +640,14 @@ void Scheduler::GraphBuilder::markModifiedIfWrite(MemObjRecord *Record, } } -EmptyCommand *Scheduler::GraphBuilder::addEmptyCmd( - Command *Cmd, const std::vector &Reqs, - const QueueImplPtr &Queue, Command::BlockReason Reason) { +template +typename std::enable_if::type, + Requirement>::value, + EmptyCommand *>::type +Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, + const std::vector &Reqs, + const QueueImplPtr &Queue, + Command::BlockReason Reason) { EmptyCommand *EmptyCmd = new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); @@ -653,7 +658,7 @@ EmptyCommand *Scheduler::GraphBuilder::addEmptyCmd( EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueBlocked; EmptyCmd->MBlockReason = Reason; - for (Requirement *Req : Reqs) { + for (T *Req : Reqs) { MemObjRecord *Record = getOrInsertMemObjRecord(Queue, Req); AllocaCommandBase *AllocaCmd = getOrCreateAllocaForReq(Record, Req, Queue); EmptyCmd->addRequirement(Cmd, AllocaCmd, Req); @@ -941,23 +946,19 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, EmptyCommand *EmptyCmd = nullptr; if (Dep.MDepRequirement) { - Requirement *Req = const_cast(Dep.MDepRequirement); - // make ConnectCmd depend on requirement - { - ConnectCmd->addDep(Dep); - assert(reinterpret_cast(DepEvent->getCommand()) == - Dep.MDepCommand); - // add user to Dep.MDepCommand is already performed beyond this if branch + ConnectCmd->addDep(Dep); + assert(reinterpret_cast(DepEvent->getCommand()) == + Dep.MDepCommand); + // add user to Dep.MDepCommand is already performed beyond this if branch - MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); + MemObjRecord *Record = getMemObjRecord(Dep.MDepRequirement->MSYCLMemObj); - updateLeaves({ Dep.MDepCommand }, Record, Req->MAccessMode); - addNodeToLeaves(Record, ConnectCmd, Req->MAccessMode); - } + updateLeaves({ Dep.MDepCommand }, Record, Dep.MDepRequirement->MAccessMode); + addNodeToLeaves(Record, ConnectCmd, Dep.MDepRequirement->MAccessMode); - const std::vector Reqs(1, Req); - EmptyCmd = addEmptyCmd(ConnectCmd, Reqs, + const std::vector Reqs(1, Dep.MDepRequirement); + EmptyCmd = addEmptyCmd<>(ConnectCmd, Reqs, Scheduler::getInstance().getDefaultHostQueue(), Command::BlockReason::HostTask); // Dependencies for EmptyCmd are set in addEmptyCmd for provided Reqs. @@ -970,9 +971,9 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, Cmd->addDep(CmdDep); } } else { - EmptyCmd = addEmptyCmd(ConnectCmd, {}, - Scheduler::getInstance().getDefaultHostQueue(), - Command::BlockReason::HostTask); + EmptyCmd = addEmptyCmd( + ConnectCmd, {}, Scheduler::getInstance().getDefaultHostQueue(), + Command::BlockReason::HostTask); // There is no requirement thus, empty command will only depend on // ConnectCmd via its event. diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index a8f8eebadc3aa..d455485efaad0 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -493,7 +493,7 @@ class Scheduler { /// \return a pointer to MemObjRecord for pointer to memory object. If the /// record is not found, nullptr is returned. MemObjRecord *getOrInsertMemObjRecord(const QueueImplPtr &Queue, - Requirement *Req); + const Requirement *Req); /// Decrements leaf counters for all leaves of the record. void decrementLeafCountersForRecord(MemObjRecord *Record); @@ -546,20 +546,24 @@ class Scheduler { const QueueImplPtr &Queue); /// Finds dependencies for the requirement. - std::set findDepsForReq(MemObjRecord *Record, Requirement *Req, + std::set findDepsForReq(MemObjRecord *Record, + const Requirement *Req, const ContextImplPtr &Context); - EmptyCommand *addEmptyCmd(Command *Cmd, - const std::vector &Req, - const QueueImplPtr &Queue, - Command::BlockReason Reason); + template + typename std::enable_if::type, + Requirement>::value, + EmptyCommand *>::type + addEmptyCmd(Command *Cmd, const std::vector &Req, + const QueueImplPtr &Queue, Command::BlockReason Reason); protected: /// Finds a command dependency corresponding to the record. DepDesc findDepForRecord(Command *Cmd, MemObjRecord *Record); /// Searches for suitable alloca in memory record. - AllocaCommandBase *findAllocaForReq(MemObjRecord *Record, Requirement *Req, + AllocaCommandBase *findAllocaForReq(MemObjRecord *Record, + const Requirement *Req, const ContextImplPtr &Context); friend class Command; @@ -569,7 +573,7 @@ class Scheduler { /// /// If none found, creates new one. AllocaCommandBase *getOrCreateAllocaForReq(MemObjRecord *Record, - Requirement *Req, + const Requirement *Req, QueueImplPtr Queue); void markModifiedIfWrite(MemObjRecord *Record, Requirement *Req); From 5d2635b4d1161a367053648503046716b3b70972 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 18 May 2020 17:28:23 +0300 Subject: [PATCH 180/188] [SYCL] Fix style issues Signed-off-by: Sergey Kanaev --- .../source/detail/scheduler/graph_builder.cpp | 24 ++++++++++--------- sycl/source/detail/scheduler/scheduler.hpp | 6 ++--- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 1fe07aed2b3b1..c6787eeb652a7 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -446,7 +446,8 @@ Command *Scheduler::GraphBuilder::addCGUpdateHost( /// 2. New and examined commands has non-overlapping requirements -> can bypass /// 3. New and examined commands have different contexts -> cannot bypass std::set -Scheduler::GraphBuilder::findDepsForReq(MemObjRecord *Record, const Requirement *Req, +Scheduler::GraphBuilder::findDepsForReq(MemObjRecord *Record, + const Requirement *Req, const ContextImplPtr &Context) { std::set RetDeps; std::set Visited; @@ -513,8 +514,10 @@ DepDesc Scheduler::GraphBuilder::findDepForRecord(Command *Cmd, // The function searches for the alloca command matching context and // requirement. -AllocaCommandBase *Scheduler::GraphBuilder::findAllocaForReq( - MemObjRecord *Record, const Requirement *Req, const ContextImplPtr &Context) { +AllocaCommandBase * +Scheduler::GraphBuilder::findAllocaForReq(MemObjRecord *Record, + const Requirement *Req, + const ContextImplPtr &Context) { auto IsSuitableAlloca = [&Context, Req](AllocaCommandBase *AllocaCmd) { bool Res = sameCtx(AllocaCmd->getQueue()->getContextImplPtr(), Context); if (IsSuitableSubReq(Req)) { @@ -641,11 +644,10 @@ void Scheduler::GraphBuilder::markModifiedIfWrite(MemObjRecord *Record, } template -typename std::enable_if::type, - Requirement>::value, - EmptyCommand *>::type -Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, - const std::vector &Reqs, +typename std::enable_if< + std::is_same::type, Requirement>::value, + EmptyCommand *>::type +Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, const std::vector &Reqs, const QueueImplPtr &Queue, Command::BlockReason Reason) { EmptyCommand *EmptyCmd = @@ -949,16 +951,16 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, // make ConnectCmd depend on requirement ConnectCmd->addDep(Dep); assert(reinterpret_cast(DepEvent->getCommand()) == - Dep.MDepCommand); + Dep.MDepCommand); // add user to Dep.MDepCommand is already performed beyond this if branch MemObjRecord *Record = getMemObjRecord(Dep.MDepRequirement->MSYCLMemObj); - updateLeaves({ Dep.MDepCommand }, Record, Dep.MDepRequirement->MAccessMode); + updateLeaves({Dep.MDepCommand}, Record, Dep.MDepRequirement->MAccessMode); addNodeToLeaves(Record, ConnectCmd, Dep.MDepRequirement->MAccessMode); const std::vector Reqs(1, Dep.MDepRequirement); - EmptyCmd = addEmptyCmd<>(ConnectCmd, Reqs, + EmptyCmd = addEmptyCmd(ConnectCmd, Reqs, Scheduler::getInstance().getDefaultHostQueue(), Command::BlockReason::HostTask); // Dependencies for EmptyCmd are set in addEmptyCmd for provided Reqs. diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index d455485efaad0..d778ea3fa402e 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -551,9 +551,9 @@ class Scheduler { const ContextImplPtr &Context); template - typename std::enable_if::type, - Requirement>::value, - EmptyCommand *>::type + typename std::enable_if< + std::is_same::type, Requirement>::value, + EmptyCommand *>::type addEmptyCmd(Command *Cmd, const std::vector &Req, const QueueImplPtr &Queue, Command::BlockReason Reason); From efd1495c7b640c0dc78b91031af5615bd1e90352 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 18 May 2020 17:32:01 +0300 Subject: [PATCH 181/188] [SYCL] Fix style issues Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 2 +- sycl/source/detail/scheduler/scheduler.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index c6787eeb652a7..684c9069f3ce1 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -643,7 +643,7 @@ void Scheduler::GraphBuilder::markModifiedIfWrite(MemObjRecord *Record, } } -template +template typename std::enable_if< std::is_same::type, Requirement>::value, EmptyCommand *>::type diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index d778ea3fa402e..b9bb2e8ef02b9 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -550,7 +550,7 @@ class Scheduler { const Requirement *Req, const ContextImplPtr &Context); - template + template typename std::enable_if< std::is_same::type, Requirement>::value, EmptyCommand *>::type From 0a0207b305d2b40bebf914db7ac191a92912fa78 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 19 May 2020 09:41:17 +0300 Subject: [PATCH 182/188] [SYCL] Fix merge glitch Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 4910a66527248..5ed6c8c61f94e 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -200,16 +200,16 @@ class DispatchHostTask { CGHostTask &HostTask = static_cast(MThisCmd->getCG()); // we're ready to call the user-defined lambda now - if (HostTask.isInteropTask()) { - auto Queue = MHostTask->MQueue->get(); - auto DeviceId = MHostTask->MQueue->get_device().get(); - auto Context = MHostTask->MQueue->get_context().get(); + if (HostTask.MHostTask->isInteropTask()) { + auto Queue = HostTask.MQueue->get(); + auto DeviceId = HostTask.MQueue->get_device().get(); + auto Context = HostTask.MQueue->get_context().get(); interop_handle IH{MReqToMem, Queue, DeviceId, Context}; - HostTask.call(IH); + HostTask.MHostTask->call(IH); } else - HostTask.call(); + HostTask.MHostTask->call(); HostTask.MHostTask.reset(); From b5bacd559d60c4cc3f77d87bd17618b5e2f35128 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 19 May 2020 14:49:28 +0300 Subject: [PATCH 183/188] [SYCL] Fix test compilation Signed-off-by: Sergey Kanaev --- .../interop-task-dependency.cpp | 32 ++++++++----------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/sycl/test/host-interop-task/interop-task-dependency.cpp b/sycl/test/host-interop-task/interop-task-dependency.cpp index 820cba3364f23..15153c27967c6 100644 --- a/sycl/test/host-interop-task/interop-task-dependency.cpp +++ b/sycl/test/host-interop-task/interop-task-dependency.cpp @@ -1,7 +1,4 @@ // RUN: %clangxx -fsycl %s -o %t.out %threads_lib -// RUN: %CPU_RUN_PLACEHOLDER %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out -// RUN: %ACC_RUN_PLACEHOLDER %t.out // RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %CPU_CHECK_PLACEHOLDER // RUN: %GPU_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %GPU_CHECK_PLACEHOLDER // RUN: %ACC_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %ACC_CHECK_PLACEHOLDER @@ -56,7 +53,7 @@ void Thread1Fn(Context *Ctx) { } // 1. submit task writing to buffer 1 - Ctx.Queue.submit([&](S::handler &CGH) { + Ctx->Queue.submit([&](S::handler &CGH) { S::accessor GeneratorAcc(Ctx->Buf1, CGH); @@ -70,7 +67,7 @@ void Thread1Fn(Context *Ctx) { }); // 2. submit host task writing from buf 1 to buf 2 - auto HostTaskEvent = Ctx.Queue.submit([&](S::handler &CGH) { + auto HostTaskEvent = Ctx->Queue.submit([&](S::handler &CGH) { S::accessor CopierSrcAcc(Ctx->Buf1, CGH); @@ -93,8 +90,8 @@ void Thread1Fn(Context *Ctx) { assert(Ctx->Flag.compare_exchange_strong(Expected, Desired)); { - std::lock_guard Lock(Ctx.Mutex); - Ctx.CV.notify_all(); + std::lock_guard Lock(Ctx->Mutex); + Ctx->CV.notify_all(); } }; @@ -102,7 +99,7 @@ void Thread1Fn(Context *Ctx) { }); // 3. submit simple task to move data between two buffers - Ctx.Queue.submit([&](S::handler &CGH) { + Ctx->Queue.submit([&](S::handler &CGH) { S::accessor SrcAcc(Ctx->Buf2, CGH); @@ -142,7 +139,7 @@ void Thread2Fn(Context *Ctx) { std::unique_lock Lock(Ctx->Mutex); // T2.1. Wait until flag F is set eq true. - Ctx.CV.wait(Lock, [&Ctx] { return Ctx->Flag.load(); }); + Ctx->CV.wait(Lock, [&Ctx] { return Ctx->Flag.load(); }); assert(Ctx->Flag.load()); } @@ -156,14 +153,14 @@ void test() { S::queue Queue(EH); - Context Ctx{{false}, Queue, "", {10}, {10}, {10}, {}, {}}; + Context Ctx{{false}, Queue, {10}, {10}, {10}, {}, {}}; // 0. setup: thread 1 T1: exec smth; thread 2 T2: waits; init flag F = false auto A1 = std::async(std::launch::async, Thread1Fn, &Ctx); auto A2 = std::async(std::launch::async, Thread2Fn, &Ctx); - A1.wait(); - A2.wait(); + A1.get(); + A2.get(); assert(Ctx.Flag.load()); @@ -175,7 +172,7 @@ void test() { bool failure = false; for (size_t Idx = 0; Idx < ResultAcc.get_count(); ++Idx) { - fprintf(stderr, "Third buffer [%3zu] = %i\n", Idx, ResultAcc[Idx]); + fprintf(stderr, "Second buffer [%3zu] = %i\n", Idx, ResultAcc[Idx]); failure |= (ResultAcc[Idx] != Idx); } @@ -196,13 +193,10 @@ int main() { // CHECK:---> piEnqueueKernelLaunch( // prepare for host task // CHECK:---> piEnqueueMemBufferMap( -// creation of host task self-event -// CHECK:---> piEventCreate( -// wait on dependencies of host task -// CHECK:---> piEventsWait( -// host task is done, set status of self-event -// CHECK:---> piEventSetStatus( // launch of CopierTask kernel // CHECK:---> piKernelCreate( // CHECK: CopierTask // CHECK:---> piEnqueueKernelLaunch( +// TODO need to check for piEventsWait as "wait on dependencies of host task". +// At the same time this piEventsWait may occur anywhere after +// piEnqueueMemBufferMap ("prepare for host task"). From 5224bf7f1df038de1e056127aaafbd930a851b55 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 19 May 2020 15:24:04 +0300 Subject: [PATCH 184/188] [SYCL] Fix style issues Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/handler.hpp | 5 +-- sycl/include/CL/sycl/interop_handle.hpp | 41 ++++++++++------------- sycl/source/detail/scheduler/commands.cpp | 10 +++--- sycl/source/interop_handle.cpp | 14 ++++---- 4 files changed, 32 insertions(+), 38 deletions(-) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 5b695629e6261..015faca289300 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -825,8 +825,9 @@ class __SYCL_EXPORT handler { } template - typename std::enable_if::type, void()>::value>::type + typename std::enable_if< + detail::check_fn_signature::type, + void()>::value>::type codeplay_host_task(FuncT Func) { throwIfActionIsCreated(); diff --git a/sycl/include/CL/sycl/interop_handle.hpp b/sycl/include/CL/sycl/interop_handle.hpp index f9e54cb2f1ed5..21446eb959a2a 100644 --- a/sycl/include/CL/sycl/interop_handle.hpp +++ b/sycl/include/CL/sycl/interop_handle.hpp @@ -9,10 +9,10 @@ #pragma once #include +#include #include #include #include -#include #include @@ -20,10 +20,10 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { - class AccessorBaseHost; - class ExecCGCommand; - class DispatchHostTask; -} +class AccessorBaseHost; +class ExecCGCommand; +class DispatchHostTask; +} // namespace detail template @@ -39,7 +39,7 @@ class interop_handle { /// asynchronously. template - typename std::enable_if::type get_native_mem(const accessor &Acc) const { @@ -57,12 +57,13 @@ class interop_handle { template - typename std::enable_if::type - get_native_mem(const accessor &Acc) const { + get_native_mem(const accessor &Acc) const { throw invalid_object_error("Getting memory object out of host accessor is " - "not allowed", PI_INVALID_MEM_OBJECT); + "not allowed", + PI_INVALID_MEM_OBJECT); } /// Returns an underlying OpenCL queue for the SYCL queue used to submit the @@ -74,26 +75,20 @@ class interop_handle { /// dispatch work, and that other potential OpenCL command queues associated /// with the same SYCL command queue are not executing commands while the host /// task is executing. - cl_command_queue get_native_queue() const noexcept { - return MQueue; - } + cl_command_queue get_native_queue() const noexcept { return MQueue; } /// Returns an underlying OpenCL device associated with the SYCL queue used /// to submit the command group, or the fallback queue if this command-group /// is re-trying execution on an OpenCL queue. - cl_device_id get_native_device() const noexcept { - return MDeviceId; - } + cl_device_id get_native_device() const noexcept { return MDeviceId; } /// Returns an underlying OpenCL context associated with the SYCL queue used /// to submit the command group, or the fallback queue if this command-group /// is re-trying execution on an OpenCL queue. - cl_context get_native_context() const noexcept { - return MContext; - } + cl_context get_native_context() const noexcept { return MContext; } private: - using ReqToMem = std::pair; + using ReqToMem = std::pair; template @@ -105,11 +100,11 @@ class interop_handle { // TODO set c-tor private interop_handle(std::vector MemObjs, cl_command_queue Queue, cl_device_id DeviceId, cl_context Context) - : MQueue(Queue), MDeviceId(DeviceId), - MContext(Context), MMemObjs(std::move(MemObjs)) {} + : MQueue(Queue), MDeviceId(DeviceId), MContext(Context), + MMemObjs(std::move(MemObjs)) {} private: - cl_mem getMemImpl(detail::Requirement* Req) const; + cl_mem getMemImpl(detail::Requirement *Req) const; cl_command_queue MQueue; cl_device_id MDeviceId; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 5ed6c8c61f94e..7281ca2747bec 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -189,7 +189,7 @@ class DispatchHostTask { public: DispatchHostTask(ExecCGCommand *ThisCmd, - std::vector ReqToMem) + std::vector ReqToMem) : MThisCmd{ThisCmd} {} void operator()() const { @@ -1929,14 +1929,14 @@ cl_int ExecCGCommand::enqueueImp() { std::vector ReqToMem; // Extract the Mem Objects for all Requirements, to ensure they are // available if a user ask for them inside the interop task scope - const auto& HandlerReq = HostTask->MRequirements; - std::for_each(std::begin(HandlerReq), std::end(HandlerReq), - [&ReqToMem, this](Requirement* Req) { + const std::vector &HandlerReq = HostTask->MRequirements; + auto ReqToMemConv = [&ReqToMem, this](Requirement *Req) { AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); auto MemArg = reinterpret_cast(AllocaCmd->getMemAllocation()); interop_handle::ReqToMem ReqToMemEl = std::make_pair(Req, MemArg); ReqToMem.emplace_back(ReqToMemEl); - }); + }; + std::for_each(std::begin(HandlerReq), std::end(HandlerReq), ReqToMemConv); std::sort(std::begin(ReqToMem), std::end(ReqToMem)); MQueue->getThreadPool().submit( diff --git a/sycl/source/interop_handle.cpp b/sycl/source/interop_handle.cpp index 5e6b68fe29634..443787c3b66e4 100644 --- a/sycl/source/interop_handle.cpp +++ b/sycl/source/interop_handle.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include #include +#include #include @@ -15,15 +15,13 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { cl_mem interop_handle::getMemImpl(detail::Requirement *Req) const { - auto Iter = std::find_if(std::begin(MMemObjs), std::end(MMemObjs), - [=](ReqToMem Elem) { - return (Elem.first == Req); - }); + auto Iter = std::find_if(std::begin(MMemObjs), std::end(MMemObjs), + [=](ReqToMem Elem) { return (Elem.first == Req); }); - if (Iter == std::end(MMemObjs)) - throw("Invalid memory object used inside interop"); + if (Iter == std::end(MMemObjs)) + throw("Invalid memory object used inside interop"); - return detail::pi::cast(Iter->second); + return detail::pi::cast(Iter->second); } } // namespace sycl From 638e241643237eb546049f7d2d36ccaebfebe11e Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 19 May 2020 15:39:18 +0300 Subject: [PATCH 185/188] [SYCL] Fix style issues Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/handler.hpp | 6 +++--- sycl/include/CL/sycl/interop_handle.hpp | 4 ++-- sycl/source/interop_handle.cpp | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 015faca289300..ebdb65ba66d46 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -840,9 +840,9 @@ class __SYCL_EXPORT handler { } template - typename std::enable_if::type, - void(interop_handle)>::value>::type + typename std::enable_if< + detail::check_fn_signature::type, + void(interop_handle)>::value>::type codeplay_host_task(FuncT &&Func) { throwIfActionIsCreated(); diff --git a/sycl/include/CL/sycl/interop_handle.hpp b/sycl/include/CL/sycl/interop_handle.hpp index 21446eb959a2a..daca59ab60f40 100644 --- a/sycl/include/CL/sycl/interop_handle.hpp +++ b/sycl/include/CL/sycl/interop_handle.hpp @@ -41,8 +41,8 @@ class interop_handle { access::target accessTarget, access::placeholder isPlaceholder> typename std::enable_if::type - get_native_mem(const accessor &Acc) const { + get_native_mem(const accessor &Acc) const { #ifndef __SYCL_DEVICE_ONLY__ // employ reinterpret_cast instead of static_cast due to cycle in includes // involving CL/sycl/accessor.hpp diff --git a/sycl/source/interop_handle.cpp b/sycl/source/interop_handle.cpp index 443787c3b66e4..c1df4993c700b 100644 --- a/sycl/source/interop_handle.cpp +++ b/sycl/source/interop_handle.cpp @@ -16,7 +16,7 @@ namespace sycl { cl_mem interop_handle::getMemImpl(detail::Requirement *Req) const { auto Iter = std::find_if(std::begin(MMemObjs), std::end(MMemObjs), - [=](ReqToMem Elem) { return (Elem.first == Req); }); + [=](ReqToMem Elem) { return (Elem.first == Req); }); if (Iter == std::end(MMemObjs)) throw("Invalid memory object used inside interop"); From 06b0ebd0b67aa26d8b58fa39f91251bbc4e4f83f Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 19 May 2020 15:43:24 +0300 Subject: [PATCH 186/188] [SYCL] Fix style issues Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/handler.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index ebdb65ba66d46..a0c14bc005253 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -825,9 +825,8 @@ class __SYCL_EXPORT handler { } template - typename std::enable_if< - detail::check_fn_signature::type, - void()>::value>::type + typename std::enable_if::type, void()>::value>::type codeplay_host_task(FuncT Func) { throwIfActionIsCreated(); From be6cc5aa3602be58c6d72290473516dec62b55d0 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 25 May 2020 14:34:06 +0300 Subject: [PATCH 187/188] [SYCL] Fix warning Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/interop_handle.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/CL/sycl/interop_handle.hpp b/sycl/include/CL/sycl/interop_handle.hpp index daca59ab60f40..1d4981eebeb59 100644 --- a/sycl/include/CL/sycl/interop_handle.hpp +++ b/sycl/include/CL/sycl/interop_handle.hpp @@ -60,7 +60,7 @@ class interop_handle { typename std::enable_if::type get_native_mem(const accessor &Acc) const { + isPlaceholder> &) const { throw invalid_object_error("Getting memory object out of host accessor is " "not allowed", PI_INVALID_MEM_OBJECT); From dfefbb649321357141e5eb55346628d14b9fd2c7 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 25 May 2020 14:49:48 +0300 Subject: [PATCH 188/188] [SYCL] Fix warning Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/interop_handle.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/include/CL/sycl/interop_handle.hpp b/sycl/include/CL/sycl/interop_handle.hpp index 1d4981eebeb59..ba8704aa25b41 100644 --- a/sycl/include/CL/sycl/interop_handle.hpp +++ b/sycl/include/CL/sycl/interop_handle.hpp @@ -50,6 +50,7 @@ class interop_handle { reinterpret_cast(&Acc)); return getMemImpl(detail::getSyclObjImpl(*AccBase).get()); #else + (void)Acc; // we believe this won't be ever called on device side return static_cast(0x0); #endif