From 841193b8e409ff550f420be41a26b54764a010a8 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Fri, 19 Nov 2021 13:50:55 +0300 Subject: [PATCH 1/6] [SYCL][WIP] Implement queue flushing --- sycl/include/CL/sycl/detail/pi.def | 1 + sycl/include/CL/sycl/detail/pi.h | 2 ++ sycl/plugins/cuda/pi_cuda.cpp | 3 +++ sycl/plugins/hip/pi_hip.cpp | 3 +++ sycl/plugins/level_zero/pi_level_zero.cpp | 2 ++ sycl/plugins/opencl/pi_opencl.cpp | 1 + sycl/source/detail/event_impl.cpp | 20 ++++++++++++++++--- sycl/source/detail/event_impl.hpp | 7 +++++++ sycl/source/detail/scheduler/commands.cpp | 17 ++++++++++++++++ sycl/test/abi/pi_level_zero_symbol_check.dump | 1 + .../unittests/helpers/CommonRedefinitions.hpp | 13 ++++++++++++ 11 files changed, 67 insertions(+), 3 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.def b/sycl/include/CL/sycl/detail/pi.def index 730b4afa50c0c..c9a68c6cadec3 100644 --- a/sycl/include/CL/sycl/detail/pi.def +++ b/sycl/include/CL/sycl/detail/pi.def @@ -45,6 +45,7 @@ _PI_API(piextContextCreateWithNativeHandle) _PI_API(piQueueCreate) _PI_API(piQueueGetInfo) _PI_API(piQueueFinish) +_PI_API(piQueueFlush) _PI_API(piQueueRetain) _PI_API(piQueueRelease) _PI_API(piextQueueGetNativeHandle) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index c27be2edb35e8..661db13fdca0d 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -1082,6 +1082,8 @@ __SYCL_EXPORT pi_result piQueueRelease(pi_queue command_queue); __SYCL_EXPORT pi_result piQueueFinish(pi_queue command_queue); +__SYCL_EXPORT pi_result piQueueFlush(pi_queue command_queue); + /// Gets the native handle of a PI queue object. /// /// \param queue is the PI queue to get the native handle of. diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 77a7f9c50c008..ddc7c097812be 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -2252,6 +2252,8 @@ pi_result cuda_piQueueFinish(pi_queue command_queue) { return result; } +pi_result cuda_piQueueFlush(pi_queue command_queue) { return PI_SUCCESS; } + /// Gets the native CUDA handle of a PI queue object /// /// \param[in] queue The PI queue to get the native CUDA object of. @@ -4885,6 +4887,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piQueueCreate, cuda_piQueueCreate) _PI_CL(piQueueGetInfo, cuda_piQueueGetInfo) _PI_CL(piQueueFinish, cuda_piQueueFinish) + _PI_CL(piQueueFlush, cuda_piQueueFlush) _PI_CL(piQueueRetain, cuda_piQueueRetain) _PI_CL(piQueueRelease, cuda_piQueueRelease) _PI_CL(piextQueueGetNativeHandle, cuda_piextQueueGetNativeHandle) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index d5ce87e8b410d..4848ed6268cda 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -2202,6 +2202,8 @@ pi_result hip_piQueueFinish(pi_queue command_queue) { return result; } +pi_result hip_piQueueFlush(pi_queue command_queue) { return PI_SUCCESS; } + /// Gets the native HIP handle of a PI queue object /// /// \param[in] queue The PI queue to get the native HIP object of. @@ -4820,6 +4822,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piQueueCreate, hip_piQueueCreate) _PI_CL(piQueueGetInfo, hip_piQueueGetInfo) _PI_CL(piQueueFinish, hip_piQueueFinish) + _PI_CL(piQueueFlush, hip_piQueueFlush) _PI_CL(piQueueRetain, hip_piQueueRetain) _PI_CL(piQueueRelease, hip_piQueueRelease) _PI_CL(piextQueueGetNativeHandle, hip_piextQueueGetNativeHandle) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 7eb43482e8a8c..bbf70e02f736d 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -2997,6 +2997,8 @@ pi_result piQueueFinish(pi_queue Queue) { return PI_SUCCESS; } +pi_result piQueueFlush(pi_queue Queue) { return PI_SUCCESS; } + pi_result piextQueueGetNativeHandle(pi_queue Queue, pi_native_handle *NativeHandle) { PI_ASSERT(Queue, PI_INVALID_QUEUE); diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index e259ac5e942cf..daf552bc484e6 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1377,6 +1377,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piQueueCreate, piQueueCreate) _PI_CL(piQueueGetInfo, clGetCommandQueueInfo) _PI_CL(piQueueFinish, clFinish) + _PI_CL(piQueueFlush, clFlush) _PI_CL(piQueueRetain, clRetainCommandQueue) _PI_CL(piQueueRelease, clReleaseCommandQueue) _PI_CL(piextQueueGetNativeHandle, piextQueueGetNativeHandle) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index dbcf4284953c8..bf54c0d225d92 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -93,11 +93,12 @@ void event_impl::setContextImpl(const ContextImplPtr &Context) { MState = HES_NotComplete; } -event_impl::event_impl() : MState(HES_Complete) {} +event_impl::event_impl() : MIsFlushed(true), MState(HES_Complete) {} event_impl::event_impl(RT::PiEvent Event, const context &SyclContext) : MEvent(Event), MContext(detail::getSyclObjImpl(SyclContext)), - MOpenCLInterop(true), MHostEvent(false), MState(HES_Complete) { + MOpenCLInterop(true), MHostEvent(false), MIsFlushed(true), + MState(HES_Complete) { if (MContext->is_host()) { throw cl::sycl::invalid_parameter_error( @@ -120,7 +121,7 @@ event_impl::event_impl(RT::PiEvent Event, const context &SyclContext) getPlugin().call(MEvent); } -event_impl::event_impl(QueueImplPtr Queue) { +event_impl::event_impl(QueueImplPtr Queue) : MQueue{Queue} { if (Queue->is_host()) { MState.store(HES_NotComplete); @@ -344,6 +345,19 @@ std::vector event_impl::getWaitList() { return Result; } +bool event_impl::isFlushed() { + if (MIsFlushed) + return true; + if (!MEvent) + return false; + pi_event_status Status = PI_EVENT_QUEUED; + getPlugin().call( + MEvent, PI_EVENT_INFO_COMMAND_EXECUTION_STATUS, sizeof(pi_int32), &Status, + nullptr); + MIsFlushed = Status != PI_EVENT_QUEUED; + return MIsFlushed; +} + void event_impl::cleanupDependencyEvents() { std::lock_guard Lock(MMutex); MPreparedDepsEvents.clear(); diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 3bb82e7a2411a..017cb21986d79 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -182,6 +182,10 @@ class event_impl { /// @return a vector of "immediate" dependencies for this event_impl. std::vector getWaitList(); + QueueImplPtr getQueue() const { return MQueue.lock(); } + + bool isFlushed(); + /// Cleans dependencies of this event_impl void cleanupDependencyEvents(); @@ -200,11 +204,14 @@ class event_impl { bool MHostEvent = true; std::unique_ptr MHostProfilingInfo; void *MCommand = nullptr; + std::weak_ptr MQueue; /// Dependency events prepared for waiting by backend. std::vector MPreparedDepsEvents; std::vector MPreparedHostDepsEvents; + std::atomic MIsFlushed = false; + enum HostEventState : int { HES_NotComplete = 0, HES_Complete }; // State of host event. Employed only for host events and event with no diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 65dfcc285f75b..74a0b4ee5e6fd 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -169,6 +169,17 @@ getPiEvents(const std::vector &EventImpls) { return RetPiEvents; } +static void flushCrossQueueDeps(const std::vector &EventImpls, + const QueueImplPtr &Queue) { + for (auto &EventImpl : EventImpls) { + QueueImplPtr DependencyQueue = EventImpl->getQueue(); + if (Queue != DependencyQueue && !EventImpl->isFlushed()) { + EventImpl->getPlugin().call( + DependencyQueue->getHandleRef()); + } + } +} + class DispatchHostTask { ExecCGCommand *MThisCmd; std::vector MReqToMem; @@ -325,6 +336,7 @@ void Command::waitForEvents(QueueImplPtr Queue, #endif std::vector RawEvents = getPiEvents(EventImpls); + flushCrossQueueDeps(EventImpls, getWorkerQueue()); const detail::plugin &Plugin = Queue->getPlugin(); Plugin.call( Queue->getHandleRef(), RawEvents.size(), &RawEvents[0], &Event); @@ -1073,6 +1085,7 @@ cl_int MapMemObject::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; std::vector RawEvents = getPiEvents(EventImpls); + flushCrossQueueDeps(EventImpls, getWorkerQueue()); RT::PiEvent &Event = MEvent->getHandleRef(); *MDstPtr = MemoryManager::map( @@ -1150,6 +1163,7 @@ cl_int UnMapMemObject::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; std::vector RawEvents = getPiEvents(EventImpls); + flushCrossQueueDeps(EventImpls, getWorkerQueue()); RT::PiEvent &Event = MEvent->getHandleRef(); MemoryManager::unmap(MDstAllocaCmd->getSYCLMemObj(), @@ -1250,6 +1264,7 @@ cl_int MemCpyCommand::enqueueImp() { RT::PiEvent &Event = MEvent->getHandleRef(); auto RawEvents = getPiEvents(EventImpls); + flushCrossQueueDeps(EventImpls, getWorkerQueue()); MemoryManager::copy( MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(), @@ -1400,6 +1415,7 @@ cl_int MemCpyCommandHost::enqueueImp() { return CL_SUCCESS; } + flushCrossQueueDeps(EventImpls, getWorkerQueue()); MemoryManager::copy( MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(), MSrcQueue, MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange, @@ -1986,6 +2002,7 @@ cl_int ExecCGCommand::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; auto RawEvents = getPiEvents(EventImpls); + flushCrossQueueDeps(EventImpls, getWorkerQueue()); RT::PiEvent &Event = MEvent->getHandleRef(); diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index f080521418500..fd8e328cba540 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -70,6 +70,7 @@ piProgramRelease piProgramRetain piQueueCreate piQueueFinish +piQueueFlush piQueueGetInfo piQueueRelease piQueueRetain diff --git a/sycl/unittests/helpers/CommonRedefinitions.hpp b/sycl/unittests/helpers/CommonRedefinitions.hpp index c8045f8435cf1..71c2712573c92 100644 --- a/sycl/unittests/helpers/CommonRedefinitions.hpp +++ b/sycl/unittests/helpers/CommonRedefinitions.hpp @@ -114,6 +114,18 @@ inline pi_result redefinedEventsWaitCommon(pi_uint32 num_events, return PI_SUCCESS; } +inline pi_result redefinedEventGetInfoCommon(pi_event event, + pi_event_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret) { + if (param_name == PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) { + auto *status = reinterpret_cast(param_value); + *status = PI_EVENT_SUBMITTED; + } + return PI_SUCCESS; +} + inline pi_result redefinedEventReleaseCommon(pi_event event) { if (event != nullptr) delete reinterpret_cast(event); @@ -166,6 +178,7 @@ inline void setupDefaultMockAPIs(sycl::unittest::PiMock &Mock) { Mock.redefine( redefinedKernelSetExecInfoCommon); Mock.redefine(redefinedEventsWaitCommon); + Mock.redefine(redefinedEventGetInfoCommon); Mock.redefine(redefinedEventReleaseCommon); Mock.redefine( redefinedEnqueueKernelLaunchCommon); From e0eb498421d366f61f0d31d1f730ad1d8f2741b2 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Wed, 1 Dec 2021 16:01:58 +0300 Subject: [PATCH 2/6] Add unit tests --- sycl/source/detail/event_impl.cpp | 30 +- sycl/source/detail/event_impl.hpp | 9 +- sycl/source/detail/scheduler/commands.cpp | 6 +- sycl/unittests/scheduler/CMakeLists.txt | 1 + sycl/unittests/scheduler/QueueFlushing.cpp | 285 ++++++++++++++++++ .../scheduler/SchedulerTestUtils.hpp | 2 + 6 files changed, 319 insertions(+), 14 deletions(-) create mode 100644 sycl/unittests/scheduler/QueueFlushing.cpp diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index bf54c0d225d92..5c7cc7373f61d 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -345,17 +345,35 @@ std::vector event_impl::getWaitList() { return Result; } -bool event_impl::isFlushed() { +void event_impl::flushIfNeeded(const QueueImplPtr &UserQueue) { + assert(MEvent != nullptr); if (MIsFlushed) - return true; - if (!MEvent) - return false; + return; + + Command *Cmd = static_cast(getCommand()); + assert(!Cmd || Cmd->getWorkerQueue() != nullptr); + QueueImplPtr Queue = Cmd ? Cmd->getWorkerQueue() : MQueue.lock(); + // If the queue has been released, all of the commands have already been + // implicitly flushed by piQueueRelease. + if (!Queue) { + MIsFlushed = true; + return; + } + if (Queue == UserQueue) + return; + + // Check if the task for this event has already been submitted. pi_event_status Status = PI_EVENT_QUEUED; getPlugin().call( MEvent, PI_EVENT_INFO_COMMAND_EXECUTION_STATUS, sizeof(pi_int32), &Status, nullptr); - MIsFlushed = Status != PI_EVENT_QUEUED; - return MIsFlushed; + if (Status != PI_EVENT_QUEUED) { + MIsFlushed = true; + return; + } + + getPlugin().call(Queue->getHandleRef()); + MIsFlushed = true; } void event_impl::cleanupDependencyEvents() { diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 017cb21986d79..55ac927321fce 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -182,9 +182,10 @@ class event_impl { /// @return a vector of "immediate" dependencies for this event_impl. std::vector getWaitList(); - QueueImplPtr getQueue() const { return MQueue.lock(); } - - bool isFlushed(); + /// Performs a flush on the queue associated with this event if the user queue + /// is different and the task associated with this event hasn't been submitted + /// to the device yet. + void flushIfNeeded(const QueueImplPtr &UserQueue); /// Cleans dependencies of this event_impl void cleanupDependencyEvents(); @@ -210,6 +211,8 @@ class event_impl { std::vector MPreparedDepsEvents; std::vector MPreparedHostDepsEvents; + /// Indicates that the task associated with this event has been submitted by + /// the queue to the device. std::atomic MIsFlushed = false; enum HostEventState : int { HES_NotComplete = 0, HES_Complete }; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 74a0b4ee5e6fd..23892249558b6 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -172,11 +172,7 @@ getPiEvents(const std::vector &EventImpls) { static void flushCrossQueueDeps(const std::vector &EventImpls, const QueueImplPtr &Queue) { for (auto &EventImpl : EventImpls) { - QueueImplPtr DependencyQueue = EventImpl->getQueue(); - if (Queue != DependencyQueue && !EventImpl->isFlushed()) { - EventImpl->getPlugin().call( - DependencyQueue->getHandleRef()); - } + EventImpl->flushIfNeeded(Queue); } } diff --git a/sycl/unittests/scheduler/CMakeLists.txt b/sycl/unittests/scheduler/CMakeLists.txt index 9674649882c8c..1ec8c7b4d894a 100644 --- a/sycl/unittests/scheduler/CMakeLists.txt +++ b/sycl/unittests/scheduler/CMakeLists.txt @@ -15,5 +15,6 @@ add_sycl_unittest(SchedulerTests OBJECT InOrderQueueHostTaskDeps.cpp AllocaLinking.cpp RequiredWGSize.cpp + QueueFlushing.cpp utils.cpp ) diff --git a/sycl/unittests/scheduler/QueueFlushing.cpp b/sycl/unittests/scheduler/QueueFlushing.cpp new file mode 100644 index 0000000000000..14e622dcdeb43 --- /dev/null +++ b/sycl/unittests/scheduler/QueueFlushing.cpp @@ -0,0 +1,285 @@ +//==------------ QueueFlushing.cpp --- Scheduler unit tests ----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SchedulerTest.hpp" +#include "SchedulerTestUtils.hpp" + +#include +#include + +using namespace sycl; + +static pi_queue ExpectedDepQueue = nullptr; +static bool QueueFlushed = false; +static bool EventStatusQueried = false; +static pi_event_status EventStatus = PI_EVENT_QUEUED; + +static pi_result redefinedQueueFlush(pi_queue Queue) { + EXPECT_EQ(ExpectedDepQueue, Queue); + EXPECT_FALSE(QueueFlushed); + QueueFlushed = true; + EventStatus = PI_EVENT_SUBMITTED; + return PI_SUCCESS; +} + +static pi_result redefinedEventGetInfo(pi_event event, pi_event_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret) { + if (param_name == PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) { + auto *Status = reinterpret_cast(param_value); + *Status = EventStatus; + EventStatusQueried = true; + } + return PI_SUCCESS; +} + +static pi_result redefinedEnqueueMemBufferReadRect( + pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, + pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, + pi_buff_rect_region region, size_t buffer_row_pitch, + size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, + void *ptr, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event) { + *event = reinterpret_cast(new int{}); + return PI_SUCCESS; +} + +static pi_result redefinedEnqueueMemBufferWriteRect( + pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, + pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, + pi_buff_rect_region region, size_t buffer_row_pitch, + size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, + const void *ptr, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event) { + *event = reinterpret_cast(new int{}); + return PI_SUCCESS; +} + +static pi_result redefinedEnqueueMemBufferMap( + pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, + pi_map_flags map_flags, size_t offset, size_t size, + pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, + pi_event *event, void **ret_map) { + *event = reinterpret_cast(new int{}); + return PI_SUCCESS; +} + +static pi_result redefinedEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, + void *mapped_ptr, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event) { + *event = reinterpret_cast(new int{}); + return PI_SUCCESS; +} + +static pi_result redefinedEnqueueMemBufferFill( + pi_queue command_queue, pi_mem buffer, const void *pattern, + size_t pattern_size, size_t offset, size_t size, + pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, + pi_event *event) { + *event = reinterpret_cast(new int{}); + return PI_SUCCESS; +} + +static void resetTestCtx() { + EventStatus = PI_EVENT_QUEUED; + QueueFlushed = false; + EventStatusQueried = false; +} + +static void addDepAndEnqueue(detail::Command *Cmd, + detail::QueueImplPtr &DepQueue, + detail::Requirement &MockReq) { + MockCommand DepCmd(DepQueue); + DepCmd.getEvent()->getHandleRef() = reinterpret_cast(new int{}); + (void)Cmd->addDep(detail::DepDesc{&DepCmd, &MockReq, nullptr}); + + detail::EnqueueResultT Res; + MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING); +} + +static void testCommandEnqueue(detail::Command *Cmd, + detail::QueueImplPtr &DepQueue, + detail::Requirement &MockReq, + bool ExpectedFlush = true) { + resetTestCtx(); + addDepAndEnqueue(Cmd, DepQueue, MockReq); + EXPECT_EQ(QueueFlushed, ExpectedFlush); +} + +static void testEventStatusCheck(detail::Command *Cmd, + detail::QueueImplPtr &DepQueue, + detail::Requirement &MockReq, + pi_event_status ReturnedEventStatus) { + resetTestCtx(); + EventStatus = ReturnedEventStatus; + addDepAndEnqueue(Cmd, DepQueue, MockReq); + EXPECT_FALSE(QueueFlushed); +} + +TEST_F(SchedulerTest, QueueFlushing) { + default_selector Selector; + platform Plt{default_selector()}; + if (Plt.is_host()) { + std::cout << "Not run due to host-only environment\n"; + return; + } + + unittest::PiMock Mock{Plt}; + setupDefaultMockAPIs(Mock); + Mock.redefine(redefinedQueueFlush); + Mock.redefine(redefinedEventGetInfo); + Mock.redefine( + redefinedEnqueueMemBufferReadRect); + Mock.redefine( + redefinedEnqueueMemBufferWriteRect); + Mock.redefine( + redefinedEnqueueMemBufferMap); + Mock.redefine(redefinedEnqueueMemUnmap); + Mock.redefine( + redefinedEnqueueMemBufferFill); + + context Ctx{Plt}; + queue QueueA{Ctx, Selector}; + detail::QueueImplPtr QueueImplA = detail::getSyclObjImpl(QueueA); + queue QueueB{Ctx, Selector}; + detail::QueueImplPtr QueueImplB = detail::getSyclObjImpl(QueueB); + ExpectedDepQueue = QueueImplB->getHandleRef(); + + int val; + buffer Buf(&val, range<1>(1)); + detail::Requirement MockReq = getMockRequirement(Buf); + detail::AllocaCommand AllocaCmd = detail::AllocaCommand(QueueImplA, MockReq); + void *MockHostPtr; + detail::EnqueueResultT Res; + + // Check that each of the non-blocking commands flush the dependency queue + { + detail::MapMemObject MapCmd{&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, + access::mode::read_write}; + testCommandEnqueue(&MapCmd, QueueImplB, MockReq); + + detail::UnMapMemObject UnmapCmd{&AllocaCmd, MockReq, &MockHostPtr, + QueueImplA}; + testCommandEnqueue(&UnmapCmd, QueueImplB, MockReq); + + device HostDevice; + detail::QueueImplPtr DefaultHostQueue{ + new detail::queue_impl(detail::getSyclObjImpl(HostDevice), {}, {})}; + detail::AllocaCommand HostAllocaCmd = + detail::AllocaCommand(DefaultHostQueue, MockReq); + + detail::MemCpyCommand MemCpyCmd{MockReq, &AllocaCmd, + MockReq, &HostAllocaCmd, + QueueImplA, DefaultHostQueue}; + testCommandEnqueue(&MemCpyCmd, QueueImplB, MockReq); + + detail::MemCpyCommandHost MemCpyCmdHost{MockReq, &AllocaCmd, + MockReq, &MockHostPtr, + QueueImplA, DefaultHostQueue}; + testCommandEnqueue(&MemCpyCmdHost, QueueImplB, MockReq); + + std::unique_ptr CG{new detail::CGFill(/*Pattern*/ {}, &MockReq, + /*ArgsStorage*/ {}, + /*AccStorage*/ {}, + /*SharedPtrStorage*/ {}, + /*Requirements*/ {}, + /*Events*/ {})}; + detail::ExecCGCommand ExecCGCmd{std::move(CG), QueueImplA}; + MockReq.MDims = 1; + (void)ExecCGCmd.addDep(detail::DepDesc(&AllocaCmd, &MockReq, &AllocaCmd)); + testCommandEnqueue(&ExecCGCmd, QueueImplB, MockReq); + } + + // Check dependency event without a command + { + resetTestCtx(); + detail::MapMemObject Cmd = {&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, + access::mode::read_write}; + detail::EventImplPtr DepEvent{new detail::event_impl(QueueImplB)}; + DepEvent->setContextImpl(QueueImplB->getContextImplPtr()); + DepEvent->getHandleRef() = reinterpret_cast(new int{}); + (void)Cmd.addDep(DepEvent); + MockScheduler::enqueueCommand(&Cmd, Res, detail::NON_BLOCKING); + EXPECT_TRUE(QueueFlushed); + } + + // Check that flush isn't called for a released queue. + { + resetTestCtx(); + detail::MapMemObject Cmd = {&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, + access::mode::read_write}; + detail::EventImplPtr DepEvent; + { + queue TempQueue{Ctx, Selector}; + detail::QueueImplPtr TempQueueImpl = detail::getSyclObjImpl(TempQueue); + DepEvent.reset(new detail::event_impl(TempQueueImpl)); + DepEvent->setContextImpl(TempQueueImpl->getContextImplPtr()); + DepEvent->getHandleRef() = reinterpret_cast(new int{}); + } + (void)Cmd.addDep(DepEvent); + MockScheduler::enqueueCommand(&Cmd, Res, detail::NON_BLOCKING); + EXPECT_FALSE(EventStatusQueried); + EXPECT_FALSE(QueueFlushed); + } + + // Check that same queue dependencies are not flushed + { + detail::MapMemObject Cmd = {&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, + access::mode::read_write}; + testCommandEnqueue(&Cmd, QueueImplA, MockReq, false); + } + + // Check that flush is not called twice for the same dependency queue + { + resetTestCtx(); + detail::MapMemObject Cmd = {&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, + access::mode::read_write}; + MockCommand DepCmdA(QueueImplB); + DepCmdA.getEvent()->getHandleRef() = reinterpret_cast(new int{}); + (void)Cmd.addDep(detail::DepDesc{&DepCmdA, &MockReq, nullptr}); + MockCommand DepCmdB(QueueImplB); + DepCmdB.getEvent()->getHandleRef() = reinterpret_cast(new int{}); + (void)Cmd.addDep(detail::DepDesc{&DepCmdB, &MockReq, nullptr}); + // The check is performed in redefinedQueueFlush + MockScheduler::enqueueCommand(&Cmd, Res, detail::NON_BLOCKING); + } + + // Check that the event status isn't requested twice for the same event + { + resetTestCtx(); + detail::MapMemObject CmdA{&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, + access::mode::read_write}; + MockCommand DepCmd(QueueImplB); + DepCmd.getEvent()->getHandleRef() = reinterpret_cast(new int{}); + (void)CmdA.addDep(detail::DepDesc{&DepCmd, &MockReq, nullptr}); + MockScheduler::enqueueCommand(&CmdA, Res, detail::NON_BLOCKING); + + EventStatusQueried = false; + detail::MapMemObject CmdB{&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, + access::mode::read_write}; + (void)CmdB.addDep(detail::DepDesc{&DepCmd, &MockReq, nullptr}); + MockScheduler::enqueueCommand(&CmdB, Res, detail::NON_BLOCKING); + EXPECT_FALSE(EventStatusQueried); + } + + // Check that flush isn't called for submitted dependencies + { + detail::MapMemObject CmdA{&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, + access::mode::read_write}; + testEventStatusCheck(&CmdA, QueueImplB, MockReq, PI_EVENT_SUBMITTED); + detail::MapMemObject CmdB{&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, + access::mode::read_write}; + testEventStatusCheck(&CmdB, QueueImplB, MockReq, PI_EVENT_RUNNING); + detail::MapMemObject CmdC{&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, + access::mode::read_write}; + testEventStatusCheck(&CmdC, QueueImplB, MockReq, PI_EVENT_COMPLETE); + } +} diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index e918951108488..bd80f24820f8f 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -78,6 +78,8 @@ class MockCommand : public cl::sycl::detail::Command { Command::waitForEvents(Queue, RawEvents, Event); } + std::shared_ptr getEvent() { return MEvent; } + protected: cl::sycl::detail::Requirement MRequirement; }; From 263a987f80ecd4977c736c9aa9013b3004c29cf0 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Wed, 8 Dec 2021 14:50:33 +0300 Subject: [PATCH 3/6] Apply comments --- sycl/include/CL/sycl/detail/pi.h | 2 +- sycl/plugins/cuda/pi_cuda.cpp | 3 +++ sycl/plugins/hip/pi_hip.cpp | 3 +++ sycl/plugins/level_zero/pi_level_zero.cpp | 2 ++ 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 661db13fdca0d..262bb2a7198c2 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -44,7 +44,7 @@ // #include "CL/cl.h" #define _PI_H_VERSION_MAJOR 5 -#define _PI_H_VERSION_MINOR 7 +#define _PI_H_VERSION_MINOR 8 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index ddc7c097812be..74b9bde573d3a 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -2252,6 +2252,9 @@ pi_result cuda_piQueueFinish(pi_queue command_queue) { return result; } +// There is no CUDA counterpart for queue flushing and we don't run into the +// same problem of having to flush cross-queue dependencies as some of the +// other plugins, so it can be left as no-op. pi_result cuda_piQueueFlush(pi_queue command_queue) { return PI_SUCCESS; } /// Gets the native CUDA handle of a PI queue object diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 4848ed6268cda..97dd5baab67c6 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -2202,6 +2202,9 @@ pi_result hip_piQueueFinish(pi_queue command_queue) { return result; } +// There is no HIP counterpart for queue flushing and we don't run into the +// same problem of having to flush cross-queue dependencies as some of the +// other plugins, so it can be left as no-op. pi_result hip_piQueueFlush(pi_queue command_queue) { return PI_SUCCESS; } /// Gets the native HIP handle of a PI queue object diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index bbf70e02f736d..65d172aa74444 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -2997,6 +2997,8 @@ pi_result piQueueFinish(pi_queue Queue) { return PI_SUCCESS; } +// Flushing cross-queue dependencies is covered by createAndRetainPiZeEventList, +// so this can be left as a no-op. pi_result piQueueFlush(pi_queue Queue) { return PI_SUCCESS; } pi_result piextQueueGetNativeHandle(pi_queue Queue, From 36021871267fa3b8dce031946e170780734505a8 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Thu, 9 Dec 2021 12:51:46 +0300 Subject: [PATCH 4/6] Update the changelog and add the missed bump of the major version --- sycl/include/CL/sycl/detail/pi.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 262bb2a7198c2..f0ff2a1db1d2f 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -40,10 +40,11 @@ // changes the API version from 3.5 to 4.6. // 5.7 Added new context and ownership arguments to // piextEventCreateWithNativeHandle -// 6.8 Added new ownership argument to piextProgramCreateWithNativeHandle. +// 6.8 Added new ownership argument to piextProgramCreateWithNativeHandle. Added +// piQueueFlush function. // #include "CL/cl.h" -#define _PI_H_VERSION_MAJOR 5 +#define _PI_H_VERSION_MAJOR 6 #define _PI_H_VERSION_MINOR 8 #define _PI_STRING_HELPER(a) #a From 4d877d3d2da14bedbb59dedf465d933beb3f0eb4 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Thu, 9 Dec 2021 13:31:12 +0300 Subject: [PATCH 5/6] Apply comments --- sycl/source/detail/event_impl.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 5c7cc7373f61d..7b973a2202356 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -350,9 +350,7 @@ void event_impl::flushIfNeeded(const QueueImplPtr &UserQueue) { if (MIsFlushed) return; - Command *Cmd = static_cast(getCommand()); - assert(!Cmd || Cmd->getWorkerQueue() != nullptr); - QueueImplPtr Queue = Cmd ? Cmd->getWorkerQueue() : MQueue.lock(); + QueueImplPtr Queue = MQueue.lock(); // If the queue has been released, all of the commands have already been // implicitly flushed by piQueueRelease. if (!Queue) { @@ -367,12 +365,9 @@ void event_impl::flushIfNeeded(const QueueImplPtr &UserQueue) { getPlugin().call( MEvent, PI_EVENT_INFO_COMMAND_EXECUTION_STATUS, sizeof(pi_int32), &Status, nullptr); - if (Status != PI_EVENT_QUEUED) { - MIsFlushed = true; - return; + if (Status == PI_EVENT_QUEUED) { + getPlugin().call(Queue->getHandleRef()); } - - getPlugin().call(Queue->getHandleRef()); MIsFlushed = true; } From fe17164b0c184e5904cc2d697b45ea2e69a2ec7b Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Thu, 9 Dec 2021 13:38:17 +0300 Subject: [PATCH 6/6] Apply comment --- sycl/source/detail/event_impl.cpp | 2 +- sycl/source/detail/event_impl.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 7b973a2202356..8a4c1138e84df 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -121,7 +121,7 @@ event_impl::event_impl(RT::PiEvent Event, const context &SyclContext) getPlugin().call(MEvent); } -event_impl::event_impl(QueueImplPtr Queue) : MQueue{Queue} { +event_impl::event_impl(const QueueImplPtr &Queue) : MQueue{Queue} { if (Queue->is_host()) { MState.store(HES_NotComplete); diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 55ac927321fce..7255e5ccdfe5f 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -44,7 +44,7 @@ class event_impl { /// \param Event is a valid instance of plug-in event. /// \param SyclContext is an instance of SYCL context. event_impl(RT::PiEvent Event, const context &SyclContext); - event_impl(QueueImplPtr Queue); + event_impl(const QueueImplPtr &Queue); /// Checks if this event is a SYCL host event. ///