diff --git a/sycl/include/CL/sycl/detail/pi.def b/sycl/include/CL/sycl/detail/pi.def index 730b4afa50c0c..c9a68c6cadec3 100644 --- a/sycl/include/CL/sycl/detail/pi.def +++ b/sycl/include/CL/sycl/detail/pi.def @@ -45,6 +45,7 @@ _PI_API(piextContextCreateWithNativeHandle) _PI_API(piQueueCreate) _PI_API(piQueueGetInfo) _PI_API(piQueueFinish) +_PI_API(piQueueFlush) _PI_API(piQueueRetain) _PI_API(piQueueRelease) _PI_API(piextQueueGetNativeHandle) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index c27be2edb35e8..f0ff2a1db1d2f 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -40,11 +40,12 @@ // changes the API version from 3.5 to 4.6. // 5.7 Added new context and ownership arguments to // piextEventCreateWithNativeHandle -// 6.8 Added new ownership argument to piextProgramCreateWithNativeHandle. +// 6.8 Added new ownership argument to piextProgramCreateWithNativeHandle. Added +// piQueueFlush function. // #include "CL/cl.h" -#define _PI_H_VERSION_MAJOR 5 -#define _PI_H_VERSION_MINOR 7 +#define _PI_H_VERSION_MAJOR 6 +#define _PI_H_VERSION_MINOR 8 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -1082,6 +1083,8 @@ __SYCL_EXPORT pi_result piQueueRelease(pi_queue command_queue); __SYCL_EXPORT pi_result piQueueFinish(pi_queue command_queue); +__SYCL_EXPORT pi_result piQueueFlush(pi_queue command_queue); + /// Gets the native handle of a PI queue object. /// /// \param queue is the PI queue to get the native handle of. diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 7614dfe207434..b1f2c72fa2e47 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -2254,6 +2254,11 @@ pi_result cuda_piQueueFinish(pi_queue command_queue) { return result; } +// There is no CUDA counterpart for queue flushing and we don't run into the +// same problem of having to flush cross-queue dependencies as some of the +// other plugins, so it can be left as no-op. +pi_result cuda_piQueueFlush(pi_queue command_queue) { return PI_SUCCESS; } + /// Gets the native CUDA handle of a PI queue object /// /// \param[in] queue The PI queue to get the native CUDA object of. @@ -4886,6 +4891,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piQueueCreate, cuda_piQueueCreate) _PI_CL(piQueueGetInfo, cuda_piQueueGetInfo) _PI_CL(piQueueFinish, cuda_piQueueFinish) + _PI_CL(piQueueFlush, cuda_piQueueFlush) _PI_CL(piQueueRetain, cuda_piQueueRetain) _PI_CL(piQueueRelease, cuda_piQueueRelease) _PI_CL(piextQueueGetNativeHandle, cuda_piextQueueGetNativeHandle) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index d5ce87e8b410d..97dd5baab67c6 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -2202,6 +2202,11 @@ pi_result hip_piQueueFinish(pi_queue command_queue) { return result; } +// There is no HIP counterpart for queue flushing and we don't run into the +// same problem of having to flush cross-queue dependencies as some of the +// other plugins, so it can be left as no-op. +pi_result hip_piQueueFlush(pi_queue command_queue) { return PI_SUCCESS; } + /// Gets the native HIP handle of a PI queue object /// /// \param[in] queue The PI queue to get the native HIP object of. @@ -4820,6 +4825,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piQueueCreate, hip_piQueueCreate) _PI_CL(piQueueGetInfo, hip_piQueueGetInfo) _PI_CL(piQueueFinish, hip_piQueueFinish) + _PI_CL(piQueueFlush, hip_piQueueFlush) _PI_CL(piQueueRetain, hip_piQueueRetain) _PI_CL(piQueueRelease, hip_piQueueRelease) _PI_CL(piextQueueGetNativeHandle, hip_piextQueueGetNativeHandle) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 537d74008e207..5a390ccc030e3 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -2999,6 +2999,10 @@ pi_result piQueueFinish(pi_queue Queue) { return PI_SUCCESS; } +// Flushing cross-queue dependencies is covered by createAndRetainPiZeEventList, +// so this can be left as a no-op. +pi_result piQueueFlush(pi_queue Queue) { return PI_SUCCESS; } + pi_result piextQueueGetNativeHandle(pi_queue Queue, pi_native_handle *NativeHandle) { PI_ASSERT(Queue, PI_INVALID_QUEUE); diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 57d2e15b96317..7f86979f864e4 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1378,6 +1378,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piQueueCreate, piQueueCreate) _PI_CL(piQueueGetInfo, clGetCommandQueueInfo) _PI_CL(piQueueFinish, clFinish) + _PI_CL(piQueueFlush, clFlush) _PI_CL(piQueueRetain, clRetainCommandQueue) _PI_CL(piQueueRelease, clReleaseCommandQueue) _PI_CL(piextQueueGetNativeHandle, piextQueueGetNativeHandle) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index dbcf4284953c8..8a4c1138e84df 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -93,11 +93,12 @@ void event_impl::setContextImpl(const ContextImplPtr &Context) { MState = HES_NotComplete; } -event_impl::event_impl() : MState(HES_Complete) {} +event_impl::event_impl() : MIsFlushed(true), MState(HES_Complete) {} event_impl::event_impl(RT::PiEvent Event, const context &SyclContext) : MEvent(Event), MContext(detail::getSyclObjImpl(SyclContext)), - MOpenCLInterop(true), MHostEvent(false), MState(HES_Complete) { + MOpenCLInterop(true), MHostEvent(false), MIsFlushed(true), + MState(HES_Complete) { if (MContext->is_host()) { throw cl::sycl::invalid_parameter_error( @@ -120,7 +121,7 @@ event_impl::event_impl(RT::PiEvent Event, const context &SyclContext) getPlugin().call(MEvent); } -event_impl::event_impl(QueueImplPtr Queue) { +event_impl::event_impl(const QueueImplPtr &Queue) : MQueue{Queue} { if (Queue->is_host()) { MState.store(HES_NotComplete); @@ -344,6 +345,32 @@ std::vector event_impl::getWaitList() { return Result; } +void event_impl::flushIfNeeded(const QueueImplPtr &UserQueue) { + assert(MEvent != nullptr); + if (MIsFlushed) + return; + + QueueImplPtr Queue = MQueue.lock(); + // If the queue has been released, all of the commands have already been + // implicitly flushed by piQueueRelease. + if (!Queue) { + MIsFlushed = true; + return; + } + if (Queue == UserQueue) + return; + + // Check if the task for this event has already been submitted. + pi_event_status Status = PI_EVENT_QUEUED; + getPlugin().call( + MEvent, PI_EVENT_INFO_COMMAND_EXECUTION_STATUS, sizeof(pi_int32), &Status, + nullptr); + if (Status == PI_EVENT_QUEUED) { + getPlugin().call(Queue->getHandleRef()); + } + MIsFlushed = true; +} + void event_impl::cleanupDependencyEvents() { std::lock_guard Lock(MMutex); MPreparedDepsEvents.clear(); diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 3bb82e7a2411a..7255e5ccdfe5f 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -44,7 +44,7 @@ class event_impl { /// \param Event is a valid instance of plug-in event. /// \param SyclContext is an instance of SYCL context. event_impl(RT::PiEvent Event, const context &SyclContext); - event_impl(QueueImplPtr Queue); + event_impl(const QueueImplPtr &Queue); /// Checks if this event is a SYCL host event. /// @@ -182,6 +182,11 @@ class event_impl { /// @return a vector of "immediate" dependencies for this event_impl. std::vector getWaitList(); + /// Performs a flush on the queue associated with this event if the user queue + /// is different and the task associated with this event hasn't been submitted + /// to the device yet. + void flushIfNeeded(const QueueImplPtr &UserQueue); + /// Cleans dependencies of this event_impl void cleanupDependencyEvents(); @@ -200,11 +205,16 @@ class event_impl { bool MHostEvent = true; std::unique_ptr MHostProfilingInfo; void *MCommand = nullptr; + std::weak_ptr MQueue; /// Dependency events prepared for waiting by backend. std::vector MPreparedDepsEvents; std::vector MPreparedHostDepsEvents; + /// Indicates that the task associated with this event has been submitted by + /// the queue to the device. + std::atomic MIsFlushed = false; + enum HostEventState : int { HES_NotComplete = 0, HES_Complete }; // State of host event. Employed only for host events and event with no diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index f74b85e229942..6724608227699 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -169,6 +169,13 @@ getPiEvents(const std::vector &EventImpls) { return RetPiEvents; } +static void flushCrossQueueDeps(const std::vector &EventImpls, + const QueueImplPtr &Queue) { + for (auto &EventImpl : EventImpls) { + EventImpl->flushIfNeeded(Queue); + } +} + class DispatchHostTask { ExecCGCommand *MThisCmd; std::vector MReqToMem; @@ -325,6 +332,7 @@ void Command::waitForEvents(QueueImplPtr Queue, #endif std::vector RawEvents = getPiEvents(EventImpls); + flushCrossQueueDeps(EventImpls, getWorkerQueue()); const detail::plugin &Plugin = Queue->getPlugin(); Plugin.call( Queue->getHandleRef(), RawEvents.size(), &RawEvents[0], &Event); @@ -1073,6 +1081,7 @@ cl_int MapMemObject::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; std::vector RawEvents = getPiEvents(EventImpls); + flushCrossQueueDeps(EventImpls, getWorkerQueue()); RT::PiEvent &Event = MEvent->getHandleRef(); *MDstPtr = MemoryManager::map( @@ -1150,6 +1159,7 @@ cl_int UnMapMemObject::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; std::vector RawEvents = getPiEvents(EventImpls); + flushCrossQueueDeps(EventImpls, getWorkerQueue()); RT::PiEvent &Event = MEvent->getHandleRef(); MemoryManager::unmap(MDstAllocaCmd->getSYCLMemObj(), @@ -1250,6 +1260,7 @@ cl_int MemCpyCommand::enqueueImp() { RT::PiEvent &Event = MEvent->getHandleRef(); auto RawEvents = getPiEvents(EventImpls); + flushCrossQueueDeps(EventImpls, getWorkerQueue()); MemoryManager::copy( MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(), @@ -1400,6 +1411,7 @@ cl_int MemCpyCommandHost::enqueueImp() { return CL_SUCCESS; } + flushCrossQueueDeps(EventImpls, getWorkerQueue()); MemoryManager::copy( MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(), MSrcQueue, MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange, @@ -1989,6 +2001,7 @@ cl_int ExecCGCommand::enqueueImp() { waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; auto RawEvents = getPiEvents(EventImpls); + flushCrossQueueDeps(EventImpls, getWorkerQueue()); RT::PiEvent &Event = MEvent->getHandleRef(); diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index f080521418500..fd8e328cba540 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -70,6 +70,7 @@ piProgramRelease piProgramRetain piQueueCreate piQueueFinish +piQueueFlush piQueueGetInfo piQueueRelease piQueueRetain diff --git a/sycl/unittests/helpers/CommonRedefinitions.hpp b/sycl/unittests/helpers/CommonRedefinitions.hpp index c8045f8435cf1..71c2712573c92 100644 --- a/sycl/unittests/helpers/CommonRedefinitions.hpp +++ b/sycl/unittests/helpers/CommonRedefinitions.hpp @@ -114,6 +114,18 @@ inline pi_result redefinedEventsWaitCommon(pi_uint32 num_events, return PI_SUCCESS; } +inline pi_result redefinedEventGetInfoCommon(pi_event event, + pi_event_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret) { + if (param_name == PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) { + auto *status = reinterpret_cast(param_value); + *status = PI_EVENT_SUBMITTED; + } + return PI_SUCCESS; +} + inline pi_result redefinedEventReleaseCommon(pi_event event) { if (event != nullptr) delete reinterpret_cast(event); @@ -166,6 +178,7 @@ inline void setupDefaultMockAPIs(sycl::unittest::PiMock &Mock) { Mock.redefine( redefinedKernelSetExecInfoCommon); Mock.redefine(redefinedEventsWaitCommon); + Mock.redefine(redefinedEventGetInfoCommon); Mock.redefine(redefinedEventReleaseCommon); Mock.redefine( redefinedEnqueueKernelLaunchCommon); diff --git a/sycl/unittests/scheduler/CMakeLists.txt b/sycl/unittests/scheduler/CMakeLists.txt index 9674649882c8c..1ec8c7b4d894a 100644 --- a/sycl/unittests/scheduler/CMakeLists.txt +++ b/sycl/unittests/scheduler/CMakeLists.txt @@ -15,5 +15,6 @@ add_sycl_unittest(SchedulerTests OBJECT InOrderQueueHostTaskDeps.cpp AllocaLinking.cpp RequiredWGSize.cpp + QueueFlushing.cpp utils.cpp ) diff --git a/sycl/unittests/scheduler/QueueFlushing.cpp b/sycl/unittests/scheduler/QueueFlushing.cpp new file mode 100644 index 0000000000000..14e622dcdeb43 --- /dev/null +++ b/sycl/unittests/scheduler/QueueFlushing.cpp @@ -0,0 +1,285 @@ +//==------------ QueueFlushing.cpp --- Scheduler unit tests ----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SchedulerTest.hpp" +#include "SchedulerTestUtils.hpp" + +#include +#include + +using namespace sycl; + +static pi_queue ExpectedDepQueue = nullptr; +static bool QueueFlushed = false; +static bool EventStatusQueried = false; +static pi_event_status EventStatus = PI_EVENT_QUEUED; + +static pi_result redefinedQueueFlush(pi_queue Queue) { + EXPECT_EQ(ExpectedDepQueue, Queue); + EXPECT_FALSE(QueueFlushed); + QueueFlushed = true; + EventStatus = PI_EVENT_SUBMITTED; + return PI_SUCCESS; +} + +static pi_result redefinedEventGetInfo(pi_event event, pi_event_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret) { + if (param_name == PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) { + auto *Status = reinterpret_cast(param_value); + *Status = EventStatus; + EventStatusQueried = true; + } + return PI_SUCCESS; +} + +static pi_result redefinedEnqueueMemBufferReadRect( + pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, + pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, + pi_buff_rect_region region, size_t buffer_row_pitch, + size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, + void *ptr, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event) { + *event = reinterpret_cast(new int{}); + return PI_SUCCESS; +} + +static pi_result redefinedEnqueueMemBufferWriteRect( + pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, + pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, + pi_buff_rect_region region, size_t buffer_row_pitch, + size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, + const void *ptr, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event) { + *event = reinterpret_cast(new int{}); + return PI_SUCCESS; +} + +static pi_result redefinedEnqueueMemBufferMap( + pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, + pi_map_flags map_flags, size_t offset, size_t size, + pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, + pi_event *event, void **ret_map) { + *event = reinterpret_cast(new int{}); + return PI_SUCCESS; +} + +static pi_result redefinedEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, + void *mapped_ptr, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event) { + *event = reinterpret_cast(new int{}); + return PI_SUCCESS; +} + +static pi_result redefinedEnqueueMemBufferFill( + pi_queue command_queue, pi_mem buffer, const void *pattern, + size_t pattern_size, size_t offset, size_t size, + pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, + pi_event *event) { + *event = reinterpret_cast(new int{}); + return PI_SUCCESS; +} + +static void resetTestCtx() { + EventStatus = PI_EVENT_QUEUED; + QueueFlushed = false; + EventStatusQueried = false; +} + +static void addDepAndEnqueue(detail::Command *Cmd, + detail::QueueImplPtr &DepQueue, + detail::Requirement &MockReq) { + MockCommand DepCmd(DepQueue); + DepCmd.getEvent()->getHandleRef() = reinterpret_cast(new int{}); + (void)Cmd->addDep(detail::DepDesc{&DepCmd, &MockReq, nullptr}); + + detail::EnqueueResultT Res; + MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING); +} + +static void testCommandEnqueue(detail::Command *Cmd, + detail::QueueImplPtr &DepQueue, + detail::Requirement &MockReq, + bool ExpectedFlush = true) { + resetTestCtx(); + addDepAndEnqueue(Cmd, DepQueue, MockReq); + EXPECT_EQ(QueueFlushed, ExpectedFlush); +} + +static void testEventStatusCheck(detail::Command *Cmd, + detail::QueueImplPtr &DepQueue, + detail::Requirement &MockReq, + pi_event_status ReturnedEventStatus) { + resetTestCtx(); + EventStatus = ReturnedEventStatus; + addDepAndEnqueue(Cmd, DepQueue, MockReq); + EXPECT_FALSE(QueueFlushed); +} + +TEST_F(SchedulerTest, QueueFlushing) { + default_selector Selector; + platform Plt{default_selector()}; + if (Plt.is_host()) { + std::cout << "Not run due to host-only environment\n"; + return; + } + + unittest::PiMock Mock{Plt}; + setupDefaultMockAPIs(Mock); + Mock.redefine(redefinedQueueFlush); + Mock.redefine(redefinedEventGetInfo); + Mock.redefine( + redefinedEnqueueMemBufferReadRect); + Mock.redefine( + redefinedEnqueueMemBufferWriteRect); + Mock.redefine( + redefinedEnqueueMemBufferMap); + Mock.redefine(redefinedEnqueueMemUnmap); + Mock.redefine( + redefinedEnqueueMemBufferFill); + + context Ctx{Plt}; + queue QueueA{Ctx, Selector}; + detail::QueueImplPtr QueueImplA = detail::getSyclObjImpl(QueueA); + queue QueueB{Ctx, Selector}; + detail::QueueImplPtr QueueImplB = detail::getSyclObjImpl(QueueB); + ExpectedDepQueue = QueueImplB->getHandleRef(); + + int val; + buffer Buf(&val, range<1>(1)); + detail::Requirement MockReq = getMockRequirement(Buf); + detail::AllocaCommand AllocaCmd = detail::AllocaCommand(QueueImplA, MockReq); + void *MockHostPtr; + detail::EnqueueResultT Res; + + // Check that each of the non-blocking commands flush the dependency queue + { + detail::MapMemObject MapCmd{&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, + access::mode::read_write}; + testCommandEnqueue(&MapCmd, QueueImplB, MockReq); + + detail::UnMapMemObject UnmapCmd{&AllocaCmd, MockReq, &MockHostPtr, + QueueImplA}; + testCommandEnqueue(&UnmapCmd, QueueImplB, MockReq); + + device HostDevice; + detail::QueueImplPtr DefaultHostQueue{ + new detail::queue_impl(detail::getSyclObjImpl(HostDevice), {}, {})}; + detail::AllocaCommand HostAllocaCmd = + detail::AllocaCommand(DefaultHostQueue, MockReq); + + detail::MemCpyCommand MemCpyCmd{MockReq, &AllocaCmd, + MockReq, &HostAllocaCmd, + QueueImplA, DefaultHostQueue}; + testCommandEnqueue(&MemCpyCmd, QueueImplB, MockReq); + + detail::MemCpyCommandHost MemCpyCmdHost{MockReq, &AllocaCmd, + MockReq, &MockHostPtr, + QueueImplA, DefaultHostQueue}; + testCommandEnqueue(&MemCpyCmdHost, QueueImplB, MockReq); + + std::unique_ptr CG{new detail::CGFill(/*Pattern*/ {}, &MockReq, + /*ArgsStorage*/ {}, + /*AccStorage*/ {}, + /*SharedPtrStorage*/ {}, + /*Requirements*/ {}, + /*Events*/ {})}; + detail::ExecCGCommand ExecCGCmd{std::move(CG), QueueImplA}; + MockReq.MDims = 1; + (void)ExecCGCmd.addDep(detail::DepDesc(&AllocaCmd, &MockReq, &AllocaCmd)); + testCommandEnqueue(&ExecCGCmd, QueueImplB, MockReq); + } + + // Check dependency event without a command + { + resetTestCtx(); + detail::MapMemObject Cmd = {&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, + access::mode::read_write}; + detail::EventImplPtr DepEvent{new detail::event_impl(QueueImplB)}; + DepEvent->setContextImpl(QueueImplB->getContextImplPtr()); + DepEvent->getHandleRef() = reinterpret_cast(new int{}); + (void)Cmd.addDep(DepEvent); + MockScheduler::enqueueCommand(&Cmd, Res, detail::NON_BLOCKING); + EXPECT_TRUE(QueueFlushed); + } + + // Check that flush isn't called for a released queue. + { + resetTestCtx(); + detail::MapMemObject Cmd = {&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, + access::mode::read_write}; + detail::EventImplPtr DepEvent; + { + queue TempQueue{Ctx, Selector}; + detail::QueueImplPtr TempQueueImpl = detail::getSyclObjImpl(TempQueue); + DepEvent.reset(new detail::event_impl(TempQueueImpl)); + DepEvent->setContextImpl(TempQueueImpl->getContextImplPtr()); + DepEvent->getHandleRef() = reinterpret_cast(new int{}); + } + (void)Cmd.addDep(DepEvent); + MockScheduler::enqueueCommand(&Cmd, Res, detail::NON_BLOCKING); + EXPECT_FALSE(EventStatusQueried); + EXPECT_FALSE(QueueFlushed); + } + + // Check that same queue dependencies are not flushed + { + detail::MapMemObject Cmd = {&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, + access::mode::read_write}; + testCommandEnqueue(&Cmd, QueueImplA, MockReq, false); + } + + // Check that flush is not called twice for the same dependency queue + { + resetTestCtx(); + detail::MapMemObject Cmd = {&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, + access::mode::read_write}; + MockCommand DepCmdA(QueueImplB); + DepCmdA.getEvent()->getHandleRef() = reinterpret_cast(new int{}); + (void)Cmd.addDep(detail::DepDesc{&DepCmdA, &MockReq, nullptr}); + MockCommand DepCmdB(QueueImplB); + DepCmdB.getEvent()->getHandleRef() = reinterpret_cast(new int{}); + (void)Cmd.addDep(detail::DepDesc{&DepCmdB, &MockReq, nullptr}); + // The check is performed in redefinedQueueFlush + MockScheduler::enqueueCommand(&Cmd, Res, detail::NON_BLOCKING); + } + + // Check that the event status isn't requested twice for the same event + { + resetTestCtx(); + detail::MapMemObject CmdA{&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, + access::mode::read_write}; + MockCommand DepCmd(QueueImplB); + DepCmd.getEvent()->getHandleRef() = reinterpret_cast(new int{}); + (void)CmdA.addDep(detail::DepDesc{&DepCmd, &MockReq, nullptr}); + MockScheduler::enqueueCommand(&CmdA, Res, detail::NON_BLOCKING); + + EventStatusQueried = false; + detail::MapMemObject CmdB{&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, + access::mode::read_write}; + (void)CmdB.addDep(detail::DepDesc{&DepCmd, &MockReq, nullptr}); + MockScheduler::enqueueCommand(&CmdB, Res, detail::NON_BLOCKING); + EXPECT_FALSE(EventStatusQueried); + } + + // Check that flush isn't called for submitted dependencies + { + detail::MapMemObject CmdA{&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, + access::mode::read_write}; + testEventStatusCheck(&CmdA, QueueImplB, MockReq, PI_EVENT_SUBMITTED); + detail::MapMemObject CmdB{&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, + access::mode::read_write}; + testEventStatusCheck(&CmdB, QueueImplB, MockReq, PI_EVENT_RUNNING); + detail::MapMemObject CmdC{&AllocaCmd, MockReq, &MockHostPtr, QueueImplA, + access::mode::read_write}; + testEventStatusCheck(&CmdC, QueueImplB, MockReq, PI_EVENT_COMPLETE); + } +} diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index e918951108488..bd80f24820f8f 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -78,6 +78,8 @@ class MockCommand : public cl::sycl::detail::Command { Command::waitForEvents(Queue, RawEvents, Event); } + std::shared_ptr getEvent() { return MEvent; } + protected: cl::sycl::detail::Requirement MRequirement; };