diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 51f86711a2126..87b49326a45ce 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -3942,6 +3942,19 @@ pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, case PI_EVENT_INFO_COMMAND_TYPE: return ReturnValue(pi_cast(Event->CommandType)); case PI_EVENT_INFO_COMMAND_EXECUTION_STATUS: { + // Check to see if the event's Queue has an open command list due to + // batching. If so, go ahead and close and submit it, because it is + // possible that this is trying to query some event's status that + // is part of the batch. This isn't strictly required, but it seems + // like a reasonable thing to do. + { + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Event->Queue->PiQueueMutex); + + if (auto Res = Event->Queue->executeOpenCommandList()) + return Res; + } + ze_result_t ZeResult; ZeResult = ZE_CALL_NOCHECK(zeEventQueryStatus, (Event->ZeEvent)); if (ZeResult == ZE_RESULT_SUCCESS) { diff --git a/sycl/test/on-device/plugins/level_zero_batch_event_status.cpp b/sycl/test/on-device/plugins/level_zero_batch_event_status.cpp new file mode 100644 index 0000000000000..d98140ac4b1ff --- /dev/null +++ b/sycl/test/on-device/plugins/level_zero_batch_event_status.cpp @@ -0,0 +1,108 @@ +// REQUIRES: gpu, level_zero + +// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out + +// Set batching to 4 explicitly +// RUN: env SYCL_PI_LEVEL_ZERO_BATCH_SIZE=4 SYCL_PI_TRACE=2 ZE_DEBUG=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1 | FileCheck %s + +// level_zero_batch_test.cpp +// +// This tests the level zero plugin's kernel batching code. It specifically +// tests that the current batch is submitted when an Event execution status +// request is made. This test uses explicit SYCL_PI_LEVEL_ZERO_BATCH_SIZE=4 +// to make sure that the batching is submitted when the piEventGetInfo is +// done, rather than some other dynamic batching criteria. +// +// CHECK: ---> piEnqueueKernelLaunch +// CHECK: ZE ---> zeCommandListAppendLaunchKernel +// Shouldn't have closed until we see a piEventGetInfo +// CHECK-NOT: ZE ---> zeCommandListClose +// CHECK-NOT: ZE ---> zeCommandQueueExecuteCommandLists +// CHECK: ---> piEventGetInfo +// Shouldn't see another piGetEventInfo until after closing command list +// CHECK-NOT: ---> piEventGetInfo +// Look for close and Execute after piEventGetInfo +// CHECK: ZE ---> zeCommandListClose +// CHECK: ZE ---> zeCommandQueueExecuteCommandLists +// CHECK: ---> piEventGetInfo +// CHECK-NOT: piEventsWait +// CHECK: ---> piEnqueueKernelLaunch +// CHECK: ZE ---> zeCommandListAppendLaunchKernel +// CHECK: ---> piEventsWait +// Look for close and Execute after piEventsWait +// CHECK: ZE ---> zeCommandListClose +// CHECK: ZE ---> zeCommandQueueExecuteCommandLists +// CHECK: ---> piEventGetInfo +// No close and execute here, should already have happened. +// CHECK-NOT: ZE ---> zeCommandListClose +// CHECK-NOT: ZE ---> zeCommandQueueExecuteCommandLists +// CHECK-NOT: Test Fail +// CHECK: Test Pass + +#include +#include +#include +#include +#include + +int main(void) { + sycl::default_selector ds{}; + sycl::queue q{ds}; + sycl::vector_class events(10); + + sycl::event ev1 = q.submit([&](sycl::handler &cgh) { + cgh.depends_on(events); + cgh.single_task([=] {}); + }); + + bool ev1_completed = false; + int try_count = 0; + while (true) { + auto ev1_status = + ev1.get_info(); + if (ev1_status == sycl::info::event_command_status::complete) { + std::cout << "Ev1 has completed" << std::endl; + ev1_completed = true; + break; + } + + std::cout << "Ev1 has not yet completed: "; + switch (ev1_status) { + case sycl::info::event_command_status::submitted: + std::cout << "submitted"; + break; + case sycl::info::event_command_status::running: + std::cout << "running"; + break; + default: + std::cout << "unrecognized"; + break; + } + std::cout << std::endl; + + std::chrono::milliseconds timespan(300); + std::this_thread::sleep_for(timespan); + + try_count += 1; + if (try_count > 10) { + ev1.wait(); + } + } + assert(ev1_completed); + + sycl::event ev2 = q.submit([&](sycl::handler &cgh) { + cgh.depends_on(events); + cgh.single_task([=] {}); + }); + q.wait(); + + auto ev2_status = ev2.get_info(); + if (ev2_status != sycl::info::event_command_status::complete) { + std::cout << "Test Fail" << std::endl; + exit(1); + } + + std::cout << "Ev2 has completed" << std::endl; + std::cout << "Test Pass" << std::endl; + return 0; +}