diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index dda8ae72e976a..3a35ba93de0ca 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -32,8 +32,8 @@ extern "C" { // Forward declarartions. static pi_result EventRelease(pi_event Event, pi_queue LockedQueue); static pi_result QueueRelease(pi_queue Queue, pi_queue LockedQueue); -static pi_result EventCreate(pi_context Context, bool HostVisible, - pi_event *RetEvent); +static pi_result EventCreate(pi_context Context, pi_queue Queue, + bool HostVisible, pi_event *RetEvent); } namespace { @@ -428,20 +428,13 @@ pi_result _pi_mem::removeMapping(void *MappedTo, Mapping &MapInfo) { pi_result _pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &Pool, - size_t &Index, bool HostVisible) { + size_t &Index, bool HostVisible, + bool ProfilingEnabled) { // Lock while updating event pool machinery. std::lock_guard Lock(ZeEventPoolCacheMutex); - // Setup for host-visible pool as needed. - ze_event_pool_flag_t ZePoolFlag = {}; - std::list *ZePoolCache; - - if (HostVisible) { - ZePoolFlag = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; - ZePoolCache = &ZeHostVisibleEventPoolCache; - } else { - ZePoolCache = &ZeDeviceScopeEventPoolCache; - } + std::list *ZePoolCache = + getZeEventPoolCache(HostVisible, ProfilingEnabled); // Remove full pool from the cache. if (!ZePoolCache->empty()) { @@ -460,7 +453,12 @@ _pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &Pool, if (*ZePool == nullptr) { ZeStruct ZeEventPoolDesc; ZeEventPoolDesc.count = MaxNumEventsPerPool; - ZeEventPoolDesc.flags = ZePoolFlag | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + ZeEventPoolDesc.flags = 0; + if (HostVisible) + ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + if (ProfilingEnabled) + ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + zePrint("ze_event_pool_desc_t flags set to: %d\n", ZeEventPoolDesc.flags); std::vector ZeDevices; std::for_each(Devices.begin(), Devices.end(), @@ -486,12 +484,8 @@ pi_result _pi_context::decrementUnreleasedEventsInPool(pi_event Event) { return PI_SUCCESS; } - std::list *ZePoolCache; - if (Event->IsHostVisible()) { - ZePoolCache = &ZeHostVisibleEventPoolCache; - } else { - ZePoolCache = &ZeDeviceScopeEventPoolCache; - } + std::list *ZePoolCache = + getZeEventPoolCache(Event->isHostVisible(), Event->isProfilingEnabled()); // Put the empty pool to the cache of the pools. std::lock_guard Lock(ZeEventPoolCacheMutex); @@ -611,13 +605,15 @@ inline static void piQueueRetainNoLock(pi_queue Queue) { Queue->RefCount++; } // \param Event a pointer to hold the newly created pi_event // \param CommandType various command type determined by the caller // \param CommandList is the command list where the event is added -inline static pi_result -createEventAndAssociateQueue(pi_queue Queue, pi_event *Event, - pi_command_type CommandType, - pi_command_list_ptr_t CommandList) { - pi_result Res = piEventCreate(Queue->Context, Event); - if (Res != PI_SUCCESS) - return Res; +// \param ForceHostVisible tells if the event must be created in +// the host-visible pool +inline static pi_result createEventAndAssociateQueue( + pi_queue Queue, pi_event *Event, pi_command_type CommandType, + pi_command_list_ptr_t CommandList, bool ForceHostVisible = false) { + + PI_CALL(EventCreate(Queue->Context, Queue, + ForceHostVisible ? true : EventsScope == AllHostVisible, + Event)); (*Event)->Queue = Queue; (*Event)->CommandType = CommandType; @@ -806,13 +802,11 @@ pi_result _pi_context::finalize() { // For example, event pool caches would be still alive. { std::lock_guard Lock(ZeEventPoolCacheMutex); - for (auto &ZePool : ZeDeviceScopeEventPoolCache) - ZE_CALL(zeEventPoolDestroy, (ZePool)); - for (auto &ZePool : ZeHostVisibleEventPoolCache) - ZE_CALL(zeEventPoolDestroy, (ZePool)); - - ZeDeviceScopeEventPoolCache.clear(); - ZeHostVisibleEventPoolCache.clear(); + for (auto &ZePoolCache : ZeEventPoolCache) { + for (auto &ZePool : ZePoolCache) + ZE_CALL(zeEventPoolDestroy, (ZePool)); + ZePoolCache.clear(); + } } // Destroy the command list used for initializations @@ -841,8 +835,7 @@ pi_result _pi_context::finalize() { bool _pi_queue::isInOrderQueue() const { // If out-of-order queue property is not set, then this is a in-order queue. - return ((this->PiQueueProperties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == - 0); + return ((this->Properties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0); } pi_result _pi_queue::resetCommandList(pi_command_list_ptr_t CommandList, @@ -1032,11 +1025,10 @@ static const zeCommandListBatchConfig ZeCommandListBatchCopyConfig = [] { _pi_queue::_pi_queue(ze_command_queue_handle_t Queue, std::vector &CopyQueues, pi_context Context, pi_device Device, - bool OwnZeCommandQueue, - pi_queue_properties PiQueueProperties) + bool OwnZeCommandQueue, pi_queue_properties Properties) : ZeComputeCommandQueue{Queue}, ZeCopyCommandQueues{CopyQueues}, Context{Context}, Device{Device}, OwnZeCommandQueue{OwnZeCommandQueue}, - PiQueueProperties(PiQueueProperties) { + Properties(Properties) { ComputeCommandBatch.OpenCommandList = CommandListMap.end(); CopyCommandBatch.OpenCommandList = CommandListMap.end(); ComputeCommandBatch.QueueBatchSize = @@ -1350,7 +1342,10 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList, // Create a "proxy" host-visible event. // pi_event HostVisibleEvent; - PI_CALL(EventCreate(Context, true, &HostVisibleEvent)); + auto Res = createEventAndAssociateQueue( + this, &HostVisibleEvent, PI_COMMAND_TYPE_USER, CommandList, true); + if (Res) + return Res; // Update each command's event in the command-list to "see" this // proxy event as a host-visible counterpart. @@ -1359,10 +1354,14 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList, PI_CALL(piEventRetain(HostVisibleEvent)); } - // Decrement the reference count by 1 so all the remaining references - // are from the other commands in this batch. This host-visible event - // will be destroyed after all events in the batch are gone. + // Decrement the reference count of the event such that all the remaining + // references are from the other commands in this batch. This host-visible + // event will not be waited/release by SYCL RT, so it must be destroyed + // after all events in the batch are gone. PI_CALL(piEventRelease(HostVisibleEvent)); + PI_CALL(piEventRelease(HostVisibleEvent)); + PI_CALL(piEventRelease(HostVisibleEvent)); + // Indicate no cleanup is needed for this PI event as it is special. HostVisibleEvent->CleanedUp = true; @@ -2105,7 +2104,7 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, *NumDevices = ZeDeviceCount; if (NumEntries == 0) { - // Devices should be nullptr when querying the number of devices + // Devices should be nullptr when querying the number of devices. PI_ASSERT(Devices == nullptr, PI_INVALID_VALUE); return PI_SUCCESS; } @@ -4954,10 +4953,6 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) { if (EventsScope != OnDemandHostVisibleProxy) die("getOrCreateHostVisibleEvent: missing host-visible event"); - // Create a "proxy" host-visible event on demand. - PI_CALL(EventCreate(Context, true, &HostVisibleEvent)); - HostVisibleEvent->CleanedUp = true; - // Submit the command(s) signalling the proxy event to the queue. // We have to first submit a wait for the device-only event for which this // proxy is created. @@ -4974,6 +4969,13 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) { Queue, CommandList, false /* UseCopyEngine */, OkToBatch)) return Res; + // Create a "proxy" host-visible event. + auto Res = createEventAndAssociateQueue( + Queue, &HostVisibleEvent, PI_COMMAND_TYPE_USER, CommandList, true); + // HostVisibleEvent->CleanedUp = true; + if (Res != PI_SUCCESS) + return Res; + ZE_CALL(zeCommandListAppendWaitOnEvents, (CommandList->first, 1, &ZeEvent)); ZE_CALL(zeCommandListAppendSignalEvent, @@ -4988,12 +4990,21 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) { return PI_SUCCESS; } -static pi_result EventCreate(pi_context Context, bool HostVisible, - pi_event *RetEvent) { +// Helper function for creating a PI event. +// The "Queue" argument specifies the PI queue where a command is submitted. +// The "HostVisible" argument specifies if event needs to be allocated from +// a host-visible pool. +// +static pi_result EventCreate(pi_context Context, pi_queue Queue, + bool HostVisible, pi_event *RetEvent) { + + bool ProfilingEnabled = + !Queue || (Queue->Properties & PI_QUEUE_PROFILING_ENABLE) != 0; + size_t Index = 0; ze_event_pool_handle_t ZeEventPool = {}; - if (auto Res = Context->getFreeSlotInExistingOrNewPool(ZeEventPool, Index, - HostVisible)) + if (auto Res = Context->getFreeSlotInExistingOrNewPool( + ZeEventPool, Index, HostVisible, ProfilingEnabled)) return Res; ze_event_handle_t ZeEvent; @@ -5034,8 +5045,9 @@ static pi_result EventCreate(pi_context Context, bool HostVisible, return PI_SUCCESS; } +// Exteral PI API entry pi_result piEventCreate(pi_context Context, pi_event *RetEvent) { - return EventCreate(Context, EventsScope == AllHostVisible, RetEvent); + return EventCreate(Context, nullptr, EventsScope == AllHostVisible, RetEvent); } pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, @@ -5101,6 +5113,11 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, PI_ASSERT(Event, PI_INVALID_EVENT); + if (Event->Queue && + (Event->Queue->Properties & PI_QUEUE_PROFILING_ENABLE) == 0) { + return PI_PROFILING_INFO_NOT_AVAILABLE; + } + uint64_t ZeTimerResolution = Event->Queue ? Event->Queue->Device->ZeDeviceProperties->timerResolution @@ -5375,7 +5392,7 @@ static pi_result EventRelease(pi_event Event, pi_queue LockedQueue) { // and release a reference to it. if (Event->HostVisibleEvent && Event->HostVisibleEvent != Event) { // Decrement ref-count of the host-visible proxy event. - PI_CALL(piEventRelease(Event->HostVisibleEvent)); + PI_CALL(EventRelease(Event->HostVisibleEvent, LockedQueue)); } auto Context = Event->Context; diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index 42c275f52695e..3bff3389cae1e 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -562,9 +562,11 @@ struct _pi_context : _pi_object { // Get index of the free slot in the available pool. If there is no available // pool then create new one. The HostVisible parameter tells if we need a - // slot for a host-visible event. + // slot for a host-visible event. The ProfilingEnabled tells is we need a + // slot for an event with profiling capabilities. pi_result getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &, size_t &, - bool HostVisible); + bool HostVisible, + bool ProfilingEnabled); // Decrement number of events living in the pool upon event destroy // and return the pool to the cache if there are no unreleased events. @@ -601,9 +603,14 @@ struct _pi_context : _pi_object { // head. In case there is no next pool, a new pool is created and made the // head. // - std::list ZeDeviceScopeEventPoolCache; // Cache of event pools to which host-visible events are added to. - std::list ZeHostVisibleEventPoolCache; + std::vector> ZeEventPoolCache{4}; + auto getZeEventPoolCache(bool HostVisible, bool WithProfiling) { + if (HostVisible) + return WithProfiling ? &ZeEventPoolCache[0] : &ZeEventPoolCache[1]; + else + return WithProfiling ? &ZeEventPoolCache[2] : &ZeEventPoolCache[3]; + } // This map will be used to determine if a pool is full or not // by storing number of empty slots available in the pool. @@ -625,7 +632,7 @@ struct _pi_queue : _pi_object { _pi_queue(ze_command_queue_handle_t Queue, std::vector &CopyQueues, pi_context Context, pi_device Device, bool OwnZeCommandQueue, - pi_queue_properties PiQueueProperties = 0); + pi_queue_properties Properties = 0); // Level Zero compute command queue handle. ze_command_queue_handle_t ZeComputeCommandQueue; @@ -731,7 +738,7 @@ struct _pi_queue : _pi_object { bool isBatchingAllowed(bool IsCopy) const; // Keeps the properties of this queue. - pi_queue_properties PiQueueProperties; + pi_queue_properties Properties; // Returns true if the queue is a in-order queue. bool isInOrderQueue() const; @@ -986,11 +993,17 @@ struct _pi_event : _pi_object { // than by just this one event, depending on the mode (see EventsScope). // pi_event HostVisibleEvent = {nullptr}; - bool IsHostVisible() const { return this == HostVisibleEvent; } + bool isHostVisible() const { return this == HostVisibleEvent; } // Get the host-visible event or create one and enqueue its signal. pi_result getOrCreateHostVisibleEvent(ze_event_handle_t &HostVisibleEvent); + // Tells if this event is with profiling capabilities. + bool isProfilingEnabled() const { + return !Queue || // tentatively assume user events are profiling enabled + (Queue->Properties & PI_QUEUE_PROFILING_ENABLE) != 0; + } + // Level Zero command list where the command signaling this event was appended // to. This is currently used to remember/destroy the command list after all // commands in it are completed, i.e. this event signaled.