Skip to content

Commit a32aac6

Browse files
committed
Add guards and clarifying comments
1 parent 235e039 commit a32aac6

File tree

2 files changed

+134
-75
lines changed

2 files changed

+134
-75
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 99 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -675,8 +675,8 @@ pi_result _pi_queue::setLastDiscardedEvent(pi_event Event) {
675675
return PI_SUCCESS;
676676
}
677677

678-
pi_result
679-
_pi_queue::resetLastDiscardedEvent(pi_command_list_ptr_t CommandList) {
678+
pi_result _pi_queue::appendWaitAndResetLastDiscardedEvent(
679+
pi_command_list_ptr_t CommandList) {
680680
if (LastCommandEvent && LastCommandEvent->IsDiscarded) {
681681
ZE_CALL(zeCommandListAppendBarrier,
682682
(CommandList->first, nullptr, 1, &(LastCommandEvent->ZeEvent)));
@@ -686,7 +686,8 @@ _pi_queue::resetLastDiscardedEvent(pi_command_list_ptr_t CommandList) {
686686
// Put previous discarded event to the cache.
687687
if (LastDiscardedEvent)
688688
PI_CALL(addEventToCache(LastDiscardedEvent));
689-
// Update last discarded event. It will be put to the cache after submission of the next command.
689+
// Update last discarded event. It will be put to the cache after submission
690+
// of the next command.
690691
PI_CALL(setLastDiscardedEvent(LastCommandEvent));
691692
}
692693
return PI_SUCCESS;
@@ -707,6 +708,7 @@ inline static pi_result createEventAndAssociateQueue(
707708
pi_queue Queue, pi_event *Event, pi_command_type CommandType,
708709
pi_command_list_ptr_t CommandList, bool IsInternal = false,
709710
bool ForceHostVisible = false) {
711+
710712
if (!ForceHostVisible)
711713
ForceHostVisible = DeviceEventsSetting == AllHostVisible;
712714

@@ -752,16 +754,20 @@ inline static pi_result createEventAndAssociateQueue(
752754
}
753755

754756
pi_result _pi_queue::signalEvent(pi_command_list_ptr_t CommandList) {
755-
pi_event SpecialEvent;
756-
PI_CALL(createEventAndAssociateQueue(
757-
this, &SpecialEvent, PI_COMMAND_TYPE_USER, CommandList,
758-
/* IsDiscarded */ false, /* ForceHostVisible */ false));
757+
// We signal new event at the end of command list only if we have queue with
758+
// discard_events property and the last command event is discarded.
759+
if (!(ReuseDiscardedEvents && isInOrderQueue() && isDiscardEvents() &&
760+
LastCommandEvent && LastCommandEvent->IsDiscarded))
761+
return PI_SUCCESS;
759762

760-
PI_CALL(piEventRelease(SpecialEvent));
761-
LastCommandEvent = SpecialEvent;
763+
pi_event Event;
764+
PI_CALL(createEventAndAssociateQueue(
765+
this, &Event, PI_COMMAND_TYPE_USER, CommandList,
766+
/* IsDiscarded */ false, /* ForceHostVisible */ false))
767+
PI_CALL(piEventReleaseInternal(Event));
768+
LastCommandEvent = Event;
762769

763-
ZE_CALL(zeCommandListAppendSignalEvent,
764-
(CommandList->first, SpecialEvent->ZeEvent));
770+
ZE_CALL(zeCommandListAppendSignalEvent, (CommandList->first, Event->ZeEvent));
765771
return PI_SUCCESS;
766772
}
767773

@@ -1111,6 +1117,16 @@ _pi_queue::resetCommandList(pi_command_list_ptr_t CommandList,
11111117
std::move(std::begin(EventList), std::end(EventList),
11121118
std::back_inserter(EventListToCleanup));
11131119
EventList.clear();
1120+
// We may have additional events to cleanup if queue has discarded events.
1121+
// These events are waited by barrier inserted in the beginning of command
1122+
// list.
1123+
auto &StartingBarrierEvents = CommandList->second.StartingBarrierEvents;
1124+
if (!StartingBarrierEvents.empty()) {
1125+
std::move(std::begin(StartingBarrierEvents),
1126+
std::end(StartingBarrierEvents),
1127+
std::back_inserter(EventListToCleanup));
1128+
StartingBarrierEvents.clear();
1129+
}
11141130

11151131
// Standard commandlists move in and out of the cache as they are recycled.
11161132
// Immediate commandlists are always available.
@@ -1389,7 +1405,7 @@ pi_result _pi_context::getAvailableCommandList(
13891405
// Immediate commandlists have been pre-allocated and are always available.
13901406
if (Queue->Device->useImmediateCommandLists()) {
13911407
CommandList = Queue->getQueueGroup(UseCopyEngine).getImmCmdList();
1392-
PI_CALL(Queue->insertLastCommandEventBarrier(CommandList));
1408+
PI_CALL(Queue->insertStartBarrierWaitingForLastEvent(CommandList));
13931409
if (auto Res = Queue->insertActiveBarriers(CommandList, UseCopyEngine))
13941410
return Res;
13951411
return PI_SUCCESS;
@@ -1405,7 +1421,7 @@ pi_result _pi_context::getAvailableCommandList(
14051421
(!ForcedCmdQueue ||
14061422
*ForcedCmdQueue == CommandBatch.OpenCommandList->second.ZeQueue)) {
14071423
CommandList = CommandBatch.OpenCommandList;
1408-
PI_CALL(Queue->insertLastCommandEventBarrier(CommandList));
1424+
PI_CALL(Queue->insertStartBarrierWaitingForLastEvent(CommandList));
14091425
return PI_SUCCESS;
14101426
}
14111427
// If this command isn't allowed to be batched or doesn't match the forced
@@ -1473,7 +1489,7 @@ pi_result _pi_context::getAvailableCommandList(
14731489
.first;
14741490
}
14751491
ZeCommandListCache.erase(ZeCommandListIt);
1476-
if (auto Res = Queue->insertLastCommandEventBarrier(CommandList))
1492+
if (auto Res = Queue->insertStartBarrierWaitingForLastEvent(CommandList))
14771493
return Res;
14781494
if (auto Res = Queue->insertActiveBarriers(CommandList, UseCopyEngine))
14791495
return Res;
@@ -1502,7 +1518,7 @@ pi_result _pi_context::getAvailableCommandList(
15021518
true /* QueueLocked */);
15031519
CommandList = it;
15041520
CommandList->second.ZeFenceInUse = true;
1505-
if (auto Res = Queue->insertLastCommandEventBarrier(CommandList))
1521+
if (auto Res = Queue->insertStartBarrierWaitingForLastEvent(CommandList))
15061522
return Res;
15071523
return PI_SUCCESS;
15081524
}
@@ -1546,7 +1562,7 @@ _pi_queue::createCommandList(bool UseCopyEngine,
15461562
std::pair<ze_command_list_handle_t, pi_command_list_info_t>(
15471563
ZeCommandList, {ZeFence, false, ZeCommandQueue, QueueGroupOrdinal}));
15481564

1549-
PI_CALL(insertLastCommandEventBarrier(CommandList));
1565+
PI_CALL(insertStartBarrierWaitingForLastEvent(CommandList));
15501566
PI_CALL(insertActiveBarriers(CommandList, UseCopyEngine));
15511567
return PI_SUCCESS;
15521568
}
@@ -1652,8 +1668,8 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
16521668
if (!CommandList->second.EventList.empty() &&
16531669
this->LastCommandEvent != CommandList->second.EventList.back()) {
16541670
this->LastCommandEvent = CommandList->second.EventList.back();
1655-
if (this->LastCommandEvent->IsDiscarded) {
1656-
PI_CALL(resetLastDiscardedEvent(CommandList));
1671+
if (ReuseDiscardedEvents && isInOrderQueue() && isDiscardEvents()) {
1672+
PI_CALL(appendWaitAndResetLastDiscardedEvent(CommandList));
16571673
}
16581674
}
16591675

@@ -1686,8 +1702,8 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
16861702
}
16871703

16881704
adjustBatchSizeForFullBatch(UseCopyEngine);
1705+
CommandBatch.OpenCommandList = CommandListMap.end();
16891706
}
1690-
CommandBatch.OpenCommandList = CommandListMap.end();
16911707
}
16921708

16931709
auto &ZeCommandQueue = CommandList->second.ZeQueue;
@@ -1763,31 +1779,37 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
17631779
// each event in the EventList.
17641780
PI_CALL(piEventReleaseInternal(HostVisibleEvent));
17651781

1766-
if (isInOrderQueue() && isDiscardEvents()) {
1767-
// If we have in-order queue with discarded events then we want to treat this event as regular event and use it as a dependency for the next command.
1782+
if (ReuseDiscardedEvents && isInOrderQueue() && isDiscardEvents()) {
1783+
// If we have in-order queue with discarded events then we want to
1784+
// treat this event as regular event. We insert a barrier in the next
1785+
// command list to wait for this event.
17681786
LastCommandEvent = HostVisibleEvent;
17691787
} else {
1770-
// For all other queues treat this as a special event and indicate no cleanup is needed.
1788+
// For all other queues treat this as a special event and indicate no
1789+
// cleanup is needed.
17711790
PI_CALL(piEventReleaseInternal(HostVisibleEvent));
17721791
HostVisibleEvent->CleanedUp = true;
17731792
}
17741793

17751794
// Finally set to signal the host-visible event at the end of the
17761795
// command-list after a barrier that waits for all commands
17771796
// completion.
1778-
if (LastCommandEvent && LastCommandEvent->IsDiscarded) {
1779-
// If we the last event is discarded then we already have a barrier inserted, so just signal event.
1797+
if (ReuseDiscardedEvents && isInOrderQueue() && isDiscardEvents() &&
1798+
LastCommandEvent && LastCommandEvent->IsDiscarded) {
1799+
// If we the last event is discarded then we already have a barrier
1800+
// inserted, so just signal the event.
17801801
ZE_CALL(zeCommandListAppendSignalEvent,
17811802
(CommandList->first, HostVisibleEvent->ZeEvent));
17821803
} else {
17831804
ZE_CALL(zeCommandListAppendBarrier,
17841805
(CommandList->first, HostVisibleEvent->ZeEvent, 0, nullptr));
17851806
}
1786-
} else if (this->LastCommandEvent &&
1787-
this->LastCommandEvent->IsDiscarded) {
1807+
} else {
1808+
// If we don't have host visible proxy then signal event if needed.
17881809
this->signalEvent(CommandList);
17891810
}
1790-
} else if (this->LastCommandEvent && this->LastCommandEvent->IsDiscarded) {
1811+
} else {
1812+
// If we don't have host visible proxy then signal event if needed.
17911813
this->signalEvent(CommandList);
17921814
}
17931815

@@ -1982,11 +2004,18 @@ pi_command_list_ptr_t _pi_queue::eventOpenCommandList(pi_event Event) {
19822004
return CommandListMap.end();
19832005
}
19842006

1985-
pi_result _pi_queue::insertLastCommandEventBarrier(pi_command_list_ptr_t &CmdList) {
1986-
if (CmdList != LastCommandList && LastCommandEvent) {
1987-
CmdList->second.append(LastCommandEvent);
2007+
pi_result _pi_queue::insertStartBarrierWaitingForLastEvent(
2008+
pi_command_list_ptr_t &CmdList) {
2009+
// If current command list is different from the last command list then insert
2010+
// a barrier waiting for the last command event.
2011+
if (ReuseDiscardedEvents && isInOrderQueue() && isDiscardEvents() &&
2012+
CmdList != LastCommandList && LastCommandEvent) {
2013+
// We want this event to live long enough so increment its reference count.
2014+
// It will be decremented when command list is reset.
19882015
LastCommandEvent->RefCount.increment();
1989-
ZE_CALL(zeCommandListAppendBarrier, (CmdList->first, nullptr, 1, &(LastCommandEvent->ZeEvent)));
2016+
CmdList->second.StartingBarrierEvents.push_back(LastCommandEvent);
2017+
ZE_CALL(zeCommandListAppendBarrier,
2018+
(CmdList->first, nullptr, 1, &(LastCommandEvent->ZeEvent)));
19902019
LastCommandEvent = nullptr;
19912020
}
19922021
return PI_SUCCESS;
@@ -2015,11 +2044,10 @@ pi_result _pi_queue::insertActiveBarriers(pi_command_list_ptr_t &CmdList,
20152044

20162045
// If there are more active barriers, insert a barrier on the command-list. We
20172046
// do not need an event for finishing so we pass nullptr.
2018-
if (ActiveBarriersWaitList.Length)
2047+
if (!ActiveBarriers.empty())
20192048
ZE_CALL(zeCommandListAppendBarrier,
20202049
(CmdList->first, nullptr, ActiveBarriersWaitList.Length,
20212050
ActiveBarriersWaitList.ZeEventList));
2022-
20232051
return PI_SUCCESS;
20242052
}
20252053

@@ -2050,39 +2078,57 @@ pi_result _pi_ze_event_list_t::createAndRetainPiZeEventList(
20502078
this->ZeEventList = nullptr;
20512079
this->PiEventList = nullptr;
20522080

2053-
if (CurQueue->isInOrderQueue()) {
2081+
if (CurQueue->isInOrderQueue() && CurQueue->LastCommandEvent != nullptr) {
20542082
if (CurQueue->Device->useImmediateCommandLists()) {
2055-
if (CurQueue->isDiscardEvents()) {
2056-
// If we have an in-order queue where some events are discarded and if
2057-
// new command list is different from the last used then signal new
2058-
// event from the last immediate command list. It is going to be waited
2059-
// in the new immediate command list.
2083+
if (ReuseDiscardedEvents && CurQueue->isDiscardEvents()) {
2084+
// If queue is in-order with discarded events and if
2085+
// new command list is different from the last used command list then
2086+
// signal new event from the last immediate command list. We are going
2087+
// to insert a barrier in the new command list waiting for that event.
20602088
auto QueueGroup = CurQueue->getQueueGroup(UseCopyEngine);
20612089
auto NextImmCmdList = QueueGroup.ImmCmdLists[QueueGroup.NextIndex];
2062-
if (CurQueue->LastCommandEvent != nullptr &&
2063-
CurQueue->LastCommandEvent->IsDiscarded &&
2064-
CurQueue->LastCommandList != CurQueue->CommandListMap.end() &&
2090+
if (CurQueue->LastCommandList != CurQueue->CommandListMap.end() &&
20652091
CurQueue->LastCommandList != NextImmCmdList) {
20662092
CurQueue->signalEvent(CurQueue->LastCommandList);
2093+
// Mark the last command list as "closed" event though we don't really
2094+
// close immediate command list. It just indicates that we are
2095+
// switching command lists. This will be taken into account below - we
2096+
// don't need to add the last command event into the wait list in this
2097+
// case.
20672098
CurQueue->LastCommandList = CurQueue->CommandListMap.end();
20682099
}
20692100
}
20702101
} else {
2071-
// Close open command list if command is going to be submitted to a
2072-
// different command list.
2073-
if (CurQueue->LastCommandEvent != nullptr &&
2074-
CurQueue->LastCommandList != CurQueue->CommandListMap.end() &&
2075-
CurQueue->LastCommandList->second.isCopy(CurQueue) != UseCopyEngine) {
2102+
// Ensure LastCommandEvent's batch is submitted if it is differrent
2103+
// from the one this command is going to.
2104+
const auto &OpenCommandList =
2105+
CurQueue->eventOpenCommandList(CurQueue->LastCommandEvent);
2106+
if (OpenCommandList != CurQueue->CommandListMap.end() &&
2107+
OpenCommandList->second.isCopy(CurQueue) != UseCopyEngine) {
2108+
20762109
if (auto Res = CurQueue->executeOpenCommandList(
2077-
CurQueue->LastCommandList->second.isCopy(CurQueue)))
2110+
OpenCommandList->second.isCopy(CurQueue)))
20782111
return Res;
20792112
}
20802113
}
20812114
}
20822115

2116+
bool IncludeLastCommandEvent =
2117+
CurQueue->isInOrderQueue() && CurQueue->LastCommandEvent != nullptr;
2118+
2119+
// If the last command event is not nullptr we still don't need to include
2120+
// last command event in the wait list in the two cases: If the last event is
2121+
// discarded then we already have a barrier waiting for that event. If the
2122+
// last command list is closed then we are going to insert a barrier at the
2123+
// beginning of the next command list.
2124+
if (ReuseDiscardedEvents && CurQueue->isDiscardEvents() &&
2125+
(CurQueue->LastCommandEvent->IsDiscarded ||
2126+
CurQueue->LastCommandList == CurQueue->CommandListMap.end())) {
2127+
IncludeLastCommandEvent = false;
2128+
}
2129+
20832130
try {
2084-
if (CurQueue->isInOrderQueue() && CurQueue->LastCommandEvent != nullptr &&
2085-
!CurQueue->LastCommandEvent->IsDiscarded && CurQueue->LastCommandList != CurQueue->CommandListMap.end()) {
2131+
if (IncludeLastCommandEvent) {
20862132
this->ZeEventList = new ze_event_handle_t[EventListLength + 1];
20872133
this->PiEventList = new pi_event[EventListLength + 1];
20882134
} else if (EventListLength > 0) {
@@ -2174,8 +2220,7 @@ pi_result _pi_ze_event_list_t::createAndRetainPiZeEventList(
21742220
// For in-order queues, every command should be executed only after the
21752221
// previous command has finished. The event associated with the last
21762222
// enqueued command is added into the waitlist to ensure in-order semantics.
2177-
if (CurQueue->isInOrderQueue() && CurQueue->LastCommandEvent != nullptr &&
2178-
!CurQueue->LastCommandEvent->IsDiscarded && CurQueue->LastCommandList != CurQueue->CommandListMap.end()) {
2223+
if (IncludeLastCommandEvent) {
21792224
std::shared_lock<pi_shared_mutex> Lock(CurQueue->LastCommandEvent->Mutex);
21802225
this->ZeEventList[TmpListLength] = CurQueue->LastCommandEvent->ZeEvent;
21812226
this->PiEventList[TmpListLength] = CurQueue->LastCommandEvent;
@@ -6538,12 +6583,7 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue,
65386583
// We use the same approach if
65396584
// SYCL_PI_LEVEL_ZERO_USE_MULTIPLE_COMMANDLIST_BARRIERS is not set to a
65406585
// positive value.
6541-
// We also use the same approach if we have in-order queue because inserted
6542-
// barrier will depend on last command event and every next command will
6543-
// depend on event signalled by barrier, so no need to populate ActiveBarriers
6544-
// in this case as well.
6545-
if (NumEventsInWaitList || !UseMultipleCmdlistBarriers ||
6546-
Queue->isInOrderQueue()) {
6586+
if (NumEventsInWaitList || !UseMultipleCmdlistBarriers) {
65476587
// Retain the events as they will be owned by the result event.
65486588
_pi_ze_event_list_t TmpWaitList;
65496589
if (auto Res = TmpWaitList.createAndRetainPiZeEventList(
@@ -6566,10 +6606,8 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue,
65666606
if (auto Res = Queue->executeCommandList(CmdList, false, OkToBatch))
65676607
return Res;
65686608

6569-
if (UseMultipleCmdlistBarriers && !Queue->isInOrderQueue()) {
6609+
if (UseMultipleCmdlistBarriers) {
65706610
// Retain and save the resulting event for future commands.
6571-
// This is redundant for in-order queues because we separately handle
6572-
// dependency chain between commands in in-order queue.
65736611
(*Event)->RefCount.increment();
65746612
Queue->ActiveBarriers.push_back(*Event);
65756613
}

0 commit comments

Comments
 (0)