@@ -675,8 +675,8 @@ pi_result _pi_queue::setLastDiscardedEvent(pi_event Event) {
675
675
return PI_SUCCESS;
676
676
}
677
677
678
- pi_result
679
- _pi_queue::resetLastDiscardedEvent ( pi_command_list_ptr_t CommandList) {
678
+ pi_result _pi_queue::appendWaitAndResetLastDiscardedEvent (
679
+ pi_command_list_ptr_t CommandList) {
680
680
if (LastCommandEvent && LastCommandEvent->IsDiscarded ) {
681
681
ZE_CALL (zeCommandListAppendBarrier,
682
682
(CommandList->first , nullptr , 1 , &(LastCommandEvent->ZeEvent )));
@@ -686,7 +686,8 @@ _pi_queue::resetLastDiscardedEvent(pi_command_list_ptr_t CommandList) {
686
686
// Put previous discarded event to the cache.
687
687
if (LastDiscardedEvent)
688
688
PI_CALL (addEventToCache (LastDiscardedEvent));
689
- // Update last discarded event. It will be put to the cache after submission of the next command.
689
+ // Update last discarded event. It will be put to the cache after submission
690
+ // of the next command.
690
691
PI_CALL (setLastDiscardedEvent (LastCommandEvent));
691
692
}
692
693
return PI_SUCCESS;
@@ -707,6 +708,7 @@ inline static pi_result createEventAndAssociateQueue(
707
708
pi_queue Queue, pi_event *Event, pi_command_type CommandType,
708
709
pi_command_list_ptr_t CommandList, bool IsInternal = false ,
709
710
bool ForceHostVisible = false ) {
711
+
710
712
if (!ForceHostVisible)
711
713
ForceHostVisible = DeviceEventsSetting == AllHostVisible;
712
714
@@ -752,16 +754,20 @@ inline static pi_result createEventAndAssociateQueue(
752
754
}
753
755
754
756
pi_result _pi_queue::signalEvent (pi_command_list_ptr_t CommandList) {
755
- pi_event SpecialEvent;
756
- PI_CALL (createEventAndAssociateQueue (
757
- this , &SpecialEvent, PI_COMMAND_TYPE_USER, CommandList,
758
- /* IsDiscarded */ false , /* ForceHostVisible */ false ));
757
+ // We signal new event at the end of command list only if we have queue with
758
+ // discard_events property and the last command event is discarded.
759
+ if (!(ReuseDiscardedEvents && isInOrderQueue () && isDiscardEvents () &&
760
+ LastCommandEvent && LastCommandEvent->IsDiscarded ))
761
+ return PI_SUCCESS;
759
762
760
- PI_CALL (piEventRelease (SpecialEvent));
761
- LastCommandEvent = SpecialEvent;
763
+ pi_event Event;
764
+ PI_CALL (createEventAndAssociateQueue (
765
+ this , &Event, PI_COMMAND_TYPE_USER, CommandList,
766
+ /* IsDiscarded */ false , /* ForceHostVisible */ false ))
767
+ PI_CALL (piEventReleaseInternal (Event));
768
+ LastCommandEvent = Event;
762
769
763
- ZE_CALL (zeCommandListAppendSignalEvent,
764
- (CommandList->first , SpecialEvent->ZeEvent ));
770
+ ZE_CALL (zeCommandListAppendSignalEvent, (CommandList->first , Event->ZeEvent ));
765
771
return PI_SUCCESS;
766
772
}
767
773
@@ -1111,6 +1117,16 @@ _pi_queue::resetCommandList(pi_command_list_ptr_t CommandList,
1111
1117
std::move (std::begin (EventList), std::end (EventList),
1112
1118
std::back_inserter (EventListToCleanup));
1113
1119
EventList.clear ();
1120
+ // We may have additional events to cleanup if queue has discarded events.
1121
+ // These events are waited by barrier inserted in the beginning of command
1122
+ // list.
1123
+ auto &StartingBarrierEvents = CommandList->second .StartingBarrierEvents ;
1124
+ if (!StartingBarrierEvents.empty ()) {
1125
+ std::move (std::begin (StartingBarrierEvents),
1126
+ std::end (StartingBarrierEvents),
1127
+ std::back_inserter (EventListToCleanup));
1128
+ StartingBarrierEvents.clear ();
1129
+ }
1114
1130
1115
1131
// Standard commandlists move in and out of the cache as they are recycled.
1116
1132
// Immediate commandlists are always available.
@@ -1389,7 +1405,7 @@ pi_result _pi_context::getAvailableCommandList(
1389
1405
// Immediate commandlists have been pre-allocated and are always available.
1390
1406
if (Queue->Device ->useImmediateCommandLists ()) {
1391
1407
CommandList = Queue->getQueueGroup (UseCopyEngine).getImmCmdList ();
1392
- PI_CALL (Queue->insertLastCommandEventBarrier (CommandList));
1408
+ PI_CALL (Queue->insertStartBarrierWaitingForLastEvent (CommandList));
1393
1409
if (auto Res = Queue->insertActiveBarriers (CommandList, UseCopyEngine))
1394
1410
return Res;
1395
1411
return PI_SUCCESS;
@@ -1405,7 +1421,7 @@ pi_result _pi_context::getAvailableCommandList(
1405
1421
(!ForcedCmdQueue ||
1406
1422
*ForcedCmdQueue == CommandBatch.OpenCommandList ->second .ZeQueue )) {
1407
1423
CommandList = CommandBatch.OpenCommandList ;
1408
- PI_CALL (Queue->insertLastCommandEventBarrier (CommandList));
1424
+ PI_CALL (Queue->insertStartBarrierWaitingForLastEvent (CommandList));
1409
1425
return PI_SUCCESS;
1410
1426
}
1411
1427
// If this command isn't allowed to be batched or doesn't match the forced
@@ -1473,7 +1489,7 @@ pi_result _pi_context::getAvailableCommandList(
1473
1489
.first ;
1474
1490
}
1475
1491
ZeCommandListCache.erase (ZeCommandListIt);
1476
- if (auto Res = Queue->insertLastCommandEventBarrier (CommandList))
1492
+ if (auto Res = Queue->insertStartBarrierWaitingForLastEvent (CommandList))
1477
1493
return Res;
1478
1494
if (auto Res = Queue->insertActiveBarriers (CommandList, UseCopyEngine))
1479
1495
return Res;
@@ -1502,7 +1518,7 @@ pi_result _pi_context::getAvailableCommandList(
1502
1518
true /* QueueLocked */ );
1503
1519
CommandList = it;
1504
1520
CommandList->second .ZeFenceInUse = true ;
1505
- if (auto Res = Queue->insertLastCommandEventBarrier (CommandList))
1521
+ if (auto Res = Queue->insertStartBarrierWaitingForLastEvent (CommandList))
1506
1522
return Res;
1507
1523
return PI_SUCCESS;
1508
1524
}
@@ -1546,7 +1562,7 @@ _pi_queue::createCommandList(bool UseCopyEngine,
1546
1562
std::pair<ze_command_list_handle_t , pi_command_list_info_t >(
1547
1563
ZeCommandList, {ZeFence, false , ZeCommandQueue, QueueGroupOrdinal}));
1548
1564
1549
- PI_CALL (insertLastCommandEventBarrier (CommandList));
1565
+ PI_CALL (insertStartBarrierWaitingForLastEvent (CommandList));
1550
1566
PI_CALL (insertActiveBarriers (CommandList, UseCopyEngine));
1551
1567
return PI_SUCCESS;
1552
1568
}
@@ -1652,8 +1668,8 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
1652
1668
if (!CommandList->second .EventList .empty () &&
1653
1669
this ->LastCommandEvent != CommandList->second .EventList .back ()) {
1654
1670
this ->LastCommandEvent = CommandList->second .EventList .back ();
1655
- if (this -> LastCommandEvent -> IsDiscarded ) {
1656
- PI_CALL (resetLastDiscardedEvent (CommandList));
1671
+ if (ReuseDiscardedEvents && isInOrderQueue () && isDiscardEvents () ) {
1672
+ PI_CALL (appendWaitAndResetLastDiscardedEvent (CommandList));
1657
1673
}
1658
1674
}
1659
1675
@@ -1686,8 +1702,8 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
1686
1702
}
1687
1703
1688
1704
adjustBatchSizeForFullBatch (UseCopyEngine);
1705
+ CommandBatch.OpenCommandList = CommandListMap.end ();
1689
1706
}
1690
- CommandBatch.OpenCommandList = CommandListMap.end ();
1691
1707
}
1692
1708
1693
1709
auto &ZeCommandQueue = CommandList->second .ZeQueue ;
@@ -1763,31 +1779,37 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
1763
1779
// each event in the EventList.
1764
1780
PI_CALL (piEventReleaseInternal (HostVisibleEvent));
1765
1781
1766
- if (isInOrderQueue () && isDiscardEvents ()) {
1767
- // If we have in-order queue with discarded events then we want to treat this event as regular event and use it as a dependency for the next command.
1782
+ if (ReuseDiscardedEvents && isInOrderQueue () && isDiscardEvents ()) {
1783
+ // If we have in-order queue with discarded events then we want to
1784
+ // treat this event as regular event. We insert a barrier in the next
1785
+ // command list to wait for this event.
1768
1786
LastCommandEvent = HostVisibleEvent;
1769
1787
} else {
1770
- // For all other queues treat this as a special event and indicate no cleanup is needed.
1788
+ // For all other queues treat this as a special event and indicate no
1789
+ // cleanup is needed.
1771
1790
PI_CALL (piEventReleaseInternal (HostVisibleEvent));
1772
1791
HostVisibleEvent->CleanedUp = true ;
1773
1792
}
1774
1793
1775
1794
// Finally set to signal the host-visible event at the end of the
1776
1795
// command-list after a barrier that waits for all commands
1777
1796
// completion.
1778
- if (LastCommandEvent && LastCommandEvent->IsDiscarded ) {
1779
- // If we the last event is discarded then we already have a barrier inserted, so just signal event.
1797
+ if (ReuseDiscardedEvents && isInOrderQueue () && isDiscardEvents () &&
1798
+ LastCommandEvent && LastCommandEvent->IsDiscarded ) {
1799
+ // If we the last event is discarded then we already have a barrier
1800
+ // inserted, so just signal the event.
1780
1801
ZE_CALL (zeCommandListAppendSignalEvent,
1781
1802
(CommandList->first , HostVisibleEvent->ZeEvent ));
1782
1803
} else {
1783
1804
ZE_CALL (zeCommandListAppendBarrier,
1784
1805
(CommandList->first , HostVisibleEvent->ZeEvent , 0 , nullptr ));
1785
1806
}
1786
- } else if ( this -> LastCommandEvent &&
1787
- this -> LastCommandEvent -> IsDiscarded ) {
1807
+ } else {
1808
+ // If we don't have host visible proxy then signal event if needed.
1788
1809
this ->signalEvent (CommandList);
1789
1810
}
1790
- } else if (this ->LastCommandEvent && this ->LastCommandEvent ->IsDiscarded ) {
1811
+ } else {
1812
+ // If we don't have host visible proxy then signal event if needed.
1791
1813
this ->signalEvent (CommandList);
1792
1814
}
1793
1815
@@ -1982,11 +2004,18 @@ pi_command_list_ptr_t _pi_queue::eventOpenCommandList(pi_event Event) {
1982
2004
return CommandListMap.end ();
1983
2005
}
1984
2006
1985
- pi_result _pi_queue::insertLastCommandEventBarrier (pi_command_list_ptr_t &CmdList) {
1986
- if (CmdList != LastCommandList && LastCommandEvent) {
1987
- CmdList->second .append (LastCommandEvent);
2007
+ pi_result _pi_queue::insertStartBarrierWaitingForLastEvent (
2008
+ pi_command_list_ptr_t &CmdList) {
2009
+ // If current command list is different from the last command list then insert
2010
+ // a barrier waiting for the last command event.
2011
+ if (ReuseDiscardedEvents && isInOrderQueue () && isDiscardEvents () &&
2012
+ CmdList != LastCommandList && LastCommandEvent) {
2013
+ // We want this event to live long enough so increment its reference count.
2014
+ // It will be decremented when command list is reset.
1988
2015
LastCommandEvent->RefCount .increment ();
1989
- ZE_CALL (zeCommandListAppendBarrier, (CmdList->first , nullptr , 1 , &(LastCommandEvent->ZeEvent )));
2016
+ CmdList->second .StartingBarrierEvents .push_back (LastCommandEvent);
2017
+ ZE_CALL (zeCommandListAppendBarrier,
2018
+ (CmdList->first , nullptr , 1 , &(LastCommandEvent->ZeEvent )));
1990
2019
LastCommandEvent = nullptr ;
1991
2020
}
1992
2021
return PI_SUCCESS;
@@ -2015,11 +2044,10 @@ pi_result _pi_queue::insertActiveBarriers(pi_command_list_ptr_t &CmdList,
2015
2044
2016
2045
// If there are more active barriers, insert a barrier on the command-list. We
2017
2046
// do not need an event for finishing so we pass nullptr.
2018
- if (ActiveBarriersWaitList. Length )
2047
+ if (!ActiveBarriers. empty () )
2019
2048
ZE_CALL (zeCommandListAppendBarrier,
2020
2049
(CmdList->first , nullptr , ActiveBarriersWaitList.Length ,
2021
2050
ActiveBarriersWaitList.ZeEventList ));
2022
-
2023
2051
return PI_SUCCESS;
2024
2052
}
2025
2053
@@ -2050,39 +2078,57 @@ pi_result _pi_ze_event_list_t::createAndRetainPiZeEventList(
2050
2078
this ->ZeEventList = nullptr ;
2051
2079
this ->PiEventList = nullptr ;
2052
2080
2053
- if (CurQueue->isInOrderQueue ()) {
2081
+ if (CurQueue->isInOrderQueue () && CurQueue-> LastCommandEvent != nullptr ) {
2054
2082
if (CurQueue->Device ->useImmediateCommandLists ()) {
2055
- if (CurQueue->isDiscardEvents ()) {
2056
- // If we have an in-order queue where some events are discarded and if
2057
- // new command list is different from the last used then signal new
2058
- // event from the last immediate command list. It is going to be waited
2059
- // in the new immediate command list.
2083
+ if (ReuseDiscardedEvents && CurQueue->isDiscardEvents ()) {
2084
+ // If queue is in-order with discarded events and if
2085
+ // new command list is different from the last used command list then
2086
+ // signal new event from the last immediate command list. We are going
2087
+ // to insert a barrier in the new command list waiting for that event .
2060
2088
auto QueueGroup = CurQueue->getQueueGroup (UseCopyEngine);
2061
2089
auto NextImmCmdList = QueueGroup.ImmCmdLists [QueueGroup.NextIndex ];
2062
- if (CurQueue->LastCommandEvent != nullptr &&
2063
- CurQueue->LastCommandEvent ->IsDiscarded &&
2064
- CurQueue->LastCommandList != CurQueue->CommandListMap .end () &&
2090
+ if (CurQueue->LastCommandList != CurQueue->CommandListMap .end () &&
2065
2091
CurQueue->LastCommandList != NextImmCmdList) {
2066
2092
CurQueue->signalEvent (CurQueue->LastCommandList );
2093
+ // Mark the last command list as "closed" event though we don't really
2094
+ // close immediate command list. It just indicates that we are
2095
+ // switching command lists. This will be taken into account below - we
2096
+ // don't need to add the last command event into the wait list in this
2097
+ // case.
2067
2098
CurQueue->LastCommandList = CurQueue->CommandListMap .end ();
2068
2099
}
2069
2100
}
2070
2101
} else {
2071
- // Close open command list if command is going to be submitted to a
2072
- // different command list.
2073
- if (CurQueue->LastCommandEvent != nullptr &&
2074
- CurQueue->LastCommandList != CurQueue->CommandListMap .end () &&
2075
- CurQueue->LastCommandList ->second .isCopy (CurQueue) != UseCopyEngine) {
2102
+ // Ensure LastCommandEvent's batch is submitted if it is differrent
2103
+ // from the one this command is going to.
2104
+ const auto &OpenCommandList =
2105
+ CurQueue->eventOpenCommandList (CurQueue->LastCommandEvent );
2106
+ if (OpenCommandList != CurQueue->CommandListMap .end () &&
2107
+ OpenCommandList->second .isCopy (CurQueue) != UseCopyEngine) {
2108
+
2076
2109
if (auto Res = CurQueue->executeOpenCommandList (
2077
- CurQueue-> LastCommandList ->second .isCopy (CurQueue)))
2110
+ OpenCommandList ->second .isCopy (CurQueue)))
2078
2111
return Res;
2079
2112
}
2080
2113
}
2081
2114
}
2082
2115
2116
+ bool IncludeLastCommandEvent =
2117
+ CurQueue->isInOrderQueue () && CurQueue->LastCommandEvent != nullptr ;
2118
+
2119
+ // If the last command event is not nullptr we still don't need to include
2120
+ // last command event in the wait list in the two cases: If the last event is
2121
+ // discarded then we already have a barrier waiting for that event. If the
2122
+ // last command list is closed then we are going to insert a barrier at the
2123
+ // beginning of the next command list.
2124
+ if (ReuseDiscardedEvents && CurQueue->isDiscardEvents () &&
2125
+ (CurQueue->LastCommandEvent ->IsDiscarded ||
2126
+ CurQueue->LastCommandList == CurQueue->CommandListMap .end ())) {
2127
+ IncludeLastCommandEvent = false ;
2128
+ }
2129
+
2083
2130
try {
2084
- if (CurQueue->isInOrderQueue () && CurQueue->LastCommandEvent != nullptr &&
2085
- !CurQueue->LastCommandEvent ->IsDiscarded && CurQueue->LastCommandList != CurQueue->CommandListMap .end ()) {
2131
+ if (IncludeLastCommandEvent) {
2086
2132
this ->ZeEventList = new ze_event_handle_t [EventListLength + 1 ];
2087
2133
this ->PiEventList = new pi_event[EventListLength + 1 ];
2088
2134
} else if (EventListLength > 0 ) {
@@ -2174,8 +2220,7 @@ pi_result _pi_ze_event_list_t::createAndRetainPiZeEventList(
2174
2220
// For in-order queues, every command should be executed only after the
2175
2221
// previous command has finished. The event associated with the last
2176
2222
// enqueued command is added into the waitlist to ensure in-order semantics.
2177
- if (CurQueue->isInOrderQueue () && CurQueue->LastCommandEvent != nullptr &&
2178
- !CurQueue->LastCommandEvent ->IsDiscarded && CurQueue->LastCommandList != CurQueue->CommandListMap .end ()) {
2223
+ if (IncludeLastCommandEvent) {
2179
2224
std::shared_lock<pi_shared_mutex> Lock (CurQueue->LastCommandEvent ->Mutex );
2180
2225
this ->ZeEventList [TmpListLength] = CurQueue->LastCommandEvent ->ZeEvent ;
2181
2226
this ->PiEventList [TmpListLength] = CurQueue->LastCommandEvent ;
@@ -6538,12 +6583,7 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue,
6538
6583
// We use the same approach if
6539
6584
// SYCL_PI_LEVEL_ZERO_USE_MULTIPLE_COMMANDLIST_BARRIERS is not set to a
6540
6585
// positive value.
6541
- // We also use the same approach if we have in-order queue because inserted
6542
- // barrier will depend on last command event and every next command will
6543
- // depend on event signalled by barrier, so no need to populate ActiveBarriers
6544
- // in this case as well.
6545
- if (NumEventsInWaitList || !UseMultipleCmdlistBarriers ||
6546
- Queue->isInOrderQueue ()) {
6586
+ if (NumEventsInWaitList || !UseMultipleCmdlistBarriers) {
6547
6587
// Retain the events as they will be owned by the result event.
6548
6588
_pi_ze_event_list_t TmpWaitList;
6549
6589
if (auto Res = TmpWaitList.createAndRetainPiZeEventList (
@@ -6566,10 +6606,8 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue,
6566
6606
if (auto Res = Queue->executeCommandList (CmdList, false , OkToBatch))
6567
6607
return Res;
6568
6608
6569
- if (UseMultipleCmdlistBarriers && !Queue-> isInOrderQueue () ) {
6609
+ if (UseMultipleCmdlistBarriers) {
6570
6610
// Retain and save the resulting event for future commands.
6571
- // This is redundant for in-order queues because we separately handle
6572
- // dependency chain between commands in in-order queue.
6573
6611
(*Event)->RefCount .increment ();
6574
6612
Queue->ActiveBarriers .push_back (*Event);
6575
6613
}
0 commit comments