@@ -67,19 +67,40 @@ inline void assign_result(pi_result *ptr, pi_result value) noexcept {
67
67
// Invokes the callback for each event in the wait list. The callback must take
68
68
// a single pi_event argument and return a pi_result.
69
69
template <typename Func>
70
- pi_result forEachEvent (const pi_event *event_wait_list,
71
- std::size_t num_events_in_wait_list, Func &&f) {
70
+ pi_result forLatestEvents (const pi_event *event_wait_list,
71
+ std::size_t num_events_in_wait_list, Func &&f) {
72
72
73
73
if (event_wait_list == nullptr || num_events_in_wait_list == 0 ) {
74
74
return PI_INVALID_EVENT_WAIT_LIST;
75
75
}
76
76
77
- for (size_t i = 0 ; i < num_events_in_wait_list; i++) {
78
- auto event = event_wait_list[i];
79
- if (event == nullptr ) {
80
- return PI_INVALID_EVENT_WAIT_LIST;
77
+ // Fast path if we only have a single event
78
+ if (num_events_in_wait_list == 1 ) {
79
+ return f (event_wait_list[0 ]);
80
+ }
81
+
82
+ std::vector<pi_event> events{event_wait_list,
83
+ event_wait_list + num_events_in_wait_list};
84
+ std::sort (events.begin (), events.end (), [](pi_event e0 , pi_event e1 ) {
85
+ // Tiered sort creating sublists of streams (smallest value first) in which
86
+ // the corresponding events are sorted into a sequence of newest first.
87
+ return e0 ->get_queue ()->stream_ < e1 ->get_queue ()->stream_ ||
88
+ (e0 ->get_queue ()->stream_ == e1 ->get_queue ()->stream_ &&
89
+ e0 ->get_event_id () > e1 ->get_event_id ());
90
+ });
91
+
92
+ bool first = true ;
93
+ CUstream lastSeenStream = 0 ;
94
+ for (pi_event event : events) {
95
+ CUstream stream = event->get_queue ()->stream_ ;
96
+
97
+ if (!event || (!first && stream == lastSeenStream)) {
98
+ continue ;
81
99
}
82
100
101
+ first = false ;
102
+ lastSeenStream = stream;
103
+
83
104
auto result = f (event);
84
105
if (result != PI_SUCCESS) {
85
106
return result;
@@ -354,6 +375,11 @@ pi_result _pi_event::record() {
354
375
CUstream cuStream = queue_->get ();
355
376
356
377
try {
378
+ eventId_ = queue_->get_next_event_id ();
379
+ if (eventId_ == 0 ) {
380
+ cl::sycl::detail::pi::die (
381
+ " Unrecoverable program state reached in event identifier overflow" );
382
+ }
357
383
result = PI_CHECK_ERROR (cuEventRecord (evEnd_, cuStream));
358
384
} catch (pi_result error) {
359
385
result = error;
@@ -1958,8 +1984,8 @@ pi_result cuda_piEnqueueMemBufferRead(pi_queue command_queue, pi_mem buffer,
1958
1984
pi_result cuda_piEventsWait (pi_uint32 num_events, const pi_event *event_list) {
1959
1985
1960
1986
try {
1961
- pi_result err = PI_SUCCESS ;
1962
-
1987
+ assert (num_events != 0 ) ;
1988
+ assert (event_list);
1963
1989
if (num_events == 0 ) {
1964
1990
return PI_INVALID_VALUE;
1965
1991
}
@@ -1971,11 +1997,7 @@ pi_result cuda_piEventsWait(pi_uint32 num_events, const pi_event *event_list) {
1971
1997
auto context = event_list[0 ]->get_context ();
1972
1998
ScopedContext active (context);
1973
1999
1974
- for (pi_uint32 count = 0 ; count < num_events && (err == PI_SUCCESS);
1975
- count++) {
1976
-
1977
- auto event = event_list[count];
1978
-
2000
+ auto waitFunc = [context](pi_event event) -> pi_result {
1979
2001
if (!event) {
1980
2002
return PI_INVALID_EVENT;
1981
2003
}
@@ -1984,9 +2006,9 @@ pi_result cuda_piEventsWait(pi_uint32 num_events, const pi_event *event_list) {
1984
2006
return PI_INVALID_CONTEXT;
1985
2007
}
1986
2008
1987
- err = event->wait ();
1988
- }
1989
- return err ;
2009
+ return event->wait ();
2010
+ };
2011
+ return forLatestEvents (event_list, num_events, waitFunc) ;
1990
2012
} catch (pi_result err) {
1991
2013
return err;
1992
2014
} catch (...) {
@@ -2760,10 +2782,10 @@ pi_result cuda_piEnqueueEventsWait(pi_queue command_queue,
2760
2782
2761
2783
if (event_wait_list) {
2762
2784
auto result =
2763
- forEachEvent (event_wait_list, num_events_in_wait_list,
2764
- [command_queue](pi_event event) -> pi_result {
2765
- return enqueueEventWait (command_queue, event);
2766
- });
2785
+ forLatestEvents (event_wait_list, num_events_in_wait_list,
2786
+ [command_queue](pi_event event) -> pi_result {
2787
+ return enqueueEventWait (command_queue, event);
2788
+ });
2767
2789
2768
2790
if (result != PI_SUCCESS) {
2769
2791
return result;
0 commit comments