Skip to content

Commit b478d2f

Browse files
[SYCL] Fix resource leak related to SYCL_FALLBACK_ASSERT (#12532)
#6837 enabled asynchronous buffer destruction for buffers constructed without host data. However, initial fallback assert implementation in #3767 predates it and as such had to place the buffer inside `queue_impl` to avoid unintended synchronization point. I don't know if there was the same crash observed on the end-to-end test added as part of this PR prior to #3767, but it doesn't even matter because the "new" implementation is both simpler and doesn't result in a crash. I suspect that without it (with the buffer for fallback assert implementation being a data member of `sycl::queue_impl`) we had a cyclic dependency somewhere leading to resource leak and ultimately to the assert in `DeviceGlobalUSMMem::~DeviceGlobalUSMMem()`.
1 parent 7348207 commit b478d2f

File tree

4 files changed

+49
-4
lines changed

4 files changed

+49
-4
lines changed

sycl/include/sycl/queue.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2965,7 +2965,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
29652965
Rest...);
29662966
}
29672967

2968+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
29682969
buffer<detail::AssertHappened, 1> &getAssertHappenedBuffer();
2970+
#endif
29692971

29702972
event memcpyToDeviceGlobal(void *DeviceGlobalPtr, const void *Src,
29712973
bool IsDeviceImageScope, size_t NumBytes,
@@ -3019,9 +3021,7 @@ class AssertInfoCopier;
30193021
*/
30203022
event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue,
30213023
const detail::code_location &CodeLoc) {
3022-
using AHBufT = buffer<detail::AssertHappened, 1>;
3023-
3024-
AHBufT &Buffer = Self.getAssertHappenedBuffer();
3024+
buffer<detail::AssertHappened, 1> Buffer{1};
30253025

30263026
event CopierEv, CheckerEv, PostCheckerEv;
30273027
auto CopierCGF = [&](handler &CGH) {

sycl/source/detail/queue_impl.hpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,9 @@ class queue_impl {
108108
const async_handler &AsyncHandler, const property_list &PropList)
109109
: MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler),
110110
MPropList(PropList), MHostQueue(MDevice->is_host()),
111+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
111112
MAssertHappenedBuffer(range<1>{1}),
113+
#endif
112114
MIsInorder(has_property<property::queue::in_order>()),
113115
MDiscardEvents(
114116
has_property<ext::oneapi::property::queue::discard_events>()),
@@ -283,7 +285,9 @@ class queue_impl {
283285
queue_impl(sycl::detail::pi::PiQueue PiQueue, const ContextImplPtr &Context,
284286
const async_handler &AsyncHandler)
285287
: MContext(Context), MAsyncHandler(AsyncHandler), MHostQueue(false),
288+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
286289
MAssertHappenedBuffer(range<1>{1}),
290+
#endif
287291
MIsInorder(has_property<property::queue::in_order>()),
288292
MDiscardEvents(
289293
has_property<ext::oneapi::property::queue::discard_events>()),
@@ -305,7 +309,10 @@ class queue_impl {
305309
queue_impl(sycl::detail::pi::PiQueue PiQueue, const ContextImplPtr &Context,
306310
const async_handler &AsyncHandler, const property_list &PropList)
307311
: MContext(Context), MAsyncHandler(AsyncHandler), MPropList(PropList),
308-
MHostQueue(false), MAssertHappenedBuffer(range<1>{1}),
312+
MHostQueue(false),
313+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
314+
MAssertHappenedBuffer(range<1>{1}),
315+
#endif
309316
MIsInorder(has_property<property::queue::in_order>()),
310317
MDiscardEvents(
311318
has_property<ext::oneapi::property::queue::discard_events>()),
@@ -673,9 +680,11 @@ class queue_impl {
673680
/// \return a native handle.
674681
pi_native_handle getNative(int32_t &NativeHandleDesc) const;
675682

683+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
676684
buffer<AssertHappened, 1> &getAssertHappenedBuffer() {
677685
return MAssertHappenedBuffer;
678686
}
687+
#endif
679688

680689
void registerStreamServiceEvent(const EventImplPtr &Event) {
681690
std::lock_guard<std::mutex> Lock(MMutex);
@@ -918,8 +927,10 @@ class queue_impl {
918927
/// need to emulate it with multiple native in-order queues.
919928
bool MEmulateOOO = false;
920929

930+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
921931
// Buffer to store assert failure descriptor
922932
buffer<AssertHappened, 1> MAssertHappenedBuffer;
933+
#endif
923934

924935
// This event is employed for enhanced dependency tracking with in-order queue
925936
// Access to the event should be guarded with MMutex

sycl/source/queue.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,9 +298,11 @@ pi_native_handle queue::getNative(int32_t &NativeHandleDesc) const {
298298
return impl->getNative(NativeHandleDesc);
299299
}
300300

301+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
301302
buffer<detail::AssertHappened, 1> &queue::getAssertHappenedBuffer() {
302303
return impl->getAssertHappenedBuffer();
303304
}
305+
#endif
304306

305307
event queue::memcpyToDeviceGlobal(void *DeviceGlobalPtr, const void *Src,
306308
bool IsDeviceImageScope, size_t NumBytes,
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
4+
// Device globals aren't supported on opencl:gpu yet.
5+
// UNSUPPORTED: opencl && gpu
6+
7+
// TODO: Fails at JIT compilation for some reason.
8+
// UNSUPPORTED: hip
9+
#define SYCL_FALLBACK_ASSERT 1
10+
11+
#include <sycl/sycl.hpp>
12+
13+
// DeviceGlobalUSMMem::~DeviceGlobalUSMMem() has asserts to ensure some
14+
// resources have been cleaned up when it's executed. Those asserts used to fail
15+
// when "AssertHappened" buffer used in fallback implementation of the device
16+
// assert was a data member of the queue_impl.
17+
sycl::ext::oneapi::experimental::device_global<int32_t> dg;
18+
19+
int main() {
20+
sycl::queue q;
21+
q.submit([&](sycl::handler &cgh) {
22+
sycl::range<1> R{16};
23+
cgh.parallel_for(sycl::nd_range<1>{R, R}, [=](sycl::nd_item<1> ndi) {
24+
if (ndi.get_global_linear_id() == 0)
25+
dg.get() = 42;
26+
auto sg = sycl::ext::oneapi::experimental::this_sub_group();
27+
auto active = sycl::ext::oneapi::group_ballot(sg, 1);
28+
});
29+
}).wait();
30+
31+
return 0;
32+
}

0 commit comments

Comments
 (0)