From 75ed1354669ed86a4bb417cb9f9029d2131b047a Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Mon, 29 Jan 2024 14:17:27 -0800 Subject: [PATCH 1/4] [SYCL] Fix resource leak related to SYCL_FALLBACK_ASSERT https://github.com/intel/llvm/pull/6837 enabled asynchronous buffer destruction for buffers constructed without host data. However, initial fallback assert implementation in https://github.com/intel/llvm/pull/3767 predates it and as such had to place the buffer inside `queue_impl` to avoid unintended synchronization point. I don't know if there was the same crash observed on the end-to-end test added as part of this PR prior to https://github.com/intel/llvm/pull/3767, but it doesn't even matter because the "new" implementation is both simpler and doesn't result in a crash. I suspect that without it (with the buffer for fallback assert implementation being a data member of `sycl::queue_impl`) we had a cyclic dependency somewhere leading to resource leak and ultimately to the assert in `DeviceGlobalUSMMem::~DeviceGlobalUSMMem()`. --- sycl/include/sycl/queue.hpp | 4 +-- sycl/test-e2e/Assert/check_resource_leak.cpp | 28 ++++++++++++++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 sycl/test-e2e/Assert/check_resource_leak.cpp diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 23008e75b80fb..2a69a1a9c756b 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -3017,9 +3017,7 @@ class AssertInfoCopier; */ event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue, const detail::code_location &CodeLoc) { - using AHBufT = buffer; - - AHBufT &Buffer = Self.getAssertHappenedBuffer(); + buffer Buffer{1}; event CopierEv, CheckerEv, PostCheckerEv; auto CopierCGF = [&](handler &CGH) { diff --git a/sycl/test-e2e/Assert/check_resource_leak.cpp b/sycl/test-e2e/Assert/check_resource_leak.cpp new file mode 100644 index 0000000000000..f9a7711eb4fe3 --- /dev/null +++ b/sycl/test-e2e/Assert/check_resource_leak.cpp @@ -0,0 +1,28 @@ +// RUN: %{build} -o %t.out +// RUN: %if level_zero %{ env UR_L0_LEAKS_DEBUG=1 %} %{run} %t.out + +// Device globals aren't supported on opencl:gpu yet. +// UNSUPPORTED: opencl && gpu +#define SYCL_FALLBACK_ASSERT 1 + +#include + +// DeviceGlobalUSMMem::~DeviceGlobalUSMMem() has asserts to ensure some +// resources have been cleaned up when it's executed. Those asserts used to fail +// when "AssertHappened" buffer used in fallback implementation of the device +// assert was a data member of the queue_impl. +sycl::ext::oneapi::experimental::device_global dg; + +int main() { + sycl::queue q; + q.submit([&](sycl::handler& cgh) { + sycl::range<1> R{16}; + cgh.parallel_for(sycl::nd_range<1>{R, R}, [=](sycl::nd_item<1> ndi) { + if (ndi.get_global_linear_id() == 0) dg.get() = 42; + auto sg = sycl::ext::oneapi::experimental::this_sub_group(); + auto active = sycl::ext::oneapi::group_ballot(sg, 1); + }); + }).wait(); + + return 0; +} From 25b294ddeb758e46e69fdd5509593970f1c1d742 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Mon, 29 Jan 2024 14:24:17 -0800 Subject: [PATCH 2/4] Remove old AssertHappened buffer in preview mode --- sycl/include/sycl/queue.hpp | 2 ++ sycl/source/detail/queue_impl.hpp | 13 ++++++++++++- sycl/source/queue.cpp | 2 ++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 2a69a1a9c756b..729bc2b7e76da 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -2963,7 +2963,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { Rest...); } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES buffer &getAssertHappenedBuffer(); +#endif event memcpyToDeviceGlobal(void *DeviceGlobalPtr, const void *Src, bool IsDeviceImageScope, size_t NumBytes, diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 0c27a177dbf1a..2bfe4ab75c7e3 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -108,7 +108,9 @@ class queue_impl { const async_handler &AsyncHandler, const property_list &PropList) : MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler), MPropList(PropList), MHostQueue(MDevice->is_host()), +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES MAssertHappenedBuffer(range<1>{1}), +#endif MIsInorder(has_property()), MDiscardEvents( has_property()), @@ -283,7 +285,9 @@ class queue_impl { queue_impl(sycl::detail::pi::PiQueue PiQueue, const ContextImplPtr &Context, const async_handler &AsyncHandler) : MContext(Context), MAsyncHandler(AsyncHandler), MHostQueue(false), +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES MAssertHappenedBuffer(range<1>{1}), +#endif MIsInorder(has_property()), MDiscardEvents( has_property()), @@ -305,7 +309,10 @@ class queue_impl { queue_impl(sycl::detail::pi::PiQueue PiQueue, const ContextImplPtr &Context, const async_handler &AsyncHandler, const property_list &PropList) : MContext(Context), MAsyncHandler(AsyncHandler), MPropList(PropList), - MHostQueue(false), MAssertHappenedBuffer(range<1>{1}), + MHostQueue(false), +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + MAssertHappenedBuffer(range<1>{1}), +#endif MIsInorder(has_property()), MDiscardEvents( has_property()), @@ -670,9 +677,11 @@ class queue_impl { /// \return a native handle. pi_native_handle getNative(int32_t &NativeHandleDesc) const; +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES buffer &getAssertHappenedBuffer() { return MAssertHappenedBuffer; } +#endif void registerStreamServiceEvent(const EventImplPtr &Event) { std::lock_guard Lock(MMutex); @@ -888,8 +897,10 @@ class queue_impl { /// need to emulate it with multiple native in-order queues. bool MEmulateOOO = false; +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES // Buffer to store assert failure descriptor buffer MAssertHappenedBuffer; +#endif // This event is employed for enhanced dependency tracking with in-order queue // Access to the event should be guarded with MLastEventMtx diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 109e6396a0341..6ca29b8505d50 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -268,9 +268,11 @@ pi_native_handle queue::getNative(int32_t &NativeHandleDesc) const { return impl->getNative(NativeHandleDesc); } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES buffer &queue::getAssertHappenedBuffer() { return impl->getAssertHappenedBuffer(); } +#endif event queue::memcpyToDeviceGlobal(void *DeviceGlobalPtr, const void *Src, bool IsDeviceImageScope, size_t NumBytes, From 199ee9b980034fae32fe6fb81d1c4b9849fed45e Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Mon, 29 Jan 2024 14:42:33 -0800 Subject: [PATCH 3/4] clang-format --- sycl/test-e2e/Assert/check_resource_leak.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl/test-e2e/Assert/check_resource_leak.cpp b/sycl/test-e2e/Assert/check_resource_leak.cpp index f9a7711eb4fe3..8bde2e981ca3b 100644 --- a/sycl/test-e2e/Assert/check_resource_leak.cpp +++ b/sycl/test-e2e/Assert/check_resource_leak.cpp @@ -15,10 +15,11 @@ sycl::ext::oneapi::experimental::device_global dg; int main() { sycl::queue q; - q.submit([&](sycl::handler& cgh) { + q.submit([&](sycl::handler &cgh) { sycl::range<1> R{16}; cgh.parallel_for(sycl::nd_range<1>{R, R}, [=](sycl::nd_item<1> ndi) { - if (ndi.get_global_linear_id() == 0) dg.get() = 42; + if (ndi.get_global_linear_id() == 0) + dg.get() = 42; auto sg = sycl::ext::oneapi::experimental::this_sub_group(); auto active = sycl::ext::oneapi::group_ballot(sg, 1); }); From 9e3377a1412a9be407c1da8d36749733e3e2c45e Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Tue, 30 Jan 2024 11:00:43 -0800 Subject: [PATCH 4/4] Remove UR_L0_LEAKS_DEBUG from the test and disable on HIP HIP failure is unrelated, so are (likely) extra memory leaks found in CI. The manifestation of the original bug is that almost nothing was freed resulting in an assert in `DeviceGlobalUSMMem::~DeviceGlobalUSMMem()`. That is what this PR addresses and is being verified by the test even without UR_L0_LEAKS_DEBUG. --- sycl/test-e2e/Assert/check_resource_leak.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sycl/test-e2e/Assert/check_resource_leak.cpp b/sycl/test-e2e/Assert/check_resource_leak.cpp index 8bde2e981ca3b..252d2ed9e0c49 100644 --- a/sycl/test-e2e/Assert/check_resource_leak.cpp +++ b/sycl/test-e2e/Assert/check_resource_leak.cpp @@ -1,8 +1,11 @@ // RUN: %{build} -o %t.out -// RUN: %if level_zero %{ env UR_L0_LEAKS_DEBUG=1 %} %{run} %t.out +// RUN: %{run} %t.out // Device globals aren't supported on opencl:gpu yet. // UNSUPPORTED: opencl && gpu + +// TODO: Fails at JIT compilation for some reason. +// UNSUPPORTED: hip #define SYCL_FALLBACK_ASSERT 1 #include