From 0601210d5fe5798f45de3182552138a48eb6ad89 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Fri, 16 Sep 2022 09:42:45 -0700 Subject: [PATCH 01/62] [SYCL] Mark mem object which may have not blocking dtor according to Spec2020 Signed-off-by: Tikhomirova, Kseniya --- sycl/include/sycl/detail/sycl_mem_obj_allocator.hpp | 8 ++++++++ sycl/source/detail/scheduler/scheduler.cpp | 7 ++++--- sycl/source/detail/scheduler/scheduler.hpp | 2 +- sycl/source/detail/sycl_mem_obj_t.cpp | 4 ++-- sycl/source/detail/sycl_mem_obj_t.hpp | 5 ++++- 5 files changed, 19 insertions(+), 7 deletions(-) diff --git a/sycl/include/sycl/detail/sycl_mem_obj_allocator.hpp b/sycl/include/sycl/detail/sycl_mem_obj_allocator.hpp index 3a4f995a707f7..34b4b992c7554 100644 --- a/sycl/include/sycl/detail/sycl_mem_obj_allocator.hpp +++ b/sycl/include/sycl/detail/sycl_mem_obj_allocator.hpp @@ -31,6 +31,7 @@ class SYCLMemObjAllocator { template AllocatorT getAllocator() { return *reinterpret_cast(getAllocatorImpl()); } + virtual bool isDefault() { return false; }; }; template @@ -61,6 +62,8 @@ class SYCLMemObjAllocatorHolder : public SYCLMemObjAllocator { virtual std::size_t getValueSize() const override { return MValueSize; } + bool isDefault() override { return isDefaultImpl(); } + protected: virtual void *getAllocatorImpl() override { return &MAllocator; } @@ -83,6 +86,11 @@ class SYCLMemObjAllocatorHolder : public SYCLMemObjAllocator { MAllocator.setAlignment(std::max(RequiredAlign, 64)); } + constexpr bool isDefaultImpl() { + return std::is_same>::value; + } + AllocatorT MAllocator; std::size_t MValueSize; }; diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index b43b53aa72dcf..fc5abfc845e86 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -258,7 +258,9 @@ void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) { deallocateStreams(StreamsToDeallocate); } -void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { +void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj, + bool NotBlockingRelease) { + std::ignore = NotBlockingRelease; // We are going to traverse a graph of finished commands. Gather stream // objects from these commands if any and deallocate buffers for these stream // objects, this is needed to guarantee that streamed data is printed and @@ -442,8 +444,7 @@ MemObjRecord *Scheduler::getMemObjRecord(const Requirement *const Req) { } void Scheduler::cleanupCommands(const std::vector &Cmds) { - if (Cmds.empty()) - { + if (Cmds.empty()) { std::lock_guard Lock{MDeferredCleanupMutex}; if (MDeferredCleanupCommands.empty()) return; diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index b6676fa4b0aa9..9235b09064d7e 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -397,7 +397,7 @@ class Scheduler { /// This member function is used by \ref buffer and \ref image. /// /// \param MemObj is a memory object that points to the buffer being removed. - void removeMemoryObject(detail::SYCLMemObjI *MemObj); + void removeMemoryObject(detail::SYCLMemObjI *MemObj, bool NotBlockingRelease); /// Removes finished non-leaf non-alloca commands from the subgraph (assuming /// that all its commands have been waited for). diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 3636e5d5e0ac6..0bab7d38004c9 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -31,7 +31,7 @@ SYCLMemObjT::SYCLMemObjT(pi_native_handle MemObject, const context &SyclContext, MInteropContext(detail::getSyclObjImpl(SyclContext)), MOpenCLInterop(true), MHostPtrReadOnly(false), MNeedWriteBack(true), MUserPtr(nullptr), MShadowCopy(nullptr), MUploadDataFunctor(nullptr), - MSharedPtrStorage(nullptr) { + MSharedPtrStorage(nullptr), MNotBlockingRelease(false) { if (MInteropContext->is_host()) throw sycl::invalid_parameter_error( "Creation of interoperability memory object using host context is " @@ -91,7 +91,7 @@ void SYCLMemObjT::updateHostMemory() { // If we're attached to a memory record, process the deletion of the memory // record. We may get detached before we do this. if (MRecord) - Scheduler::getInstance().removeMemoryObject(this); + Scheduler::getInstance().removeMemoryObject(this, MNotBlockingRelease); releaseHostMem(MShadowCopy); if (MOpenCLInterop) { diff --git a/sycl/source/detail/sycl_mem_obj_t.hpp b/sycl/source/detail/sycl_mem_obj_t.hpp index 69335f5ab72dc..aa488318f0064 100644 --- a/sycl/source/detail/sycl_mem_obj_t.hpp +++ b/sycl/source/detail/sycl_mem_obj_t.hpp @@ -57,7 +57,8 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { MInteropContext(nullptr), MInteropMemObject(nullptr), MOpenCLInterop(false), MHostPtrReadOnly(false), MNeedWriteBack(true), MSizeInBytes(SizeInBytes), MUserPtr(nullptr), MShadowCopy(nullptr), - MUploadDataFunctor(nullptr), MSharedPtrStorage(nullptr) {} + MUploadDataFunctor(nullptr), MSharedPtrStorage(nullptr), + MNotBlockingRelease(MAllocator->isDefault()) {} SYCLMemObjT(const property_list &Props, std::unique_ptr Allocator) @@ -287,6 +288,8 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { // Field which holds user's shared_ptr in case of memory object is created // using constructor with shared_ptr. std::shared_ptr MSharedPtrStorage; + // Field to identify if dtor is not necessarily blocking + bool MNotBlockingRelease; }; } // namespace detail From aff3be6bf927c2841723f129b16dc28fab5c29b6 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 21 Sep 2022 07:36:38 -0700 Subject: [PATCH 02/62] Add draft how to delay buffer_impl release Signed-off-by: Tikhomirova, Kseniya --- sycl/include/sycl/buffer.hpp | 4 +++- sycl/source/buffer.cpp | 2 ++ sycl/source/detail/scheduler/scheduler.cpp | 4 ++++ sycl/source/detail/scheduler/scheduler.hpp | 2 ++ sycl/source/detail/sycl_mem_obj_t.cpp | 7 +++++++ sycl/source/detail/sycl_mem_obj_t.hpp | 2 ++ 6 files changed, 20 insertions(+), 1 deletion(-) diff --git a/sycl/include/sycl/buffer.hpp b/sycl/include/sycl/buffer.hpp index 3225f7827737c..423cb7ae4c78a 100644 --- a/sycl/include/sycl/buffer.hpp +++ b/sycl/include/sycl/buffer.hpp @@ -117,6 +117,8 @@ class __SYCL_EXPORT buffer_plain { size_t getSize() const; + void handleRelease() const; + std::shared_ptr impl; }; @@ -457,7 +459,7 @@ class buffer : public detail::buffer_plain { buffer &operator=(buffer &&rhs) = default; - ~buffer() = default; + ~buffer() { buffer_plain::handleRelease(); } bool operator==(const buffer &rhs) const { return impl == rhs.impl; } diff --git a/sycl/source/buffer.cpp b/sycl/source/buffer.cpp index 823e1a19b338f..a2dae0fd3e76c 100644 --- a/sycl/source/buffer.cpp +++ b/sycl/source/buffer.cpp @@ -121,6 +121,8 @@ void buffer_plain::addOrReplaceAccessorProperties( size_t buffer_plain::getSize() const { return impl->getSizeInBytes(); } +void buffer_plain::handleRelease() const { impl->detachObjectIfNeeded(impl); } + } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index fc5abfc845e86..2e79f515dee31 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -473,6 +473,10 @@ void Scheduler::cleanupCommands(const std::vector &Cmds) { } } +void Scheduler::deferMemObjRelease(const std::shared_ptr &MemObj) { + +} + } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 9235b09064d7e..bdf0d32b174ff 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -443,6 +443,8 @@ class Scheduler { static MemObjRecord *getMemObjRecord(const Requirement *const Req); + void deferMemObjRelease(const std::shared_ptr &MemObj); + Scheduler(); ~Scheduler(); diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 0bab7d38004c9..f77114dc25c2e 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -147,6 +147,13 @@ void SYCLMemObjT::determineHostPtr(const ContextImplPtr &Context, } else HostPtrReadOnly = false; } + +void SYCLMemObjT::detachObjectIfNeeded( + const std::shared_ptr &self) const { + if (self.use_count() == 1 && MNotBlockingRelease) + Scheduler::getInstance().deferMemObjRelease(self); +} + } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/source/detail/sycl_mem_obj_t.hpp b/sycl/source/detail/sycl_mem_obj_t.hpp index aa488318f0064..48ab0487ba591 100644 --- a/sycl/source/detail/sycl_mem_obj_t.hpp +++ b/sycl/source/detail/sycl_mem_obj_t.hpp @@ -253,6 +253,8 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { bool isHostPointerReadOnly() const { return MHostPtrReadOnly; } + void detachObjectIfNeeded(const std::shared_ptr &self) const; + protected: // An allocateMem helper that determines which host ptr to use void determineHostPtr(const ContextImplPtr &Context, bool InitFromUserData, From 1195b593e92d7d2191cb83e62086cdfc2a38ce8d Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 21 Sep 2022 08:19:21 -0700 Subject: [PATCH 03/62] Update symbols for non-breaking change Signed-off-by: Tikhomirova, Kseniya --- sycl/test/abi/sycl_symbols_linux.dump | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index d7a6f7d91e2a0..4dee0a4c3f5c1 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4037,6 +4037,7 @@ _ZNK4sycl3_V15queue9getNativeEv _ZNK4sycl3_V16ONEAPI15filter_selector13select_deviceEv _ZNK4sycl3_V16ONEAPI15filter_selector5resetEv _ZNK4sycl3_V16ONEAPI15filter_selectorclERKNS0_6deviceE +_ZNK4sycl3_V16detail11SYCLMemObjT20detachObjectIfNeededERKSt10shared_ptrIS2_E _ZNK4sycl3_V16detail11SYCLMemObjT9getPluginEv _ZNK4sycl3_V16detail11SYCLMemObjT9isInteropEv _ZNK4sycl3_V16detail11buffer_impl15getNativeVectorENS0_7backendE @@ -4105,6 +4106,7 @@ _ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property6noinitEEEbv _ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv _ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property7no_initEEEbv _ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv +_ZNK4sycl3_V16detail12buffer_plain13handleReleaseEv _ZNK4sycl3_V16detail12buffer_plain15getNativeVectorENS0_7backendE _ZNK4sycl3_V16detail12buffer_plain22get_allocator_internalEv _ZNK4sycl3_V16detail12buffer_plain7getSizeEv From 8d058023dad2eb1ca72ad5a51275a0aaff111b33 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 21 Sep 2022 08:42:23 -0700 Subject: [PATCH 04/62] Update abi test vtable.cpp - non-breaking change Signed-off-by: Tikhomirova, Kseniya --- sycl/test/abi/vtable.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/test/abi/vtable.cpp b/sycl/test/abi/vtable.cpp index 11a4189ebaa00..5470f0c12d1c8 100644 --- a/sycl/test/abi/vtable.cpp +++ b/sycl/test/abi/vtable.cpp @@ -53,6 +53,8 @@ void foo(sycl::detail::SYCLMemObjAllocator &Allocator) { // CHECK-NEXT: 6 | void sycl::detail::SYCLMemObjAllocator::deallocate(void *, std::size_t) [pure] // CHECK-NEXT: 7 | std::size_t sycl::detail::SYCLMemObjAllocator::getValueSize() const [pure] // CHECK-NEXT: 8 | void sycl::detail::SYCLMemObjAllocator::setAlignment(std::size_t) [pure] +// CHECK-NEXT: 9 | bool sycl::detail::SYCLMemObjAllocator::isDefault() + void foo(sycl::device_selector &DeviceSelector) { (void)DeviceSelector.select_device(); From b54b8e48523d5fc00e752173909ba30d1e9e93d3 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 21 Sep 2022 08:47:40 -0700 Subject: [PATCH 05/62] Update SYCL_MINOR_VERSION for non-breaking ABI change Signed-off-by: Tikhomirova, Kseniya --- sycl/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index babdc853af05c..021fc8e2b8624 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -28,7 +28,7 @@ include(SYCLUtils) # The change in SYCL_MAJOR_VERSION must be accompanied with the same update in # llvm/clang/lib/Driver/CMakeLists.txt. set(SYCL_MAJOR_VERSION 6) -set(SYCL_MINOR_VERSION 0) +set(SYCL_MINOR_VERSION 1) set(SYCL_PATCH_VERSION 0) set(SYCL_DEV_ABI_VERSION 0) if (SYCL_ADD_DEV_VERSION_POSTFIX) From 965a0155e08608f4c2e316fe0674d73d64742d64 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 21 Sep 2022 15:27:23 -0700 Subject: [PATCH 06/62] Remove ABI break Signed-off-by: Tikhomirova, Kseniya --- sycl/include/sycl/buffer.hpp | 8 ++++++-- sycl/source/buffer.cpp | 4 +++- sycl/source/detail/scheduler/scheduler.cpp | 11 +++++++---- sycl/source/detail/scheduler/scheduler.hpp | 7 ++++++- sycl/source/detail/sycl_mem_obj_t.cpp | 10 +++++----- sycl/source/detail/sycl_mem_obj_t.hpp | 14 ++++++++++---- 6 files changed, 37 insertions(+), 17 deletions(-) diff --git a/sycl/include/sycl/buffer.hpp b/sycl/include/sycl/buffer.hpp index 423cb7ae4c78a..572ccaa70cacf 100644 --- a/sycl/include/sycl/buffer.hpp +++ b/sycl/include/sycl/buffer.hpp @@ -117,7 +117,7 @@ class __SYCL_EXPORT buffer_plain { size_t getSize() const; - void handleRelease() const; + void handleRelease(bool DefaultAllocator) const; std::shared_ptr impl; }; @@ -459,7 +459,11 @@ class buffer : public detail::buffer_plain { buffer &operator=(buffer &&rhs) = default; - ~buffer() { buffer_plain::handleRelease(); } + ~buffer() { + buffer_plain::handleRelease( + std::is_same>::value); + } bool operator==(const buffer &rhs) const { return impl == rhs.impl; } diff --git a/sycl/source/buffer.cpp b/sycl/source/buffer.cpp index a2dae0fd3e76c..b29e15683af52 100644 --- a/sycl/source/buffer.cpp +++ b/sycl/source/buffer.cpp @@ -121,7 +121,9 @@ void buffer_plain::addOrReplaceAccessorProperties( size_t buffer_plain::getSize() const { return impl->getSizeInBytes(); } -void buffer_plain::handleRelease() const { impl->detachObjectIfNeeded(impl); } +void buffer_plain::handleRelease(bool DefaultAllocator) const { + impl->detachObjectIfNeeded(impl, DefaultAllocator); +} } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 2e79f515dee31..071e287f459cb 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -258,9 +258,7 @@ void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) { deallocateStreams(StreamsToDeallocate); } -void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj, - bool NotBlockingRelease) { - std::ignore = NotBlockingRelease; +void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { // We are going to traverse a graph of finished commands. Gather stream // objects from these commands if any and deallocate buffers for these stream // objects, this is needed to guarantee that streamed data is printed and @@ -412,6 +410,7 @@ Scheduler::~Scheduler() { "not all resources were released. Please be sure that all kernels " "have synchronization points.\n\n"); } + cleanupDeferredMemObjects(true); // There might be some commands scheduled for post enqueue cleanup that // haven't been freed because of the graph mutex being locked at the time, // clean them up now. @@ -474,9 +473,13 @@ void Scheduler::cleanupCommands(const std::vector &Cmds) { } void Scheduler::deferMemObjRelease(const std::shared_ptr &MemObj) { - + std::lock_guard Lock{MDeferredMemReleaseMutex}; + MDeferredMemObjRelease.push_back(MemObj); + cleanupDeferredMemObjects(false); } +void Scheduler::cleanupDeferredMemObjects(bool Blocking) {} + } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index bdf0d32b174ff..60ec201a0135a 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -397,7 +397,7 @@ class Scheduler { /// This member function is used by \ref buffer and \ref image. /// /// \param MemObj is a memory object that points to the buffer being removed. - void removeMemoryObject(detail::SYCLMemObjI *MemObj, bool NotBlockingRelease); + void removeMemoryObject(detail::SYCLMemObjI *MemObj); /// Removes finished non-leaf non-alloca commands from the subgraph (assuming /// that all its commands have been waited for). @@ -466,6 +466,8 @@ class Scheduler { static void enqueueLeavesOfReqUnlocked(const Requirement *const Req, std::vector &ToCleanUp); + bool isRecordReadyForRelease(MemObjRecord *Record); + void cleanupDeferredMemObjects(bool Blocking); /// Graph builder class. /// /// The graph builder provides means to change an existing graph (e.g. add @@ -773,6 +775,9 @@ class Scheduler { std::vector MDeferredCleanupCommands; std::mutex MDeferredCleanupMutex; + std::list> MDeferredMemObjRelease; + std::mutex MDeferredMemReleaseMutex; + QueueImplPtr DefaultHostQueue; friend class Command; diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index f77114dc25c2e..e19a46adafe2f 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -31,7 +31,7 @@ SYCLMemObjT::SYCLMemObjT(pi_native_handle MemObject, const context &SyclContext, MInteropContext(detail::getSyclObjImpl(SyclContext)), MOpenCLInterop(true), MHostPtrReadOnly(false), MNeedWriteBack(true), MUserPtr(nullptr), MShadowCopy(nullptr), MUploadDataFunctor(nullptr), - MSharedPtrStorage(nullptr), MNotBlockingRelease(false) { + MSharedPtrStorage(nullptr), MNoHostPtrProvided(false) { if (MInteropContext->is_host()) throw sycl::invalid_parameter_error( "Creation of interoperability memory object using host context is " @@ -91,7 +91,7 @@ void SYCLMemObjT::updateHostMemory() { // If we're attached to a memory record, process the deletion of the memory // record. We may get detached before we do this. if (MRecord) - Scheduler::getInstance().removeMemoryObject(this, MNotBlockingRelease); + Scheduler::getInstance().removeMemoryObject(this); releaseHostMem(MShadowCopy); if (MOpenCLInterop) { @@ -148,9 +148,9 @@ void SYCLMemObjT::determineHostPtr(const ContextImplPtr &Context, HostPtrReadOnly = false; } -void SYCLMemObjT::detachObjectIfNeeded( - const std::shared_ptr &self) const { - if (self.use_count() == 1 && MNotBlockingRelease) +void SYCLMemObjT::detachObjectIfNeeded(const std::shared_ptr &self, + bool DefaultAllocator) const { + if (self.use_count() == 1 && MNoHostPtrProvided && DefaultAllocator) Scheduler::getInstance().deferMemObjRelease(self); } diff --git a/sycl/source/detail/sycl_mem_obj_t.hpp b/sycl/source/detail/sycl_mem_obj_t.hpp index 48ab0487ba591..c2f61e5ba0a7e 100644 --- a/sycl/source/detail/sycl_mem_obj_t.hpp +++ b/sycl/source/detail/sycl_mem_obj_t.hpp @@ -58,7 +58,7 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { MOpenCLInterop(false), MHostPtrReadOnly(false), MNeedWriteBack(true), MSizeInBytes(SizeInBytes), MUserPtr(nullptr), MShadowCopy(nullptr), MUploadDataFunctor(nullptr), MSharedPtrStorage(nullptr), - MNotBlockingRelease(MAllocator->isDefault()) {} + MNoHostPtrProvided(true) {} SYCLMemObjT(const property_list &Props, std::unique_ptr Allocator) @@ -170,6 +170,7 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { } void handleHostData(void *HostPtr, const size_t RequiredAlign) { + MNoHostPtrProvided = false; if (!MHostPtrReadOnly && HostPtr) { set_final_data([HostPtr](const std::function &F) { F(HostPtr); @@ -193,6 +194,7 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { void handleHostData(const std::shared_ptr &HostPtr, const size_t RequiredAlign, bool IsConstPtr) { + MNoHostPtrProvided = false; MSharedPtrStorage = HostPtr; MHostPtrReadOnly = IsConstPtr; if (HostPtr) { @@ -253,7 +255,8 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { bool isHostPointerReadOnly() const { return MHostPtrReadOnly; } - void detachObjectIfNeeded(const std::shared_ptr &self) const; + void detachObjectIfNeeded(const std::shared_ptr &self, + bool DefaultAllocator) const; protected: // An allocateMem helper that determines which host ptr to use @@ -290,8 +293,11 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { // Field which holds user's shared_ptr in case of memory object is created // using constructor with shared_ptr. std::shared_ptr MSharedPtrStorage; - // Field to identify if dtor is not necessarily blocking - bool MNotBlockingRelease; + // Field to identify if dtor is not necessarily blocking. + // check for MUploadDataFunctor is not enough to define it since for case when + // we have read only HostPtr - MUploadDataFunctor is empty but delayed release + // must be not allowed. + bool MNoHostPtrProvided; }; } // namespace detail From 27ccbff729167a2ef1a8a8418b01af60a785ca3d Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 21 Sep 2022 15:34:20 -0700 Subject: [PATCH 07/62] Update symbols to new version Signed-off-by: Tikhomirova, Kseniya --- sycl/test/abi/sycl_symbols_linux.dump | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 4dee0a4c3f5c1..6d0509c87947f 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4037,7 +4037,7 @@ _ZNK4sycl3_V15queue9getNativeEv _ZNK4sycl3_V16ONEAPI15filter_selector13select_deviceEv _ZNK4sycl3_V16ONEAPI15filter_selector5resetEv _ZNK4sycl3_V16ONEAPI15filter_selectorclERKNS0_6deviceE -_ZNK4sycl3_V16detail11SYCLMemObjT20detachObjectIfNeededERKSt10shared_ptrIS2_E +_ZNK4sycl3_V16detail11SYCLMemObjT20detachObjectIfNeededERKSt10shared_ptrIS2_Eb _ZNK4sycl3_V16detail11SYCLMemObjT9getPluginEv _ZNK4sycl3_V16detail11SYCLMemObjT9isInteropEv _ZNK4sycl3_V16detail11buffer_impl15getNativeVectorENS0_7backendE @@ -4106,7 +4106,7 @@ _ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property6noinitEEEbv _ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv _ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property7no_initEEEbv _ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv -_ZNK4sycl3_V16detail12buffer_plain13handleReleaseEv +_ZNK4sycl3_V16detail12buffer_plain13handleReleaseEb _ZNK4sycl3_V16detail12buffer_plain15getNativeVectorENS0_7backendE _ZNK4sycl3_V16detail12buffer_plain22get_allocator_internalEv _ZNK4sycl3_V16detail12buffer_plain7getSizeEv From 9540fe04afd7e45afcbd243110ecf5766d883a5a Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 21 Sep 2022 15:40:51 -0700 Subject: [PATCH 08/62] Tiny rename Signed-off-by: Tikhomirova, Kseniya --- sycl/source/buffer.cpp | 2 +- sycl/source/detail/sycl_mem_obj_t.cpp | 2 +- sycl/source/detail/sycl_mem_obj_t.hpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/source/buffer.cpp b/sycl/source/buffer.cpp index b29e15683af52..246fb9bb21594 100644 --- a/sycl/source/buffer.cpp +++ b/sycl/source/buffer.cpp @@ -122,7 +122,7 @@ void buffer_plain::addOrReplaceAccessorProperties( size_t buffer_plain::getSize() const { return impl->getSizeInBytes(); } void buffer_plain::handleRelease(bool DefaultAllocator) const { - impl->detachObjectIfNeeded(impl, DefaultAllocator); + impl->detachMemoryObject(impl, DefaultAllocator); } } // namespace detail diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index e19a46adafe2f..e9d61856d908d 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -148,7 +148,7 @@ void SYCLMemObjT::determineHostPtr(const ContextImplPtr &Context, HostPtrReadOnly = false; } -void SYCLMemObjT::detachObjectIfNeeded(const std::shared_ptr &self, +void SYCLMemObjT::detachMemoryObject(const std::shared_ptr &self, bool DefaultAllocator) const { if (self.use_count() == 1 && MNoHostPtrProvided && DefaultAllocator) Scheduler::getInstance().deferMemObjRelease(self); diff --git a/sycl/source/detail/sycl_mem_obj_t.hpp b/sycl/source/detail/sycl_mem_obj_t.hpp index c2f61e5ba0a7e..437b164a7915b 100644 --- a/sycl/source/detail/sycl_mem_obj_t.hpp +++ b/sycl/source/detail/sycl_mem_obj_t.hpp @@ -255,7 +255,7 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { bool isHostPointerReadOnly() const { return MHostPtrReadOnly; } - void detachObjectIfNeeded(const std::shared_ptr &self, + void detachMemoryObject(const std::shared_ptr &self, bool DefaultAllocator) const; protected: From c00c7cb8ebdd6449de20d35d1a32f72b075314c0 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 21 Sep 2022 15:41:32 -0700 Subject: [PATCH 09/62] Revert "Update abi test vtable.cpp - non-breaking change" This reverts commit 8d058023dad2eb1ca72ad5a51275a0aaff111b33. --- sycl/test/abi/vtable.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/sycl/test/abi/vtable.cpp b/sycl/test/abi/vtable.cpp index 5470f0c12d1c8..11a4189ebaa00 100644 --- a/sycl/test/abi/vtable.cpp +++ b/sycl/test/abi/vtable.cpp @@ -53,8 +53,6 @@ void foo(sycl::detail::SYCLMemObjAllocator &Allocator) { // CHECK-NEXT: 6 | void sycl::detail::SYCLMemObjAllocator::deallocate(void *, std::size_t) [pure] // CHECK-NEXT: 7 | std::size_t sycl::detail::SYCLMemObjAllocator::getValueSize() const [pure] // CHECK-NEXT: 8 | void sycl::detail::SYCLMemObjAllocator::setAlignment(std::size_t) [pure] -// CHECK-NEXT: 9 | bool sycl::detail::SYCLMemObjAllocator::isDefault() - void foo(sycl::device_selector &DeviceSelector) { (void)DeviceSelector.select_device(); From 661dace32174f247ad0b26d7d825d02c77b4e8fb Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 21 Sep 2022 15:44:54 -0700 Subject: [PATCH 10/62] Remove isDefault method, reimplemented Signed-off-by: Tikhomirova, Kseniya --- sycl/include/sycl/detail/sycl_mem_obj_allocator.hpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/sycl/include/sycl/detail/sycl_mem_obj_allocator.hpp b/sycl/include/sycl/detail/sycl_mem_obj_allocator.hpp index 34b4b992c7554..2cc38daedc827 100644 --- a/sycl/include/sycl/detail/sycl_mem_obj_allocator.hpp +++ b/sycl/include/sycl/detail/sycl_mem_obj_allocator.hpp @@ -31,7 +31,6 @@ class SYCLMemObjAllocator { template AllocatorT getAllocator() { return *reinterpret_cast(getAllocatorImpl()); } - virtual bool isDefault() { return false; }; }; template @@ -61,9 +60,6 @@ class SYCLMemObjAllocatorHolder : public SYCLMemObjAllocator { } virtual std::size_t getValueSize() const override { return MValueSize; } - - bool isDefault() override { return isDefaultImpl(); } - protected: virtual void *getAllocatorImpl() override { return &MAllocator; } @@ -86,11 +82,6 @@ class SYCLMemObjAllocatorHolder : public SYCLMemObjAllocator { MAllocator.setAlignment(std::max(RequiredAlign, 64)); } - constexpr bool isDefaultImpl() { - return std::is_same>::value; - } - AllocatorT MAllocator; std::size_t MValueSize; }; From 6615db300b058ebaea5e8ee364376b90d6008433 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 21 Sep 2022 16:25:37 -0700 Subject: [PATCH 11/62] Fix symbols again Signed-off-by: Tikhomirova, Kseniya --- sycl/test/abi/sycl_symbols_linux.dump | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 6d0509c87947f..b611c4a03f93c 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4037,7 +4037,7 @@ _ZNK4sycl3_V15queue9getNativeEv _ZNK4sycl3_V16ONEAPI15filter_selector13select_deviceEv _ZNK4sycl3_V16ONEAPI15filter_selector5resetEv _ZNK4sycl3_V16ONEAPI15filter_selectorclERKNS0_6deviceE -_ZNK4sycl3_V16detail11SYCLMemObjT20detachObjectIfNeededERKSt10shared_ptrIS2_Eb +_ZNK4sycl3_V16detail11SYCLMemObjT18detachMemoryObjectERKSt10shared_ptrIS2_Eb _ZNK4sycl3_V16detail11SYCLMemObjT9getPluginEv _ZNK4sycl3_V16detail11SYCLMemObjT9isInteropEv _ZNK4sycl3_V16detail11buffer_impl15getNativeVectorENS0_7backendE From 8174dc33ac285fa6e1a8f6173920b1807f7293ae Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Thu, 22 Sep 2022 06:28:58 -0700 Subject: [PATCH 12/62] Add handling of deferred mem objects release Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/event_impl.cpp | 5 + sycl/source/detail/event_impl.hpp | 2 + sycl/source/detail/scheduler/scheduler.cpp | 138 +++++++++++++++++++-- sycl/source/detail/scheduler/scheduler.hpp | 9 +- 4 files changed, 144 insertions(+), 10 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 7d88e52baf27d..efc31fa817779 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -443,6 +443,11 @@ void event_impl::cleanDepEventsThroughOneLevel() { } } +bool event_impl::isCompleted() { + return get_info() == + info::event_command_status::complete; +} + } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index d33da055a4f8e..723d124f6278c 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -234,6 +234,8 @@ class event_impl { /// state. bool isInitialized() const noexcept { return MIsInitialized; } + bool isCompleted(); + private: // When instrumentation is enabled emits trace event for event wait begin and // returns the telemetry event generated for the wait diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 071e287f459cb..adbd5b744863f 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -258,6 +258,18 @@ void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) { deallocateStreams(StreamsToDeallocate); } +inline void Scheduler::releaseMemObjRecord( + detail::SYCLMemObjI *MemObj, + std::vector> &StreamsToDeallocate, + std::vector> &AuxResourcesToDeallocate) { + MemObjRecord *Record = MGraphBuilder.getMemObjRecord(MemObj); + assert(Record); + MGraphBuilder.decrementLeafCountersForRecord(Record); + MGraphBuilder.cleanupCommandsForRecord(Record, StreamsToDeallocate, + AuxResourcesToDeallocate); + MGraphBuilder.removeRecordForMemObj(MemObj); +} + void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { // We are going to traverse a graph of finished commands. Gather stream // objects from these commands if any and deallocate buffers for these stream @@ -283,16 +295,14 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { // No operations were performed on the mem object return; - waitForRecordToFinish(Record, Lock); + checkRecordReadinessForRelease(Record, Lock, true); } { WriteLockT Lock(MGraphLock, std::defer_lock); acquireWriteLock(Lock); - MGraphBuilder.decrementLeafCountersForRecord(Record); - MGraphBuilder.cleanupCommandsForRecord(Record, StreamsToDeallocate, - AuxResourcesToDeallocate); - MGraphBuilder.removeRecordForMemObj(MemObj); + releaseMemObjRecord(MemObj, StreamsToDeallocate, + AuxResourcesToDeallocate); } } deallocateStreams(StreamsToDeallocate); @@ -443,6 +453,7 @@ MemObjRecord *Scheduler::getMemObjRecord(const Requirement *const Req) { } void Scheduler::cleanupCommands(const std::vector &Cmds) { + cleanupDeferredMemObjects(false); if (Cmds.empty()) { std::lock_guard Lock{MDeferredCleanupMutex}; if (MDeferredCleanupCommands.empty()) @@ -473,12 +484,123 @@ void Scheduler::cleanupCommands(const std::vector &Cmds) { } void Scheduler::deferMemObjRelease(const std::shared_ptr &MemObj) { - std::lock_guard Lock{MDeferredMemReleaseMutex}; - MDeferredMemObjRelease.push_back(MemObj); + { + std::lock_guard Lock{MDeferredMemReleaseMutex}; + MDeferredMemObjRelease.push_back(MemObj); + } cleanupDeferredMemObjects(false); } -void Scheduler::cleanupDeferredMemObjects(bool Blocking) {} +bool Scheduler::checkRecordReadinessForRelease(MemObjRecord *Record, + ReadLockT &GraphReadLock, + bool ForceWait) { + assert(Record); + // walk through LeavesCollection manually since now its iterator is not + // compatible with STL algorithms + std::vector ToCleanUp; + for (Command *Cmd : Record->MReadLeaves) { + if (Cmd->getEvent()->isCompleted()) + continue; + if (ForceWait) { + EnqueueResultT Res; + bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ToCleanUp); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Enqueue process failed.", + PI_ERROR_INVALID_OPERATION); + GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); + } else + return false; + } + for (Command *Cmd : Record->MWriteLeaves) { + if (Cmd->getEvent()->isCompleted()) + continue; + if (ForceWait) { + EnqueueResultT Res; + bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ToCleanUp); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Enqueue process failed.", + PI_ERROR_INVALID_OPERATION); + GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); + } else + return false; + } + // all dependencies is completed and we can enqueue all ReleaseCmds first. + for (AllocaCommandBase *AllocaCmd : Record->MAllocaCommands) { + Command *ReleaseCmd = AllocaCmd->getReleaseCmd(); + EnqueueResultT Res; + bool Enqueued = GraphProcessor::enqueueCommand(ReleaseCmd, Res, ToCleanUp); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Enqueue process failed.", + PI_ERROR_INVALID_OPERATION); + } + // enqueue is fully done and we can check if ReleaseCmd is completed + for (AllocaCommandBase *AllocaCmd : Record->MAllocaCommands) { + Command *Cmd = AllocaCmd->getReleaseCmd(); + if (Cmd->getEvent()->isCompleted()) + continue; + if (ForceWait) + GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); + else + return false; + } + return true; +} + +void Scheduler::cleanupDeferredMemObjects(bool ForceWait) { + { + std::lock_guard Lock{MDeferredMemReleaseMutex}; + if (MDeferredMemObjRelease.empty()) + return; + } + + // Need to aggregate ready to release object to acquire write lock once. + std::list> ObjsReadyToRelease; + { + ReadLockT Lock(MGraphLock); + { + // Not expected that ForceWait == true with be used in parallel with + // adding MemObj to storage, no such scenario. + std::lock_guard LockDef{MDeferredMemReleaseMutex}; + auto MemObjIt = MDeferredMemObjRelease.begin(); + while (MemObjIt != MDeferredMemObjRelease.end()) { + MemObjRecord *Record = MGraphBuilder.getMemObjRecord((*MemObjIt).get()); + if (!Record) { + // Just trigger delete since no operations on object was perfromed and + // no commands and other to wait for + MemObjIt = MDeferredMemObjRelease.erase(MemObjIt); + continue; + } + if (!checkRecordReadinessForRelease(Record, Lock, ForceWait)) { + MemObjIt++; + continue; + } + ObjsReadyToRelease.push_back(*MemObjIt); + MemObjIt = MDeferredMemObjRelease.erase(MemObjIt); + } + } + } + if (ObjsReadyToRelease.empty()) + return; + + std::vector> StreamsToDeallocate; + std::vector> AuxResourcesToDeallocate; + { + WriteLockT Lock(MGraphLock, std::try_to_lock); + // In order to avoid deadlocks related to blocked commands, defer cleanup if + // the lock wasn't acquired. + if (Lock.owns_lock()) { + for (auto &MemObj : ObjsReadyToRelease) + releaseMemObjRecord(MemObj.get(), StreamsToDeallocate, + AuxResourcesToDeallocate); + } else { + std::lock_guard LockDef{MDeferredMemReleaseMutex}; + MDeferredMemObjRelease.splice(MDeferredMemObjRelease.end(), + ObjsReadyToRelease); + } + } + deallocateStreams(StreamsToDeallocate); + // ObjsReadyToRelease leaving scope and being deleted +} } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 60ec201a0135a..ffb097c769268 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -466,8 +466,13 @@ class Scheduler { static void enqueueLeavesOfReqUnlocked(const Requirement *const Req, std::vector &ToCleanUp); - bool isRecordReadyForRelease(MemObjRecord *Record); - void cleanupDeferredMemObjects(bool Blocking); + bool checkRecordReadinessForRelease(MemObjRecord *Record, + ReadLockT &GraphReadLock, bool ForceWait); + void cleanupDeferredMemObjects(bool ForceWait); + inline void releaseMemObjRecord( + detail::SYCLMemObjI *MemObj, + std::vector> &StreamsToDeallocate, + std::vector> &AuxResourcesToDeallocate); /// Graph builder class. /// /// The graph builder provides means to change an existing graph (e.g. add From d55405efbbb21ba34b5580eb427863acf8db3162 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Thu, 22 Sep 2022 06:55:30 -0700 Subject: [PATCH 13/62] Remove unused function and restore XPTI traces collection Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/scheduler/scheduler.cpp | 143 ++++++++------------- sycl/source/detail/scheduler/scheduler.hpp | 19 +-- 2 files changed, 65 insertions(+), 97 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index adbd5b744863f..b3a8851be5006 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -26,51 +26,71 @@ namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace detail { -void Scheduler::waitForRecordToFinish(MemObjRecord *Record, - ReadLockT &GraphReadLock) { -#ifdef XPTI_ENABLE_INSTRUMENTATION - // Will contain the list of dependencies for the Release Command - std::set DepCommands; -#endif +bool Scheduler::waitForRecordToFinish(MemObjRecord *Record, + ReadLockT &GraphReadLock, + bool ForceWait) { + assert(Record); std::vector ToCleanUp; for (Command *Cmd : Record->MReadLeaves) { - EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ToCleanUp); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error("Enqueue process failed.", - PI_ERROR_INVALID_OPERATION); -#ifdef XPTI_ENABLE_INSTRUMENTATION - // Capture the dependencies - DepCommands.insert(Cmd); -#endif - GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); + if (Cmd->getEvent()->isCompleted()) + continue; + if (ForceWait) { + EnqueueResultT Res; + bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ToCleanUp); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Enqueue process failed.", + PI_ERROR_INVALID_OPERATION); + GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); + } else + return false; } for (Command *Cmd : Record->MWriteLeaves) { - EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ToCleanUp); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error("Enqueue process failed.", - PI_ERROR_INVALID_OPERATION); -#ifdef XPTI_ENABLE_INSTRUMENTATION - DepCommands.insert(Cmd); -#endif - GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); + if (Cmd->getEvent()->isCompleted()) + continue; + if (ForceWait) { + EnqueueResultT Res; + bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ToCleanUp); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Enqueue process failed.", + PI_ERROR_INVALID_OPERATION); + GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); + } else + return false; } + // all dependencies is completed and we can enqueue all ReleaseCmds in advance for (AllocaCommandBase *AllocaCmd : Record->MAllocaCommands) { Command *ReleaseCmd = AllocaCmd->getReleaseCmd(); - EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(ReleaseCmd, Res, ToCleanUp); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error("Enqueue process failed.", - PI_ERROR_INVALID_OPERATION); + if (ReleaseCmd->isSuccessfullyEnqueued()) + continue; #ifdef XPTI_ENABLE_INSTRUMENTATION + // Will contain the list of dependencies for the Release Command + std::set DepCommands; + // Capture the read dependencies + for (Command *Cmd : Record->MWriteLeaves) + DepCommands.insert(Cmd); + for (Command *Cmd : Record->MReadLeaves) + DepCommands.insert(Cmd); // Report these dependencies to the Command so these dependencies can be // reported as edges ReleaseCmd->resolveReleaseDependencies(DepCommands); #endif - GraphProcessor::waitForEvent(ReleaseCmd->getEvent(), GraphReadLock, - ToCleanUp); + EnqueueResultT Res; + bool Enqueued = GraphProcessor::enqueueCommand(ReleaseCmd, Res, ToCleanUp); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Enqueue process failed.", + PI_ERROR_INVALID_OPERATION); + } + // enqueue is fully done and we can check if ReleaseCmd is completed + for (AllocaCommandBase *AllocaCmd : Record->MAllocaCommands) { + Command *Cmd = AllocaCmd->getReleaseCmd(); + if (Cmd->getEvent()->isCompleted()) + continue; + if (ForceWait) + GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); + else + return false; } + return true; } EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, @@ -295,7 +315,7 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { // No operations were performed on the mem object return; - checkRecordReadinessForRelease(Record, Lock, true); + waitForRecordToFinish(Record, Lock, true); } { @@ -491,61 +511,6 @@ void Scheduler::deferMemObjRelease(const std::shared_ptr &MemObj) { cleanupDeferredMemObjects(false); } -bool Scheduler::checkRecordReadinessForRelease(MemObjRecord *Record, - ReadLockT &GraphReadLock, - bool ForceWait) { - assert(Record); - // walk through LeavesCollection manually since now its iterator is not - // compatible with STL algorithms - std::vector ToCleanUp; - for (Command *Cmd : Record->MReadLeaves) { - if (Cmd->getEvent()->isCompleted()) - continue; - if (ForceWait) { - EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ToCleanUp); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error("Enqueue process failed.", - PI_ERROR_INVALID_OPERATION); - GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); - } else - return false; - } - for (Command *Cmd : Record->MWriteLeaves) { - if (Cmd->getEvent()->isCompleted()) - continue; - if (ForceWait) { - EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ToCleanUp); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error("Enqueue process failed.", - PI_ERROR_INVALID_OPERATION); - GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); - } else - return false; - } - // all dependencies is completed and we can enqueue all ReleaseCmds first. - for (AllocaCommandBase *AllocaCmd : Record->MAllocaCommands) { - Command *ReleaseCmd = AllocaCmd->getReleaseCmd(); - EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(ReleaseCmd, Res, ToCleanUp); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error("Enqueue process failed.", - PI_ERROR_INVALID_OPERATION); - } - // enqueue is fully done and we can check if ReleaseCmd is completed - for (AllocaCommandBase *AllocaCmd : Record->MAllocaCommands) { - Command *Cmd = AllocaCmd->getReleaseCmd(); - if (Cmd->getEvent()->isCompleted()) - continue; - if (ForceWait) - GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); - else - return false; - } - return true; -} - void Scheduler::cleanupDeferredMemObjects(bool ForceWait) { { std::lock_guard Lock{MDeferredMemReleaseMutex}; @@ -570,7 +535,7 @@ void Scheduler::cleanupDeferredMemObjects(bool ForceWait) { MemObjIt = MDeferredMemObjRelease.erase(MemObjIt); continue; } - if (!checkRecordReadinessForRelease(Record, Lock, ForceWait)) { + if (!waitForRecordToFinish(Record, Lock, ForceWait)) { MemObjIt++; continue; } diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index ffb097c769268..23f2a47acd4e4 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -466,8 +466,6 @@ class Scheduler { static void enqueueLeavesOfReqUnlocked(const Requirement *const Req, std::vector &ToCleanUp); - bool checkRecordReadinessForRelease(MemObjRecord *Record, - ReadLockT &GraphReadLock, bool ForceWait); void cleanupDeferredMemObjects(bool ForceWait); inline void releaseMemObjRecord( detail::SYCLMemObjI *MemObj, @@ -764,15 +762,20 @@ class Scheduler { BlockingT Blocking = NON_BLOCKING); }; - /// This function waits on all of the graph leaves which somehow use the - /// memory object which is represented by \c Record. The function is called - /// upon destruction of memory buffer. - /// \param Record memory record to await graph leaves of to finish - /// \param GraphReadLock locked graph read lock + /// This function conditionally waits on all of the graph leaves which somehow + /// use the memory object which is represented by \c Record. The function is + /// called upon destruction of memory buffer. \param Record memory record to + /// await graph leaves of to finish \param GraphReadLock locked graph read + /// lock \param ForceWait flag to identify if we need to wait for all + /// dependencies /// /// GraphReadLock will be unlocked/locked as needed. Upon return from the /// function, GraphReadLock will be left in locked state. - void waitForRecordToFinish(MemObjRecord *Record, ReadLockT &GraphReadLock); + /// \return true if all record dependencies and release commands are + /// completed, otherwise - false. Must always return true if ForceWait == + /// true. + bool waitForRecordToFinish(MemObjRecord *Record, ReadLockT &GraphReadLock, + bool ForceWait); GraphBuilder MGraphBuilder; RWLockT MGraphLock; From bb2c4fbf2a3e7a480cb6c0139dd0141966ded710 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Thu, 22 Sep 2022 10:19:10 -0700 Subject: [PATCH 14/62] Add skeleton for unit test Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/global_handler.cpp | 8 + sycl/source/detail/global_handler.hpp | 3 + sycl/source/detail/scheduler/scheduler.hpp | 7 +- .../buffer/BufferDestructionCheck.cpp | 145 ++++++++++++++++++ sycl/unittests/buffer/CMakeLists.txt | 1 + 5 files changed, 161 insertions(+), 3 deletions(-) create mode 100644 sycl/unittests/buffer/BufferDestructionCheck.cpp diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 4735640c5b733..41aa04bf660da 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -47,6 +47,14 @@ T &GlobalHandler::getOrCreate(InstWithLock &IWL, Types... Args) { return *IWL.Inst; } +bool GlobalHandler::attachScheduler(Scheduler *scheduler) { + const LockGuard Lock{MScheduler.Lock}; + if (MScheduler.Inst) + return false; + MScheduler.Inst.reset(scheduler); + return true; +} + Scheduler &GlobalHandler::getScheduler() { return getOrCreate(MScheduler); } ProgramManager &GlobalHandler::getProgramManager() { diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index 9e9016ab218e6..3d20dba152ed6 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -73,6 +73,9 @@ class GlobalHandler { void unloadPlugins(); + // For testing purposes only + bool attachScheduler(Scheduler *scheduler); + private: friend void releaseDefaultContexts(); friend void shutdown(); diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 23f2a47acd4e4..f13ef367f64eb 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -442,11 +442,12 @@ class Scheduler { QueueImplPtr getDefaultHostQueue() { return DefaultHostQueue; } static MemObjRecord *getMemObjRecord(const Requirement *const Req); - - void deferMemObjRelease(const std::shared_ptr &MemObj); + // Virtual for testing purposes only + virtual void + deferMemObjRelease(const std::shared_ptr &MemObj); Scheduler(); - ~Scheduler(); + virtual ~Scheduler(); protected: // TODO: after switching to C++17, change std::shared_timed_mutex to diff --git a/sycl/unittests/buffer/BufferDestructionCheck.cpp b/sycl/unittests/buffer/BufferDestructionCheck.cpp new file mode 100644 index 0000000000000..99a26ff77f38a --- /dev/null +++ b/sycl/unittests/buffer/BufferDestructionCheck.cpp @@ -0,0 +1,145 @@ +//==- BufferDestructionCheck.cpp --- check delayed destruction of buffer --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include + +class FairMockScheduler : public sycl::detail::Scheduler { +public: + // FairMockScheduler() : Scheduler() + // { + // // ON_CALL(*this, deferMemObjRelease(_)). + // // WillByDefault(Invoke([&](qcsinternal::Duration timeout) { + // // return qcsinternal::PidNamedEvent::TryLockFor(timeout); + // // })); + // } + MOCK_METHOD1(deferMemObjRelease, + void(const std::shared_ptr &)); +}; + +class BufferDestructionCheck : public ::testing::Test { +public: + BufferDestructionCheck() : Mock{}, Plt{Mock.getPlatform()} {} + +protected: + void SetUp() override { + MockSchedulerPtr = new FairMockScheduler(); + ASSERT_TRUE(sycl::detail::GlobalHandler::instance().attachScheduler( + dynamic_cast(MockSchedulerPtr))); + // Mock.redefine( + // redefinedMemBufferCreate); + // Mock.redefine( + // redefinedDeviceGetInfo); + } + +protected: + sycl::unittest::PiMock Mock; + sycl::platform Plt; + FairMockScheduler *MockSchedulerPtr; +}; + +// Test that buffer_location was passed correctly +TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefault) { + sycl::context Context{Plt}; + if (Plt.is_host()) { + std::cout << "Not run due to host-only environment\n"; + return; + } + sycl::queue Queue{Context, sycl::default_selector{}}; + + sycl::buffer Buf(3); + std::shared_ptr BufImpl = + sycl::detail::getSyclObjImpl(Buf); + ASSERT_NE(BufImpl, nullptr); + // EXPECT_TRUE(BufImpl->get_allocator_internal()->isDefault()); + // EXPECT_TRUE(BufImpl->isNotBlockingRelease()); +} + +// TEST_F(BufferAllocatorCheck, BufferWithSizeOnlyUSM) { +// sycl::context Context{Plt}; +// if (Plt.is_host()) { +// std::cout << "Not run due to host-only environment\n"; +// return; +// } +// sycl::queue Queue{Context, sycl::default_selector{}}; +// using AllocatorTypeTest = sycl::usm_allocator; AllocatorTypeTest allocator(Queue); +// sycl::buffer Buf(3, allocator); +// std::shared_ptr BufImpl = +// sycl::detail::getSyclObjImpl(Buf); +// ASSERT_NE(BufImpl, nullptr); +// EXPECT_FALSE(BufImpl->get_allocator_internal()->isDefault()); +// EXPECT_FALSE(BufImpl->isNotBlockingRelease()); +// } + +// double timer() { +// using namespace std::chrono; +// auto tp = high_resolution_clock::now(); +// auto tp_duration = tp.time_since_epoch(); +// duration sec = tp.time_since_epoch(); +// return sec.count() * 1000; +// } + +// TEST_F(BufferAllocatorCheck, BufferDestructionDelayed) { +// sycl::context Context{Plt}; +// if (Plt.is_host()) { +// std::cout << "Not run due to host-only environment\n"; +// return; +// } + +// double start, t1, t2; + +// sycl::queue Queue{Context, sycl::default_selector{}}; +// using buffer_u8_t = cl::sycl::buffer; +// const size_t array_size = 1<<24; +// { +// buffer_u8_t BufferA((cl::sycl::range<1>(array_size))); + +// std::shared_ptr BufImpl = +// sycl::detail::getSyclObjImpl(Buf); +// ASSERT_NE(BufImpl, nullptr); +// EXPECT_FALSE(BufImpl->get_allocator_internal()->isDefault()); +// EXPECT_FALSE(BufImpl->isNotBlockingRelease()); + +// Q.submit([&](cl::sycl::handler &cgh) { +// auto accA = BufferA.template +// get_access(cgh); +// cgh.parallel_for(cl::sycl::range<1>{array_size}, +// [=](cl::sycl::id<1> id) +// { +// accA[id] = id % 2; +// }); +// }); +// start = timer(); +// Q.submit([&](cl::sycl::handler &cgh) { +// auto accA = BufferA.template +// get_access(cgh); +// cgh.parallel_for(cl::sycl::range<1>{array_size}, +// [=](cl::sycl::id<1> id) +// { +// accA[id] = id % 2; +// }); +// }); + +// t1 = timer() - start; // before buffer destroy +// } +// t2 = timer() - start; // after buffer destroy + +// std::cout << "time before buffer destroy: " << t1 << " ms\n"; +// std::cout << "time after buffer destroy: " << t2 << " ms\n"; +// } diff --git a/sycl/unittests/buffer/CMakeLists.txt b/sycl/unittests/buffer/CMakeLists.txt index f5dabae23f6df..137efe06cc555 100644 --- a/sycl/unittests/buffer/CMakeLists.txt +++ b/sycl/unittests/buffer/CMakeLists.txt @@ -1,4 +1,5 @@ add_sycl_unittest(BufferTests OBJECT BufferLocation.cpp Image.cpp + BufferDestructionCheck.cpp ) From 5db9e85c864ddaf075d8ffa27282ca72191a30a6 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Fri, 23 Sep 2022 06:34:51 -0700 Subject: [PATCH 15/62] Fix shared_ptr use_count check Signed-off-by: Tikhomirova, Kseniya --- sycl/source/buffer.cpp | 3 ++- sycl/source/detail/sycl_mem_obj_t.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl/source/buffer.cpp b/sycl/source/buffer.cpp index 246fb9bb21594..29ce3280f7079 100644 --- a/sycl/source/buffer.cpp +++ b/sycl/source/buffer.cpp @@ -122,7 +122,8 @@ void buffer_plain::addOrReplaceAccessorProperties( size_t buffer_plain::getSize() const { return impl->getSizeInBytes(); } void buffer_plain::handleRelease(bool DefaultAllocator) const { - impl->detachMemoryObject(impl, DefaultAllocator); + if (impl.use_count() == 1) + impl->detachMemoryObject(impl, DefaultAllocator); } } // namespace detail diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index e9d61856d908d..8f9cfa82486b5 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -150,7 +150,7 @@ void SYCLMemObjT::determineHostPtr(const ContextImplPtr &Context, void SYCLMemObjT::detachMemoryObject(const std::shared_ptr &self, bool DefaultAllocator) const { - if (self.use_count() == 1 && MNoHostPtrProvided && DefaultAllocator) + if (MNoHostPtrProvided && DefaultAllocator) Scheduler::getInstance().deferMemObjRelease(self); } From 53a1892e737850a8d0fc8fbaca693472d8c279db Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Fri, 23 Sep 2022 06:35:37 -0700 Subject: [PATCH 16/62] Test draft Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/global_handler.cpp | 19 ++- sycl/source/detail/scheduler/scheduler.cpp | 9 ++ sycl/source/detail/scheduler/scheduler.hpp | 1 + .../buffer/BufferDestructionCheck.cpp | 118 ++++++------------ sycl/unittests/buffer/CMakeLists.txt | 1 + 5 files changed, 65 insertions(+), 83 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 41aa04bf660da..b3a30ebbb3108 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -48,11 +48,19 @@ T &GlobalHandler::getOrCreate(InstWithLock &IWL, Types... Args) { } bool GlobalHandler::attachScheduler(Scheduler *scheduler) { - const LockGuard Lock{MScheduler.Lock}; - if (MScheduler.Inst) - return false; - MScheduler.Inst.reset(scheduler); - return true; + Scheduler* old; + bool result = true; + { + const LockGuard Lock{MScheduler.Lock}; + // Used for notification that scheduler attached later than expected that may cause test issues. + if (MScheduler.Inst) + result = false; + old = MScheduler.Inst.release(); + MScheduler.Inst.reset(scheduler); + } + if (old) + delete old; + return result; } Scheduler &GlobalHandler::getScheduler() { return getOrCreate(MScheduler); } @@ -157,6 +165,7 @@ void shutdown() { // First, release resources, that may access plugins. GlobalHandler::instance().MPlatformCache.Inst.reset(nullptr); + GlobalHandler::instance().MScheduler.Inst->releaseResources(); GlobalHandler::instance().MScheduler.Inst.reset(nullptr); GlobalHandler::instance().MProgramManager.Inst.reset(nullptr); diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index b3a8851be5006..7192fb2fdc6fc 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -425,6 +425,15 @@ Scheduler::Scheduler() { } Scheduler::~Scheduler() { + // Please be aware that releaseResources should be called before deletion of Scheduler. + // Otherwise there can be the case when objects Scheduler keeps as fields may need Scheduler + // for their release and they work with Scheduler via GlobalHandler::getScheduler that will create new Scheduler object. + // Still keep it here but it should no almost nothing if releaseResources called before. + releaseResources(); +} + +void Scheduler::releaseResources() +{ // By specification there are several possible sync points: buffer // destruction, wait() method of a queue or event. Stream doesn't introduce // any synchronization point. It is guaranteed that stream is flushed and diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index f13ef367f64eb..b139238f5227b 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -448,6 +448,7 @@ class Scheduler { Scheduler(); virtual ~Scheduler(); + void releaseResources(); protected: // TODO: after switching to C++17, change std::shared_timed_mutex to diff --git a/sycl/unittests/buffer/BufferDestructionCheck.cpp b/sycl/unittests/buffer/BufferDestructionCheck.cpp index 99a26ff77f38a..11e5248451de0 100644 --- a/sycl/unittests/buffer/BufferDestructionCheck.cpp +++ b/sycl/unittests/buffer/BufferDestructionCheck.cpp @@ -46,6 +46,9 @@ class BufferDestructionCheck : public ::testing::Test { // Mock.redefine( // redefinedDeviceGetInfo); } + void TearDown() override { + sycl::detail::GlobalHandler::instance().attachScheduler(NULL); + } protected: sycl::unittest::PiMock Mock; @@ -62,84 +65,43 @@ TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefault) { } sycl::queue Queue{Context, sycl::default_selector{}}; - sycl::buffer Buf(3); - std::shared_ptr BufImpl = - sycl::detail::getSyclObjImpl(Buf); - ASSERT_NE(BufImpl, nullptr); - // EXPECT_TRUE(BufImpl->get_allocator_internal()->isDefault()); - // EXPECT_TRUE(BufImpl->isNotBlockingRelease()); + { + sycl::buffer Buf(3); + std::shared_ptr BufImpl = + sycl::detail::getSyclObjImpl(Buf); + ASSERT_NE(BufImpl, nullptr); + std::function&)> checker = + [&BufImpl](const std::shared_ptr& memObj) + { + return BufImpl.get() == memObj.get(); + }; + testing::Sequence S; + EXPECT_CALL(*MockSchedulerPtr, deferMemObjRelease(testing::Truly(checker))).Times(1).InSequence(S).RetiresOnSaturation(); + EXPECT_CALL(*MockSchedulerPtr, deferMemObjRelease(testing::_)).Times(testing::AnyNumber()).InSequence(S); + } } -// TEST_F(BufferAllocatorCheck, BufferWithSizeOnlyUSM) { -// sycl::context Context{Plt}; -// if (Plt.is_host()) { -// std::cout << "Not run due to host-only environment\n"; -// return; -// } -// sycl::queue Queue{Context, sycl::default_selector{}}; -// using AllocatorTypeTest = sycl::usm_allocator; AllocatorTypeTest allocator(Queue); -// sycl::buffer Buf(3, allocator); -// std::shared_ptr BufImpl = -// sycl::detail::getSyclObjImpl(Buf); -// ASSERT_NE(BufImpl, nullptr); -// EXPECT_FALSE(BufImpl->get_allocator_internal()->isDefault()); -// EXPECT_FALSE(BufImpl->isNotBlockingRelease()); -// } - -// double timer() { -// using namespace std::chrono; -// auto tp = high_resolution_clock::now(); -// auto tp_duration = tp.time_since_epoch(); -// duration sec = tp.time_since_epoch(); -// return sec.count() * 1000; -// } - -// TEST_F(BufferAllocatorCheck, BufferDestructionDelayed) { -// sycl::context Context{Plt}; -// if (Plt.is_host()) { -// std::cout << "Not run due to host-only environment\n"; -// return; -// } - -// double start, t1, t2; - -// sycl::queue Queue{Context, sycl::default_selector{}}; -// using buffer_u8_t = cl::sycl::buffer; -// const size_t array_size = 1<<24; -// { -// buffer_u8_t BufferA((cl::sycl::range<1>(array_size))); - -// std::shared_ptr BufImpl = -// sycl::detail::getSyclObjImpl(Buf); -// ASSERT_NE(BufImpl, nullptr); -// EXPECT_FALSE(BufImpl->get_allocator_internal()->isDefault()); -// EXPECT_FALSE(BufImpl->isNotBlockingRelease()); - -// Q.submit([&](cl::sycl::handler &cgh) { -// auto accA = BufferA.template -// get_access(cgh); -// cgh.parallel_for(cl::sycl::range<1>{array_size}, -// [=](cl::sycl::id<1> id) -// { -// accA[id] = id % 2; -// }); -// }); -// start = timer(); -// Q.submit([&](cl::sycl::handler &cgh) { -// auto accA = BufferA.template -// get_access(cgh); -// cgh.parallel_for(cl::sycl::range<1>{array_size}, -// [=](cl::sycl::id<1> id) -// { -// accA[id] = id % 2; -// }); -// }); - -// t1 = timer() - start; // before buffer destroy -// } -// t2 = timer() - start; // after buffer destroy +TEST_F(BufferDestructionCheck, BufferWithSizeOnlyUSM) { + sycl::context Context{Plt}; + if (Plt.is_host()) { + std::cout << "Not run due to host-only environment\n"; + return; + } + sycl::queue Queue{Context, sycl::default_selector{}}; -// std::cout << "time before buffer destroy: " << t1 << " ms\n"; -// std::cout << "time after buffer destroy: " << t2 << " ms\n"; -// } + { + using AllocatorTypeTest = sycl::usm_allocator; + AllocatorTypeTest allocator(Queue); + sycl::buffer Buf(3, allocator); + std::shared_ptr BufImpl = + sycl::detail::getSyclObjImpl(Buf); + ASSERT_NE(BufImpl, nullptr); + std::function&)> checker = + [&BufImpl](const std::shared_ptr& memObj) + { + return BufImpl.get() == memObj.get(); + }; + testing::Sequence S; + EXPECT_CALL(*MockSchedulerPtr, deferMemObjRelease(testing::Truly(checker))); + } +} diff --git a/sycl/unittests/buffer/CMakeLists.txt b/sycl/unittests/buffer/CMakeLists.txt index 137efe06cc555..1542ecc57bbe3 100644 --- a/sycl/unittests/buffer/CMakeLists.txt +++ b/sycl/unittests/buffer/CMakeLists.txt @@ -1,3 +1,4 @@ +add_definitions(-gdwarf-4 -O0) add_sycl_unittest(BufferTests OBJECT BufferLocation.cpp Image.cpp From 4b0a3faea42fb5bfa5af9c27c8b6def9ce9c3325 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Fri, 23 Sep 2022 06:37:42 -0700 Subject: [PATCH 17/62] [SYCL] Align usm_allocator ctor and operators with SYCCL2020 Signed-off-by: Tikhomirova, Kseniya --- sycl/include/sycl/usm/usm_allocator.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/include/sycl/usm/usm_allocator.hpp b/sycl/include/sycl/usm/usm_allocator.hpp index 233c92ab62f01..bd2a09a18f8ab 100644 --- a/sycl/include/sycl/usm/usm_allocator.hpp +++ b/sycl/include/sycl/usm/usm_allocator.hpp @@ -38,14 +38,14 @@ class usm_allocator { AllocKind != usm::alloc::device, "usm_allocator does not support AllocKind == usm::alloc::device"); - usm_allocator() noexcept = delete; + usm_allocator() = delete; usm_allocator(const context &Ctxt, const device &Dev, - const property_list &PropList = {}) noexcept + const property_list &PropList = {}) : MContext(Ctxt), MDevice(Dev), MPropList(PropList) {} - usm_allocator(const queue &Q, const property_list &PropList = {}) noexcept + usm_allocator(const queue &Q, const property_list &PropList = {}) : MContext(Q.get_context()), MDevice(Q.get_device()), MPropList(PropList) {} - usm_allocator(const usm_allocator &) noexcept = default; + usm_allocator(const usm_allocator &) = default; usm_allocator(usm_allocator &&) noexcept = default; usm_allocator &operator=(const usm_allocator &Other) { MContext = Other.MContext; From 8daea20effdb519c42060b8617ef785facd3e61f Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Mon, 26 Sep 2022 11:22:41 -0700 Subject: [PATCH 18/62] Update attach scheduler logic Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/global_handler.cpp | 21 +++----- sycl/source/detail/global_handler.hpp | 2 +- sycl/source/detail/scheduler/scheduler.cpp | 1 + .../buffer/BufferDestructionCheck.cpp | 51 ++++++++++--------- 4 files changed, 35 insertions(+), 40 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index b3a30ebbb3108..173a24a65090f 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -47,20 +47,13 @@ T &GlobalHandler::getOrCreate(InstWithLock &IWL, Types... Args) { return *IWL.Inst; } -bool GlobalHandler::attachScheduler(Scheduler *scheduler) { - Scheduler* old; - bool result = true; - { - const LockGuard Lock{MScheduler.Lock}; - // Used for notification that scheduler attached later than expected that may cause test issues. - if (MScheduler.Inst) - result = false; - old = MScheduler.Inst.release(); - MScheduler.Inst.reset(scheduler); - } - if (old) - delete old; - return result; +void GlobalHandler::attachScheduler(Scheduler *scheduler) { + // Test method, do not protect with lock since releaseResources will cause dead lock due to host queue release + // const LockGuard Lock{MScheduler.Lock}; + if (MScheduler.Inst) + MScheduler.Inst->releaseResources(); + + MScheduler.Inst.reset(scheduler); } Scheduler &GlobalHandler::getScheduler() { return getOrCreate(MScheduler); } diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index 3d20dba152ed6..32402e363f803 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -74,7 +74,7 @@ class GlobalHandler { void unloadPlugins(); // For testing purposes only - bool attachScheduler(Scheduler *scheduler); + void attachScheduler(Scheduler *scheduler); private: friend void releaseDefaultContexts(); diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 7192fb2fdc6fc..2a578c718bafa 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -454,6 +454,7 @@ void Scheduler::releaseResources() // haven't been freed because of the graph mutex being locked at the time, // clean them up now. cleanupCommands({}); + DefaultHostQueue.reset(); } void Scheduler::acquireWriteLock(WriteLockT &Lock) { diff --git a/sycl/unittests/buffer/BufferDestructionCheck.cpp b/sycl/unittests/buffer/BufferDestructionCheck.cpp index 11e5248451de0..b69d2b32a004b 100644 --- a/sycl/unittests/buffer/BufferDestructionCheck.cpp +++ b/sycl/unittests/buffer/BufferDestructionCheck.cpp @@ -39,8 +39,8 @@ class BufferDestructionCheck : public ::testing::Test { protected: void SetUp() override { MockSchedulerPtr = new FairMockScheduler(); - ASSERT_TRUE(sycl::detail::GlobalHandler::instance().attachScheduler( - dynamic_cast(MockSchedulerPtr))); + sycl::detail::GlobalHandler::instance().attachScheduler( + dynamic_cast(MockSchedulerPtr)); // Mock.redefine( // redefinedMemBufferCreate); // Mock.redefine( @@ -56,6 +56,8 @@ class BufferDestructionCheck : public ::testing::Test { FairMockScheduler *MockSchedulerPtr; }; +//inline void CheckBufferDestruction() + // Test that buffer_location was passed correctly TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefault) { sycl::context Context{Plt}; @@ -81,27 +83,26 @@ TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefault) { } } -TEST_F(BufferDestructionCheck, BufferWithSizeOnlyUSM) { - sycl::context Context{Plt}; - if (Plt.is_host()) { - std::cout << "Not run due to host-only environment\n"; - return; - } - sycl::queue Queue{Context, sycl::default_selector{}}; +// TEST_F(BufferDestructionCheck, BufferWithSizeOnlyUSM) { +// sycl::context Context{Plt}; +// if (Plt.is_host()) { +// std::cout << "Not run due to host-only environment\n"; +// return; +// } +// sycl::queue Queue{Context, sycl::default_selector{}}; - { - using AllocatorTypeTest = sycl::usm_allocator; - AllocatorTypeTest allocator(Queue); - sycl::buffer Buf(3, allocator); - std::shared_ptr BufImpl = - sycl::detail::getSyclObjImpl(Buf); - ASSERT_NE(BufImpl, nullptr); - std::function&)> checker = - [&BufImpl](const std::shared_ptr& memObj) - { - return BufImpl.get() == memObj.get(); - }; - testing::Sequence S; - EXPECT_CALL(*MockSchedulerPtr, deferMemObjRelease(testing::Truly(checker))); - } -} +// { +// using AllocatorTypeTest = sycl::usm_allocator; +// AllocatorTypeTest allocator(Queue); +// sycl::buffer Buf(3, allocator); +// std::shared_ptr BufImpl = +// sycl::detail::getSyclObjImpl(Buf); +// ASSERT_NE(BufImpl, nullptr); +// std::function&)> checkerNotEqual = +// [&BufImpl](const std::shared_ptr& memObj) +// { +// return BufImpl.get() != memObj.get(); +// }; +// EXPECT_CALL(*MockSchedulerPtr, deferMemObjRelease(testing::Truly(checkerNotEqual))).Times(testing::AnyNumber()); +// } +// } From c855f139f53bdea9952c1544c4bc151935e568b6 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 28 Sep 2022 01:43:09 -0700 Subject: [PATCH 19/62] Make cleanup iterative Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/scheduler/scheduler.cpp | 36 +++++++++++++++------- sycl/source/detail/scheduler/scheduler.hpp | 1 + 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 2a578c718bafa..53faf73288264 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -449,12 +449,18 @@ void Scheduler::releaseResources() "not all resources were released. Please be sure that all kernels " "have synchronization points.\n\n"); } - cleanupDeferredMemObjects(true); // There might be some commands scheduled for post enqueue cleanup that // haven't been freed because of the graph mutex being locked at the time, // clean them up now. cleanupCommands({}); DefaultHostQueue.reset(); + + // We need loop since sometimes we may need new objects to be added to deferred mem objects storage during cleanup. + // Known example is: we cleanup existing deferred mem objects under write lock, during this process we cleanup commands related to this record, + // command may have last reference to queue_impl, ~queue_impl is called and buffer for assert (which is created with size only so all confitions for deferred release are satisfied) + // is added to deferred mem obj storage. So we may end up with leak. + while(!isNoDeferredMemObjects()) + cleanupDeferredMemObjects(true); } void Scheduler::acquireWriteLock(WriteLockT &Lock) { @@ -521,17 +527,24 @@ void Scheduler::deferMemObjRelease(const std::shared_ptr &MemObj) { cleanupDeferredMemObjects(false); } +inline bool Scheduler::isNoDeferredMemObjects() +{ + std::lock_guard Lock{MDeferredMemReleaseMutex}; + return MDeferredMemObjRelease.empty(); +} + void Scheduler::cleanupDeferredMemObjects(bool ForceWait) { - { - std::lock_guard Lock{MDeferredMemReleaseMutex}; - if (MDeferredMemObjRelease.empty()) - return; - } + if (isNoDeferredMemObjects()) + return; // Need to aggregate ready to release object to acquire write lock once. std::list> ObjsReadyToRelease; - { - ReadLockT Lock(MGraphLock); + { + ReadLockT Lock(MGraphLock, std::try_to_lock); + // if we need blocking mode - force lock waiting + if (!Lock.owns_lock() && ForceWait) + Lock.lock(); + if (Lock.owns_lock()) { { // Not expected that ForceWait == true with be used in parallel with // adding MemObj to storage, no such scenario. @@ -561,8 +574,9 @@ void Scheduler::cleanupDeferredMemObjects(bool ForceWait) { std::vector> AuxResourcesToDeallocate; { WriteLockT Lock(MGraphLock, std::try_to_lock); - // In order to avoid deadlocks related to blocked commands, defer cleanup if - // the lock wasn't acquired. + // if we need blocking mode - force lock waiting + if (!Lock.owns_lock() && ForceWait) + acquireWriteLock(Lock); if (Lock.owns_lock()) { for (auto &MemObj : ObjsReadyToRelease) releaseMemObjRecord(MemObj.get(), StreamsToDeallocate, @@ -575,8 +589,8 @@ void Scheduler::cleanupDeferredMemObjects(bool ForceWait) { } deallocateStreams(StreamsToDeallocate); // ObjsReadyToRelease leaving scope and being deleted + } } - } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index b139238f5227b..37454ea5b957a 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -449,6 +449,7 @@ class Scheduler { Scheduler(); virtual ~Scheduler(); void releaseResources(); + inline bool isNoDeferredMemObjects(); protected: // TODO: after switching to C++17, change std::shared_timed_mutex to From 8dbcd1caa71e538e07caf353c69676d5d2eb80dc Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 28 Sep 2022 09:28:30 -0700 Subject: [PATCH 20/62] Fix test utils impl error Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/global_handler.cpp | 3 +- .../buffer/BufferDestructionCheck.cpp | 44 +++++++++---------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 173a24a65090f..fbf0f1e66ae6a 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -158,7 +158,8 @@ void shutdown() { // First, release resources, that may access plugins. GlobalHandler::instance().MPlatformCache.Inst.reset(nullptr); - GlobalHandler::instance().MScheduler.Inst->releaseResources(); + if (GlobalHandler::instance().MScheduler.Inst) + GlobalHandler::instance().MScheduler.Inst->releaseResources(); GlobalHandler::instance().MScheduler.Inst.reset(nullptr); GlobalHandler::instance().MProgramManager.Inst.reset(nullptr); diff --git a/sycl/unittests/buffer/BufferDestructionCheck.cpp b/sycl/unittests/buffer/BufferDestructionCheck.cpp index b69d2b32a004b..f5029ab9c0e88 100644 --- a/sycl/unittests/buffer/BufferDestructionCheck.cpp +++ b/sycl/unittests/buffer/BufferDestructionCheck.cpp @@ -83,26 +83,26 @@ TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefault) { } } -// TEST_F(BufferDestructionCheck, BufferWithSizeOnlyUSM) { -// sycl::context Context{Plt}; -// if (Plt.is_host()) { -// std::cout << "Not run due to host-only environment\n"; -// return; -// } -// sycl::queue Queue{Context, sycl::default_selector{}}; +TEST_F(BufferDestructionCheck, BufferWithSizeOnlyUSM) { + sycl::context Context{Plt}; + if (Plt.is_host()) { + std::cout << "Not run due to host-only environment\n"; + return; + } + sycl::queue Queue{Context, sycl::default_selector{}}; -// { -// using AllocatorTypeTest = sycl::usm_allocator; -// AllocatorTypeTest allocator(Queue); -// sycl::buffer Buf(3, allocator); -// std::shared_ptr BufImpl = -// sycl::detail::getSyclObjImpl(Buf); -// ASSERT_NE(BufImpl, nullptr); -// std::function&)> checkerNotEqual = -// [&BufImpl](const std::shared_ptr& memObj) -// { -// return BufImpl.get() != memObj.get(); -// }; -// EXPECT_CALL(*MockSchedulerPtr, deferMemObjRelease(testing::Truly(checkerNotEqual))).Times(testing::AnyNumber()); -// } -// } + { + using AllocatorTypeTest = sycl::usm_allocator; + AllocatorTypeTest allocator(Queue); + sycl::buffer Buf(3, allocator); + std::shared_ptr BufImpl = + sycl::detail::getSyclObjImpl(Buf); + ASSERT_NE(BufImpl, nullptr); + std::function&)> checkerNotEqual = + [&BufImpl](const std::shared_ptr& memObj) + { + return BufImpl.get() != memObj.get(); + }; + EXPECT_CALL(*MockSchedulerPtr, deferMemObjRelease(testing::Truly(checkerNotEqual))).Times(testing::AnyNumber()); + } +} From 0f61c64d1819d74346317c3382c28b00482e0e34 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 28 Sep 2022 11:32:07 -0700 Subject: [PATCH 21/62] Add other tests for buffer contructors Signed-off-by: Tikhomirova, Kseniya --- .../buffer/BufferDestructionCheck.cpp | 259 +++++++++++++++--- 1 file changed, 228 insertions(+), 31 deletions(-) diff --git a/sycl/unittests/buffer/BufferDestructionCheck.cpp b/sycl/unittests/buffer/BufferDestructionCheck.cpp index f5029ab9c0e88..96d5600365acc 100644 --- a/sycl/unittests/buffer/BufferDestructionCheck.cpp +++ b/sycl/unittests/buffer/BufferDestructionCheck.cpp @@ -38,6 +38,10 @@ class BufferDestructionCheck : public ::testing::Test { protected: void SetUp() override { + if (Plt.is_host()) { + std::cout << "Not run due to host-only environment\n"; + GTEST_SKIP(); + } MockSchedulerPtr = new FairMockScheduler(); sycl::detail::GlobalHandler::instance().attachScheduler( dynamic_cast(MockSchedulerPtr)); @@ -50,59 +54,252 @@ class BufferDestructionCheck : public ::testing::Test { sycl::detail::GlobalHandler::instance().attachScheduler(NULL); } + inline void + CheckBufferDestruction(std::shared_ptr BufImpl, + bool ShouldBeDeferred) { + ASSERT_NE(BufImpl, nullptr); + const std::function &)> + checkerNotEqual = + [&BufImpl]( + const std::shared_ptr &memObj) { + return BufImpl.get() != memObj.get(); + }; + const std::function &)> + checkerEqual = + [&BufImpl]( + const std::shared_ptr &memObj) { + return BufImpl.get() == memObj.get(); + }; + if (ShouldBeDeferred) { + testing::Sequence S; + // first is check that explicitly created buffer is deferred + EXPECT_CALL(*MockSchedulerPtr, + deferMemObjRelease(testing::Truly(checkerEqual))) + .Times(1) + .InSequence(S) + .RetiresOnSaturation(); + // we have two queues - non host and host queue. Currently queue contains + // its own buffer as class member, buffer as used for assert handling. + // those buffers also created with size only so it also to be deferred on + // deletion. + EXPECT_CALL(*MockSchedulerPtr, deferMemObjRelease(testing::_)) + .Times(/*testing::AnyNumber()*/ 2) + .InSequence(S); + } else { + // buffer created above should not be deferred on deletion because has non + // default allocator + EXPECT_CALL(*MockSchedulerPtr, + deferMemObjRelease(testing::Truly(checkerNotEqual))) + .Times(testing::AnyNumber()); + } + } + protected: sycl::unittest::PiMock Mock; sycl::platform Plt; FairMockScheduler *MockSchedulerPtr; }; -//inline void CheckBufferDestruction() - // Test that buffer_location was passed correctly TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefault) { sycl::context Context{Plt}; - if (Plt.is_host()) { - std::cout << "Not run due to host-only environment\n"; - return; + sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + { + sycl::buffer Buf(1); + std::shared_ptr BufImpl = + sycl::detail::getSyclObjImpl(Buf); + CheckBufferDestruction(BufImpl, true); } - sycl::queue Queue{Context, sycl::default_selector{}}; +} +TEST_F(BufferDestructionCheck, BufferWithSizeOnlyNonDefaultAllocator) { + sycl::context Context{Plt}; + sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; { - sycl::buffer Buf(3); + using AllocatorTypeTest = + sycl::usm_allocator; + AllocatorTypeTest allocator(Q); + sycl::buffer Buf(1, allocator); std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); - ASSERT_NE(BufImpl, nullptr); - std::function&)> checker = - [&BufImpl](const std::shared_ptr& memObj) - { - return BufImpl.get() == memObj.get(); - }; - testing::Sequence S; - EXPECT_CALL(*MockSchedulerPtr, deferMemObjRelease(testing::Truly(checker))).Times(1).InSequence(S).RetiresOnSaturation(); - EXPECT_CALL(*MockSchedulerPtr, deferMemObjRelease(testing::_)).Times(testing::AnyNumber()).InSequence(S); + CheckBufferDestruction(BufImpl, false); } } -TEST_F(BufferDestructionCheck, BufferWithSizeOnlyUSM) { +TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefaultAllocator) { sycl::context Context{Plt}; - if (Plt.is_host()) { - std::cout << "Not run due to host-only environment\n"; - return; + sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + { + using AllocatorTypeTest = sycl::buffer_allocator; + AllocatorTypeTest allocator; + sycl::buffer Buf(1, allocator); + std::shared_ptr BufImpl = + sycl::detail::getSyclObjImpl(Buf); + CheckBufferDestruction(BufImpl, true); } - sycl::queue Queue{Context, sycl::default_selector{}}; +} +TEST_F(BufferDestructionCheck, BufferWithRawHostPtr) { + sycl::context Context{Plt}; + sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; { - using AllocatorTypeTest = sycl::usm_allocator; - AllocatorTypeTest allocator(Queue); - sycl::buffer Buf(3, allocator); + int InitialVal = 8; + sycl::buffer Buf(&InitialVal, 1); std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); - ASSERT_NE(BufImpl, nullptr); - std::function&)> checkerNotEqual = - [&BufImpl](const std::shared_ptr& memObj) - { - return BufImpl.get() != memObj.get(); - }; - EXPECT_CALL(*MockSchedulerPtr, deferMemObjRelease(testing::Truly(checkerNotEqual))).Times(testing::AnyNumber()); + CheckBufferDestruction(BufImpl, false); + } +} + +TEST_F(BufferDestructionCheck, BufferWithRawHostPtrWithNonDefaultAllocator) { + sycl::context Context{Plt}; + sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + { + int InitialVal = 8; + using AllocatorTypeTest = + sycl::usm_allocator; + AllocatorTypeTest allocator(Q); + sycl::buffer Buf(&InitialVal, 1, allocator); + std::shared_ptr BufImpl = + sycl::detail::getSyclObjImpl(Buf); + CheckBufferDestruction(BufImpl, false); + } +} + +TEST_F(BufferDestructionCheck, BufferWithConstRawHostPtr) { + sycl::context Context{Plt}; + sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + { + const int InitialVal = 8; + sycl::buffer Buf(&InitialVal, 1); + std::shared_ptr BufImpl = + sycl::detail::getSyclObjImpl(Buf); + CheckBufferDestruction(BufImpl, false); + } +} + +TEST_F(BufferDestructionCheck, + BufferWithConstRawHostPtrWithNonDefaultAllocator) { + sycl::context Context{Plt}; + sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + { + const int InitialVal = 8; + using AllocatorTypeTest = + sycl::usm_allocator; + AllocatorTypeTest allocator(Q); + sycl::buffer Buf(&InitialVal, 1, allocator); + std::shared_ptr BufImpl = + sycl::detail::getSyclObjImpl(Buf); + CheckBufferDestruction(BufImpl, false); + } +} + +TEST_F(BufferDestructionCheck, BufferWithContainer) { + sycl::context Context{Plt}; + sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + { + std::vector data{3, 4}; + sycl::buffer Buf(data); + std::shared_ptr BufImpl = + sycl::detail::getSyclObjImpl(Buf); + CheckBufferDestruction(BufImpl, false); } } + +TEST_F(BufferDestructionCheck, BufferWithContainerWithNonDefaultAllocator) { + sycl::context Context{Plt}; + sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + { + std::vector data{3, 4}; + using AllocatorTypeTest = + sycl::usm_allocator; + AllocatorTypeTest allocator(Q); + sycl::buffer Buf(data, allocator); + std::shared_ptr BufImpl = + sycl::detail::getSyclObjImpl(Buf); + CheckBufferDestruction(BufImpl, false); + } +} + +TEST_F(BufferDestructionCheck, BufferWithSharedPtr) { + sycl::context Context{Plt}; + sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + { + std::shared_ptr InitialVal(new int(5)); + sycl::buffer Buf(InitialVal, 1); + std::shared_ptr BufImpl = + sycl::detail::getSyclObjImpl(Buf); + CheckBufferDestruction(BufImpl, false); + } +} + +TEST_F(BufferDestructionCheck, BufferWithSharedPtrWithNonDefaultAllocator) { + sycl::context Context{Plt}; + sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + { + std::shared_ptr InitialVal(new int(5)); + using AllocatorTypeTest = + sycl::usm_allocator; + AllocatorTypeTest allocator(Q); + sycl::buffer Buf(InitialVal, 1, allocator); + std::shared_ptr BufImpl = + sycl::detail::getSyclObjImpl(Buf); + CheckBufferDestruction(BufImpl, false); + } +} + +TEST_F(BufferDestructionCheck, BufferWithSharedPtrArray) { + sycl::context Context{Plt}; + sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + { + std::shared_ptr InitialVal(new int[2]); + sycl::buffer Buf(InitialVal, 1); + std::shared_ptr BufImpl = + sycl::detail::getSyclObjImpl(Buf); + CheckBufferDestruction(BufImpl, false); + } +} + +TEST_F(BufferDestructionCheck, + BufferWithSharedPtrArrayWithNonDefaultAllocator) { + sycl::context Context{Plt}; + sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + { + std::shared_ptr InitialVal(new int[2]); + using AllocatorTypeTest = + sycl::usm_allocator; + AllocatorTypeTest allocator(Q); + sycl::buffer Buf(InitialVal, 2, allocator); + std::shared_ptr BufImpl = + sycl::detail::getSyclObjImpl(Buf); + CheckBufferDestruction(BufImpl, false); + } +} + +TEST_F(BufferDestructionCheck, BufferWithIterators) { + sycl::context Context{Plt}; + sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + { + std::vector data{3, 4}; + sycl::buffer Buf(data.begin(), data.end()); + std::shared_ptr BufImpl = + sycl::detail::getSyclObjImpl(Buf); + CheckBufferDestruction(BufImpl, true); + } +} + +// TEST_F(BufferDestructionCheck, BufferWithIteratorsWithNonDefaultAllocator) { +// sycl::context Context{Plt}; +// sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; +// { +// std::vector data{3, 4}; +// using AllocatorTypeTest = sycl::usm_allocator; AllocatorTypeTest allocator(Q); +// sycl::buffer Buf(data.begin(), data.end(), +// allocator); std::shared_ptr BufImpl = +// sycl::detail::getSyclObjImpl(Buf); +// CheckBufferDestruction(BufImpl, false); +// } +// } \ No newline at end of file From ddf215b1f3fb134d5b202e9357f6f451d8fd58dd Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Thu, 29 Sep 2022 06:18:09 -0700 Subject: [PATCH 22/62] Other tests for high level buffer destruction deferring logic Signed-off-by: Tikhomirova, Kseniya --- .../buffer/BufferDestructionCheck.cpp | 79 ++++++++++++++++--- 1 file changed, 66 insertions(+), 13 deletions(-) diff --git a/sycl/unittests/buffer/BufferDestructionCheck.cpp b/sycl/unittests/buffer/BufferDestructionCheck.cpp index 96d5600365acc..f0533a291e010 100644 --- a/sycl/unittests/buffer/BufferDestructionCheck.cpp +++ b/sycl/unittests/buffer/BufferDestructionCheck.cpp @@ -21,13 +21,8 @@ class FairMockScheduler : public sycl::detail::Scheduler { public: - // FairMockScheduler() : Scheduler() - // { - // // ON_CALL(*this, deferMemObjRelease(_)). - // // WillByDefault(Invoke([&](qcsinternal::Duration timeout) { - // // return qcsinternal::PidNamedEvent::TryLockFor(timeout); - // // })); - // } + using sycl::detail::Scheduler::MDeferredMemObjRelease; + using sycl::detail::Scheduler::MGraphLock; MOCK_METHOD1(deferMemObjRelease, void(const std::shared_ptr &)); }; @@ -45,10 +40,6 @@ class BufferDestructionCheck : public ::testing::Test { MockSchedulerPtr = new FairMockScheduler(); sycl::detail::GlobalHandler::instance().attachScheduler( dynamic_cast(MockSchedulerPtr)); - // Mock.redefine( - // redefinedMemBufferCreate); - // Mock.redefine( - // redefinedDeviceGetInfo); } void TearDown() override { sycl::detail::GlobalHandler::instance().attachScheduler(NULL); @@ -102,7 +93,6 @@ class BufferDestructionCheck : public ::testing::Test { FairMockScheduler *MockSchedulerPtr; }; -// Test that buffer_location was passed correctly TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefault) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; @@ -302,4 +292,67 @@ TEST_F(BufferDestructionCheck, BufferWithIterators) { // sycl::detail::getSyclObjImpl(Buf); // CheckBufferDestruction(BufImpl, false); // } -// } \ No newline at end of file +// } + +TEST_F(BufferDestructionCheck, BufferDeferringCheckWriteLock) { + sycl::context Context{Plt}; + sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + { + testing::Sequence S; + sycl::detail::buffer_impl *unsafePtr = nullptr; + EXPECT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); + std::unique_lock Lock(MockSchedulerPtr->MGraphLock, + std::defer_lock); + { + sycl::buffer Buf(1); + { + std::shared_ptr BufImpl = + sycl::detail::getSyclObjImpl(Buf); + unsafePtr = BufImpl.get(); + } + Lock.lock(); + // gmock warning will be generated - simply tell gtest that now we do not + // want to mock the function + ON_CALL(*MockSchedulerPtr, deferMemObjRelease) + .WillByDefault( + [this](const std::shared_ptr &MemObj) { + return MockSchedulerPtr + ->sycl::detail::Scheduler::deferMemObjRelease(MemObj); + }); + } + // Record is empty but lock should prevent from being deleted + ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 1u); + EXPECT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.front().get(), + unsafePtr); + Lock.unlock(); + MockSchedulerPtr->releaseResources(); + + ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); + } +} + +TEST_F(BufferDestructionCheck, BufferDeferringCheckReadLock) { + sycl::context Context{Plt}; + sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + { + testing::Sequence S; + EXPECT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); + std::shared_lock Lock(MockSchedulerPtr->MGraphLock, + std::defer_lock); + { + sycl::buffer Buf(1); + Lock.lock(); + // gmock warning will be generated - simply tell gtest that now we do not + // want to mock the function + ON_CALL(*MockSchedulerPtr, deferMemObjRelease) + .WillByDefault( + [this](const std::shared_ptr &MemObj) { + return MockSchedulerPtr + ->sycl::detail::Scheduler::deferMemObjRelease(MemObj); + }); + } + // Record is empty and read lock do not prevent from being deleted + ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); + Lock.unlock(); + } +} From 23bea82d8628eaaaca2d2372ac439c3743ebc5dc Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Mon, 3 Oct 2022 13:06:11 -0700 Subject: [PATCH 23/62] Add unittest for waitForRecordToFinish Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/scheduler/scheduler.cpp | 55 +++++++------- .../buffer/BufferDestructionCheck.cpp | 76 ++++++++++++++++++- .../scheduler/SchedulerTestUtils.hpp | 1 + 3 files changed, 104 insertions(+), 28 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 53faf73288264..73acd6b601498 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -34,12 +34,13 @@ bool Scheduler::waitForRecordToFinish(MemObjRecord *Record, for (Command *Cmd : Record->MReadLeaves) { if (Cmd->getEvent()->isCompleted()) continue; + + EnqueueResultT Res; + bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ToCleanUp); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Enqueue process failed.", + PI_ERROR_INVALID_OPERATION); if (ForceWait) { - EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ToCleanUp); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error("Enqueue process failed.", - PI_ERROR_INVALID_OPERATION); GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); } else return false; @@ -47,12 +48,13 @@ bool Scheduler::waitForRecordToFinish(MemObjRecord *Record, for (Command *Cmd : Record->MWriteLeaves) { if (Cmd->getEvent()->isCompleted()) continue; + + EnqueueResultT Res; + bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ToCleanUp); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Enqueue process failed.", + PI_ERROR_INVALID_OPERATION); if (ForceWait) { - EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ToCleanUp); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error("Enqueue process failed.", - PI_ERROR_INVALID_OPERATION); GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); } else return false; @@ -425,15 +427,16 @@ Scheduler::Scheduler() { } Scheduler::~Scheduler() { - // Please be aware that releaseResources should be called before deletion of Scheduler. - // Otherwise there can be the case when objects Scheduler keeps as fields may need Scheduler - // for their release and they work with Scheduler via GlobalHandler::getScheduler that will create new Scheduler object. - // Still keep it here but it should no almost nothing if releaseResources called before. + // Please be aware that releaseResources should be called before deletion of + // Scheduler. Otherwise there can be the case when objects Scheduler keeps as + // fields may need Scheduler for their release and they work with Scheduler + // via GlobalHandler::getScheduler that will create new Scheduler object. + // Still keep it here but it should no almost nothing if releaseResources + // called before. releaseResources(); } -void Scheduler::releaseResources() -{ +void Scheduler::releaseResources() { // By specification there are several possible sync points: buffer // destruction, wait() method of a queue or event. Stream doesn't introduce // any synchronization point. It is guaranteed that stream is flushed and @@ -455,11 +458,14 @@ void Scheduler::releaseResources() cleanupCommands({}); DefaultHostQueue.reset(); - // We need loop since sometimes we may need new objects to be added to deferred mem objects storage during cleanup. - // Known example is: we cleanup existing deferred mem objects under write lock, during this process we cleanup commands related to this record, - // command may have last reference to queue_impl, ~queue_impl is called and buffer for assert (which is created with size only so all confitions for deferred release are satisfied) - // is added to deferred mem obj storage. So we may end up with leak. - while(!isNoDeferredMemObjects()) + // We need loop since sometimes we may need new objects to be added to + // deferred mem objects storage during cleanup. Known example is: we cleanup + // existing deferred mem objects under write lock, during this process we + // cleanup commands related to this record, command may have last reference to + // queue_impl, ~queue_impl is called and buffer for assert (which is created + // with size only so all confitions for deferred release are satisfied) is + // added to deferred mem obj storage. So we may end up with leak. + while (!isNoDeferredMemObjects()) cleanupDeferredMemObjects(true); } @@ -527,8 +533,7 @@ void Scheduler::deferMemObjRelease(const std::shared_ptr &MemObj) { cleanupDeferredMemObjects(false); } -inline bool Scheduler::isNoDeferredMemObjects() -{ +inline bool Scheduler::isNoDeferredMemObjects() { std::lock_guard Lock{MDeferredMemReleaseMutex}; return MDeferredMemObjRelease.empty(); } @@ -539,13 +544,12 @@ void Scheduler::cleanupDeferredMemObjects(bool ForceWait) { // Need to aggregate ready to release object to acquire write lock once. std::list> ObjsReadyToRelease; - { + { ReadLockT Lock(MGraphLock, std::try_to_lock); // if we need blocking mode - force lock waiting if (!Lock.owns_lock() && ForceWait) Lock.lock(); if (Lock.owns_lock()) { - { // Not expected that ForceWait == true with be used in parallel with // adding MemObj to storage, no such scenario. std::lock_guard LockDef{MDeferredMemReleaseMutex}; @@ -589,7 +593,6 @@ void Scheduler::cleanupDeferredMemObjects(bool ForceWait) { } deallocateStreams(StreamsToDeallocate); // ObjsReadyToRelease leaving scope and being deleted - } } } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) diff --git a/sycl/unittests/buffer/BufferDestructionCheck.cpp b/sycl/unittests/buffer/BufferDestructionCheck.cpp index f0533a291e010..f2548b2a625ff 100644 --- a/sycl/unittests/buffer/BufferDestructionCheck.cpp +++ b/sycl/unittests/buffer/BufferDestructionCheck.cpp @@ -19,10 +19,14 @@ #include #include +#include "../scheduler/SchedulerTestUtils.hpp" + class FairMockScheduler : public sycl::detail::Scheduler { public: using sycl::detail::Scheduler::MDeferredMemObjRelease; + using sycl::detail::Scheduler::MGraphBuilder; using sycl::detail::Scheduler::MGraphLock; + using sycl::detail::Scheduler::waitForRecordToFinish; MOCK_METHOD1(deferMemObjRelease, void(const std::shared_ptr &)); }; @@ -37,7 +41,7 @@ class BufferDestructionCheck : public ::testing::Test { std::cout << "Not run due to host-only environment\n"; GTEST_SKIP(); } - MockSchedulerPtr = new FairMockScheduler(); + MockSchedulerPtr = new testing::NiceMock(); sycl::detail::GlobalHandler::instance().attachScheduler( dynamic_cast(MockSchedulerPtr)); } @@ -90,7 +94,7 @@ class BufferDestructionCheck : public ::testing::Test { protected: sycl::unittest::PiMock Mock; sycl::platform Plt; - FairMockScheduler *MockSchedulerPtr; + testing::NiceMock *MockSchedulerPtr; }; TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefault) { @@ -356,3 +360,71 @@ TEST_F(BufferDestructionCheck, BufferDeferringCheckReadLock) { Lock.unlock(); } } + +std::map ExpectedEventStatus; +pi_result getEventInfoFunc(pi_event Event, pi_event_info PName, size_t PVSize, + void *PV, size_t *PVSizeRet) { + EXPECT_EQ(PName, PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) + << "Unknown param name"; + // could not use assert here + EXPECT_EQ(PVSize, 4u); + auto it = ExpectedEventStatus.find(Event); + if (it != ExpectedEventStatus.end()) { + *(static_cast(PV)) = it->second; + return PI_SUCCESS; + } else + return PI_ERROR_INVALID_OPERATION; +} + +TEST_F(BufferDestructionCheck, ReadyToReleaseLogic) { + sycl::context Context{Plt}; + sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + + sycl::buffer Buf(1); + sycl::detail::Requirement MockReq = getMockRequirement(Buf); + std::vector AuxCmds; + sycl::detail::MemObjRecord *Rec = + MockSchedulerPtr->MGraphBuilder.getOrInsertMemObjRecord( + sycl::detail::getSyclObjImpl(Q), &MockReq, AuxCmds); + MockCommand *ReadCmd = nullptr; + MockCommand *WriteCmd = nullptr; + ReadCmd = new MockCommand(sycl::detail::getSyclObjImpl(Q), MockReq); + ReadCmd->getEvent()->getHandleRef() = reinterpret_cast( + 0x01); // just assign to be able to use mock + WriteCmd = new MockCommand(sycl::detail::getSyclObjImpl(Q), MockReq); + WriteCmd->getEvent()->getHandleRef() = + reinterpret_cast(0x02); + + std::vector ToCleanUp; + std::vector ToEnqueue; + MockSchedulerPtr->MGraphBuilder.addNodeToLeaves( + Rec, ReadCmd, sycl::access::mode::read, ToEnqueue); + MockSchedulerPtr->MGraphBuilder.addNodeToLeaves( + Rec, WriteCmd, sycl::access::mode::write, ToEnqueue); + + Mock.redefine(getEventInfoFunc); + std::shared_lock Lock(MockSchedulerPtr->MGraphLock); + testing::InSequence S; + + ExpectedEventStatus[ReadCmd->getEvent()->getHandleRef()] = PI_EVENT_SUBMITTED; + ExpectedEventStatus[WriteCmd->getEvent()->getHandleRef()] = + PI_EVENT_SUBMITTED; + + EXPECT_CALL(*ReadCmd, enqueue).Times(1).RetiresOnSaturation(); + EXPECT_FALSE(MockSchedulerPtr->waitForRecordToFinish(Rec, Lock, false)); + EXPECT_CALL(*ReadCmd, enqueue).Times(0); + + ExpectedEventStatus[ReadCmd->getEvent()->getHandleRef()] = PI_EVENT_COMPLETE; + ExpectedEventStatus[WriteCmd->getEvent()->getHandleRef()] = + PI_EVENT_SUBMITTED; + + EXPECT_CALL(*WriteCmd, enqueue).Times(1).RetiresOnSaturation(); + EXPECT_FALSE(MockSchedulerPtr->waitForRecordToFinish(Rec, Lock, false)); + EXPECT_CALL(*WriteCmd, enqueue).Times(0); + + ExpectedEventStatus[ReadCmd->getEvent()->getHandleRef()] = PI_EVENT_COMPLETE; + ExpectedEventStatus[WriteCmd->getEvent()->getHandleRef()] = PI_EVENT_COMPLETE; + EXPECT_TRUE(MockSchedulerPtr->waitForRecordToFinish(Rec, Lock, true)); + // previous expect_call is still valid and will generate failure if we recieve + // call here, no need for extra limitation +} \ No newline at end of file diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index 02511f92eca69..ba6eb72674fe9 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -103,6 +103,7 @@ class MockScheduler : public sycl::detail::Scheduler { using sycl::detail::Scheduler::addCG; using sycl::detail::Scheduler::addCopyBack; using sycl::detail::Scheduler::cleanupCommands; + using sycl::detail::Scheduler::waitForRecordToFinish; sycl::detail::MemObjRecord * getOrInsertMemObjRecord(const sycl::detail::QueueImplPtr &Queue, From aa41d76a6d8edade6c510fac436e5f64939ee73c Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Mon, 3 Oct 2022 13:09:02 -0700 Subject: [PATCH 24/62] Remove debug flags uploaded by mistake Signed-off-by: Tikhomirova, Kseniya --- sycl/unittests/buffer/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/unittests/buffer/CMakeLists.txt b/sycl/unittests/buffer/CMakeLists.txt index 1542ecc57bbe3..137efe06cc555 100644 --- a/sycl/unittests/buffer/CMakeLists.txt +++ b/sycl/unittests/buffer/CMakeLists.txt @@ -1,4 +1,3 @@ -add_definitions(-gdwarf-4 -O0) add_sycl_unittest(BufferTests OBJECT BufferLocation.cpp Image.cpp From 179c47232b4ee8eee5fddf791930818c029682d4 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Mon, 3 Oct 2022 13:17:51 -0700 Subject: [PATCH 25/62] Fix clang-format Signed-off-by: Tikhomirova, Kseniya --- sycl/include/sycl/detail/sycl_mem_obj_allocator.hpp | 1 + sycl/source/detail/global_handler.cpp | 4 ++-- sycl/source/detail/sycl_mem_obj_t.cpp | 2 +- sycl/source/detail/sycl_mem_obj_t.hpp | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/sycl/include/sycl/detail/sycl_mem_obj_allocator.hpp b/sycl/include/sycl/detail/sycl_mem_obj_allocator.hpp index 2cc38daedc827..3a4f995a707f7 100644 --- a/sycl/include/sycl/detail/sycl_mem_obj_allocator.hpp +++ b/sycl/include/sycl/detail/sycl_mem_obj_allocator.hpp @@ -60,6 +60,7 @@ class SYCLMemObjAllocatorHolder : public SYCLMemObjAllocator { } virtual std::size_t getValueSize() const override { return MValueSize; } + protected: virtual void *getAllocatorImpl() override { return &MAllocator; } diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 0f1e09be93e48..9499fe31cfc2a 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -48,8 +48,8 @@ T &GlobalHandler::getOrCreate(InstWithLock &IWL, Types... Args) { } void GlobalHandler::attachScheduler(Scheduler *scheduler) { - // Test method, do not protect with lock since releaseResources will cause dead lock due to host queue release - // const LockGuard Lock{MScheduler.Lock}; + // Test method, do not protect with lock since releaseResources will cause + // dead lock due to host queue release const LockGuard Lock{MScheduler.Lock}; if (MScheduler.Inst) MScheduler.Inst->releaseResources(); diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 8f9cfa82486b5..c3be8aea52b65 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -149,7 +149,7 @@ void SYCLMemObjT::determineHostPtr(const ContextImplPtr &Context, } void SYCLMemObjT::detachMemoryObject(const std::shared_ptr &self, - bool DefaultAllocator) const { + bool DefaultAllocator) const { if (MNoHostPtrProvided && DefaultAllocator) Scheduler::getInstance().deferMemObjRelease(self); } diff --git a/sycl/source/detail/sycl_mem_obj_t.hpp b/sycl/source/detail/sycl_mem_obj_t.hpp index 437b164a7915b..d684fccb7dacc 100644 --- a/sycl/source/detail/sycl_mem_obj_t.hpp +++ b/sycl/source/detail/sycl_mem_obj_t.hpp @@ -256,7 +256,7 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { bool isHostPointerReadOnly() const { return MHostPtrReadOnly; } void detachMemoryObject(const std::shared_ptr &self, - bool DefaultAllocator) const; + bool DefaultAllocator) const; protected: // An allocateMem helper that determines which host ptr to use From 2076c7c0b2dec698f5a1ff96e49175703307f742 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Tue, 4 Oct 2022 06:33:17 -0700 Subject: [PATCH 26/62] Update test to not keep ill-formed objects Signed-off-by: Tikhomirova, Kseniya --- sycl/unittests/scheduler/LeafLimit.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sycl/unittests/scheduler/LeafLimit.cpp b/sycl/unittests/scheduler/LeafLimit.cpp index 94e35bcdebd6a..4fc077bb1dad2 100644 --- a/sycl/unittests/scheduler/LeafLimit.cpp +++ b/sycl/unittests/scheduler/LeafLimit.cpp @@ -9,6 +9,7 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" +#include #include #include #include @@ -87,4 +88,8 @@ TEST_F(SchedulerTest, LeafLimit) { EXPECT_TRUE(std::any_of( NewestLeaf->MDeps.begin(), NewestLeaf->MDeps.end(), [&](const detail::DepDesc &DD) { return DD.MDepCommand == OldestLeaf; })); + MS.cleanupCommandsForRecord(Rec); + auto MemObj = static_cast( + detail::getSyclObjImpl(Buf).get()); + MS.removeRecordForMemObj(MemObj); } From e911d0a03c4bc6664d9ddbbd192709217b069710 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Tue, 4 Oct 2022 07:22:37 -0700 Subject: [PATCH 27/62] Check command destruction Signed-off-by: Tikhomirova, Kseniya --- .../buffer/BufferDestructionCheck.cpp | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/sycl/unittests/buffer/BufferDestructionCheck.cpp b/sycl/unittests/buffer/BufferDestructionCheck.cpp index f2548b2a625ff..e5bfe1efbc960 100644 --- a/sycl/unittests/buffer/BufferDestructionCheck.cpp +++ b/sycl/unittests/buffer/BufferDestructionCheck.cpp @@ -376,6 +376,24 @@ pi_result getEventInfoFunc(pi_event Event, pi_event_info PName, size_t PVSize, return PI_ERROR_INVALID_OPERATION; } +class MockCmdWithRelTracking : public MockCommand +{ + public: + MockCmdWithRelTracking( + sycl::detail::QueueImplPtr Queue, sycl::detail::Requirement Req, + sycl::detail::Command::CommandType Type = sycl::detail::Command::RUN_CG) + : MockCommand(Queue, Req, Type) {}; + MockCmdWithRelTracking( + sycl::detail::QueueImplPtr Queue, + sycl::detail::Command::CommandType Type = sycl::detail::Command::RUN_CG) + : MockCommand(Queue, Type) {}; + ~MockCmdWithRelTracking() + { + Release(); + } + MOCK_METHOD0(Release, void()); +}; + TEST_F(BufferDestructionCheck, ReadyToReleaseLogic) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; @@ -386,12 +404,12 @@ TEST_F(BufferDestructionCheck, ReadyToReleaseLogic) { sycl::detail::MemObjRecord *Rec = MockSchedulerPtr->MGraphBuilder.getOrInsertMemObjRecord( sycl::detail::getSyclObjImpl(Q), &MockReq, AuxCmds); - MockCommand *ReadCmd = nullptr; - MockCommand *WriteCmd = nullptr; - ReadCmd = new MockCommand(sycl::detail::getSyclObjImpl(Q), MockReq); + MockCmdWithRelTracking *ReadCmd = nullptr; + MockCmdWithRelTracking *WriteCmd = nullptr; + ReadCmd = new MockCmdWithRelTracking(sycl::detail::getSyclObjImpl(Q), MockReq); ReadCmd->getEvent()->getHandleRef() = reinterpret_cast( 0x01); // just assign to be able to use mock - WriteCmd = new MockCommand(sycl::detail::getSyclObjImpl(Q), MockReq); + WriteCmd = new MockCmdWithRelTracking(sycl::detail::getSyclObjImpl(Q), MockReq); WriteCmd->getEvent()->getHandleRef() = reinterpret_cast(0x02); @@ -427,4 +445,6 @@ TEST_F(BufferDestructionCheck, ReadyToReleaseLogic) { EXPECT_TRUE(MockSchedulerPtr->waitForRecordToFinish(Rec, Lock, true)); // previous expect_call is still valid and will generate failure if we recieve // call here, no need for extra limitation + EXPECT_CALL(*ReadCmd, Release).Times(1); + EXPECT_CALL(*WriteCmd, Release).Times(1); } \ No newline at end of file From 81c2b09b2d64d822366b49c956d5dcefc7168b1d Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Tue, 4 Oct 2022 07:28:07 -0700 Subject: [PATCH 28/62] Fix clang-format Signed-off-by: Tikhomirova, Kseniya --- .../buffer/BufferDestructionCheck.cpp | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/sycl/unittests/buffer/BufferDestructionCheck.cpp b/sycl/unittests/buffer/BufferDestructionCheck.cpp index e5bfe1efbc960..e715c34e7fc5b 100644 --- a/sycl/unittests/buffer/BufferDestructionCheck.cpp +++ b/sycl/unittests/buffer/BufferDestructionCheck.cpp @@ -376,21 +376,17 @@ pi_result getEventInfoFunc(pi_event Event, pi_event_info PName, size_t PVSize, return PI_ERROR_INVALID_OPERATION; } -class MockCmdWithRelTracking : public MockCommand -{ - public: +class MockCmdWithRelTracking : public MockCommand { +public: MockCmdWithRelTracking( - sycl::detail::QueueImplPtr Queue, sycl::detail::Requirement Req, - sycl::detail::Command::CommandType Type = sycl::detail::Command::RUN_CG) - : MockCommand(Queue, Req, Type) {}; + sycl::detail::QueueImplPtr Queue, sycl::detail::Requirement Req, + sycl::detail::Command::CommandType Type = sycl::detail::Command::RUN_CG) + : MockCommand(Queue, Req, Type){}; MockCmdWithRelTracking( - sycl::detail::QueueImplPtr Queue, - sycl::detail::Command::CommandType Type = sycl::detail::Command::RUN_CG) - : MockCommand(Queue, Type) {}; - ~MockCmdWithRelTracking() - { - Release(); - } + sycl::detail::QueueImplPtr Queue, + sycl::detail::Command::CommandType Type = sycl::detail::Command::RUN_CG) + : MockCommand(Queue, Type){}; + ~MockCmdWithRelTracking() { Release(); } MOCK_METHOD0(Release, void()); }; @@ -406,10 +402,12 @@ TEST_F(BufferDestructionCheck, ReadyToReleaseLogic) { sycl::detail::getSyclObjImpl(Q), &MockReq, AuxCmds); MockCmdWithRelTracking *ReadCmd = nullptr; MockCmdWithRelTracking *WriteCmd = nullptr; - ReadCmd = new MockCmdWithRelTracking(sycl::detail::getSyclObjImpl(Q), MockReq); + ReadCmd = + new MockCmdWithRelTracking(sycl::detail::getSyclObjImpl(Q), MockReq); ReadCmd->getEvent()->getHandleRef() = reinterpret_cast( 0x01); // just assign to be able to use mock - WriteCmd = new MockCmdWithRelTracking(sycl::detail::getSyclObjImpl(Q), MockReq); + WriteCmd = + new MockCmdWithRelTracking(sycl::detail::getSyclObjImpl(Q), MockReq); WriteCmd->getEvent()->getHandleRef() = reinterpret_cast(0x02); From edcfcfcfa10b71e66d76f920379cb5268ca19efa Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Tue, 4 Oct 2022 07:52:40 -0700 Subject: [PATCH 29/62] Handle set_final_data usage Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/sycl_mem_obj_t.hpp | 7 ++++++- sycl/unittests/buffer/BufferDestructionCheck.cpp | 13 +++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/sycl_mem_obj_t.hpp b/sycl/source/detail/sycl_mem_obj_t.hpp index d684fccb7dacc..6f1614d938c66 100644 --- a/sycl/source/detail/sycl_mem_obj_t.hpp +++ b/sycl/source/detail/sycl_mem_obj_t.hpp @@ -126,7 +126,10 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { void set_write_back(bool NeedWriteBack) { MNeedWriteBack = NeedWriteBack; } - void set_final_data(std::nullptr_t) { MUploadDataFunctor = nullptr; } + void set_final_data(std::nullptr_t) { + MUploadDataFunctor = nullptr; + MNoHostPtrProvided &= true; + } void set_final_data_from_storage() { MUploadDataFunctor = [this]() { @@ -135,6 +138,7 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { updateHostMemory(FinalData); } }; + MNoHostPtrProvided &= false; } void set_final_data( @@ -145,6 +149,7 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { MUploadDataFunctor = [FinalDataFunc, UpdateFunc]() { FinalDataFunc(UpdateFunc); }; + MNoHostPtrProvided &= false; } protected: diff --git a/sycl/unittests/buffer/BufferDestructionCheck.cpp b/sycl/unittests/buffer/BufferDestructionCheck.cpp index e715c34e7fc5b..bd5779ed95f11 100644 --- a/sycl/unittests/buffer/BufferDestructionCheck.cpp +++ b/sycl/unittests/buffer/BufferDestructionCheck.cpp @@ -108,6 +108,19 @@ TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefault) { } } +TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefaultSetFinalData) { + sycl::context Context{Plt}; + sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + { + int FinalData = 0; + sycl::buffer Buf(1); + std::shared_ptr BufImpl = + sycl::detail::getSyclObjImpl(Buf); + Buf.set_final_data(&FinalData); + CheckBufferDestruction(BufImpl, false); + } +} + TEST_F(BufferDestructionCheck, BufferWithSizeOnlyNonDefaultAllocator) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; From b5e85de484fb026152cfcaa8d3dd8bbc79c158d3 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 5 Oct 2022 05:34:59 -0700 Subject: [PATCH 30/62] Fix code-review comments (round 1) Signed-off-by: Tikhomirova, Kseniya --- sycl/include/sycl/buffer.hpp | 8 ++++--- sycl/source/buffer.cpp | 2 ++ sycl/source/detail/scheduler/scheduler.cpp | 26 ++++++++++------------ sycl/source/detail/scheduler/scheduler.hpp | 2 +- 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/sycl/include/sycl/buffer.hpp b/sycl/include/sycl/buffer.hpp index 572ccaa70cacf..9bd2c4d37ed89 100644 --- a/sycl/include/sycl/buffer.hpp +++ b/sycl/include/sycl/buffer.hpp @@ -460,9 +460,11 @@ class buffer : public detail::buffer_plain { buffer &operator=(buffer &&rhs) = default; ~buffer() { - buffer_plain::handleRelease( - std::is_same>::value); + buffer_plain:: + handleRelease(/*DefaultAllocator = */ + std::is_same< + AllocatorT, + detail::sycl_memory_object_allocator>::value); } bool operator==(const buffer &rhs) const { return impl == rhs.impl; } diff --git a/sycl/source/buffer.cpp b/sycl/source/buffer.cpp index 29ce3280f7079..d41c2a8829506 100644 --- a/sycl/source/buffer.cpp +++ b/sycl/source/buffer.cpp @@ -122,6 +122,8 @@ void buffer_plain::addOrReplaceAccessorProperties( size_t buffer_plain::getSize() const { return impl->getSizeInBytes(); } void buffer_plain::handleRelease(bool DefaultAllocator) const { + // Try to detach memory object only if impl is going to be released. + // Buffer copy will have pointer to the same impl. if (impl.use_count() == 1) impl->detachMemoryObject(impl, DefaultAllocator); } diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 73acd6b601498..272eea599c6a0 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -27,8 +27,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace detail { bool Scheduler::waitForRecordToFinish(MemObjRecord *Record, - ReadLockT &GraphReadLock, - bool ForceWait) { + ReadLockT &GraphReadLock, bool Blocking) { assert(Record); std::vector ToCleanUp; for (Command *Cmd : Record->MReadLeaves) { @@ -40,7 +39,7 @@ bool Scheduler::waitForRecordToFinish(MemObjRecord *Record, if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_ERROR_INVALID_OPERATION); - if (ForceWait) { + if (Blocking) { GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); } else return false; @@ -54,7 +53,7 @@ bool Scheduler::waitForRecordToFinish(MemObjRecord *Record, if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_ERROR_INVALID_OPERATION); - if (ForceWait) { + if (Blocking) { GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); } else return false; @@ -87,7 +86,7 @@ bool Scheduler::waitForRecordToFinish(MemObjRecord *Record, Command *Cmd = AllocaCmd->getReleaseCmd(); if (Cmd->getEvent()->isCompleted()) continue; - if (ForceWait) + if (Blocking) GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); else return false; @@ -431,7 +430,7 @@ Scheduler::~Scheduler() { // Scheduler. Otherwise there can be the case when objects Scheduler keeps as // fields may need Scheduler for their release and they work with Scheduler // via GlobalHandler::getScheduler that will create new Scheduler object. - // Still keep it here but it should no almost nothing if releaseResources + // Still keep it here but it should do almost nothing if releaseResources // called before. releaseResources(); } @@ -538,19 +537,18 @@ inline bool Scheduler::isNoDeferredMemObjects() { return MDeferredMemObjRelease.empty(); } -void Scheduler::cleanupDeferredMemObjects(bool ForceWait) { +void Scheduler::cleanupDeferredMemObjects(bool Blocking) { if (isNoDeferredMemObjects()) return; // Need to aggregate ready to release object to acquire write lock once. std::list> ObjsReadyToRelease; { - ReadLockT Lock(MGraphLock, std::try_to_lock); // if we need blocking mode - force lock waiting - if (!Lock.owns_lock() && ForceWait) - Lock.lock(); + ReadLockT Lock = Blocking ? ReadLockT(MGraphLock) + : ReadLockT(MGraphLock, std::try_to_lock); if (Lock.owns_lock()) { - // Not expected that ForceWait == true with be used in parallel with + // Not expected that Blocking == true with be used in parallel with // adding MemObj to storage, no such scenario. std::lock_guard LockDef{MDeferredMemReleaseMutex}; auto MemObjIt = MDeferredMemObjRelease.begin(); @@ -562,7 +560,7 @@ void Scheduler::cleanupDeferredMemObjects(bool ForceWait) { MemObjIt = MDeferredMemObjRelease.erase(MemObjIt); continue; } - if (!waitForRecordToFinish(Record, Lock, ForceWait)) { + if (!waitForRecordToFinish(Record, Lock, Blocking)) { MemObjIt++; continue; } @@ -579,10 +577,10 @@ void Scheduler::cleanupDeferredMemObjects(bool ForceWait) { { WriteLockT Lock(MGraphLock, std::try_to_lock); // if we need blocking mode - force lock waiting - if (!Lock.owns_lock() && ForceWait) + if (!Lock.owns_lock() && Blocking) acquireWriteLock(Lock); if (Lock.owns_lock()) { - for (auto &MemObj : ObjsReadyToRelease) + for (std::shared_ptr &MemObj : ObjsReadyToRelease) releaseMemObjRecord(MemObj.get(), StreamsToDeallocate, AuxResourcesToDeallocate); } else { diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 37454ea5b957a..a2b1d75789123 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -469,7 +469,7 @@ class Scheduler { static void enqueueLeavesOfReqUnlocked(const Requirement *const Req, std::vector &ToCleanUp); - void cleanupDeferredMemObjects(bool ForceWait); + void cleanupDeferredMemObjects(bool Blocking); inline void releaseMemObjRecord( detail::SYCLMemObjI *MemObj, std::vector> &StreamsToDeallocate, From a5980a020f67e8fc22872e294e22df30b1052a49 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 5 Oct 2022 05:40:23 -0700 Subject: [PATCH 31/62] Fix missed comments Signed-off-by: Tikhomirova, Kseniya --- sycl/CMakeLists.txt | 2 +- sycl/source/detail/scheduler/scheduler.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 384249f826fc0..19128c9e2833f 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -28,7 +28,7 @@ include(SYCLUtils) # The change in SYCL_MAJOR_VERSION must be accompanied with the same update in # llvm/clang/lib/Driver/CMakeLists.txt. set(SYCL_MAJOR_VERSION 6) -set(SYCL_MINOR_VERSION 1) +set(SYCL_MINOR_VERSION 0) set(SYCL_PATCH_VERSION 0) set(SYCL_DEV_ABI_VERSION 0) if (SYCL_ADD_DEV_VERSION_POSTFIX) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 272eea599c6a0..3a733127afafd 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -548,7 +548,7 @@ void Scheduler::cleanupDeferredMemObjects(bool Blocking) { ReadLockT Lock = Blocking ? ReadLockT(MGraphLock) : ReadLockT(MGraphLock, std::try_to_lock); if (Lock.owns_lock()) { - // Not expected that Blocking == true with be used in parallel with + // Not expected that Blocking == true will be used in parallel with // adding MemObj to storage, no such scenario. std::lock_guard LockDef{MDeferredMemReleaseMutex}; auto MemObjIt = MDeferredMemObjRelease.begin(); From 09b8359b82c7fa89830f77961787bca7e517f71a Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 5 Oct 2022 10:19:39 -0700 Subject: [PATCH 32/62] Remove nagation from variable name and logic Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/sycl_mem_obj_t.cpp | 4 ++-- sycl/source/detail/sycl_mem_obj_t.hpp | 18 +++++++----------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index c3be8aea52b65..a6ad2b1c84ddd 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -31,7 +31,7 @@ SYCLMemObjT::SYCLMemObjT(pi_native_handle MemObject, const context &SyclContext, MInteropContext(detail::getSyclObjImpl(SyclContext)), MOpenCLInterop(true), MHostPtrReadOnly(false), MNeedWriteBack(true), MUserPtr(nullptr), MShadowCopy(nullptr), MUploadDataFunctor(nullptr), - MSharedPtrStorage(nullptr), MNoHostPtrProvided(false) { + MSharedPtrStorage(nullptr), MHostPtrProvided(true) { if (MInteropContext->is_host()) throw sycl::invalid_parameter_error( "Creation of interoperability memory object using host context is " @@ -150,7 +150,7 @@ void SYCLMemObjT::determineHostPtr(const ContextImplPtr &Context, void SYCLMemObjT::detachMemoryObject(const std::shared_ptr &self, bool DefaultAllocator) const { - if (MNoHostPtrProvided && DefaultAllocator) + if (!MHostPtrProvided && DefaultAllocator) Scheduler::getInstance().deferMemObjRelease(self); } diff --git a/sycl/source/detail/sycl_mem_obj_t.hpp b/sycl/source/detail/sycl_mem_obj_t.hpp index 6f1614d938c66..fa8f79165b827 100644 --- a/sycl/source/detail/sycl_mem_obj_t.hpp +++ b/sycl/source/detail/sycl_mem_obj_t.hpp @@ -58,7 +58,7 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { MOpenCLInterop(false), MHostPtrReadOnly(false), MNeedWriteBack(true), MSizeInBytes(SizeInBytes), MUserPtr(nullptr), MShadowCopy(nullptr), MUploadDataFunctor(nullptr), MSharedPtrStorage(nullptr), - MNoHostPtrProvided(true) {} + MHostPtrProvided(false) {} SYCLMemObjT(const property_list &Props, std::unique_ptr Allocator) @@ -126,10 +126,7 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { void set_write_back(bool NeedWriteBack) { MNeedWriteBack = NeedWriteBack; } - void set_final_data(std::nullptr_t) { - MUploadDataFunctor = nullptr; - MNoHostPtrProvided &= true; - } + void set_final_data(std::nullptr_t) { MUploadDataFunctor = nullptr; } void set_final_data_from_storage() { MUploadDataFunctor = [this]() { @@ -138,7 +135,7 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { updateHostMemory(FinalData); } }; - MNoHostPtrProvided &= false; + MHostPtrProvided = true; } void set_final_data( @@ -149,7 +146,7 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { MUploadDataFunctor = [FinalDataFunc, UpdateFunc]() { FinalDataFunc(UpdateFunc); }; - MNoHostPtrProvided &= false; + MHostPtrProvided = true; } protected: @@ -175,7 +172,7 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { } void handleHostData(void *HostPtr, const size_t RequiredAlign) { - MNoHostPtrProvided = false; + MHostPtrProvided = true; if (!MHostPtrReadOnly && HostPtr) { set_final_data([HostPtr](const std::function &F) { F(HostPtr); @@ -199,7 +196,7 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { void handleHostData(const std::shared_ptr &HostPtr, const size_t RequiredAlign, bool IsConstPtr) { - MNoHostPtrProvided = false; + MHostPtrProvided = true; MSharedPtrStorage = HostPtr; MHostPtrReadOnly = IsConstPtr; if (HostPtr) { @@ -302,9 +299,8 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { // check for MUploadDataFunctor is not enough to define it since for case when // we have read only HostPtr - MUploadDataFunctor is empty but delayed release // must be not allowed. - bool MNoHostPtrProvided; + bool MHostPtrProvided; }; - } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl From 1e75448f8dcc41d6c2fe51fbfe32fa93066562f0 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 5 Oct 2022 15:12:00 -0700 Subject: [PATCH 33/62] Simplify deferred mem objects release - do not aggregate to capture lock Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/scheduler/scheduler.cpp | 123 ++++++------------ sycl/source/detail/scheduler/scheduler.hpp | 7 +- sycl/source/detail/sycl_mem_obj_t.cpp | 6 +- .../buffer/BufferDestructionCheck.cpp | 87 +++++++------ 4 files changed, 101 insertions(+), 122 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 3a733127afafd..f3012d6bbf1c5 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -26,49 +26,42 @@ namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace detail { -bool Scheduler::waitForRecordToFinish(MemObjRecord *Record, - ReadLockT &GraphReadLock, bool Blocking) { - assert(Record); +inline bool Scheduler::checkLeavesCompletion(LeavesCollection &Leaves, + ReadLockT &GraphReadLock, + bool Blocking) { std::vector ToCleanUp; - for (Command *Cmd : Record->MReadLeaves) { + for (Command *Cmd : Leaves) { + // Expect that mem object release is not responsible for dependency commands + // enqueue if (Cmd->getEvent()->isCompleted()) continue; - EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ToCleanUp); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error("Enqueue process failed.", - PI_ERROR_INVALID_OPERATION); - if (Blocking) { + if (Blocking) GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); - } else + else return false; } - for (Command *Cmd : Record->MWriteLeaves) { - if (Cmd->getEvent()->isCompleted()) - continue; + return true; +} - EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ToCleanUp); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error("Enqueue process failed.", - PI_ERROR_INVALID_OPERATION); - if (Blocking) { - GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); - } else - return false; - } - // all dependencies is completed and we can enqueue all ReleaseCmds in advance +bool Scheduler::waitForRecordToFinish(MemObjRecord *Record, + ReadLockT &GraphReadLock, bool Blocking) { + assert(Record); + if (!checkLeavesCompletion(Record->MReadLeaves, GraphReadLock, Blocking)) + return false; + if (!checkLeavesCompletion(Record->MWriteLeaves, GraphReadLock, Blocking)) + return false; + + std::vector ToCleanUp; for (AllocaCommandBase *AllocaCmd : Record->MAllocaCommands) { Command *ReleaseCmd = AllocaCmd->getReleaseCmd(); - if (ReleaseCmd->isSuccessfullyEnqueued()) - continue; #ifdef XPTI_ENABLE_INSTRUMENTATION // Will contain the list of dependencies for the Release Command std::set DepCommands; // Capture the read dependencies for (Command *Cmd : Record->MWriteLeaves) DepCommands.insert(Cmd); + // Capture the write dependencies for (Command *Cmd : Record->MReadLeaves) DepCommands.insert(Cmd); // Report these dependencies to the Command so these dependencies can be @@ -80,17 +73,11 @@ bool Scheduler::waitForRecordToFinish(MemObjRecord *Record, if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_ERROR_INVALID_OPERATION); + // Unconditionally wait for completion + GraphProcessor::waitForEvent(ReleaseCmd->getEvent(), GraphReadLock, + ToCleanUp); } - // enqueue is fully done and we can check if ReleaseCmd is completed - for (AllocaCommandBase *AllocaCmd : Record->MAllocaCommands) { - Command *Cmd = AllocaCmd->getReleaseCmd(); - if (Cmd->getEvent()->isCompleted()) - continue; - if (Blocking) - GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); - else - return false; - } + return true; } @@ -304,21 +291,17 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { std::vector> AuxResourcesToDeallocate; { - MemObjRecord *Record = nullptr; + MemObjRecord *Record = MGraphBuilder.getMemObjRecord(MemObj); + if (!Record) + // No operations were performed on the mem object + return; { // This only needs a shared mutex as it only involves enqueueing and // awaiting for events ReadLockT Lock(MGraphLock); - - Record = MGraphBuilder.getMemObjRecord(MemObj); - if (!Record) - // No operations were performed on the mem object - return; - waitForRecordToFinish(Record, Lock, true); } - { WriteLockT Lock(MGraphLock, std::defer_lock); acquireWriteLock(Lock); @@ -464,7 +447,7 @@ void Scheduler::releaseResources() { // queue_impl, ~queue_impl is called and buffer for assert (which is created // with size only so all confitions for deferred release are satisfied) is // added to deferred mem obj storage. So we may end up with leak. - while (!isNoDeferredMemObjects()) + while (!isDeferredMemObjectsEmpty()) cleanupDeferredMemObjects(true); } @@ -532,21 +515,28 @@ void Scheduler::deferMemObjRelease(const std::shared_ptr &MemObj) { cleanupDeferredMemObjects(false); } -inline bool Scheduler::isNoDeferredMemObjects() { +inline bool Scheduler::isDeferredMemObjectsEmpty() { std::lock_guard Lock{MDeferredMemReleaseMutex}; return MDeferredMemObjRelease.empty(); } void Scheduler::cleanupDeferredMemObjects(bool Blocking) { - if (isNoDeferredMemObjects()) + if (isDeferredMemObjectsEmpty()) return; + if (Blocking) { + std::list> MTempStorage; + { + std::lock_guard LockDef{MDeferredMemReleaseMutex}; + MDeferredMemObjRelease.swap(MTempStorage); + } + // if any objects in MTempStorage exist - it is leaving scope and being + // deleted + } - // Need to aggregate ready to release object to acquire write lock once. std::list> ObjsReadyToRelease; { - // if we need blocking mode - force lock waiting - ReadLockT Lock = Blocking ? ReadLockT(MGraphLock) - : ReadLockT(MGraphLock, std::try_to_lock); + + ReadLockT Lock = ReadLockT(MGraphLock, std::try_to_lock); if (Lock.owns_lock()) { // Not expected that Blocking == true will be used in parallel with // adding MemObj to storage, no such scenario. @@ -554,12 +544,6 @@ void Scheduler::cleanupDeferredMemObjects(bool Blocking) { auto MemObjIt = MDeferredMemObjRelease.begin(); while (MemObjIt != MDeferredMemObjRelease.end()) { MemObjRecord *Record = MGraphBuilder.getMemObjRecord((*MemObjIt).get()); - if (!Record) { - // Just trigger delete since no operations on object was perfromed and - // no commands and other to wait for - MemObjIt = MDeferredMemObjRelease.erase(MemObjIt); - continue; - } if (!waitForRecordToFinish(Record, Lock, Blocking)) { MemObjIt++; continue; @@ -569,28 +553,7 @@ void Scheduler::cleanupDeferredMemObjects(bool Blocking) { } } } - if (ObjsReadyToRelease.empty()) - return; - - std::vector> StreamsToDeallocate; - std::vector> AuxResourcesToDeallocate; - { - WriteLockT Lock(MGraphLock, std::try_to_lock); - // if we need blocking mode - force lock waiting - if (!Lock.owns_lock() && Blocking) - acquireWriteLock(Lock); - if (Lock.owns_lock()) { - for (std::shared_ptr &MemObj : ObjsReadyToRelease) - releaseMemObjRecord(MemObj.get(), StreamsToDeallocate, - AuxResourcesToDeallocate); - } else { - std::lock_guard LockDef{MDeferredMemReleaseMutex}; - MDeferredMemObjRelease.splice(MDeferredMemObjRelease.end(), - ObjsReadyToRelease); - } - } - deallocateStreams(StreamsToDeallocate); - // ObjsReadyToRelease leaving scope and being deleted + // if any ObjsReadyToRelease found - it is leaving scope and being deleted } } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index a2b1d75789123..28923fafe362b 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -449,7 +449,7 @@ class Scheduler { Scheduler(); virtual ~Scheduler(); void releaseResources(); - inline bool isNoDeferredMemObjects(); + inline bool isDeferredMemObjectsEmpty(); protected: // TODO: after switching to C++17, change std::shared_timed_mutex to @@ -469,6 +469,7 @@ class Scheduler { static void enqueueLeavesOfReqUnlocked(const Requirement *const Req, std::vector &ToCleanUp); + // May lock graph with read and write modes during execution. void cleanupDeferredMemObjects(bool Blocking); inline void releaseMemObjRecord( detail::SYCLMemObjI *MemObj, @@ -778,7 +779,9 @@ class Scheduler { /// completed, otherwise - false. Must always return true if ForceWait == /// true. bool waitForRecordToFinish(MemObjRecord *Record, ReadLockT &GraphReadLock, - bool ForceWait); + bool Blocking); + inline bool checkLeavesCompletion(LeavesCollection &Leaves, + ReadLockT &GraphReadLock, bool Blocking); GraphBuilder MGraphBuilder; RWLockT MGraphLock; diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index a6ad2b1c84ddd..3e949fee77ddf 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -150,7 +150,11 @@ void SYCLMemObjT::determineHostPtr(const ContextImplPtr &Context, void SYCLMemObjT::detachMemoryObject(const std::shared_ptr &self, bool DefaultAllocator) const { - if (!MHostPtrProvided && DefaultAllocator) + // Check MRecord without read lock because not found any usages that may bring + // corruption. MRecord is nullptr on buffer creation and set to meaningfull + // value only if any operation on buffer submitted inside addCG call. addCG is + // called from queue::submit and buffer destruction could not overlap with it. + if (MRecord && !MHostPtrProvided && DefaultAllocator) Scheduler::getInstance().deferMemObjRelease(self); } diff --git a/sycl/unittests/buffer/BufferDestructionCheck.cpp b/sycl/unittests/buffer/BufferDestructionCheck.cpp index bd5779ed95f11..4e542a1413c43 100644 --- a/sycl/unittests/buffer/BufferDestructionCheck.cpp +++ b/sycl/unittests/buffer/BufferDestructionCheck.cpp @@ -80,7 +80,7 @@ class BufferDestructionCheck : public ::testing::Test { // those buffers also created with size only so it also to be deferred on // deletion. EXPECT_CALL(*MockSchedulerPtr, deferMemObjRelease(testing::_)) - .Times(/*testing::AnyNumber()*/ 2) + .Times(testing::AnyNumber()) .InSequence(S); } else { // buffer created above should not be deferred on deletion because has non @@ -91,12 +91,32 @@ class BufferDestructionCheck : public ::testing::Test { } } + template + void SubmitWorkload(sycl::queue &Queue, Buffer *Buf) { + Queue.submit([&](sycl::handler &CGH) { + // Just need to imitate task dependency on buffer + auto acc = Buf->get_access(CGH, sycl::read_only); + CGH.host_task([] {}); + }); + } + protected: sycl::unittest::PiMock Mock; sycl::platform Plt; testing::NiceMock *MockSchedulerPtr; }; +TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefaultNoRecord) { + sycl::context Context{Plt}; + sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + { + sycl::buffer Buf(1); + std::shared_ptr BufImpl = + sycl::detail::getSyclObjImpl(Buf); + CheckBufferDestruction(BufImpl, false); + } +} + TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefault) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; @@ -104,6 +124,7 @@ TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefault) { sycl::buffer Buf(1); std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); + SubmitWorkload(Q, &Buf); CheckBufferDestruction(BufImpl, true); } } @@ -117,6 +138,7 @@ TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefaultSetFinalData) { std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); Buf.set_final_data(&FinalData); + SubmitWorkload(Q, &Buf); CheckBufferDestruction(BufImpl, false); } } @@ -131,6 +153,7 @@ TEST_F(BufferDestructionCheck, BufferWithSizeOnlyNonDefaultAllocator) { sycl::buffer Buf(1, allocator); std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); + SubmitWorkload(Q, &Buf); CheckBufferDestruction(BufImpl, false); } } @@ -144,6 +167,7 @@ TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefaultAllocator) { sycl::buffer Buf(1, allocator); std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); + SubmitWorkload(Q, &Buf); CheckBufferDestruction(BufImpl, true); } } @@ -156,6 +180,7 @@ TEST_F(BufferDestructionCheck, BufferWithRawHostPtr) { sycl::buffer Buf(&InitialVal, 1); std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); + SubmitWorkload(Q, &Buf); CheckBufferDestruction(BufImpl, false); } } @@ -171,6 +196,7 @@ TEST_F(BufferDestructionCheck, BufferWithRawHostPtrWithNonDefaultAllocator) { sycl::buffer Buf(&InitialVal, 1, allocator); std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); + SubmitWorkload(Q, &Buf); CheckBufferDestruction(BufImpl, false); } } @@ -183,6 +209,7 @@ TEST_F(BufferDestructionCheck, BufferWithConstRawHostPtr) { sycl::buffer Buf(&InitialVal, 1); std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); + SubmitWorkload(Q, &Buf); CheckBufferDestruction(BufImpl, false); } } @@ -199,6 +226,7 @@ TEST_F(BufferDestructionCheck, sycl::buffer Buf(&InitialVal, 1, allocator); std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); + SubmitWorkload(Q, &Buf); CheckBufferDestruction(BufImpl, false); } } @@ -211,6 +239,7 @@ TEST_F(BufferDestructionCheck, BufferWithContainer) { sycl::buffer Buf(data); std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); + SubmitWorkload(Q, &Buf); CheckBufferDestruction(BufImpl, false); } } @@ -226,6 +255,7 @@ TEST_F(BufferDestructionCheck, BufferWithContainerWithNonDefaultAllocator) { sycl::buffer Buf(data, allocator); std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); + SubmitWorkload(Q, &Buf); CheckBufferDestruction(BufImpl, false); } } @@ -238,6 +268,7 @@ TEST_F(BufferDestructionCheck, BufferWithSharedPtr) { sycl::buffer Buf(InitialVal, 1); std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); + SubmitWorkload(Q, &Buf); CheckBufferDestruction(BufImpl, false); } } @@ -253,6 +284,7 @@ TEST_F(BufferDestructionCheck, BufferWithSharedPtrWithNonDefaultAllocator) { sycl::buffer Buf(InitialVal, 1, allocator); std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); + SubmitWorkload(Q, &Buf); CheckBufferDestruction(BufImpl, false); } } @@ -265,6 +297,7 @@ TEST_F(BufferDestructionCheck, BufferWithSharedPtrArray) { sycl::buffer Buf(InitialVal, 1); std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); + SubmitWorkload(Q, &Buf); CheckBufferDestruction(BufImpl, false); } } @@ -281,6 +314,7 @@ TEST_F(BufferDestructionCheck, sycl::buffer Buf(InitialVal, 2, allocator); std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); + SubmitWorkload(Q, &Buf); CheckBufferDestruction(BufImpl, false); } } @@ -293,6 +327,7 @@ TEST_F(BufferDestructionCheck, BufferWithIterators) { sycl::buffer Buf(data.begin(), data.end()); std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); + SubmitWorkload(Q, &Buf); CheckBufferDestruction(BufImpl, true); } } @@ -307,6 +342,7 @@ TEST_F(BufferDestructionCheck, BufferWithIterators) { // sycl::buffer Buf(data.begin(), data.end(), // allocator); std::shared_ptr BufImpl = // sycl::detail::getSyclObjImpl(Buf); +// //needs workload // CheckBufferDestruction(BufImpl, false); // } // } @@ -322,6 +358,7 @@ TEST_F(BufferDestructionCheck, BufferDeferringCheckWriteLock) { std::defer_lock); { sycl::buffer Buf(1); + SubmitWorkload(Q, &Buf); { std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); @@ -348,32 +385,6 @@ TEST_F(BufferDestructionCheck, BufferDeferringCheckWriteLock) { } } -TEST_F(BufferDestructionCheck, BufferDeferringCheckReadLock) { - sycl::context Context{Plt}; - sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; - { - testing::Sequence S; - EXPECT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); - std::shared_lock Lock(MockSchedulerPtr->MGraphLock, - std::defer_lock); - { - sycl::buffer Buf(1); - Lock.lock(); - // gmock warning will be generated - simply tell gtest that now we do not - // want to mock the function - ON_CALL(*MockSchedulerPtr, deferMemObjRelease) - .WillByDefault( - [this](const std::shared_ptr &MemObj) { - return MockSchedulerPtr - ->sycl::detail::Scheduler::deferMemObjRelease(MemObj); - }); - } - // Record is empty and read lock do not prevent from being deleted - ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); - Lock.unlock(); - } -} - std::map ExpectedEventStatus; pi_result getEventInfoFunc(pi_event Event, pi_event_info PName, size_t PVSize, void *PV, size_t *PVSizeRet) { @@ -389,17 +400,17 @@ pi_result getEventInfoFunc(pi_event Event, pi_event_info PName, size_t PVSize, return PI_ERROR_INVALID_OPERATION; } -class MockCmdWithRelTracking : public MockCommand { +class MockCmdWithReleaseTracking : public MockCommand { public: - MockCmdWithRelTracking( + MockCmdWithReleaseTracking( sycl::detail::QueueImplPtr Queue, sycl::detail::Requirement Req, sycl::detail::Command::CommandType Type = sycl::detail::Command::RUN_CG) : MockCommand(Queue, Req, Type){}; - MockCmdWithRelTracking( + MockCmdWithReleaseTracking( sycl::detail::QueueImplPtr Queue, sycl::detail::Command::CommandType Type = sycl::detail::Command::RUN_CG) : MockCommand(Queue, Type){}; - ~MockCmdWithRelTracking() { Release(); } + ~MockCmdWithReleaseTracking() { Release(); } MOCK_METHOD0(Release, void()); }; @@ -413,16 +424,18 @@ TEST_F(BufferDestructionCheck, ReadyToReleaseLogic) { sycl::detail::MemObjRecord *Rec = MockSchedulerPtr->MGraphBuilder.getOrInsertMemObjRecord( sycl::detail::getSyclObjImpl(Q), &MockReq, AuxCmds); - MockCmdWithRelTracking *ReadCmd = nullptr; - MockCmdWithRelTracking *WriteCmd = nullptr; + MockCmdWithReleaseTracking *ReadCmd = nullptr; + MockCmdWithReleaseTracking *WriteCmd = nullptr; ReadCmd = - new MockCmdWithRelTracking(sycl::detail::getSyclObjImpl(Q), MockReq); + new MockCmdWithReleaseTracking(sycl::detail::getSyclObjImpl(Q), MockReq); ReadCmd->getEvent()->getHandleRef() = reinterpret_cast( 0x01); // just assign to be able to use mock WriteCmd = - new MockCmdWithRelTracking(sycl::detail::getSyclObjImpl(Q), MockReq); + new MockCmdWithReleaseTracking(sycl::detail::getSyclObjImpl(Q), MockReq); WriteCmd->getEvent()->getHandleRef() = reinterpret_cast(0x02); + ReadCmd->MEnqueueStatus = sycl::detail::EnqueueResultT::SyclEnqueueSuccess; + WriteCmd->MEnqueueStatus = sycl::detail::EnqueueResultT::SyclEnqueueSuccess; std::vector ToCleanUp; std::vector ToEnqueue; @@ -439,17 +452,13 @@ TEST_F(BufferDestructionCheck, ReadyToReleaseLogic) { ExpectedEventStatus[WriteCmd->getEvent()->getHandleRef()] = PI_EVENT_SUBMITTED; - EXPECT_CALL(*ReadCmd, enqueue).Times(1).RetiresOnSaturation(); EXPECT_FALSE(MockSchedulerPtr->waitForRecordToFinish(Rec, Lock, false)); - EXPECT_CALL(*ReadCmd, enqueue).Times(0); ExpectedEventStatus[ReadCmd->getEvent()->getHandleRef()] = PI_EVENT_COMPLETE; ExpectedEventStatus[WriteCmd->getEvent()->getHandleRef()] = PI_EVENT_SUBMITTED; - EXPECT_CALL(*WriteCmd, enqueue).Times(1).RetiresOnSaturation(); EXPECT_FALSE(MockSchedulerPtr->waitForRecordToFinish(Rec, Lock, false)); - EXPECT_CALL(*WriteCmd, enqueue).Times(0); ExpectedEventStatus[ReadCmd->getEvent()->getHandleRef()] = PI_EVENT_COMPLETE; ExpectedEventStatus[WriteCmd->getEvent()->getHandleRef()] = PI_EVENT_COMPLETE; From 484b1cf9108ff8b3cf1b2872e0a6e73175b6c3fc Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 5 Oct 2022 15:20:20 -0700 Subject: [PATCH 34/62] Return trace of stream buffer emptyness to scheduler destructor Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/scheduler/scheduler.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index f3012d6bbf1c5..b878eb3286178 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -409,16 +409,6 @@ Scheduler::Scheduler() { } Scheduler::~Scheduler() { - // Please be aware that releaseResources should be called before deletion of - // Scheduler. Otherwise there can be the case when objects Scheduler keeps as - // fields may need Scheduler for their release and they work with Scheduler - // via GlobalHandler::getScheduler that will create new Scheduler object. - // Still keep it here but it should do almost nothing if releaseResources - // called before. - releaseResources(); -} - -void Scheduler::releaseResources() { // By specification there are several possible sync points: buffer // destruction, wait() method of a queue or event. Stream doesn't introduce // any synchronization point. It is guaranteed that stream is flushed and @@ -434,6 +424,16 @@ void Scheduler::releaseResources() { "not all resources were released. Please be sure that all kernels " "have synchronization points.\n\n"); } + // Please be aware that releaseResources should be called before deletion of + // Scheduler. Otherwise there can be the case when objects Scheduler keeps as + // fields may need Scheduler for their release and they work with Scheduler + // via GlobalHandler::getScheduler that will create new Scheduler object. + // Still keep it here but it should do almost nothing if releaseResources + // called before. + releaseResources(); +} + +void Scheduler::releaseResources() { // There might be some commands scheduled for post enqueue cleanup that // haven't been freed because of the graph mutex being locked at the time, // clean them up now. From 106132246e56ca1834ad8607ffbf15b937954d7b Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Fri, 7 Oct 2022 01:52:43 -0700 Subject: [PATCH 35/62] Fix comments (round 2) Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/global_handler.cpp | 4 +- sycl/source/detail/global_handler.hpp | 2 +- sycl/source/detail/scheduler/scheduler.cpp | 86 ++++++++++--------- sycl/source/detail/scheduler/scheduler.hpp | 24 ++---- .../buffer/BufferDestructionCheck.cpp | 8 +- 5 files changed, 61 insertions(+), 63 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 9499fe31cfc2a..c510ccc3f55bf 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -47,13 +47,13 @@ T &GlobalHandler::getOrCreate(InstWithLock &IWL, Types... Args) { return *IWL.Inst; } -void GlobalHandler::attachScheduler(Scheduler *scheduler) { +void GlobalHandler::attachScheduler(Scheduler *Scheduler) { // Test method, do not protect with lock since releaseResources will cause // dead lock due to host queue release const LockGuard Lock{MScheduler.Lock}; if (MScheduler.Inst) MScheduler.Inst->releaseResources(); - MScheduler.Inst.reset(scheduler); + MScheduler.Inst.reset(Scheduler); } Scheduler &GlobalHandler::getScheduler() { return getOrCreate(MScheduler); } diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index 32402e363f803..8e01845d0a277 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -74,7 +74,7 @@ class GlobalHandler { void unloadPlugins(); // For testing purposes only - void attachScheduler(Scheduler *scheduler); + void attachScheduler(Scheduler *Scheduler); private: friend void releaseDefaultContexts(); diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index b878eb3286178..1f039524c8177 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -26,59 +26,63 @@ namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace detail { -inline bool Scheduler::checkLeavesCompletion(LeavesCollection &Leaves, - ReadLockT &GraphReadLock, - bool Blocking) { - std::vector ToCleanUp; - for (Command *Cmd : Leaves) { - // Expect that mem object release is not responsible for dependency commands - // enqueue - if (Cmd->getEvent()->isCompleted()) - continue; - - if (Blocking) - GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); - else +bool Scheduler::checkLeavesCompletion(MemObjRecord *Record) { + for (Command *Cmd : Record->MReadLeaves) { + if (!Cmd->getEvent()->isCompleted()) + return false; + } + for (Command *Cmd : Record->MWriteLeaves) { + if (!Cmd->getEvent()->isCompleted()) return false; } return true; } -bool Scheduler::waitForRecordToFinish(MemObjRecord *Record, - ReadLockT &GraphReadLock, bool Blocking) { - assert(Record); - if (!checkLeavesCompletion(Record->MReadLeaves, GraphReadLock, Blocking)) - return false; - if (!checkLeavesCompletion(Record->MWriteLeaves, GraphReadLock, Blocking)) - return false; - +void Scheduler::waitForRecordToFinish(MemObjRecord *Record, + ReadLockT &GraphReadLock) { +#ifdef XPTI_ENABLE_INSTRUMENTATION + // Will contain the list of dependencies for the Release Command + std::set DepCommands; +#endif std::vector ToCleanUp; - for (AllocaCommandBase *AllocaCmd : Record->MAllocaCommands) { - Command *ReleaseCmd = AllocaCmd->getReleaseCmd(); + for (Command *Cmd : Record->MReadLeaves) { + EnqueueResultT Res; + bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ToCleanUp); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Enqueue process failed.", + PI_ERROR_INVALID_OPERATION); #ifdef XPTI_ENABLE_INSTRUMENTATION - // Will contain the list of dependencies for the Release Command - std::set DepCommands; - // Capture the read dependencies - for (Command *Cmd : Record->MWriteLeaves) - DepCommands.insert(Cmd); - // Capture the write dependencies - for (Command *Cmd : Record->MReadLeaves) - DepCommands.insert(Cmd); - // Report these dependencies to the Command so these dependencies can be - // reported as edges - ReleaseCmd->resolveReleaseDependencies(DepCommands); + // Capture the dependencies + DepCommands.insert(Cmd); #endif + GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); + } + for (Command *Cmd : Record->MWriteLeaves) { + EnqueueResultT Res; + bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ToCleanUp); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Enqueue process failed.", + PI_ERROR_INVALID_OPERATION); +#ifdef XPTI_ENABLE_INSTRUMENTATION + DepCommands.insert(Cmd); +#endif + GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); + } + for (AllocaCommandBase *AllocaCmd : Record->MAllocaCommands) { + Command *ReleaseCmd = AllocaCmd->getReleaseCmd(); EnqueueResultT Res; bool Enqueued = GraphProcessor::enqueueCommand(ReleaseCmd, Res, ToCleanUp); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_ERROR_INVALID_OPERATION); - // Unconditionally wait for completion +#ifdef XPTI_ENABLE_INSTRUMENTATION + // Report these dependencies to the Command so these dependencies can be + // reported as edges + ReleaseCmd->resolveReleaseDependencies(DepCommands); +#endif GraphProcessor::waitForEvent(ReleaseCmd->getEvent(), GraphReadLock, ToCleanUp); } - - return true; } EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, @@ -266,7 +270,7 @@ void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) { deallocateStreams(StreamsToDeallocate); } -inline void Scheduler::releaseMemObjRecord( +void Scheduler::releaseMemObjRecord( detail::SYCLMemObjI *MemObj, std::vector> &StreamsToDeallocate, std::vector> &AuxResourcesToDeallocate) { @@ -300,7 +304,7 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { // This only needs a shared mutex as it only involves enqueueing and // awaiting for events ReadLockT Lock(MGraphLock); - waitForRecordToFinish(Record, Lock, true); + waitForRecordToFinish(Record, Lock); } { WriteLockT Lock(MGraphLock, std::defer_lock); @@ -524,7 +528,7 @@ void Scheduler::cleanupDeferredMemObjects(bool Blocking) { if (isDeferredMemObjectsEmpty()) return; if (Blocking) { - std::list> MTempStorage; + std::vector> MTempStorage; { std::lock_guard LockDef{MDeferredMemReleaseMutex}; MDeferredMemObjRelease.swap(MTempStorage); @@ -544,7 +548,7 @@ void Scheduler::cleanupDeferredMemObjects(bool Blocking) { auto MemObjIt = MDeferredMemObjRelease.begin(); while (MemObjIt != MDeferredMemObjRelease.end()) { MemObjRecord *Record = MGraphBuilder.getMemObjRecord((*MemObjIt).get()); - if (!waitForRecordToFinish(Record, Lock, Blocking)) { + if (!checkLeavesCompletion(Record)) { MemObjIt++; continue; } diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 28923fafe362b..a6b5049d60d05 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -471,7 +471,7 @@ class Scheduler { // May lock graph with read and write modes during execution. void cleanupDeferredMemObjects(bool Blocking); - inline void releaseMemObjRecord( + void releaseMemObjRecord( detail::SYCLMemObjI *MemObj, std::vector> &StreamsToDeallocate, std::vector> &AuxResourcesToDeallocate); @@ -766,22 +766,16 @@ class Scheduler { BlockingT Blocking = NON_BLOCKING); }; - /// This function conditionally waits on all of the graph leaves which somehow - /// use the memory object which is represented by \c Record. The function is - /// called upon destruction of memory buffer. \param Record memory record to - /// await graph leaves of to finish \param GraphReadLock locked graph read - /// lock \param ForceWait flag to identify if we need to wait for all - /// dependencies + /// This function waits on all of the graph leaves which somehow use the + /// memory object which is represented by \c Record. The function is called + /// upon destruction of memory buffer. + /// \param Record memory record to await graph leaves of to finish + /// \param GraphReadLock locked graph read lock /// /// GraphReadLock will be unlocked/locked as needed. Upon return from the /// function, GraphReadLock will be left in locked state. - /// \return true if all record dependencies and release commands are - /// completed, otherwise - false. Must always return true if ForceWait == - /// true. - bool waitForRecordToFinish(MemObjRecord *Record, ReadLockT &GraphReadLock, - bool Blocking); - inline bool checkLeavesCompletion(LeavesCollection &Leaves, - ReadLockT &GraphReadLock, bool Blocking); + void waitForRecordToFinish(MemObjRecord *Record, ReadLockT &GraphReadLock); + bool checkLeavesCompletion(MemObjRecord *Record); GraphBuilder MGraphBuilder; RWLockT MGraphLock; @@ -789,7 +783,7 @@ class Scheduler { std::vector MDeferredCleanupCommands; std::mutex MDeferredCleanupMutex; - std::list> MDeferredMemObjRelease; + std::vector> MDeferredMemObjRelease; std::mutex MDeferredMemReleaseMutex; QueueImplPtr DefaultHostQueue; diff --git a/sycl/unittests/buffer/BufferDestructionCheck.cpp b/sycl/unittests/buffer/BufferDestructionCheck.cpp index 4e542a1413c43..f66d71f23debc 100644 --- a/sycl/unittests/buffer/BufferDestructionCheck.cpp +++ b/sycl/unittests/buffer/BufferDestructionCheck.cpp @@ -23,10 +23,10 @@ class FairMockScheduler : public sycl::detail::Scheduler { public: + using sycl::detail::Scheduler::checkLeavesCompletion; using sycl::detail::Scheduler::MDeferredMemObjRelease; using sycl::detail::Scheduler::MGraphBuilder; using sycl::detail::Scheduler::MGraphLock; - using sycl::detail::Scheduler::waitForRecordToFinish; MOCK_METHOD1(deferMemObjRelease, void(const std::shared_ptr &)); }; @@ -452,17 +452,17 @@ TEST_F(BufferDestructionCheck, ReadyToReleaseLogic) { ExpectedEventStatus[WriteCmd->getEvent()->getHandleRef()] = PI_EVENT_SUBMITTED; - EXPECT_FALSE(MockSchedulerPtr->waitForRecordToFinish(Rec, Lock, false)); + EXPECT_FALSE(MockSchedulerPtr->checkLeavesCompletion(Rec)); ExpectedEventStatus[ReadCmd->getEvent()->getHandleRef()] = PI_EVENT_COMPLETE; ExpectedEventStatus[WriteCmd->getEvent()->getHandleRef()] = PI_EVENT_SUBMITTED; - EXPECT_FALSE(MockSchedulerPtr->waitForRecordToFinish(Rec, Lock, false)); + EXPECT_FALSE(MockSchedulerPtr->checkLeavesCompletion(Rec)); ExpectedEventStatus[ReadCmd->getEvent()->getHandleRef()] = PI_EVENT_COMPLETE; ExpectedEventStatus[WriteCmd->getEvent()->getHandleRef()] = PI_EVENT_COMPLETE; - EXPECT_TRUE(MockSchedulerPtr->waitForRecordToFinish(Rec, Lock, true)); + EXPECT_TRUE(MockSchedulerPtr->checkLeavesCompletion(Rec)); // previous expect_call is still valid and will generate failure if we recieve // call here, no need for extra limitation EXPECT_CALL(*ReadCmd, Release).Times(1); From d4537a3c85672ebd6fe7ccee110b98c5d3f46b5b Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Fri, 7 Oct 2022 04:58:06 -0700 Subject: [PATCH 36/62] Fix comments (round 3) Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/global_handler.cpp | 12 ++++++------ sycl/source/detail/scheduler/scheduler.cpp | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index c510ccc3f55bf..f476f68f64618 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -46,15 +46,15 @@ T &GlobalHandler::getOrCreate(InstWithLock &IWL, Types... Args) { return *IWL.Inst; } +if (MScheduler.Inst) -void GlobalHandler::attachScheduler(Scheduler *Scheduler) { - // Test method, do not protect with lock since releaseResources will cause - // dead lock due to host queue release const LockGuard Lock{MScheduler.Lock}; - if (MScheduler.Inst) + void GlobalHandler::attachScheduler(Scheduler *Scheduler) { + // The method is for testing purposes. Do not protect with lock since + // releaseResources will cause dead lock due to host queue release MScheduler.Inst->releaseResources(); - MScheduler.Inst.reset(Scheduler); -} + MScheduler.Inst.reset(Scheduler); + } Scheduler &GlobalHandler::getScheduler() { return getOrCreate(MScheduler); } diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 1f039524c8177..c4d27066a869a 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -452,7 +452,7 @@ void Scheduler::releaseResources() { // with size only so all confitions for deferred release are satisfied) is // added to deferred mem obj storage. So we may end up with leak. while (!isDeferredMemObjectsEmpty()) - cleanupDeferredMemObjects(true); + cleanupDeferredMemObjects(/*Blocking*/ true); } void Scheduler::acquireWriteLock(WriteLockT &Lock) { @@ -481,7 +481,7 @@ MemObjRecord *Scheduler::getMemObjRecord(const Requirement *const Req) { } void Scheduler::cleanupCommands(const std::vector &Cmds) { - cleanupDeferredMemObjects(false); + cleanupDeferredMemObjects(/*Blocking*/ false); if (Cmds.empty()) { std::lock_guard Lock{MDeferredCleanupMutex}; if (MDeferredCleanupCommands.empty()) @@ -516,7 +516,7 @@ void Scheduler::deferMemObjRelease(const std::shared_ptr &MemObj) { std::lock_guard Lock{MDeferredMemReleaseMutex}; MDeferredMemObjRelease.push_back(MemObj); } - cleanupDeferredMemObjects(false); + cleanupDeferredMemObjects(/*Blocking*/ false); } inline bool Scheduler::isDeferredMemObjectsEmpty() { From 342ff91747bd658429c1f87c688b17a214461a24 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Fri, 7 Oct 2022 05:20:50 -0700 Subject: [PATCH 37/62] Fix build Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/global_handler.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index f476f68f64618..575e15dfc7167 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -46,15 +46,14 @@ T &GlobalHandler::getOrCreate(InstWithLock &IWL, Types... Args) { return *IWL.Inst; } -if (MScheduler.Inst) - void GlobalHandler::attachScheduler(Scheduler *Scheduler) { - // The method is for testing purposes. Do not protect with lock since - // releaseResources will cause dead lock due to host queue release +void GlobalHandler::attachScheduler(Scheduler *Scheduler) { + // The method is for testing purposes. Do not protect with lock since + // releaseResources will cause dead lock due to host queue release + if (MScheduler.Inst) MScheduler.Inst->releaseResources(); - - MScheduler.Inst.reset(Scheduler); - } + MScheduler.Inst.reset(Scheduler); +} Scheduler &GlobalHandler::getScheduler() { return getOrCreate(MScheduler); } From fdab0e78a2f82d394d531c1e5ff80fc5e2197848 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Fri, 7 Oct 2022 08:10:39 -0700 Subject: [PATCH 38/62] Fix comments & tests (round 4) Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/scheduler/scheduler.cpp | 10 +- sycl/source/detail/scheduler/scheduler.hpp | 7 +- .../buffer/BufferDestructionCheck.cpp | 316 +++++++----------- .../scheduler/SchedulerTestUtils.hpp | 3 +- 4 files changed, 129 insertions(+), 207 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index c4d27066a869a..729a2da51a091 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -452,7 +452,7 @@ void Scheduler::releaseResources() { // with size only so all confitions for deferred release are satisfied) is // added to deferred mem obj storage. So we may end up with leak. while (!isDeferredMemObjectsEmpty()) - cleanupDeferredMemObjects(/*Blocking*/ true); + cleanupDeferredMemObjects(BlockingT::BLOCKING); } void Scheduler::acquireWriteLock(WriteLockT &Lock) { @@ -481,7 +481,7 @@ MemObjRecord *Scheduler::getMemObjRecord(const Requirement *const Req) { } void Scheduler::cleanupCommands(const std::vector &Cmds) { - cleanupDeferredMemObjects(/*Blocking*/ false); + cleanupDeferredMemObjects(BlockingT::NON_BLOCKING); if (Cmds.empty()) { std::lock_guard Lock{MDeferredCleanupMutex}; if (MDeferredCleanupCommands.empty()) @@ -516,7 +516,7 @@ void Scheduler::deferMemObjRelease(const std::shared_ptr &MemObj) { std::lock_guard Lock{MDeferredMemReleaseMutex}; MDeferredMemObjRelease.push_back(MemObj); } - cleanupDeferredMemObjects(/*Blocking*/ false); + cleanupDeferredMemObjects(BlockingT::NON_BLOCKING); } inline bool Scheduler::isDeferredMemObjectsEmpty() { @@ -524,10 +524,10 @@ inline bool Scheduler::isDeferredMemObjectsEmpty() { return MDeferredMemObjRelease.empty(); } -void Scheduler::cleanupDeferredMemObjects(bool Blocking) { +void Scheduler::cleanupDeferredMemObjects(BlockingT Blocking) { if (isDeferredMemObjectsEmpty()) return; - if (Blocking) { + if (Blocking == BlockingT::BLOCKING) { std::vector> MTempStorage; { std::lock_guard LockDef{MDeferredMemReleaseMutex}; diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index a6b5049d60d05..7830a4d889ecd 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -443,11 +443,10 @@ class Scheduler { static MemObjRecord *getMemObjRecord(const Requirement *const Req); // Virtual for testing purposes only - virtual void - deferMemObjRelease(const std::shared_ptr &MemObj); + void deferMemObjRelease(const std::shared_ptr &MemObj); Scheduler(); - virtual ~Scheduler(); + ~Scheduler(); void releaseResources(); inline bool isDeferredMemObjectsEmpty(); @@ -470,7 +469,7 @@ class Scheduler { std::vector &ToCleanUp); // May lock graph with read and write modes during execution. - void cleanupDeferredMemObjects(bool Blocking); + void cleanupDeferredMemObjects(BlockingT Blocking); void releaseMemObjRecord( detail::SYCLMemObjI *MemObj, std::vector> &StreamsToDeallocate, diff --git a/sycl/unittests/buffer/BufferDestructionCheck.cpp b/sycl/unittests/buffer/BufferDestructionCheck.cpp index f66d71f23debc..0487b13e14f96 100644 --- a/sycl/unittests/buffer/BufferDestructionCheck.cpp +++ b/sycl/unittests/buffer/BufferDestructionCheck.cpp @@ -21,14 +21,18 @@ #include "../scheduler/SchedulerTestUtils.hpp" -class FairMockScheduler : public sycl::detail::Scheduler { +class MockCmdWithReleaseTracking : public MockCommand { public: - using sycl::detail::Scheduler::checkLeavesCompletion; - using sycl::detail::Scheduler::MDeferredMemObjRelease; - using sycl::detail::Scheduler::MGraphBuilder; - using sycl::detail::Scheduler::MGraphLock; - MOCK_METHOD1(deferMemObjRelease, - void(const std::shared_ptr &)); + MockCmdWithReleaseTracking( + sycl::detail::QueueImplPtr Queue, sycl::detail::Requirement Req, + sycl::detail::Command::CommandType Type = sycl::detail::Command::RUN_CG) + : MockCommand(Queue, Req, Type){}; + MockCmdWithReleaseTracking( + sycl::detail::QueueImplPtr Queue, + sycl::detail::Command::CommandType Type = sycl::detail::Command::RUN_CG) + : MockCommand(Queue, Type){}; + ~MockCmdWithReleaseTracking() { Release(); } + MOCK_METHOD0(Release, void()); }; class BufferDestructionCheck : public ::testing::Test { @@ -41,7 +45,7 @@ class BufferDestructionCheck : public ::testing::Test { std::cout << "Not run due to host-only environment\n"; GTEST_SKIP(); } - MockSchedulerPtr = new testing::NiceMock(); + MockSchedulerPtr = new MockScheduler(); sycl::detail::GlobalHandler::instance().attachScheduler( dynamic_cast(MockSchedulerPtr)); } @@ -49,340 +53,274 @@ class BufferDestructionCheck : public ::testing::Test { sycl::detail::GlobalHandler::instance().attachScheduler(NULL); } - inline void - CheckBufferDestruction(std::shared_ptr BufImpl, - bool ShouldBeDeferred) { - ASSERT_NE(BufImpl, nullptr); - const std::function &)> - checkerNotEqual = - [&BufImpl]( - const std::shared_ptr &memObj) { - return BufImpl.get() != memObj.get(); - }; - const std::function &)> - checkerEqual = - [&BufImpl]( - const std::shared_ptr &memObj) { - return BufImpl.get() == memObj.get(); - }; - if (ShouldBeDeferred) { - testing::Sequence S; - // first is check that explicitly created buffer is deferred - EXPECT_CALL(*MockSchedulerPtr, - deferMemObjRelease(testing::Truly(checkerEqual))) - .Times(1) - .InSequence(S) - .RetiresOnSaturation(); - // we have two queues - non host and host queue. Currently queue contains - // its own buffer as class member, buffer as used for assert handling. - // those buffers also created with size only so it also to be deferred on - // deletion. - EXPECT_CALL(*MockSchedulerPtr, deferMemObjRelease(testing::_)) - .Times(testing::AnyNumber()) - .InSequence(S); - } else { - // buffer created above should not be deferred on deletion because has non - // default allocator - EXPECT_CALL(*MockSchedulerPtr, - deferMemObjRelease(testing::Truly(checkerNotEqual))) - .Times(testing::AnyNumber()); - } - } - template - void SubmitWorkload(sycl::queue &Queue, Buffer *Buf) { - Queue.submit([&](sycl::handler &CGH) { - // Just need to imitate task dependency on buffer - auto acc = Buf->get_access(CGH, sycl::read_only); - CGH.host_task([] {}); - }); + MockCmdWithReleaseTracking *addCommandToBuffer(Buffer &Buf, sycl::queue &Q) { + sycl::detail::Requirement MockReq = getMockRequirement(Buf); + std::vector AuxCmds; + sycl::detail::MemObjRecord *Rec = MockSchedulerPtr->getOrInsertMemObjRecord( + sycl::detail::getSyclObjImpl(Q), &MockReq, AuxCmds); + MockCmdWithReleaseTracking *MockCmd = new MockCmdWithReleaseTracking( + sycl::detail::getSyclObjImpl(Q), MockReq); + std::vector ToEnqueue; + MockSchedulerPtr->addNodeToLeaves(Rec, MockCmd, sycl::access::mode::write, + ToEnqueue); + // we do not want to enqueue commands, just keep not enqueued and not + // completed, otherwise check is not possible + return MockCmd; } protected: sycl::unittest::PiMock Mock; sycl::platform Plt; - testing::NiceMock *MockSchedulerPtr; + MockScheduler *MockSchedulerPtr; }; -TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefaultNoRecord) { - sycl::context Context{Plt}; - sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; - { - sycl::buffer Buf(1); - std::shared_ptr BufImpl = - sycl::detail::getSyclObjImpl(Buf); - CheckBufferDestruction(BufImpl, false); - } -} - TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefault) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + + MockCmdWithReleaseTracking *MockCmd = NULL; + sycl::detail::buffer_impl *RawBufferImplPtr = NULL; { sycl::buffer Buf(1); std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); - SubmitWorkload(Q, &Buf); - CheckBufferDestruction(BufImpl, true); + RawBufferImplPtr = BufImpl.get(); + MockCmd = addCommandToBuffer(Buf, Q); } + ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 1u); + EXPECT_EQ(MockSchedulerPtr->MDeferredMemObjRelease[0].get(), + RawBufferImplPtr); + EXPECT_CALL(*MockCmd, Release).Times(1); } TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefaultSetFinalData) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + + MockCmdWithReleaseTracking *MockCmd = NULL; { int FinalData = 0; sycl::buffer Buf(1); - std::shared_ptr BufImpl = - sycl::detail::getSyclObjImpl(Buf); Buf.set_final_data(&FinalData); - SubmitWorkload(Q, &Buf); - CheckBufferDestruction(BufImpl, false); + MockCmd = addCommandToBuffer(Buf, Q); + EXPECT_CALL(*MockCmd, Release).Times(1); } + ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); } TEST_F(BufferDestructionCheck, BufferWithSizeOnlyNonDefaultAllocator) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + + MockCmdWithReleaseTracking *MockCmd = NULL; { using AllocatorTypeTest = sycl::usm_allocator; AllocatorTypeTest allocator(Q); sycl::buffer Buf(1, allocator); - std::shared_ptr BufImpl = - sycl::detail::getSyclObjImpl(Buf); - SubmitWorkload(Q, &Buf); - CheckBufferDestruction(BufImpl, false); + MockCmd = addCommandToBuffer(Buf, Q); + EXPECT_CALL(*MockCmd, Release).Times(1); } + ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); } TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefaultAllocator) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + + MockCmdWithReleaseTracking *MockCmd = NULL; + sycl::detail::buffer_impl *RawBufferImplPtr = NULL; { using AllocatorTypeTest = sycl::buffer_allocator; AllocatorTypeTest allocator; sycl::buffer Buf(1, allocator); std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); - SubmitWorkload(Q, &Buf); - CheckBufferDestruction(BufImpl, true); + RawBufferImplPtr = BufImpl.get(); + MockCmd = addCommandToBuffer(Buf, Q); + EXPECT_CALL(*MockCmd, Release).Times(1); } + ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 1u); + EXPECT_EQ(MockSchedulerPtr->MDeferredMemObjRelease[0].get(), + RawBufferImplPtr); } TEST_F(BufferDestructionCheck, BufferWithRawHostPtr) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + + MockCmdWithReleaseTracking *MockCmd = NULL; { int InitialVal = 8; sycl::buffer Buf(&InitialVal, 1); - std::shared_ptr BufImpl = - sycl::detail::getSyclObjImpl(Buf); - SubmitWorkload(Q, &Buf); - CheckBufferDestruction(BufImpl, false); + MockCmd = addCommandToBuffer(Buf, Q); + EXPECT_CALL(*MockCmd, Release).Times(1); } + ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); } TEST_F(BufferDestructionCheck, BufferWithRawHostPtrWithNonDefaultAllocator) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + + MockCmdWithReleaseTracking *MockCmd = NULL; { int InitialVal = 8; using AllocatorTypeTest = sycl::usm_allocator; AllocatorTypeTest allocator(Q); sycl::buffer Buf(&InitialVal, 1, allocator); - std::shared_ptr BufImpl = - sycl::detail::getSyclObjImpl(Buf); - SubmitWorkload(Q, &Buf); - CheckBufferDestruction(BufImpl, false); + MockCmd = addCommandToBuffer(Buf, Q); + EXPECT_CALL(*MockCmd, Release).Times(1); } + ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); } TEST_F(BufferDestructionCheck, BufferWithConstRawHostPtr) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + + MockCmdWithReleaseTracking *MockCmd = NULL; { const int InitialVal = 8; sycl::buffer Buf(&InitialVal, 1); - std::shared_ptr BufImpl = - sycl::detail::getSyclObjImpl(Buf); - SubmitWorkload(Q, &Buf); - CheckBufferDestruction(BufImpl, false); + MockCmd = addCommandToBuffer(Buf, Q); + EXPECT_CALL(*MockCmd, Release).Times(1); } + ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); } TEST_F(BufferDestructionCheck, BufferWithConstRawHostPtrWithNonDefaultAllocator) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + + MockCmdWithReleaseTracking *MockCmd = NULL; { const int InitialVal = 8; using AllocatorTypeTest = sycl::usm_allocator; AllocatorTypeTest allocator(Q); sycl::buffer Buf(&InitialVal, 1, allocator); - std::shared_ptr BufImpl = - sycl::detail::getSyclObjImpl(Buf); - SubmitWorkload(Q, &Buf); - CheckBufferDestruction(BufImpl, false); + MockCmd = addCommandToBuffer(Buf, Q); + EXPECT_CALL(*MockCmd, Release).Times(1); } + ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); } TEST_F(BufferDestructionCheck, BufferWithContainer) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + + MockCmdWithReleaseTracking *MockCmd = NULL; { std::vector data{3, 4}; sycl::buffer Buf(data); - std::shared_ptr BufImpl = - sycl::detail::getSyclObjImpl(Buf); - SubmitWorkload(Q, &Buf); - CheckBufferDestruction(BufImpl, false); + MockCmd = addCommandToBuffer(Buf, Q); + EXPECT_CALL(*MockCmd, Release).Times(1); } + ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); } TEST_F(BufferDestructionCheck, BufferWithContainerWithNonDefaultAllocator) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + + MockCmdWithReleaseTracking *MockCmd = NULL; { std::vector data{3, 4}; using AllocatorTypeTest = sycl::usm_allocator; AllocatorTypeTest allocator(Q); sycl::buffer Buf(data, allocator); - std::shared_ptr BufImpl = - sycl::detail::getSyclObjImpl(Buf); - SubmitWorkload(Q, &Buf); - CheckBufferDestruction(BufImpl, false); + MockCmd = addCommandToBuffer(Buf, Q); + EXPECT_CALL(*MockCmd, Release).Times(1); } + ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); } TEST_F(BufferDestructionCheck, BufferWithSharedPtr) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + + MockCmdWithReleaseTracking *MockCmd = NULL; { std::shared_ptr InitialVal(new int(5)); sycl::buffer Buf(InitialVal, 1); - std::shared_ptr BufImpl = - sycl::detail::getSyclObjImpl(Buf); - SubmitWorkload(Q, &Buf); - CheckBufferDestruction(BufImpl, false); + MockCmd = addCommandToBuffer(Buf, Q); + EXPECT_CALL(*MockCmd, Release).Times(1); } + ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); } TEST_F(BufferDestructionCheck, BufferWithSharedPtrWithNonDefaultAllocator) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + + MockCmdWithReleaseTracking *MockCmd = NULL; { std::shared_ptr InitialVal(new int(5)); using AllocatorTypeTest = sycl::usm_allocator; AllocatorTypeTest allocator(Q); sycl::buffer Buf(InitialVal, 1, allocator); - std::shared_ptr BufImpl = - sycl::detail::getSyclObjImpl(Buf); - SubmitWorkload(Q, &Buf); - CheckBufferDestruction(BufImpl, false); + MockCmd = addCommandToBuffer(Buf, Q); + EXPECT_CALL(*MockCmd, Release).Times(1); } + ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); } TEST_F(BufferDestructionCheck, BufferWithSharedPtrArray) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + + MockCmdWithReleaseTracking *MockCmd = NULL; { std::shared_ptr InitialVal(new int[2]); sycl::buffer Buf(InitialVal, 1); - std::shared_ptr BufImpl = - sycl::detail::getSyclObjImpl(Buf); - SubmitWorkload(Q, &Buf); - CheckBufferDestruction(BufImpl, false); + MockCmd = addCommandToBuffer(Buf, Q); + EXPECT_CALL(*MockCmd, Release).Times(1); } + ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); } TEST_F(BufferDestructionCheck, BufferWithSharedPtrArrayWithNonDefaultAllocator) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + + MockCmdWithReleaseTracking *MockCmd = NULL; { std::shared_ptr InitialVal(new int[2]); using AllocatorTypeTest = sycl::usm_allocator; AllocatorTypeTest allocator(Q); sycl::buffer Buf(InitialVal, 2, allocator); - std::shared_ptr BufImpl = - sycl::detail::getSyclObjImpl(Buf); - SubmitWorkload(Q, &Buf); - CheckBufferDestruction(BufImpl, false); + MockCmd = addCommandToBuffer(Buf, Q); + EXPECT_CALL(*MockCmd, Release).Times(1); } + ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); } TEST_F(BufferDestructionCheck, BufferWithIterators) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; + + MockCmdWithReleaseTracking *MockCmd = NULL; + sycl::detail::buffer_impl *RawBufferImplPtr = NULL; { std::vector data{3, 4}; sycl::buffer Buf(data.begin(), data.end()); std::shared_ptr BufImpl = sycl::detail::getSyclObjImpl(Buf); - SubmitWorkload(Q, &Buf); - CheckBufferDestruction(BufImpl, true); - } -} - -// TEST_F(BufferDestructionCheck, BufferWithIteratorsWithNonDefaultAllocator) { -// sycl::context Context{Plt}; -// sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; -// { -// std::vector data{3, 4}; -// using AllocatorTypeTest = sycl::usm_allocator; AllocatorTypeTest allocator(Q); -// sycl::buffer Buf(data.begin(), data.end(), -// allocator); std::shared_ptr BufImpl = -// sycl::detail::getSyclObjImpl(Buf); -// //needs workload -// CheckBufferDestruction(BufImpl, false); -// } -// } - -TEST_F(BufferDestructionCheck, BufferDeferringCheckWriteLock) { - sycl::context Context{Plt}; - sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; - { - testing::Sequence S; - sycl::detail::buffer_impl *unsafePtr = nullptr; - EXPECT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); - std::unique_lock Lock(MockSchedulerPtr->MGraphLock, - std::defer_lock); - { - sycl::buffer Buf(1); - SubmitWorkload(Q, &Buf); - { - std::shared_ptr BufImpl = - sycl::detail::getSyclObjImpl(Buf); - unsafePtr = BufImpl.get(); - } - Lock.lock(); - // gmock warning will be generated - simply tell gtest that now we do not - // want to mock the function - ON_CALL(*MockSchedulerPtr, deferMemObjRelease) - .WillByDefault( - [this](const std::shared_ptr &MemObj) { - return MockSchedulerPtr - ->sycl::detail::Scheduler::deferMemObjRelease(MemObj); - }); - } - // Record is empty but lock should prevent from being deleted - ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 1u); - EXPECT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.front().get(), - unsafePtr); - Lock.unlock(); - MockSchedulerPtr->releaseResources(); - - ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); + RawBufferImplPtr = BufImpl.get(); + MockCmd = addCommandToBuffer(Buf, Q); + EXPECT_CALL(*MockCmd, Release).Times(1); } + ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 1u); + EXPECT_EQ(MockSchedulerPtr->MDeferredMemObjRelease[0].get(), + RawBufferImplPtr); } std::map ExpectedEventStatus; @@ -400,20 +338,6 @@ pi_result getEventInfoFunc(pi_event Event, pi_event_info PName, size_t PVSize, return PI_ERROR_INVALID_OPERATION; } -class MockCmdWithReleaseTracking : public MockCommand { -public: - MockCmdWithReleaseTracking( - sycl::detail::QueueImplPtr Queue, sycl::detail::Requirement Req, - sycl::detail::Command::CommandType Type = sycl::detail::Command::RUN_CG) - : MockCommand(Queue, Req, Type){}; - MockCmdWithReleaseTracking( - sycl::detail::QueueImplPtr Queue, - sycl::detail::Command::CommandType Type = sycl::detail::Command::RUN_CG) - : MockCommand(Queue, Type){}; - ~MockCmdWithReleaseTracking() { Release(); } - MOCK_METHOD0(Release, void()); -}; - TEST_F(BufferDestructionCheck, ReadyToReleaseLogic) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; @@ -421,9 +345,8 @@ TEST_F(BufferDestructionCheck, ReadyToReleaseLogic) { sycl::buffer Buf(1); sycl::detail::Requirement MockReq = getMockRequirement(Buf); std::vector AuxCmds; - sycl::detail::MemObjRecord *Rec = - MockSchedulerPtr->MGraphBuilder.getOrInsertMemObjRecord( - sycl::detail::getSyclObjImpl(Q), &MockReq, AuxCmds); + sycl::detail::MemObjRecord *Rec = MockSchedulerPtr->getOrInsertMemObjRecord( + sycl::detail::getSyclObjImpl(Q), &MockReq, AuxCmds); MockCmdWithReleaseTracking *ReadCmd = nullptr; MockCmdWithReleaseTracking *WriteCmd = nullptr; ReadCmd = @@ -439,13 +362,12 @@ TEST_F(BufferDestructionCheck, ReadyToReleaseLogic) { std::vector ToCleanUp; std::vector ToEnqueue; - MockSchedulerPtr->MGraphBuilder.addNodeToLeaves( - Rec, ReadCmd, sycl::access::mode::read, ToEnqueue); - MockSchedulerPtr->MGraphBuilder.addNodeToLeaves( - Rec, WriteCmd, sycl::access::mode::write, ToEnqueue); + MockSchedulerPtr->addNodeToLeaves(Rec, ReadCmd, sycl::access::mode::read, + ToEnqueue); + MockSchedulerPtr->addNodeToLeaves(Rec, WriteCmd, sycl::access::mode::write, + ToEnqueue); Mock.redefine(getEventInfoFunc); - std::shared_lock Lock(MockSchedulerPtr->MGraphLock); testing::InSequence S; ExpectedEventStatus[ReadCmd->getEvent()->getHandleRef()] = PI_EVENT_SUBMITTED; diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index ba6eb72674fe9..42e52eebd72d1 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -102,8 +102,9 @@ class MockScheduler : public sycl::detail::Scheduler { public: using sycl::detail::Scheduler::addCG; using sycl::detail::Scheduler::addCopyBack; + using sycl::detail::Scheduler::checkLeavesCompletion; using sycl::detail::Scheduler::cleanupCommands; - using sycl::detail::Scheduler::waitForRecordToFinish; + using sycl::detail::Scheduler::MDeferredMemObjRelease; sycl::detail::MemObjRecord * getOrInsertMemObjRecord(const sycl::detail::QueueImplPtr &Queue, From 0872d7c232c4272937db63cc519a5812c2351b03 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Fri, 7 Oct 2022 08:20:18 -0700 Subject: [PATCH 39/62] Predict comments: restore removeMemoryObject content Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/scheduler/scheduler.cpp | 18 ++++-------------- sycl/source/detail/scheduler/scheduler.hpp | 5 +---- 2 files changed, 5 insertions(+), 18 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 729a2da51a091..1f6f218dc8a97 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -270,18 +270,6 @@ void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) { deallocateStreams(StreamsToDeallocate); } -void Scheduler::releaseMemObjRecord( - detail::SYCLMemObjI *MemObj, - std::vector> &StreamsToDeallocate, - std::vector> &AuxResourcesToDeallocate) { - MemObjRecord *Record = MGraphBuilder.getMemObjRecord(MemObj); - assert(Record); - MGraphBuilder.decrementLeafCountersForRecord(Record); - MGraphBuilder.cleanupCommandsForRecord(Record, StreamsToDeallocate, - AuxResourcesToDeallocate); - MGraphBuilder.removeRecordForMemObj(MemObj); -} - void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { // We are going to traverse a graph of finished commands. Gather stream // objects from these commands if any and deallocate buffers for these stream @@ -309,8 +297,10 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { { WriteLockT Lock(MGraphLock, std::defer_lock); acquireWriteLock(Lock); - releaseMemObjRecord(MemObj, StreamsToDeallocate, - AuxResourcesToDeallocate); + MGraphBuilder.decrementLeafCountersForRecord(Record); + MGraphBuilder.cleanupCommandsForRecord(Record, StreamsToDeallocate, + AuxResourcesToDeallocate); + MGraphBuilder.removeRecordForMemObj(MemObj); } } deallocateStreams(StreamsToDeallocate); diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 7830a4d889ecd..155217a26a901 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -470,10 +470,7 @@ class Scheduler { // May lock graph with read and write modes during execution. void cleanupDeferredMemObjects(BlockingT Blocking); - void releaseMemObjRecord( - detail::SYCLMemObjI *MemObj, - std::vector> &StreamsToDeallocate, - std::vector> &AuxResourcesToDeallocate); + /// Graph builder class. /// /// The graph builder provides means to change an existing graph (e.g. add From 3a25f1e53fbf7233ddf1daa74d17c8599fe34493 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Tue, 11 Oct 2022 11:42:13 -0700 Subject: [PATCH 40/62] Fix root cause of hang when host task is not even started upon release of all resouces Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/global_handler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 575e15dfc7167..1770cd69c9b5f 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -145,6 +145,8 @@ void GlobalHandler::unloadPlugins() { } void shutdown() { + if (GlobalHandler::instance().MScheduler.Inst) + GlobalHandler::instance().MScheduler.Inst->releaseResources(); // Ensure neither host task is working so that no default context is accessed // upon its release if (GlobalHandler::instance().MHostTaskThreadPool.Inst) @@ -159,8 +161,6 @@ void shutdown() { // First, release resources, that may access plugins. GlobalHandler::instance().MPlatformCache.Inst.reset(nullptr); - if (GlobalHandler::instance().MScheduler.Inst) - GlobalHandler::instance().MScheduler.Inst->releaseResources(); GlobalHandler::instance().MScheduler.Inst.reset(nullptr); GlobalHandler::instance().MProgramManager.Inst.reset(nullptr); From 28f008d324caa8739383fcf7915407c920f1ea98 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Fri, 14 Oct 2022 03:21:25 -0700 Subject: [PATCH 41/62] Fix comments (round n) Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/scheduler/scheduler.cpp | 8 ++++---- sycl/source/detail/sycl_mem_obj_t.cpp | 9 +++++---- sycl/source/detail/sycl_mem_obj_t.hpp | 2 +- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 1f6f218dc8a97..1d9a001d85b57 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -518,16 +518,16 @@ void Scheduler::cleanupDeferredMemObjects(BlockingT Blocking) { if (isDeferredMemObjectsEmpty()) return; if (Blocking == BlockingT::BLOCKING) { - std::vector> MTempStorage; + std::vector> TempStorage; { std::lock_guard LockDef{MDeferredMemReleaseMutex}; - MDeferredMemObjRelease.swap(MTempStorage); + MDeferredMemObjRelease.swap(TempStorage); } - // if any objects in MTempStorage exist - it is leaving scope and being + // if any objects in TempStorage exist - it is leaving scope and being // deleted } - std::list> ObjsReadyToRelease; + std::vector> ObjsReadyToRelease; { ReadLockT Lock = ReadLockT(MGraphLock, std::try_to_lock); diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 3e949fee77ddf..41724fd9428a4 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -148,14 +148,15 @@ void SYCLMemObjT::determineHostPtr(const ContextImplPtr &Context, HostPtrReadOnly = false; } -void SYCLMemObjT::detachMemoryObject(const std::shared_ptr &self, +void SYCLMemObjT::detachMemoryObject(const std::shared_ptr &Self, bool DefaultAllocator) const { - // Check MRecord without read lock because not found any usages that may bring - // corruption. MRecord is nullptr on buffer creation and set to meaningfull + // Check MRecord without read lock because at this point we expect that no + // commands that operate on the buffer can be created. MRecord is nullptr on + // buffer creation and set to meaningfull // value only if any operation on buffer submitted inside addCG call. addCG is // called from queue::submit and buffer destruction could not overlap with it. if (MRecord && !MHostPtrProvided && DefaultAllocator) - Scheduler::getInstance().deferMemObjRelease(self); + Scheduler::getInstance().deferMemObjRelease(Self); } } // namespace detail diff --git a/sycl/source/detail/sycl_mem_obj_t.hpp b/sycl/source/detail/sycl_mem_obj_t.hpp index fa8f79165b827..a8c3f3ffc35fa 100644 --- a/sycl/source/detail/sycl_mem_obj_t.hpp +++ b/sycl/source/detail/sycl_mem_obj_t.hpp @@ -257,7 +257,7 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { bool isHostPointerReadOnly() const { return MHostPtrReadOnly; } - void detachMemoryObject(const std::shared_ptr &self, + void detachMemoryObject(const std::shared_ptr &Self, bool DefaultAllocator) const; protected: From 6247f8aa130e84e62465a805dee6d19ed9662a28 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Tue, 18 Oct 2022 14:03:39 -0700 Subject: [PATCH 42/62] [ESIMD] Implement piEventGetInfo for event execution status Signed-off-by: Tikhomirova, Kseniya --- .../esimd_emulator/pi_esimd_emulator.cpp | 35 +++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 00daaddbb37aa..36347ecc46f2d 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -1379,8 +1379,39 @@ pi_result piKernelRelease(pi_kernel) { DIE_NO_IMPLEMENTATION; } pi_result piEventCreate(pi_context, pi_event *) { DIE_NO_IMPLEMENTATION; } -pi_result piEventGetInfo(pi_event, pi_event_info, size_t, void *, size_t *) { - DIE_NO_IMPLEMENTATION; +pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + if (ParamName != PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) { + DIE_NO_IMPLEMENTATION; + } + // Dummy event is already completed ones done by CM. + auto CheckAndFillStatus = [&](const cm_support::CM_STATUS &State) { + pi_int32 Result = PI_EVENT_RUNNING; + if (State == cm_support::CM_STATUS_FINISHED) + Result = PI_EVENT_COMPLETE; + if (ParamValue) { + if (ParamValueSize < sizeof(Result)) + return PI_ERROR_INVALID_VALUE; + *static_cast(ParamValue) = Result; + } + if (ParamValueSizeRet) { + *ParamValueSizeRet = sizeof(Result); + } + return PI_SUCCESS; + }; + if (Event->IsDummyEvent) + return CheckAndFillStatus(cm_support::CM_STATUS_FINISHED); + + if (Event->CmEventPtr == nullptr) + return PI_ERROR_INVALID_EVENT; + + cm_support::CM_STATUS Status; + int32_t Result = Event->CmEventPtr->GetStatus(Status); + if (Result != cm_support::CM_SUCCESS) + return PI_ERROR_COMMAND_EXECUTION_FAILURE; + + return CheckAndFillStatus(Status); } pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, From 41338627107b65fdbcc2d15fb2e92af082ebef72 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Tue, 18 Oct 2022 14:11:38 -0700 Subject: [PATCH 43/62] Move comment to the right place Signed-off-by: Tikhomirova, Kseniya --- sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 36347ecc46f2d..f06a4ed64b4b6 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -1385,7 +1385,7 @@ pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, if (ParamName != PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) { DIE_NO_IMPLEMENTATION; } - // Dummy event is already completed ones done by CM. + auto CheckAndFillStatus = [&](const cm_support::CM_STATUS &State) { pi_int32 Result = PI_EVENT_RUNNING; if (State == cm_support::CM_STATUS_FINISHED) @@ -1400,6 +1400,7 @@ pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, } return PI_SUCCESS; }; + // Dummy event is already completed ones done by CM. if (Event->IsDummyEvent) return CheckAndFillStatus(cm_support::CM_STATUS_FINISHED); From 79b2125d03b6dff9492cfcbfa2cbdfc2e1dc42b6 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 19 Oct 2022 06:24:33 -0700 Subject: [PATCH 44/62] cv.notify_all should not be called under mutex paired with cv Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/event_impl.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index efc31fa817779..d262fa1e9e43d 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -78,17 +78,19 @@ void event_impl::waitInternal() { void event_impl::setComplete() { if (MHostEvent || !MEvent) { - std::unique_lock lock(MMutex); + { + std::unique_lock lock(MMutex); #ifndef NDEBUG - int Expected = HES_NotComplete; - int Desired = HES_Complete; + int Expected = HES_NotComplete; + int Desired = HES_Complete; - bool Succeeded = MState.compare_exchange_strong(Expected, Desired); + bool Succeeded = MState.compare_exchange_strong(Expected, Desired); - assert(Succeeded && "Unexpected state of event"); + assert(Succeeded && "Unexpected state of event"); #else - MState.store(static_cast(HES_Complete)); + MState.store(static_cast(HES_Complete)); #endif + } cv.notify_all(); return; } From 868973c2521deb7273a2d778685360361cdd7e05 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Fri, 21 Oct 2022 03:07:14 -0700 Subject: [PATCH 45/62] Remove default allocator check after SYCL2020 update Signed-off-by: Tikhomirova, Kseniya --- sycl/include/sycl/buffer.hpp | 10 ++-------- sycl/source/buffer.cpp | 4 ++-- sycl/source/detail/sycl_mem_obj_t.cpp | 12 ++++++------ sycl/source/detail/sycl_mem_obj_t.hpp | 3 +-- 4 files changed, 11 insertions(+), 18 deletions(-) diff --git a/sycl/include/sycl/buffer.hpp b/sycl/include/sycl/buffer.hpp index 9bd2c4d37ed89..423cb7ae4c78a 100644 --- a/sycl/include/sycl/buffer.hpp +++ b/sycl/include/sycl/buffer.hpp @@ -117,7 +117,7 @@ class __SYCL_EXPORT buffer_plain { size_t getSize() const; - void handleRelease(bool DefaultAllocator) const; + void handleRelease() const; std::shared_ptr impl; }; @@ -459,13 +459,7 @@ class buffer : public detail::buffer_plain { buffer &operator=(buffer &&rhs) = default; - ~buffer() { - buffer_plain:: - handleRelease(/*DefaultAllocator = */ - std::is_same< - AllocatorT, - detail::sycl_memory_object_allocator>::value); - } + ~buffer() { buffer_plain::handleRelease(); } bool operator==(const buffer &rhs) const { return impl == rhs.impl; } diff --git a/sycl/source/buffer.cpp b/sycl/source/buffer.cpp index d41c2a8829506..6e5e682b9429a 100644 --- a/sycl/source/buffer.cpp +++ b/sycl/source/buffer.cpp @@ -121,11 +121,11 @@ void buffer_plain::addOrReplaceAccessorProperties( size_t buffer_plain::getSize() const { return impl->getSizeInBytes(); } -void buffer_plain::handleRelease(bool DefaultAllocator) const { +void buffer_plain::handleRelease() const { // Try to detach memory object only if impl is going to be released. // Buffer copy will have pointer to the same impl. if (impl.use_count() == 1) - impl->detachMemoryObject(impl, DefaultAllocator); + impl->detachMemoryObject(impl); } } // namespace detail diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 41724fd9428a4..4aa812f3ddcde 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -148,14 +148,14 @@ void SYCLMemObjT::determineHostPtr(const ContextImplPtr &Context, HostPtrReadOnly = false; } -void SYCLMemObjT::detachMemoryObject(const std::shared_ptr &Self, - bool DefaultAllocator) const { - // Check MRecord without read lock because at this point we expect that no - // commands that operate on the buffer can be created. MRecord is nullptr on - // buffer creation and set to meaningfull +void SYCLMemObjT::detachMemoryObject( + const std::shared_ptr &Self) const { + // Check MRecord without read lock because at this point we expect that no + // commands that operate on the buffer can be created. MRecord is nullptr on + // buffer creation and set to meaningfull // value only if any operation on buffer submitted inside addCG call. addCG is // called from queue::submit and buffer destruction could not overlap with it. - if (MRecord && !MHostPtrProvided && DefaultAllocator) + if (MRecord && !MHostPtrProvided) Scheduler::getInstance().deferMemObjRelease(Self); } diff --git a/sycl/source/detail/sycl_mem_obj_t.hpp b/sycl/source/detail/sycl_mem_obj_t.hpp index a8c3f3ffc35fa..dfd01b88c5a5a 100644 --- a/sycl/source/detail/sycl_mem_obj_t.hpp +++ b/sycl/source/detail/sycl_mem_obj_t.hpp @@ -257,8 +257,7 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { bool isHostPointerReadOnly() const { return MHostPtrReadOnly; } - void detachMemoryObject(const std::shared_ptr &Self, - bool DefaultAllocator) const; + void detachMemoryObject(const std::shared_ptr &Self) const; protected: // An allocateMem helper that determines which host ptr to use From 1bc8e571bb364b934c3cf91faf267837d6ef4f19 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Fri, 21 Oct 2022 04:26:28 -0700 Subject: [PATCH 46/62] Update unittests due to default allocator check removal Signed-off-by: Tikhomirova, Kseniya --- .../buffer/BufferDestructionCheck.cpp | 78 ++----------------- 1 file changed, 7 insertions(+), 71 deletions(-) diff --git a/sycl/unittests/buffer/BufferDestructionCheck.cpp b/sycl/unittests/buffer/BufferDestructionCheck.cpp index 0487b13e14f96..77465fe7ec2c0 100644 --- a/sycl/unittests/buffer/BufferDestructionCheck.cpp +++ b/sycl/unittests/buffer/BufferDestructionCheck.cpp @@ -114,15 +114,21 @@ TEST_F(BufferDestructionCheck, BufferWithSizeOnlyNonDefaultAllocator) { sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; MockCmdWithReleaseTracking *MockCmd = NULL; + sycl::detail::buffer_impl *RawBufferImplPtr = NULL; { using AllocatorTypeTest = sycl::usm_allocator; AllocatorTypeTest allocator(Q); sycl::buffer Buf(1, allocator); + std::shared_ptr BufImpl = + sycl::detail::getSyclObjImpl(Buf); + RawBufferImplPtr = BufImpl.get(); MockCmd = addCommandToBuffer(Buf, Q); EXPECT_CALL(*MockCmd, Release).Times(1); } - ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); + ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 1u); + EXPECT_EQ(MockSchedulerPtr->MDeferredMemObjRelease[0].get(), + RawBufferImplPtr); } TEST_F(BufferDestructionCheck, BufferWithSizeOnlyDefaultAllocator) { @@ -191,24 +197,6 @@ TEST_F(BufferDestructionCheck, BufferWithConstRawHostPtr) { ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); } -TEST_F(BufferDestructionCheck, - BufferWithConstRawHostPtrWithNonDefaultAllocator) { - sycl::context Context{Plt}; - sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; - - MockCmdWithReleaseTracking *MockCmd = NULL; - { - const int InitialVal = 8; - using AllocatorTypeTest = - sycl::usm_allocator; - AllocatorTypeTest allocator(Q); - sycl::buffer Buf(&InitialVal, 1, allocator); - MockCmd = addCommandToBuffer(Buf, Q); - EXPECT_CALL(*MockCmd, Release).Times(1); - } - ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); -} - TEST_F(BufferDestructionCheck, BufferWithContainer) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; @@ -223,23 +211,6 @@ TEST_F(BufferDestructionCheck, BufferWithContainer) { ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); } -TEST_F(BufferDestructionCheck, BufferWithContainerWithNonDefaultAllocator) { - sycl::context Context{Plt}; - sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; - - MockCmdWithReleaseTracking *MockCmd = NULL; - { - std::vector data{3, 4}; - using AllocatorTypeTest = - sycl::usm_allocator; - AllocatorTypeTest allocator(Q); - sycl::buffer Buf(data, allocator); - MockCmd = addCommandToBuffer(Buf, Q); - EXPECT_CALL(*MockCmd, Release).Times(1); - } - ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); -} - TEST_F(BufferDestructionCheck, BufferWithSharedPtr) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; @@ -254,23 +225,6 @@ TEST_F(BufferDestructionCheck, BufferWithSharedPtr) { ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); } -TEST_F(BufferDestructionCheck, BufferWithSharedPtrWithNonDefaultAllocator) { - sycl::context Context{Plt}; - sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; - - MockCmdWithReleaseTracking *MockCmd = NULL; - { - std::shared_ptr InitialVal(new int(5)); - using AllocatorTypeTest = - sycl::usm_allocator; - AllocatorTypeTest allocator(Q); - sycl::buffer Buf(InitialVal, 1, allocator); - MockCmd = addCommandToBuffer(Buf, Q); - EXPECT_CALL(*MockCmd, Release).Times(1); - } - ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); -} - TEST_F(BufferDestructionCheck, BufferWithSharedPtrArray) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; @@ -285,24 +239,6 @@ TEST_F(BufferDestructionCheck, BufferWithSharedPtrArray) { ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); } -TEST_F(BufferDestructionCheck, - BufferWithSharedPtrArrayWithNonDefaultAllocator) { - sycl::context Context{Plt}; - sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; - - MockCmdWithReleaseTracking *MockCmd = NULL; - { - std::shared_ptr InitialVal(new int[2]); - using AllocatorTypeTest = - sycl::usm_allocator; - AllocatorTypeTest allocator(Q); - sycl::buffer Buf(InitialVal, 2, allocator); - MockCmd = addCommandToBuffer(Buf, Q); - EXPECT_CALL(*MockCmd, Release).Times(1); - } - ASSERT_EQ(MockSchedulerPtr->MDeferredMemObjRelease.size(), 0u); -} - TEST_F(BufferDestructionCheck, BufferWithIterators) { sycl::context Context{Plt}; sycl::queue Q = sycl::queue{Context, sycl::default_selector{}}; From 6e0943b5680ad4e04793e96c727ac6dbd8ff29ae Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Fri, 21 Oct 2022 06:20:20 -0700 Subject: [PATCH 47/62] Update symbols after parameter removal Signed-off-by: Tikhomirova, Kseniya --- sycl/test/abi/sycl_symbols_linux.dump | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 6332a1ca20d7f..5b4f49e2ecaf5 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4038,7 +4038,7 @@ _ZNK4sycl3_V15queue9getNativeEv _ZNK4sycl3_V16ONEAPI15filter_selector13select_deviceEv _ZNK4sycl3_V16ONEAPI15filter_selector5resetEv _ZNK4sycl3_V16ONEAPI15filter_selectorclERKNS0_6deviceE -_ZNK4sycl3_V16detail11SYCLMemObjT18detachMemoryObjectERKSt10shared_ptrIS2_Eb +_ZNK4sycl3_V16detail11SYCLMemObjT18detachMemoryObjectERKSt10shared_ptrIS2_E _ZNK4sycl3_V16detail11SYCLMemObjT9getPluginEv _ZNK4sycl3_V16detail11SYCLMemObjT9isInteropEv _ZNK4sycl3_V16detail11buffer_impl15getNativeVectorENS0_7backendE @@ -4107,7 +4107,7 @@ _ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property6noinitEEEbv _ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv _ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property7no_initEEEbv _ZNK4sycl3_V16detail12buffer_plain12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv -_ZNK4sycl3_V16detail12buffer_plain13handleReleaseEb +_ZNK4sycl3_V16detail12buffer_plain13handleReleaseEv _ZNK4sycl3_V16detail12buffer_plain15getNativeVectorENS0_7backendE _ZNK4sycl3_V16detail12buffer_plain22get_allocator_internalEv _ZNK4sycl3_V16detail12buffer_plain7getSizeEv From 1c62d0825991067d4aa45f1d11fe6ea8f1ec5f07 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 26 Oct 2022 11:08:46 -0700 Subject: [PATCH 48/62] Try to align hip context destruction handling with cuda WA Signed-off-by: Tikhomirova, Kseniya --- sycl/plugins/hip/pi_hip.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 6bbb39f4d05df..a53f8b0f81cc5 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -185,7 +185,7 @@ pi_result forLatestEvents(const pi_event *event_wait_list, /// pi_result check_error(hipError_t result, const char *function, int line, const char *file) { - if (result == hipSuccess) { + if (result == hipSuccess || result == hipErrorContextIsDestroyed) { return PI_SUCCESS; } From 60e3011a48106af39d722418d0f4fffcd795eb24 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Mon, 7 Nov 2022 07:08:56 -0800 Subject: [PATCH 49/62] Fix unit test after mock plugin rework Signed-off-by: Tikhomirova, Kseniya --- sycl/unittests/buffer/BufferDestructionCheck.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sycl/unittests/buffer/BufferDestructionCheck.cpp b/sycl/unittests/buffer/BufferDestructionCheck.cpp index 77465fe7ec2c0..0abe2918aea36 100644 --- a/sycl/unittests/buffer/BufferDestructionCheck.cpp +++ b/sycl/unittests/buffer/BufferDestructionCheck.cpp @@ -283,16 +283,19 @@ TEST_F(BufferDestructionCheck, ReadyToReleaseLogic) { std::vector AuxCmds; sycl::detail::MemObjRecord *Rec = MockSchedulerPtr->getOrInsertMemObjRecord( sycl::detail::getSyclObjImpl(Q), &MockReq, AuxCmds); + + std::shared_ptr CtxImpl = + sycl::detail::getSyclObjImpl(Context); MockCmdWithReleaseTracking *ReadCmd = nullptr; MockCmdWithReleaseTracking *WriteCmd = nullptr; ReadCmd = new MockCmdWithReleaseTracking(sycl::detail::getSyclObjImpl(Q), MockReq); - ReadCmd->getEvent()->getHandleRef() = reinterpret_cast( - 0x01); // just assign to be able to use mock + ReadCmd->getEvent()->getHandleRef() = + createDummyHandle(); // just assign to be able to use mock WriteCmd = new MockCmdWithReleaseTracking(sycl::detail::getSyclObjImpl(Q), MockReq); WriteCmd->getEvent()->getHandleRef() = - reinterpret_cast(0x02); + createDummyHandle(); // just assign to be able to use mock ReadCmd->MEnqueueStatus = sycl::detail::EnqueueResultT::SyclEnqueueSuccess; WriteCmd->MEnqueueStatus = sycl::detail::EnqueueResultT::SyclEnqueueSuccess; From 3d5315e163079b343b848c0d85ebc8293e77aa0a Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 16 Nov 2022 13:55:23 -0800 Subject: [PATCH 50/62] DRAFT: try to release scheduler resources earlier using thread_local utility class Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/global_handler.cpp | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index a0a3a8fd4c367..a5a6485f8a119 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -27,6 +27,24 @@ namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace detail { + +// just a draft, no unification +template class ObjectUsageCounter { +public: + ObjectUsageCounter(std::unique_ptr &Obj) : MObj(Obj) { + MCounter++; + } + ~ObjectUsageCounter() { + MCounter--; + if (!MCounter) + MObj->releaseResources(); + } + +private: + std::atomic_int MCounter; + std::unique_ptr &MObj; +}; + using LockGuard = std::lock_guard; GlobalHandler::GlobalHandler() = default; @@ -55,7 +73,12 @@ void GlobalHandler::attachScheduler(Scheduler *Scheduler) { MScheduler.Inst.reset(Scheduler); } -Scheduler &GlobalHandler::getScheduler() { return getOrCreate(MScheduler); } +Scheduler &GlobalHandler::getScheduler() { + // just a draft + getOrCreate(MScheduler); + thread_local ObjectUsageCounter SchedulerCounter(MScheduler.Inst); + return *MScheduler.Inst; +} ProgramManager &GlobalHandler::getProgramManager() { return getOrCreate(MProgramManager); From 467a9ea0ec9e74406b74f0a923a477c3ddfdd85e Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Thu, 17 Nov 2022 02:40:41 -0800 Subject: [PATCH 51/62] Draft: try to release scheduler resources earlier, fix counter declaration Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/global_handler.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index a5a6485f8a119..7a348d9770140 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -41,9 +41,11 @@ template class ObjectUsageCounter { } private: - std::atomic_int MCounter; + static std::atomic_uint MCounter; std::unique_ptr &MObj; }; +template +std::atomic_uint ObjectUsageCounter::MCounter{0}; using LockGuard = std::lock_guard; From 0b9032a223db8a2a4849b743fc97922cdc3cfb55 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 16 Nov 2022 13:55:23 -0800 Subject: [PATCH 52/62] Release scheduler resources earlier --- sycl/source/detail/global_handler.cpp | 42 +++++++++++++++++----- sycl/source/detail/global_handler.hpp | 2 ++ sycl/source/detail/scheduler/scheduler.cpp | 31 +++++++++------- sycl/source/detail/scheduler/scheduler.hpp | 2 +- sycl/source/detail/thread_pool.hpp | 35 +++++++++++++++--- 5 files changed, 86 insertions(+), 26 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 7a348d9770140..5509d4b92b541 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -31,21 +31,35 @@ namespace detail { // just a draft, no unification template class ObjectUsageCounter { public: - ObjectUsageCounter(std::unique_ptr &Obj) : MObj(Obj) { - MCounter++; + ObjectUsageCounter(std::unique_ptr &Obj, + bool IncrementCounter) + : MIncrementCounter(IncrementCounter), MObj(Obj) { + if (MIncrementCounter) + MCounter++; } ~ObjectUsageCounter() { - MCounter--; - if (!MCounter) - MObj->releaseResources(); + if (MIncrementCounter) + MCounter--; + if (!MCounter && MObj) { + bool ReleaseCalled = MReleaseCalled.exchange(true); + if (!ReleaseCalled) + MObj->releaseResources(); + } } private: static std::atomic_uint MCounter; + bool MIncrementCounter; + std::unique_ptr &MObj; + + static std::atomic_bool MReleaseCalled; // test + // std::unique_ptr MTrace; }; template std::atomic_uint ObjectUsageCounter::MCounter{0}; +template +std::atomic_bool ObjectUsageCounter::MReleaseCalled{false}; using LockGuard = std::lock_guard; @@ -78,10 +92,15 @@ void GlobalHandler::attachScheduler(Scheduler *Scheduler) { Scheduler &GlobalHandler::getScheduler() { // just a draft getOrCreate(MScheduler); - thread_local ObjectUsageCounter SchedulerCounter(MScheduler.Inst); + registerSchedulerUsage(); return *MScheduler.Inst; } +void GlobalHandler::registerSchedulerUsage(bool IncrementCounter) { + thread_local ObjectUsageCounter SchedulerCounter(MScheduler.Inst, + IncrementCounter); +} + ProgramManager &GlobalHandler::getProgramManager() { return getOrCreate(MProgramManager); } @@ -174,11 +193,18 @@ void GlobalHandler::unloadPlugins() { GlobalHandler::instance().getPlugins().clear(); } +void GlobalHandler::drainThreadPool() { + if (MHostTaskThreadPool.Inst) + MHostTaskThreadPool.Inst->drain(); +} + void shutdown() { - if (GlobalHandler::instance().MScheduler.Inst) - GlobalHandler::instance().MScheduler.Inst->releaseResources(); // Ensure neither host task is working so that no default context is accessed // upon its release + + if (GlobalHandler::instance().MScheduler.Inst) + GlobalHandler::instance().MScheduler.Inst->releaseResources(); + if (GlobalHandler::instance().MHostTaskThreadPool.Inst) GlobalHandler::instance().MHostTaskThreadPool.Inst->finishAndWait(); diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index 1de7208d6f8de..035b72395d3d3 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -54,6 +54,7 @@ class GlobalHandler { GlobalHandler(const GlobalHandler &) = delete; GlobalHandler(GlobalHandler &&) = delete; + void registerSchedulerUsage(bool IncrementCounter = true); Scheduler &getScheduler(); ProgramManager &getProgramManager(); Sync &getSync(); @@ -74,6 +75,7 @@ class GlobalHandler { static void registerDefaultContextReleaseHandler(); void unloadPlugins(); + void drainThreadPool(); // For testing purposes only void attachScheduler(Scheduler *Scheduler); diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 3f1fce270f908..081cc35a9cbb0 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -414,21 +414,22 @@ Scheduler::~Scheduler() { "not all resources were released. Please be sure that all kernels " "have synchronization points.\n\n"); } - // Please be aware that releaseResources should be called before deletion of - // Scheduler. Otherwise there can be the case when objects Scheduler keeps as - // fields may need Scheduler for their release and they work with Scheduler - // via GlobalHandler::getScheduler that will create new Scheduler object. - // Still keep it here but it should do almost nothing if releaseResources - // called before. - releaseResources(); + DefaultHostQueue.reset(); } void Scheduler::releaseResources() { - // There might be some commands scheduled for post enqueue cleanup that - // haven't been freed because of the graph mutex being locked at the time, - // clean them up now. + MReleaseStarted = true; + // TraceEvent trace("releaseResources"); + if (DefaultHostQueue) { + DefaultHostQueue->wait(); + // std::cout << "DefaultHostQueue finished" << std::endl; + } + GlobalHandler::instance().drainThreadPool(); + // std::cout << "threads finished" << std::endl; + // There might be some commands scheduled for post enqueue cleanup that + // haven't been freed because of the graph mutex being locked at the time, + // clean them up now. cleanupCommands({}); - DefaultHostQueue.reset(); // We need loop since sometimes we may need new objects to be added to // deferred mem objects storage during cleanup. Known example is: we cleanup @@ -502,7 +503,7 @@ void Scheduler::NotifyHostTaskCompletion(Command *Cmd, Command *BlockingCmd) { } void Scheduler::deferMemObjRelease(const std::shared_ptr &MemObj) { - { + if (!MReleaseStarted) { std::lock_guard Lock{MDeferredMemReleaseMutex}; MDeferredMemObjRelease.push_back(MemObj); } @@ -515,6 +516,8 @@ inline bool Scheduler::isDeferredMemObjectsEmpty() { } void Scheduler::cleanupDeferredMemObjects(BlockingT Blocking) { + // std::cout << "MDeferredMemObjRelease.size = " << + // MDeferredMemObjRelease.size() << std::endl; if (isDeferredMemObjectsEmpty()) return; if (Blocking == BlockingT::BLOCKING) { @@ -547,7 +550,9 @@ void Scheduler::cleanupDeferredMemObjects(BlockingT Blocking) { } } } - // if any ObjsReadyToRelease found - it is leaving scope and being deleted + // std::cout << "MDeferredMemObjRelease after NON_BLOCKING.size = " << + // MDeferredMemObjRelease.size() << std::endl; + // if any ObjsReadyToRelease found - it is leaving scope and being deleted } } // namespace detail diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 01e8b1c0e0035..268275859172d 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -174,7 +174,6 @@ class MockScheduler; namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace detail { - class queue_impl; class event_impl; class context_impl; @@ -453,6 +452,7 @@ class Scheduler { inline bool isDeferredMemObjectsEmpty(); protected: + std::atomic_bool MReleaseStarted{false}; using RWLockT = std::shared_timed_mutex; using ReadLockT = std::shared_lock; using WriteLockT = std::unique_lock; diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 2ffbe8a0bd52a..8a7d522399c13 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -31,16 +31,30 @@ class ThreadPool { std::condition_variable MDoSmthOrStop; std::atomic_bool MStop; + // Drain related staff + std::map MDrainSchedule; + std::atomic_bool MDrain{false}; + void worker() { + GlobalHandler::instance().registerSchedulerUsage(false); std::unique_lock Lock(MJobQueueMutex); - + std::thread::id ThisThreadId = std::this_thread::get_id(); while (true) { - MDoSmthOrStop.wait( - Lock, [this]() { return !MJobQueue.empty() || MStop.load(); }); + MDoSmthOrStop.wait(Lock, [this, &ThisThreadId]() { + return !MJobQueue.empty() || MStop.load() || + (MDrain.load() && MDrainSchedule[ThisThreadId].load()); + }); - if (MStop.load()) + // lets complete enqueued tasks first + if (MStop.load() && MJobQueue.empty()) break; + if (MJobQueue.empty() && MDrain.load() && + MDrainSchedule[ThisThreadId].load()) { + assert(MDrainSchedule[ThisThreadId].exchange(false)); + continue; + } + std::function Job = std::move(MJobQueue.front()); MJobQueue.pop(); Lock.unlock(); @@ -61,6 +75,19 @@ class ThreadPool { } public: + void drain() { + for (const auto &thread : MLaunchedThreads) { + MDrainSchedule[thread.get_id()].store(true); + } + MDrain.store(true); + MDoSmthOrStop.notify_all(); + while (std::any_of(MDrainSchedule.cbegin(), MDrainSchedule.cend(), + [](const auto &state) { return state.second.load(); })) + std::this_thread::yield(); + + MDrain.store(false); + } + ThreadPool(unsigned int ThreadCount = 1) : MThreadCount(ThreadCount) { start(); } From 9d570ceb9df71b34438c18e65ee244790ef37a41 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Mon, 5 Dec 2022 08:38:45 -0800 Subject: [PATCH 53/62] change location for buff release attempt Signed-off-by: Tikhomirova, Kseniya .git/CHERRY_PICK_HEAD --- sycl/source/detail/event_impl.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 40d87d59d355c..f2a596980a741 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -146,8 +146,8 @@ event_impl::event_impl(RT::PiEvent Event, const context &SyclContext) } event_impl::event_impl(const QueueImplPtr &Queue) - : MQueue{Queue}, MIsProfilingEnabled{Queue->is_host() || - Queue->MIsProfilingEnabled} { + : MQueue{Queue}, + MIsProfilingEnabled{Queue->is_host() || Queue->MIsProfilingEnabled} { this->setContextImpl(Queue->getContextImplPtr()); if (Queue->is_host()) { @@ -233,6 +233,9 @@ void event_impl::wait(std::shared_ptr Self) { detail::Scheduler::getInstance().waitForEvent(Self); cleanupCommand(std::move(Self)); + detail::Scheduler::getInstance().cleanupDeferredMemObjects( + BlockingT::NON_BLOCKING); + #ifdef XPTI_ENABLE_INSTRUMENTATION instrumentationEpilog(TelemetryEvent, Name, StreamID, IId); #endif From c6d5dc758cfe14ab9e2f5a7b520c4798a6a5c8f6 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Tue, 6 Dec 2022 05:38:14 -0800 Subject: [PATCH 54/62] Code cleanup Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/global_handler.cpp | 18 ++++++++------- sycl/source/detail/thread_pool.hpp | 32 +++++++-------------------- 2 files changed, 18 insertions(+), 32 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 5509d4b92b541..2e89efe3996ed 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -28,7 +28,14 @@ namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace detail { -// just a draft, no unification +// Utility class to track references on object. +// Used for Scheduler now and created as thread_local object. +// Origin idea is to track usage of Scheduler from main and other used threads - +// they increment MCounter; and to use but not add extra reference by our +// thread_pool threads. For this control MIncrementCounter class member is used. +// MObj and MReleaseCalled is extra protection needed to handle case when main +// thread finished but thread_pool is still running and we will join that +// threads in releaseResources call. template class ObjectUsageCounter { public: ObjectUsageCounter(std::unique_ptr &Obj, @@ -41,8 +48,7 @@ template class ObjectUsageCounter { if (MIncrementCounter) MCounter--; if (!MCounter && MObj) { - bool ReleaseCalled = MReleaseCalled.exchange(true); - if (!ReleaseCalled) + if (!MReleaseCalled.exchange(true)) MObj->releaseResources(); } } @@ -50,11 +56,8 @@ template class ObjectUsageCounter { private: static std::atomic_uint MCounter; bool MIncrementCounter; - std::unique_ptr &MObj; - - static std::atomic_bool MReleaseCalled; // test - // std::unique_ptr MTrace; + static std::atomic_bool MReleaseCalled; }; template std::atomic_uint ObjectUsageCounter::MCounter{0}; @@ -90,7 +93,6 @@ void GlobalHandler::attachScheduler(Scheduler *Scheduler) { } Scheduler &GlobalHandler::getScheduler() { - // just a draft getOrCreate(MScheduler); registerSchedulerUsage(); return *MScheduler.Inst; diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 8a7d522399c13..e078b4cbb517b 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -30,10 +30,7 @@ class ThreadPool { std::mutex MJobQueueMutex; std::condition_variable MDoSmthOrStop; std::atomic_bool MStop; - - // Drain related staff - std::map MDrainSchedule; - std::atomic_bool MDrain{false}; + std::atomic_uint MJobsInExecution; void worker() { GlobalHandler::instance().registerSchedulerUsage(false); @@ -41,20 +38,12 @@ class ThreadPool { std::thread::id ThisThreadId = std::this_thread::get_id(); while (true) { MDoSmthOrStop.wait(Lock, [this, &ThisThreadId]() { - return !MJobQueue.empty() || MStop.load() || - (MDrain.load() && MDrainSchedule[ThisThreadId].load()); + return !MJobQueue.empty() || MStop.load(); }); - // lets complete enqueued tasks first - if (MStop.load() && MJobQueue.empty()) + if (MStop.load()) break; - if (MJobQueue.empty() && MDrain.load() && - MDrainSchedule[ThisThreadId].load()) { - assert(MDrainSchedule[ThisThreadId].exchange(false)); - continue; - } - std::function Job = std::move(MJobQueue.front()); MJobQueue.pop(); Lock.unlock(); @@ -62,6 +51,8 @@ class ThreadPool { Job(); Lock.lock(); + + MJobsInExecution--; } } @@ -69,6 +60,7 @@ class ThreadPool { MLaunchedThreads.reserve(MThreadCount); MStop.store(false); + MJobsInExecution.store(0); for (size_t Idx = 0; Idx < MThreadCount; ++Idx) MLaunchedThreads.emplace_back([this] { worker(); }); @@ -76,16 +68,8 @@ class ThreadPool { public: void drain() { - for (const auto &thread : MLaunchedThreads) { - MDrainSchedule[thread.get_id()].store(true); - } - MDrain.store(true); - MDoSmthOrStop.notify_all(); - while (std::any_of(MDrainSchedule.cbegin(), MDrainSchedule.cend(), - [](const auto &state) { return state.second.load(); })) + while (MJobsInExecution != 0) std::this_thread::yield(); - - MDrain.store(false); } ThreadPool(unsigned int ThreadCount = 1) : MThreadCount(ThreadCount) { @@ -118,7 +102,7 @@ class ThreadPool { std::lock_guard Lock(MJobQueueMutex); MJobQueue.emplace(Func); } - + MJobsInExecution++; MDoSmthOrStop.notify_one(); } }; From a0b37efd951d0bc85c6d16024bdb33086d9323e5 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Tue, 6 Dec 2022 06:01:48 -0800 Subject: [PATCH 55/62] Code cleanup Part 2 Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/scheduler/scheduler.cpp | 12 +++--------- sycl/source/detail/scheduler/scheduler.hpp | 1 - 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 081cc35a9cbb0..78e0807c9abf7 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -418,14 +418,11 @@ Scheduler::~Scheduler() { } void Scheduler::releaseResources() { - MReleaseStarted = true; - // TraceEvent trace("releaseResources"); if (DefaultHostQueue) { DefaultHostQueue->wait(); - // std::cout << "DefaultHostQueue finished" << std::endl; } GlobalHandler::instance().drainThreadPool(); - // std::cout << "threads finished" << std::endl; + // There might be some commands scheduled for post enqueue cleanup that // haven't been freed because of the graph mutex being locked at the time, // clean them up now. @@ -447,7 +444,6 @@ MemObjRecord *Scheduler::getMemObjRecord(const Requirement *const Req) { } void Scheduler::cleanupCommands(const std::vector &Cmds) { - cleanupDeferredMemObjects(BlockingT::NON_BLOCKING); if (Cmds.empty()) { std::lock_guard Lock{MDeferredCleanupMutex}; if (MDeferredCleanupCommands.empty()) @@ -503,10 +499,8 @@ void Scheduler::NotifyHostTaskCompletion(Command *Cmd, Command *BlockingCmd) { } void Scheduler::deferMemObjRelease(const std::shared_ptr &MemObj) { - if (!MReleaseStarted) { - std::lock_guard Lock{MDeferredMemReleaseMutex}; - MDeferredMemObjRelease.push_back(MemObj); - } + std::lock_guard Lock{MDeferredMemReleaseMutex}; + MDeferredMemObjRelease.push_back(MemObj); cleanupDeferredMemObjects(BlockingT::NON_BLOCKING); } diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 268275859172d..fe2b1e873b290 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -452,7 +452,6 @@ class Scheduler { inline bool isDeferredMemObjectsEmpty(); protected: - std::atomic_bool MReleaseStarted{false}; using RWLockT = std::shared_timed_mutex; using ReadLockT = std::shared_lock; using WriteLockT = std::unique_lock; From 619ee4efc4b8c890d6e8ab41c047ec2ec306ebef Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Tue, 6 Dec 2022 06:04:55 -0800 Subject: [PATCH 56/62] Revert "Try to align hip context destruction handling with cuda WA" This reverts commit 1c62d0825991067d4aa45f1d11fe6ea8f1ec5f07. --- sycl/plugins/hip/pi_hip.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index c10feb2b6e007..43f881b393f86 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -185,7 +185,7 @@ pi_result forLatestEvents(const pi_event *event_wait_list, /// pi_result check_error(hipError_t result, const char *function, int line, const char *file) { - if (result == hipSuccess || result == hipErrorContextIsDestroyed) { + if (result == hipSuccess) { return PI_SUCCESS; } From 3187f0aad2dbae62885583e257c2485faa0f021a Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Tue, 6 Dec 2022 08:33:49 -0800 Subject: [PATCH 57/62] Return cleanup deferred buffers to cleanupCommands call Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/event_impl.cpp | 3 -- sycl/source/detail/scheduler/scheduler.cpp | 41 +++++++++++++++------- sycl/source/detail/scheduler/scheduler.hpp | 9 ++++- sycl/source/detail/sycl_mem_obj_t.cpp | 2 +- 4 files changed, 38 insertions(+), 17 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index f2a596980a741..48c235fc9b096 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -233,9 +233,6 @@ void event_impl::wait(std::shared_ptr Self) { detail::Scheduler::getInstance().waitForEvent(Self); cleanupCommand(std::move(Self)); - detail::Scheduler::getInstance().cleanupDeferredMemObjects( - BlockingT::NON_BLOCKING); - #ifdef XPTI_ENABLE_INSTRUMENTATION instrumentationEpilog(TelemetryEvent, Name, StreamID, IId); #endif diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 78e0807c9abf7..bc432ec2e6994 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -268,7 +268,8 @@ void Scheduler::cleanupFinishedCommands(const EventImplPtr &FinishedEvent) { deallocateStreams(StreamsToDeallocate); } -void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { +bool Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj, + bool StrictLock) { // We are going to traverse a graph of finished commands. Gather stream // objects from these commands if any and deallocate buffers for these stream // objects, this is needed to guarantee that streamed data is printed and @@ -284,16 +285,22 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { MemObjRecord *Record = MGraphBuilder.getMemObjRecord(MemObj); if (!Record) // No operations were performed on the mem object - return; + return true; { // This only needs a shared mutex as it only involves enqueueing and // awaiting for events - ReadLockT Lock = acquireReadLock(); + ReadLockT Lock = StrictLock ? ReadLockT(MGraphLock) + : ReadLockT(MGraphLock, std::try_to_lock); + if (!Lock.owns_lock()) + return false; waitForRecordToFinish(Record, Lock); } { - WriteLockT Lock = acquireWriteLock(); + WriteLockT Lock = StrictLock ? acquireWriteLock() + : WriteLockT(MGraphLock, std::try_to_lock); + if (!Lock.owns_lock()) + return false; MGraphBuilder.decrementLeafCountersForRecord(Record); MGraphBuilder.cleanupCommandsForRecord(Record, StreamsToDeallocate, AuxResourcesToDeallocate); @@ -301,6 +308,7 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { } } deallocateStreams(StreamsToDeallocate); + return true; } EventImplPtr Scheduler::addHostAccessor(Requirement *Req) { @@ -444,6 +452,8 @@ MemObjRecord *Scheduler::getMemObjRecord(const Requirement *const Req) { } void Scheduler::cleanupCommands(const std::vector &Cmds) { + cleanupDeferredMemObjects(BlockingT::NON_BLOCKING); + if (Cmds.empty()) { std::lock_guard Lock{MDeferredCleanupMutex}; if (MDeferredCleanupCommands.empty()) @@ -499,8 +509,10 @@ void Scheduler::NotifyHostTaskCompletion(Command *Cmd, Command *BlockingCmd) { } void Scheduler::deferMemObjRelease(const std::shared_ptr &MemObj) { - std::lock_guard Lock{MDeferredMemReleaseMutex}; - MDeferredMemObjRelease.push_back(MemObj); + { + std::lock_guard Lock{MDeferredMemReleaseMutex}; + MDeferredMemObjRelease.push_back(MemObj); + } cleanupDeferredMemObjects(BlockingT::NON_BLOCKING); } @@ -510,8 +522,6 @@ inline bool Scheduler::isDeferredMemObjectsEmpty() { } void Scheduler::cleanupDeferredMemObjects(BlockingT Blocking) { - // std::cout << "MDeferredMemObjRelease.size = " << - // MDeferredMemObjRelease.size() << std::endl; if (isDeferredMemObjectsEmpty()) return; if (Blocking == BlockingT::BLOCKING) { @@ -526,7 +536,7 @@ void Scheduler::cleanupDeferredMemObjects(BlockingT Blocking) { std::vector> ObjsReadyToRelease; { - + // Lock is needed for checkLeavesCompletion - if walks through Record leaves ReadLockT Lock = ReadLockT(MGraphLock, std::try_to_lock); if (Lock.owns_lock()) { // Not expected that Blocking == true will be used in parallel with @@ -544,9 +554,16 @@ void Scheduler::cleanupDeferredMemObjects(BlockingT Blocking) { } } } - // std::cout << "MDeferredMemObjRelease after NON_BLOCKING.size = " << - // MDeferredMemObjRelease.size() << std::endl; - // if any ObjsReadyToRelease found - it is leaving scope and being deleted + auto ReleaseCandidateIt = ObjsReadyToRelease.begin(); + while (ReleaseCandidateIt != ObjsReadyToRelease.end()) { + if (!removeMemoryObject(ReleaseCandidateIt->get(), false)) + break; + ReleaseCandidateIt = ObjsReadyToRelease.erase(ReleaseCandidateIt); + } + MDeferredMemObjRelease.insert( + MDeferredMemObjRelease.end(), + std::make_move_iterator(ObjsReadyToRelease.begin()), + std::make_move_iterator(ObjsReadyToRelease.end())); } } // namespace detail diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index fe2b1e873b290..ec5a8cfd9d001 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -396,7 +396,14 @@ class Scheduler { /// This member function is used by \ref buffer and \ref image. /// /// \param MemObj is a memory object that points to the buffer being removed. - void removeMemoryObject(detail::SYCLMemObjI *MemObj); + /// \param StrictLock WA, is a flag used to identify if strict read and write + /// lock are allowed or not. Default value is always applied in buffer_impl + /// destructor. StrictLock == false is introduced for + /// cleanupDeferredMemObjects to avoid blocking mem object release that may + /// lead to dead lock. \return WA, true if all release action completed and we + /// could delete memory object, false otherwise, most possible reason to + /// receive false - fail to obtain write lock. + bool removeMemoryObject(detail::SYCLMemObjI *MemObj, bool StrictLock = true); /// Removes finished non-leaf non-alloca commands from the subgraph (assuming /// that all its commands have been waited for). diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 4aa812f3ddcde..f6208911ed825 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -91,7 +91,7 @@ void SYCLMemObjT::updateHostMemory() { // If we're attached to a memory record, process the deletion of the memory // record. We may get detached before we do this. if (MRecord) - Scheduler::getInstance().removeMemoryObject(this); + assert(Scheduler::getInstance().removeMemoryObject(this)); releaseHostMem(MShadowCopy); if (MOpenCLInterop) { From dbe88e2a538af054b6e6a0894f4d3dda9e8141b1 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Tue, 6 Dec 2022 08:53:57 -0800 Subject: [PATCH 58/62] Remove unnecessary variable in ObjectRefCounter Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/global_handler.cpp | 28 ++++++++++++--------------- sycl/source/detail/global_handler.hpp | 2 +- sycl/source/detail/thread_pool.hpp | 2 +- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 2e89efe3996ed..bb7a02f2ab978 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -38,31 +38,27 @@ namespace detail { // threads in releaseResources call. template class ObjectUsageCounter { public: - ObjectUsageCounter(std::unique_ptr &Obj, - bool IncrementCounter) - : MIncrementCounter(IncrementCounter), MObj(Obj) { - if (MIncrementCounter) + ObjectUsageCounter(std::unique_ptr &Obj, bool ModifyCounter) + : MModifyCounter(ModifyCounter), MObj(Obj) { + if (MModifyCounter) MCounter++; } ~ObjectUsageCounter() { - if (MIncrementCounter) - MCounter--; - if (!MCounter && MObj) { - if (!MReleaseCalled.exchange(true)) - MObj->releaseResources(); - } + if (!MModifyCounter) + return; + + MCounter--; + if (!MCounter && MObj) + MObj->releaseResources(); } private: static std::atomic_uint MCounter; - bool MIncrementCounter; + bool MModifyCounter; std::unique_ptr &MObj; - static std::atomic_bool MReleaseCalled; }; template std::atomic_uint ObjectUsageCounter::MCounter{0}; -template -std::atomic_bool ObjectUsageCounter::MReleaseCalled{false}; using LockGuard = std::lock_guard; @@ -98,9 +94,9 @@ Scheduler &GlobalHandler::getScheduler() { return *MScheduler.Inst; } -void GlobalHandler::registerSchedulerUsage(bool IncrementCounter) { +void GlobalHandler::registerSchedulerUsage(bool ModifyCounter) { thread_local ObjectUsageCounter SchedulerCounter(MScheduler.Inst, - IncrementCounter); + ModifyCounter); } ProgramManager &GlobalHandler::getProgramManager() { diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index 035b72395d3d3..cecd3ab09792a 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -54,7 +54,7 @@ class GlobalHandler { GlobalHandler(const GlobalHandler &) = delete; GlobalHandler(GlobalHandler &&) = delete; - void registerSchedulerUsage(bool IncrementCounter = true); + void registerSchedulerUsage(bool ModifyCounter = true); Scheduler &getScheduler(); ProgramManager &getProgramManager(); Sync &getSync(); diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index e078b4cbb517b..20897ece90ff6 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -33,7 +33,7 @@ class ThreadPool { std::atomic_uint MJobsInExecution; void worker() { - GlobalHandler::instance().registerSchedulerUsage(false); + GlobalHandler::instance().registerSchedulerUsage(/*ModifyCounter*/ false); std::unique_lock Lock(MJobQueueMutex); std::thread::id ThisThreadId = std::this_thread::get_id(); while (true) { From 06e2608dfccb0c349a1e6aa25fad727c69ed7155 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Tue, 6 Dec 2022 12:41:38 -0800 Subject: [PATCH 59/62] Fix hang Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/thread_pool.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 20897ece90ff6..aa1801594efe9 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -93,7 +93,7 @@ class ThreadPool { std::lock_guard Lock(MJobQueueMutex); MJobQueue.emplace([F = std::move(Func)]() { F(); }); } - + MJobsInExecution++; MDoSmthOrStop.notify_one(); } From 71e9048af88ddcc853968cf9bbaa050f63ad323e Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Tue, 6 Dec 2022 12:58:28 -0800 Subject: [PATCH 60/62] Fix comments Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/global_handler.cpp | 5 +---- sycl/source/detail/scheduler/scheduler.hpp | 4 ++-- sycl/source/detail/sycl_mem_obj_t.cpp | 8 ++++++-- sycl/source/detail/thread_pool.hpp | 18 ++++++++---------- 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index bb7a02f2ab978..c2b60ef1de043 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -33,9 +33,6 @@ namespace detail { // Origin idea is to track usage of Scheduler from main and other used threads - // they increment MCounter; and to use but not add extra reference by our // thread_pool threads. For this control MIncrementCounter class member is used. -// MObj and MReleaseCalled is extra protection needed to handle case when main -// thread finished but thread_pool is still running and we will join that -// threads in releaseResources call. template class ObjectUsageCounter { public: ObjectUsageCounter(std::unique_ptr &Obj, bool ModifyCounter) @@ -81,7 +78,7 @@ T &GlobalHandler::getOrCreate(InstWithLock &IWL, Types... Args) { } void GlobalHandler::attachScheduler(Scheduler *Scheduler) { - // The method is for testing purposes. Do not protect with lock since + // The method is used in unit tests only. Do not protect with lock since // releaseResources will cause dead lock due to host queue release if (MScheduler.Inst) MScheduler.Inst->releaseResources(); diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index ec5a8cfd9d001..338602d4d874b 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -450,13 +450,13 @@ class Scheduler { const QueueImplPtr &getDefaultHostQueue() const { return DefaultHostQueue; } static MemObjRecord *getMemObjRecord(const Requirement *const Req); - // Virtual for testing purposes only + void deferMemObjRelease(const std::shared_ptr &MemObj); Scheduler(); ~Scheduler(); void releaseResources(); - inline bool isDeferredMemObjectsEmpty(); + bool isDeferredMemObjectsEmpty(); protected: using RWLockT = std::shared_timed_mutex; diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index f6208911ed825..1a87a38d2a92e 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -90,8 +90,12 @@ void SYCLMemObjT::updateHostMemory() { // If we're attached to a memory record, process the deletion of the memory // record. We may get detached before we do this. - if (MRecord) - assert(Scheduler::getInstance().removeMemoryObject(this)); + if (MRecord) { + bool Result = Scheduler::getInstance().removeMemoryObject(this); + assert( + Result && + "removeMemoryObject should not return false in mem object destructor"); + } releaseHostMem(MShadowCopy); if (MOpenCLInterop) { diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index aa1801594efe9..798573a40eca9 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -30,16 +30,14 @@ class ThreadPool { std::mutex MJobQueueMutex; std::condition_variable MDoSmthOrStop; std::atomic_bool MStop; - std::atomic_uint MJobsInExecution; + std::atomic_uint MJobsInPool; void worker() { GlobalHandler::instance().registerSchedulerUsage(/*ModifyCounter*/ false); std::unique_lock Lock(MJobQueueMutex); - std::thread::id ThisThreadId = std::this_thread::get_id(); while (true) { - MDoSmthOrStop.wait(Lock, [this, &ThisThreadId]() { - return !MJobQueue.empty() || MStop.load(); - }); + MDoSmthOrStop.wait( + Lock, [this]() { return !MJobQueue.empty() || MStop.load(); }); if (MStop.load()) break; @@ -52,7 +50,7 @@ class ThreadPool { Lock.lock(); - MJobsInExecution--; + MJobsInPool--; } } @@ -60,7 +58,7 @@ class ThreadPool { MLaunchedThreads.reserve(MThreadCount); MStop.store(false); - MJobsInExecution.store(0); + MJobsInPool.store(0); for (size_t Idx = 0; Idx < MThreadCount; ++Idx) MLaunchedThreads.emplace_back([this] { worker(); }); @@ -68,7 +66,7 @@ class ThreadPool { public: void drain() { - while (MJobsInExecution != 0) + while (MJobsInPool != 0) std::this_thread::yield(); } @@ -93,7 +91,7 @@ class ThreadPool { std::lock_guard Lock(MJobQueueMutex); MJobQueue.emplace([F = std::move(Func)]() { F(); }); } - MJobsInExecution++; + MJobsInPool++; MDoSmthOrStop.notify_one(); } @@ -102,7 +100,7 @@ class ThreadPool { std::lock_guard Lock(MJobQueueMutex); MJobQueue.emplace(Func); } - MJobsInExecution++; + MJobsInPool++; MDoSmthOrStop.notify_one(); } }; From ceea7f83ad60f0affaae6ba0573affc0ac109515 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 7 Dec 2022 02:48:12 -0800 Subject: [PATCH 61/62] Prevent warning as error for release build Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/sycl_mem_obj_t.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 1a87a38d2a92e..2acd31259a40b 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -92,6 +92,7 @@ void SYCLMemObjT::updateHostMemory() { // record. We may get detached before we do this. if (MRecord) { bool Result = Scheduler::getInstance().removeMemoryObject(this); + std::ignore = Result; // for no assert build assert( Result && "removeMemoryObject should not return false in mem object destructor"); From 1f201a9a1efd3899c7e0a59b4ab86ed8878c022b Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 7 Dec 2022 09:48:06 -0800 Subject: [PATCH 62/62] wprotectMDeferredMemObjRelease modification with mutex Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/scheduler/scheduler.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index bc432ec2e6994..a8ba574c9e731 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -560,10 +560,13 @@ void Scheduler::cleanupDeferredMemObjects(BlockingT Blocking) { break; ReleaseCandidateIt = ObjsReadyToRelease.erase(ReleaseCandidateIt); } - MDeferredMemObjRelease.insert( - MDeferredMemObjRelease.end(), - std::make_move_iterator(ObjsReadyToRelease.begin()), - std::make_move_iterator(ObjsReadyToRelease.end())); + if (!ObjsReadyToRelease.empty()) { + std::lock_guard LockDef{MDeferredMemReleaseMutex}; + MDeferredMemObjRelease.insert( + MDeferredMemObjRelease.end(), + std::make_move_iterator(ObjsReadyToRelease.begin()), + std::make_move_iterator(ObjsReadyToRelease.end())); + } } } // namespace detail