Skip to content

Commit 894ce25

Browse files
[SYCL] Defer buffer release when no host memory to be updated (#6837)
SYCL2020 4.7.2.3. Buffer synchronization rules states that "A buffer can be constructed from a range (and without a hostData pointer). The memory management for this type of buffer is entirely handled by the SYCL system. The destructor for this type of buffer does not need to block, even if work on the buffer has not completed. Instead, the SYCL system frees any storage required for the buffer asynchronously when it is no longer in use in queues." This commit implements this behavior for sycl::buffer. This feature introduced more resources to be released in the end of program if there was no chance to release them earlier. This commit implements WA of known issues with global object destruction based on thread_local usage, thread_local variables destroy earlier than global variables that allow us to do release resources earlier. Signed-off-by: Tikhomirova, Kseniya <[email protected]>
1 parent ead8404 commit 894ce25

17 files changed

+641
-38
lines changed

sycl/include/sycl/buffer.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ class __SYCL_EXPORT buffer_plain {
125125

126126
size_t getSize() const;
127127

128+
void handleRelease() const;
129+
128130
std::shared_ptr<detail::buffer_impl> impl;
129131
};
130132

@@ -466,7 +468,7 @@ class buffer : public detail::buffer_plain,
466468

467469
buffer &operator=(buffer &&rhs) = default;
468470

469-
~buffer() = default;
471+
~buffer() { buffer_plain::handleRelease(); }
470472

471473
bool operator==(const buffer &rhs) const { return impl == rhs.impl; }
472474

sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,8 +1381,40 @@ pi_result piKernelRelease(pi_kernel) { DIE_NO_IMPLEMENTATION; }
13811381

13821382
pi_result piEventCreate(pi_context, pi_event *) { DIE_NO_IMPLEMENTATION; }
13831383

1384-
pi_result piEventGetInfo(pi_event, pi_event_info, size_t, void *, size_t *) {
1385-
DIE_NO_IMPLEMENTATION;
1384+
pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName,
1385+
size_t ParamValueSize, void *ParamValue,
1386+
size_t *ParamValueSizeRet) {
1387+
if (ParamName != PI_EVENT_INFO_COMMAND_EXECUTION_STATUS) {
1388+
DIE_NO_IMPLEMENTATION;
1389+
}
1390+
1391+
auto CheckAndFillStatus = [&](const cm_support::CM_STATUS &State) {
1392+
pi_int32 Result = PI_EVENT_RUNNING;
1393+
if (State == cm_support::CM_STATUS_FINISHED)
1394+
Result = PI_EVENT_COMPLETE;
1395+
if (ParamValue) {
1396+
if (ParamValueSize < sizeof(Result))
1397+
return PI_ERROR_INVALID_VALUE;
1398+
*static_cast<pi_int32 *>(ParamValue) = Result;
1399+
}
1400+
if (ParamValueSizeRet) {
1401+
*ParamValueSizeRet = sizeof(Result);
1402+
}
1403+
return PI_SUCCESS;
1404+
};
1405+
// Dummy event is already completed ones done by CM.
1406+
if (Event->IsDummyEvent)
1407+
return CheckAndFillStatus(cm_support::CM_STATUS_FINISHED);
1408+
1409+
if (Event->CmEventPtr == nullptr)
1410+
return PI_ERROR_INVALID_EVENT;
1411+
1412+
cm_support::CM_STATUS Status;
1413+
int32_t Result = Event->CmEventPtr->GetStatus(Status);
1414+
if (Result != cm_support::CM_SUCCESS)
1415+
return PI_ERROR_COMMAND_EXECUTION_FAILURE;
1416+
1417+
return CheckAndFillStatus(Status);
13861418
}
13871419

13881420
pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName,

sycl/source/buffer.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,13 @@ void buffer_plain::addOrReplaceAccessorProperties(
121121

122122
size_t buffer_plain::getSize() const { return impl->getSizeInBytes(); }
123123

124+
void buffer_plain::handleRelease() const {
125+
// Try to detach memory object only if impl is going to be released.
126+
// Buffer copy will have pointer to the same impl.
127+
if (impl.use_count() == 1)
128+
impl->detachMemoryObject(impl);
129+
}
130+
124131
} // namespace detail
125132
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
126133
} // namespace sycl

sycl/source/detail/event_impl.cpp

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -80,17 +80,19 @@ void event_impl::waitInternal() {
8080

8181
void event_impl::setComplete() {
8282
if (MHostEvent || !MEvent) {
83-
std::unique_lock<std::mutex> lock(MMutex);
83+
{
84+
std::unique_lock<std::mutex> lock(MMutex);
8485
#ifndef NDEBUG
85-
int Expected = HES_NotComplete;
86-
int Desired = HES_Complete;
86+
int Expected = HES_NotComplete;
87+
int Desired = HES_Complete;
8788

88-
bool Succeeded = MState.compare_exchange_strong(Expected, Desired);
89+
bool Succeeded = MState.compare_exchange_strong(Expected, Desired);
8990

90-
assert(Succeeded && "Unexpected state of event");
91+
assert(Succeeded && "Unexpected state of event");
9192
#else
92-
MState.store(static_cast<int>(HES_Complete));
93+
MState.store(static_cast<int>(HES_Complete));
9394
#endif
95+
}
9496
cv.notify_all();
9597
return;
9698
}
@@ -144,8 +146,8 @@ event_impl::event_impl(RT::PiEvent Event, const context &SyclContext)
144146
}
145147

146148
event_impl::event_impl(const QueueImplPtr &Queue)
147-
: MQueue{Queue}, MIsProfilingEnabled{Queue->is_host() ||
148-
Queue->MIsProfilingEnabled} {
149+
: MQueue{Queue},
150+
MIsProfilingEnabled{Queue->is_host() || Queue->MIsProfilingEnabled} {
149151
this->setContextImpl(Queue->getContextImplPtr());
150152

151153
if (Queue->is_host()) {
@@ -429,6 +431,11 @@ void event_impl::cleanDepEventsThroughOneLevel() {
429431
}
430432
}
431433

434+
bool event_impl::isCompleted() {
435+
return get_info<info::event::command_execution_status>() ==
436+
info::event_command_status::complete;
437+
}
438+
432439
} // namespace detail
433440
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
434441
} // namespace sycl

sycl/source/detail/event_impl.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,8 @@ class event_impl {
243243
/// state.
244244
bool isInitialized() const noexcept { return MIsInitialized; }
245245

246+
bool isCompleted();
247+
246248
void attachEventToComplete(const EventImplPtr &Event) {
247249
std::lock_guard<std::mutex> Lock(MMutex);
248250
MPostCompleteEvents.push_back(Event);

sycl/source/detail/global_handler.cpp

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,36 @@
2727
namespace sycl {
2828
__SYCL_INLINE_VER_NAMESPACE(_V1) {
2929
namespace detail {
30+
31+
// Utility class to track references on object.
32+
// Used for Scheduler now and created as thread_local object.
33+
// Origin idea is to track usage of Scheduler from main and other used threads -
34+
// they increment MCounter; and to use but not add extra reference by our
35+
// thread_pool threads. For this control MIncrementCounter class member is used.
36+
template <class ResourceHandler> class ObjectUsageCounter {
37+
public:
38+
ObjectUsageCounter(std::unique_ptr<ResourceHandler> &Obj, bool ModifyCounter)
39+
: MModifyCounter(ModifyCounter), MObj(Obj) {
40+
if (MModifyCounter)
41+
MCounter++;
42+
}
43+
~ObjectUsageCounter() {
44+
if (!MModifyCounter)
45+
return;
46+
47+
MCounter--;
48+
if (!MCounter && MObj)
49+
MObj->releaseResources();
50+
}
51+
52+
private:
53+
static std::atomic_uint MCounter;
54+
bool MModifyCounter;
55+
std::unique_ptr<ResourceHandler> &MObj;
56+
};
57+
template <class ResourceHandler>
58+
std::atomic_uint ObjectUsageCounter<ResourceHandler>::MCounter{0};
59+
3060
using LockGuard = std::lock_guard<SpinLock>;
3161

3262
GlobalHandler::GlobalHandler() = default;
@@ -47,7 +77,24 @@ T &GlobalHandler::getOrCreate(InstWithLock<T> &IWL, Types... Args) {
4777
return *IWL.Inst;
4878
}
4979

50-
Scheduler &GlobalHandler::getScheduler() { return getOrCreate(MScheduler); }
80+
void GlobalHandler::attachScheduler(Scheduler *Scheduler) {
81+
// The method is used in unit tests only. Do not protect with lock since
82+
// releaseResources will cause dead lock due to host queue release
83+
if (MScheduler.Inst)
84+
MScheduler.Inst->releaseResources();
85+
MScheduler.Inst.reset(Scheduler);
86+
}
87+
88+
Scheduler &GlobalHandler::getScheduler() {
89+
getOrCreate(MScheduler);
90+
registerSchedulerUsage();
91+
return *MScheduler.Inst;
92+
}
93+
94+
void GlobalHandler::registerSchedulerUsage(bool ModifyCounter) {
95+
thread_local ObjectUsageCounter SchedulerCounter(MScheduler.Inst,
96+
ModifyCounter);
97+
}
5198

5299
ProgramManager &GlobalHandler::getProgramManager() {
53100
return getOrCreate(MProgramManager);
@@ -141,9 +188,18 @@ void GlobalHandler::unloadPlugins() {
141188
GlobalHandler::instance().getPlugins().clear();
142189
}
143190

191+
void GlobalHandler::drainThreadPool() {
192+
if (MHostTaskThreadPool.Inst)
193+
MHostTaskThreadPool.Inst->drain();
194+
}
195+
144196
void shutdown() {
145197
// Ensure neither host task is working so that no default context is accessed
146198
// upon its release
199+
200+
if (GlobalHandler::instance().MScheduler.Inst)
201+
GlobalHandler::instance().MScheduler.Inst->releaseResources();
202+
147203
if (GlobalHandler::instance().MHostTaskThreadPool.Inst)
148204
GlobalHandler::instance().MHostTaskThreadPool.Inst->finishAndWait();
149205

sycl/source/detail/global_handler.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ class GlobalHandler {
5454
GlobalHandler(const GlobalHandler &) = delete;
5555
GlobalHandler(GlobalHandler &&) = delete;
5656

57+
void registerSchedulerUsage(bool ModifyCounter = true);
5758
Scheduler &getScheduler();
5859
ProgramManager &getProgramManager();
5960
Sync &getSync();
@@ -74,6 +75,10 @@ class GlobalHandler {
7475
static void registerDefaultContextReleaseHandler();
7576

7677
void unloadPlugins();
78+
void drainThreadPool();
79+
80+
// For testing purposes only
81+
void attachScheduler(Scheduler *Scheduler);
7782

7883
private:
7984
friend void releaseDefaultContexts();

0 commit comments

Comments
 (0)