Skip to content

Commit 64c2d35

Browse files
authored
[SYCL] Fix kernel program cache for multiple devices and refactor some unit tests (#5017)
When multiple devices are used in the same context, the problem of piProgram double free is arised. In program_manager::build() created program is shared with multiple devices and is cached in KernelProgramCache as many times as there are devices. When cache is destructed, the same program will be released for every device, but piProgramRetain was called only for one device, which leads to double free. Adding piProgramRetain for each device solves this problem. Deleting kernel from cache after its release prevents double free in the case a kernel was created for a program that shared with multiple devices. Unittest with emulating multiple gpu is added. Signed-off-by: mdimakov <[email protected]>
1 parent c2d4dcc commit 64c2d35

18 files changed

+266
-184
lines changed

sycl/source/detail/kernel_program_cache.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ KernelProgramCache::~KernelProgramCache() {
3333
Plugin.call<PiApiKind::piKernelRelease>(Kern);
3434
}
3535
}
36+
MKernelsPerProgramCache.erase(KernIt);
3637
}
3738

3839
const detail::plugin &Plugin = MParentContext->getPlugin();

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1813,7 +1813,11 @@ device_image_plain ProgramManager::build(const device_image_plain &DeviceImage,
18131813
// Cache supports key with once device only, but here we have multiple
18141814
// devices a program is built for, so add the program to the cache for all
18151815
// other devices.
1816-
auto CacheOtherDevices = [ResProgram]() { return ResProgram; };
1816+
const detail::plugin &Plugin = ContextImpl->getPlugin();
1817+
auto CacheOtherDevices = [ResProgram, &Plugin]() {
1818+
Plugin.call<PiApiKind::piProgramRetain>(ResProgram);
1819+
return ResProgram;
1820+
};
18171821

18181822
// The program for device "0" is already added to the cache during the first
18191823
// call to getOrBuild, so starting with "1"
@@ -1833,7 +1837,6 @@ device_image_plain ProgramManager::build(const device_image_plain &DeviceImage,
18331837
// devive_image_impl shares ownership of PIProgram with, at least, program
18341838
// cache. The ref counter will be descremented in the destructor of
18351839
// device_image_impl
1836-
const detail::plugin &Plugin = ContextImpl->getPlugin();
18371840
Plugin.call<PiApiKind::piProgramRetain>(ResProgram);
18381841

18391842
DeviceImageImplPtr ExecImpl = std::make_shared<detail::device_image_impl>(

sycl/unittests/helpers/CommonRedefinitions.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#include <CL/sycl.hpp>
109
#include <helpers/PiImage.hpp>
1110
#include <helpers/PiMock.hpp>
1211

sycl/unittests/helpers/PiImage.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
#pragma once
1010

11-
#include <CL/sycl.hpp>
1211
#include <CL/sycl/detail/common.hpp>
1312
#include <CL/sycl/detail/pi.hpp>
1413
#include <detail/platform_impl.hpp>

sycl/unittests/helpers/PiMock.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,11 @@
2727

2828
#pragma once
2929

30-
#include <CL/sycl.hpp>
3130
#include <CL/sycl/detail/common.hpp>
3231
#include <CL/sycl/detail/pi.hpp>
32+
#include <CL/sycl/device_selector.hpp>
33+
#include <CL/sycl/platform.hpp>
34+
#include <CL/sycl/queue.hpp>
3335
#include <detail/platform_impl.hpp>
3436

3537
#include <functional>

sycl/unittests/helpers/TestKernel.hpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include "PiImage.hpp"
12+
13+
class TestKernel;
14+
15+
__SYCL_INLINE_NAMESPACE(cl) {
16+
namespace sycl {
17+
namespace detail {
18+
template <> struct KernelInfo<TestKernel> {
19+
static constexpr unsigned getNumParams() { return 0; }
20+
static const kernel_param_desc_t &getParamDesc(int) {
21+
static kernel_param_desc_t Dummy;
22+
return Dummy;
23+
}
24+
static constexpr const char *getName() { return "TestKernel"; }
25+
static constexpr bool isESIMD() { return false; }
26+
static constexpr bool callsThisItem() { return false; }
27+
static constexpr bool callsAnyThisFreeFunction() { return false; }
28+
};
29+
30+
} // namespace detail
31+
} // namespace sycl
32+
} // __SYCL_INLINE_NAMESPACE(cl)
33+
34+
static sycl::unittest::PiImage generateDefaultImage() {
35+
using namespace sycl::unittest;
36+
37+
PiPropertySet PropSet;
38+
39+
std::vector<unsigned char> Bin{0, 1, 2, 3, 4, 5}; // Random data
40+
41+
PiArray<PiOffloadEntry> Entries = makeEmptyKernels({"TestKernel"});
42+
43+
PiImage Img{PI_DEVICE_BINARY_TYPE_SPIRV, // Format
44+
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec
45+
"", // Compile options
46+
"", // Link options
47+
std::move(Bin),
48+
std::move(Entries),
49+
std::move(PropSet)};
50+
51+
return Img;
52+
}
53+
54+
static sycl::unittest::PiImage Img = generateDefaultImage();
55+
static sycl::unittest::PiImageArray<1> ImgArray{&Img};

sycl/unittests/kernel-and-program/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
add_sycl_unittest(KernelAndProgramTests OBJECT
22
Cache.cpp
3+
MultipleDevsCache.cpp
34
KernelRelease.cpp
45
KernelInfo.cpp
56
DeviceInfo.cpp

sycl/unittests/kernel-and-program/Cache.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,9 @@ struct MockKernelInfo {
5151
template <> struct KernelInfo<TestKernel> : public MockKernelInfo {
5252
static constexpr const char *getName() { return "TestKernel"; }
5353
};
54-
5554
template <> struct KernelInfo<TestKernel2> : public MockKernelInfo {
5655
static constexpr const char *getName() { return "TestKernel2"; }
5756
};
58-
5957
} // namespace detail
6058
} // namespace sycl
6159
} // __SYCL_INLINE_NAMESPACE(cl)
@@ -117,6 +115,7 @@ static pi_result redefinedKernelCreate(pi_program program,
117115
pi_kernel *ret_kernel) {
118116
return PI_SUCCESS;
119117
}
118+
120119
static pi_result redefinedKernelRelease(pi_kernel kernel) { return PI_SUCCESS; }
121120

122121
class KernelAndProgramCacheTest : public ::testing::Test {
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
//==--- KPCache.cpp --- KernelProgramCache for multiple devices unit test --==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#define SYCL2020_DISABLE_DEPRECATION_WARNINGS
10+
11+
#include "detail/context_impl.hpp"
12+
#include "detail/kernel_bundle_impl.hpp"
13+
#include "detail/kernel_program_cache.hpp"
14+
#include <helpers/CommonRedefinitions.hpp>
15+
#include <helpers/PiImage.hpp>
16+
#include <helpers/PiMock.hpp>
17+
#include <helpers/TestKernel.hpp>
18+
19+
#include <gtest/gtest.h>
20+
21+
#include <iostream>
22+
23+
using namespace sycl;
24+
25+
static pi_result redefinedContextCreate(
26+
const pi_context_properties *properties, pi_uint32 num_devices,
27+
const pi_device *devices,
28+
void (*pfn_notify)(const char *errinfo, const void *private_info, size_t cb,
29+
void *user_data),
30+
void *user_data, pi_context *ret_context) {
31+
*ret_context = reinterpret_cast<pi_context>(123);
32+
return PI_SUCCESS;
33+
}
34+
35+
static pi_result redefinedContextRelease(pi_context context) {
36+
return PI_SUCCESS;
37+
}
38+
39+
static pi_result redefinedDevicesGet(pi_platform platform,
40+
pi_device_type device_type,
41+
pi_uint32 num_entries, pi_device *devices,
42+
pi_uint32 *num_devices) {
43+
if (num_devices) {
44+
*num_devices = static_cast<pi_uint32>(2);
45+
return PI_SUCCESS;
46+
}
47+
48+
if (num_entries == 2 && devices) {
49+
devices[0] = reinterpret_cast<pi_device>(1111);
50+
devices[1] = reinterpret_cast<pi_device>(2222);
51+
}
52+
return PI_SUCCESS;
53+
}
54+
55+
static pi_result redefinedDeviceGetInfo(pi_device device,
56+
pi_device_info param_name,
57+
size_t param_value_size,
58+
void *param_value,
59+
size_t *param_value_size_ret) {
60+
if (param_name == PI_DEVICE_INFO_TYPE) {
61+
auto *Result = reinterpret_cast<_pi_device_type *>(param_value);
62+
*Result = PI_DEVICE_TYPE_GPU;
63+
}
64+
if (param_name == PI_DEVICE_INFO_COMPILER_AVAILABLE) {
65+
auto *Result = reinterpret_cast<pi_bool *>(param_value);
66+
*Result = true;
67+
}
68+
return PI_SUCCESS;
69+
}
70+
71+
static pi_result redefinedDeviceRetain(pi_device device) { return PI_SUCCESS; }
72+
73+
static pi_result redefinedDeviceRelease(pi_device device) { return PI_SUCCESS; }
74+
75+
static pi_result redefinedQueueCreate(pi_context context, pi_device device,
76+
pi_queue_properties properties,
77+
pi_queue *queue) {
78+
*queue = reinterpret_cast<pi_queue>(1234);
79+
return PI_SUCCESS;
80+
}
81+
82+
static pi_result redefinedQueueRelease(pi_queue command_queue) {
83+
return PI_SUCCESS;
84+
}
85+
86+
static size_t ProgramNum = 12345;
87+
static pi_result redefinedProgramCreate(pi_context context, const void *il,
88+
size_t length,
89+
pi_program *res_program) {
90+
size_t CurrentProgram = ProgramNum;
91+
*res_program = reinterpret_cast<pi_program>(CurrentProgram);
92+
++ProgramNum;
93+
return PI_SUCCESS;
94+
}
95+
96+
static int RetainCounter = 0;
97+
static pi_result redefinedProgramRetain(pi_program program) {
98+
++RetainCounter;
99+
return PI_SUCCESS;
100+
}
101+
102+
static int KernelReleaseCounter = 0;
103+
static pi_result redefinedKernelRelease(pi_kernel kernel) {
104+
++KernelReleaseCounter;
105+
return PI_SUCCESS;
106+
}
107+
108+
class MultipleDeviceCacheTest : public ::testing::Test {
109+
public:
110+
MultipleDeviceCacheTest() : Plt{default_selector()} {}
111+
112+
protected:
113+
void SetUp() override {
114+
if (Plt.is_host() || Plt.get_backend() != backend::opencl) {
115+
return;
116+
}
117+
118+
Mock = std::make_unique<unittest::PiMock>(Plt);
119+
120+
setupDefaultMockAPIs(*Mock);
121+
Mock->redefine<detail::PiApiKind::piDevicesGet>(redefinedDevicesGet);
122+
Mock->redefine<detail::PiApiKind::piDeviceGetInfo>(redefinedDeviceGetInfo);
123+
Mock->redefine<detail::PiApiKind::piDeviceRetain>(redefinedDeviceRetain);
124+
Mock->redefine<detail::PiApiKind::piDeviceRelease>(redefinedDeviceRelease);
125+
Mock->redefine<detail::PiApiKind::piContextCreate>(redefinedContextCreate);
126+
Mock->redefine<detail::PiApiKind::piContextRelease>(
127+
redefinedContextRelease);
128+
Mock->redefine<detail::PiApiKind::piQueueCreate>(redefinedQueueCreate);
129+
Mock->redefine<detail::PiApiKind::piQueueRelease>(redefinedQueueRelease);
130+
Mock->redefine<detail::PiApiKind::piProgramRetain>(redefinedProgramRetain);
131+
Mock->redefine<detail::PiApiKind::piProgramCreate>(redefinedProgramCreate);
132+
Mock->redefine<detail::PiApiKind::piKernelRelease>(redefinedKernelRelease);
133+
}
134+
135+
protected:
136+
std::unique_ptr<unittest::PiMock> Mock;
137+
platform Plt;
138+
};
139+
140+
// Test that program is retained for each device and each kernel is released
141+
// once
142+
TEST_F(MultipleDeviceCacheTest, ProgramRetain) {
143+
if (Plt.is_host() || Plt.get_backend() != backend::opencl) {
144+
return;
145+
}
146+
{
147+
std::vector<sycl::device> Devices = Plt.get_devices(info::device_type::gpu);
148+
sycl::context Context(Devices);
149+
sycl::queue Queue(Context, Devices[0]);
150+
assert(Devices.size() == 2);
151+
152+
auto Bundle = cl::sycl::get_kernel_bundle<sycl::bundle_state::input>(
153+
Queue.get_context());
154+
Queue.submit(
155+
[&](cl::sycl::handler &cgh) { cgh.single_task<TestKernel>([]() {}); });
156+
157+
auto BundleObject = cl::sycl::build(Bundle, Bundle.get_devices());
158+
auto KernelID = cl::sycl::get_kernel_id<TestKernel>();
159+
auto Kernel = BundleObject.get_kernel(KernelID);
160+
161+
// Because of emulating 2 devices program is retained for each one in
162+
// build(). It is also depends on number of device images. This test has one
163+
// image, but other tests can create other images. Additional variable is
164+
// added to control count of piProgramRetain calls
165+
auto BundleImpl = getSyclObjImpl(Bundle);
166+
int NumRetains = BundleImpl->size() * 2;
167+
168+
EXPECT_EQ(RetainCounter, NumRetains)
169+
<< "Expect " << NumRetains << " piProgramRetain calls";
170+
171+
auto CtxImpl = detail::getSyclObjImpl(Context);
172+
detail::KernelProgramCache::KernelCacheT &KernelCache =
173+
CtxImpl->getKernelProgramCache().acquireKernelsPerProgramCache().get();
174+
175+
EXPECT_EQ(KernelCache.size(), (size_t)2) << "Expect 2 kernels in cache";
176+
}
177+
// First kernel creating is called in handler::single_task().
178+
// kernel_bundle::get_kernel() creates a kernel and shares it with created
179+
// programs. Also the kernel is retained in kernel_bundle::get_kernel(). A
180+
// kernel is removed from cache if piKernelRelease was called for it, so it
181+
// will not be removed twice for the other programs. As a result we must
182+
// expect 3 piKernelRelease calls.
183+
EXPECT_EQ(KernelReleaseCounter, 3) << "Expect 3 piKernelRelease calls";
184+
}

sycl/unittests/pi/PiMock.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#include <gtest/gtest.h>
109
#include <helpers/PiMock.hpp>
10+
1111
#include <detail/queue_impl.hpp>
1212

13+
#include <gtest/gtest.h>
14+
1315
using namespace cl::sycl;
1416

1517
pi_result piProgramBuildRedefine(pi_program, pi_uint32, const pi_device *,

0 commit comments

Comments
 (0)