Skip to content

Commit 91b1515

Browse files
authored
[SYCL] Check if kernel_bundle contains compatible kernels (#7691)
According to the SYCL2020 get_kernel_bundle() and has_kernel_bundle() should check that kernels are compatible with device.
1 parent f32d34f commit 91b1515

File tree

8 files changed

+188
-49
lines changed

8 files changed

+188
-49
lines changed

sycl/source/detail/global_handler.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ ThreadPool &GlobalHandler::getHostTaskThreadPool() {
148148
return TP;
149149
}
150150

151-
void releaseDefaultContexts() {
151+
void GlobalHandler::releaseDefaultContexts() {
152152
// Release shared-pointers to SYCL objects.
153153
#ifndef _WIN32
154154
GlobalHandler::instance().MPlatformToDefaultContextCache.Inst.reset(nullptr);
@@ -163,7 +163,9 @@ void releaseDefaultContexts() {
163163
}
164164

165165
struct DefaultContextReleaseHandler {
166-
~DefaultContextReleaseHandler() { releaseDefaultContexts(); }
166+
~DefaultContextReleaseHandler() {
167+
GlobalHandler::instance().releaseDefaultContexts();
168+
}
167169
};
168170

169171
void GlobalHandler::registerDefaultContextReleaseHandler() {
@@ -210,7 +212,7 @@ void shutdown() {
210212
// prior to closing the plugins.
211213
// Note: Releasing a default context here may cause failures in plugins with
212214
// global state as the global state may have been released.
213-
releaseDefaultContexts();
215+
GlobalHandler::instance().releaseDefaultContexts();
214216

215217
// First, release resources, that may access plugins.
216218
GlobalHandler::instance().MPlatformCache.Inst.reset(nullptr);

sycl/source/detail/global_handler.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,13 +75,13 @@ class GlobalHandler {
7575
static void registerDefaultContextReleaseHandler();
7676

7777
void unloadPlugins();
78+
void releaseDefaultContexts();
7879
void drainThreadPool();
7980

8081
// For testing purposes only
8182
void attachScheduler(Scheduler *Scheduler);
8283

8384
private:
84-
friend void releaseDefaultContexts();
8585
friend void shutdown();
8686

8787
// Constructor and destructor are declared out-of-line to allow incomplete

sycl/source/detail/kernel_bundle_impl.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,8 +236,6 @@ class kernel_bundle_impl {
236236
bundle_state State)
237237
: MContext(std::move(Ctx)), MDevices(std::move(Devs)), MState(State) {
238238

239-
// TODO: Add a check that all kernel ids are compatible with at least one
240-
// device in Devs
241239
common_ctor_checks(State);
242240

243241
MDeviceImages = detail::ProgramManager::getInstance().getSYCLDeviceImages(

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1605,15 +1605,16 @@ void ProgramManager::addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr,
16051605
m_Ptr2DeviceGlobal.insert({DeviceGlobalPtr, NewEntry.first->second.get()});
16061606
}
16071607

1608-
void ProgramManager::getRawDeviceImages(
1609-
const std::vector<kernel_id> &KernelIDs,
1610-
std::set<RTDeviceBinaryImage *> &BinImages) {
1608+
std::set<RTDeviceBinaryImage *>
1609+
ProgramManager::getRawDeviceImages(const std::vector<kernel_id> &KernelIDs) {
1610+
std::set<RTDeviceBinaryImage *> BinImages;
16111611
std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
16121612
for (const kernel_id &KID : KernelIDs) {
16131613
auto Range = m_KernelIDs2BinImage.equal_range(KID);
16141614
for (auto It = Range.first, End = Range.second; It != End; ++It)
16151615
BinImages.insert(It->second);
16161616
}
1617+
return BinImages;
16171618
}
16181619

16191620
std::vector<device_image_plain>
@@ -1625,7 +1626,17 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState(
16251626
// TODO: Can we avoid repacking?
16261627
std::set<RTDeviceBinaryImage *> BinImages;
16271628
if (!KernelIDs.empty()) {
1628-
getRawDeviceImages(KernelIDs, BinImages);
1629+
for (const auto &KID : KernelIDs) {
1630+
bool isCompatibleWithAtLeastOneDev =
1631+
std::any_of(Devs.begin(), Devs.end(), [&KID](const auto &Dev) {
1632+
return sycl::is_compatible({KID}, Dev);
1633+
});
1634+
if (!isCompatibleWithAtLeastOneDev)
1635+
throw sycl::exception(
1636+
make_error_code(errc::invalid),
1637+
"Kernel is incompatible with all devices in devs");
1638+
}
1639+
BinImages = getRawDeviceImages(KernelIDs);
16291640
} else {
16301641
std::lock_guard<std::mutex> Guard(Sync::getGlobalLock());
16311642
for (auto &ImagesSets : m_DeviceImages) {
@@ -1653,7 +1664,8 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState(
16531664
continue;
16541665

16551666
for (const sycl::device &Dev : Devs) {
1656-
if (!compatibleWithDevice(BinImage, Dev))
1667+
if (!compatibleWithDevice(BinImage, Dev) ||
1668+
!doesDevSupportImgAspects(Dev, *BinImage))
16571669
continue;
16581670

16591671
std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
@@ -1732,7 +1744,7 @@ ProgramManager::getSYCLDeviceImages(const context &Ctx,
17321744
// Collect device images with compatible state
17331745
std::vector<device_image_plain> DeviceImages =
17341746
getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState);
1735-
// Brind device images with compatible state to desired state
1747+
// Bring device images with compatible state to desired state.
17361748
bringSYCLDeviceImagesToState(DeviceImages, TargetState);
17371749
return DeviceImages;
17381750
}
@@ -1779,7 +1791,7 @@ std::vector<device_image_plain> ProgramManager::getSYCLDeviceImages(
17791791
std::vector<device_image_plain> DeviceImages =
17801792
getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState, KernelIDs);
17811793

1782-
// Brind device images with compatible state to desired state
1794+
// Bring device images with compatible state to desired state.
17831795
bringSYCLDeviceImagesToState(DeviceImages, TargetState);
17841796
return DeviceImages;
17851797
}
@@ -2118,6 +2130,29 @@ std::pair<RT::PiKernel, std::mutex *> ProgramManager::getOrCreateKernel(
21182130
&(BuildResult->MBuildResultMutex));
21192131
}
21202132

2133+
bool doesDevSupportImgAspects(const device &Dev,
2134+
const RTDeviceBinaryImage &Img) {
2135+
const RTDeviceBinaryImage::PropertyRange &PropRange =
2136+
Img.getDeviceRequirements();
2137+
RTDeviceBinaryImage::PropertyRange::ConstIterator PropIt = std::find_if(
2138+
PropRange.begin(), PropRange.end(),
2139+
[](RTDeviceBinaryImage::PropertyRange::ConstIterator &&Prop) {
2140+
using namespace std::literals;
2141+
return (*Prop)->Name == "aspects"sv;
2142+
});
2143+
if (PropIt == PropRange.end())
2144+
return true;
2145+
ByteArray Aspects = DeviceBinaryProperty(*PropIt).asByteArray();
2146+
// Drop 8 bytes describing the size of the byte array.
2147+
Aspects.dropBytes(8);
2148+
while (!Aspects.empty()) {
2149+
aspect Aspect = Aspects.consume<aspect>();
2150+
if (!Dev.has(Aspect))
2151+
return false;
2152+
}
2153+
return true;
2154+
}
2155+
21212156
} // namespace detail
21222157
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
21232158
} // namespace sycl

sycl/source/detail/program_manager/program_manager.hpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) {
4545
class context;
4646
namespace detail {
4747

48+
bool doesDevSupportImgAspects(const device &Dev,
49+
const RTDeviceBinaryImage &BinImages);
50+
4851
// This value must be the same as in libdevice/device_itt.h.
4952
// See sycl/doc/design/ITTAnnotations.md for more info.
5053
static constexpr uint32_t inline ITTSpecConstId = 0xFF747469;
@@ -255,8 +258,8 @@ class ProgramManager {
255258

256259
bool kernelUsesAssert(OSModuleHandle M, const std::string &KernelName) const;
257260

258-
void getRawDeviceImages(const std::vector<kernel_id> &KernelIDs,
259-
std::set<RTDeviceBinaryImage *> &BinImages);
261+
std::set<RTDeviceBinaryImage *>
262+
getRawDeviceImages(const std::vector<kernel_id> &KernelIDs);
260263

261264
private:
262265
ProgramManager(ProgramManager const &) = delete;

sycl/source/kernel_bundle.cpp

Lines changed: 6 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -184,9 +184,6 @@ bool has_kernel_bundle_impl(const context &Ctx, const std::vector<device> &Devs,
184184
detail::ProgramManager::getInstance()
185185
.getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, State);
186186

187-
// TODO: Add a check that all kernel ids are compatible with at least one
188-
// device in Devs
189-
190187
return (bool)DeviceImages.size();
191188
}
192189

@@ -236,9 +233,6 @@ bool has_kernel_bundle_impl(const context &Ctx, const std::vector<device> &Devs,
236233
return CombinedKernelIDs.count(KernelID);
237234
});
238235

239-
// TODO: Add a check that all kernel ids are compatible with at least one
240-
// device in Devs
241-
242236
return AllKernelIDsRepresented;
243237
}
244238

@@ -296,28 +290,12 @@ std::vector<kernel_id> get_kernel_ids() {
296290
}
297291

298292
bool is_compatible(const std::vector<kernel_id> &KernelIDs, const device &Dev) {
299-
using namespace detail;
300-
std::set<RTDeviceBinaryImage *> BinImages;
301-
ProgramManager::getInstance().getRawDeviceImages(KernelIDs, BinImages);
302-
for (RTDeviceBinaryImage *Img : BinImages) {
303-
const RTDeviceBinaryImage::PropertyRange &PropRange =
304-
Img->getDeviceRequirements();
305-
for (RTDeviceBinaryImage::PropertyRange::ConstIterator It : PropRange) {
306-
using namespace std::literals;
307-
if ((*It)->Name != "aspects"sv)
308-
continue;
309-
ByteArray Aspects = DeviceBinaryProperty(*It).asByteArray();
310-
// Drop 8 bytes describing the size of the byte array
311-
Aspects.dropBytes(8);
312-
while (!Aspects.empty()) {
313-
aspect Aspect = Aspects.consume<aspect>();
314-
if (!Dev.has(Aspect))
315-
return false;
316-
}
317-
}
318-
}
319-
320-
return true;
293+
std::set<detail::RTDeviceBinaryImage *> BinImages =
294+
detail::ProgramManager::getInstance().getRawDeviceImages(KernelIDs);
295+
return std::all_of(BinImages.begin(), BinImages.end(),
296+
[&Dev](const detail::RTDeviceBinaryImage *Img) {
297+
return doesDevSupportImgAspects(Dev, *Img);
298+
});
321299
}
322300

323301
} // __SYCL_INLINE_VER_NAMESPACE(_V1)

sycl/unittests/SYCL2020/KernelBundle.cpp

Lines changed: 128 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
class TestKernel;
1919
class TestKernelExeOnly;
20+
class TestKernelWithAspects;
2021

2122
namespace sycl {
2223
__SYCL_INLINE_VER_NAMESPACE(_V1) {
@@ -47,17 +48,33 @@ template <> struct KernelInfo<TestKernelExeOnly> {
4748
static constexpr int64_t getKernelSize() { return 1; }
4849
};
4950

51+
template <> struct KernelInfo<TestKernelWithAspects> {
52+
static constexpr unsigned getNumParams() { return 0; }
53+
static const kernel_param_desc_t &getParamDesc(int) {
54+
static kernel_param_desc_t Dummy;
55+
return Dummy;
56+
}
57+
static constexpr const char *getName() { return "TestKernelWithAspects"; }
58+
static constexpr bool isESIMD() { return false; }
59+
static constexpr bool callsThisItem() { return false; }
60+
static constexpr bool callsAnyThisFreeFunction() { return false; }
61+
static constexpr int64_t getKernelSize() { return 1; }
62+
};
63+
5064
} // namespace detail
5165
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
5266
} // namespace sycl
5367

5468
static sycl::unittest::PiImage
5569
generateDefaultImage(std::initializer_list<std::string> KernelNames,
5670
pi_device_binary_type BinaryType,
57-
const char *DeviceTargetSpec) {
71+
const char *DeviceTargetSpec,
72+
const std::vector<sycl::aspect> &Aspects = {}) {
5873
using namespace sycl::unittest;
5974

6075
PiPropertySet PropSet;
76+
if (!Aspects.empty())
77+
addAspects(PropSet, Aspects);
6178

6279
std::vector<unsigned char> Bin{0, 1, 2, 3, 4, 5}; // Random data
6380

@@ -74,16 +91,30 @@ generateDefaultImage(std::initializer_list<std::string> KernelNames,
7491
return Img;
7592
}
7693

77-
static sycl::unittest::PiImage Imgs[3] = {
94+
static sycl::unittest::PiImage Imgs[] = {
7895
generateDefaultImage({"TestKernel"}, PI_DEVICE_BINARY_TYPE_SPIRV,
7996
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64),
8097
generateDefaultImage({"TestKernelExeOnly"}, PI_DEVICE_BINARY_TYPE_NATIVE,
8198
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64),
8299
// A device image without entires
83-
generateDefaultImage({},
84-
PI_DEVICE_BINARY_TYPE_NATIVE,
85-
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64)};
86-
static sycl::unittest::PiImageArray<3> ImgArray{Imgs};
100+
generateDefaultImage({}, PI_DEVICE_BINARY_TYPE_NATIVE,
101+
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64),
102+
generateDefaultImage(
103+
{"TestKernelWithAspects"}, PI_DEVICE_BINARY_TYPE_NATIVE,
104+
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64, {sycl::aspect::gpu})};
105+
static sycl::unittest::PiImageArray<std::size(Imgs)> ImgArray{Imgs};
106+
107+
static pi_result redefinedDeviceGetInfoCPU(pi_device device,
108+
pi_device_info param_name,
109+
size_t param_value_size,
110+
void *param_value,
111+
size_t *param_value_size_ret) {
112+
if (param_name == PI_DEVICE_INFO_TYPE) {
113+
auto *Result = reinterpret_cast<_pi_device_type *>(param_value);
114+
*Result = PI_DEVICE_TYPE_CPU;
115+
}
116+
return PI_SUCCESS;
117+
}
87118

88119
TEST(KernelBundle, GetKernelBundleFromKernel) {
89120
sycl::unittest::PiMock Mock;
@@ -537,3 +568,94 @@ TEST(KernelBundle, DescendentDevice) {
537568

538569
EXPECT_EQ(KernelBundle, RetKernelBundle);
539570
}
571+
572+
TEST(KernelBundle, CheckIfBundleHasIncompatibleKernel) {
573+
sycl::unittest::PiMock Mock;
574+
// TestKernelWithAspects has GPU aspect, so it shouldn't be compatible with
575+
// the CPU device and hence shouldn't be in the kernel bundle.
576+
Mock.redefineAfter<sycl::detail::PiApiKind::piDeviceGetInfo>(
577+
redefinedDeviceGetInfoCPU);
578+
sycl::platform Plt = Mock.getPlatform();
579+
const sycl::device Dev = Plt.get_devices()[0];
580+
EXPECT_TRUE(Dev.is_cpu());
581+
582+
auto Bundle = sycl::get_kernel_bundle<sycl::bundle_state::executable>(
583+
sycl::context(Dev), {Dev});
584+
auto KernelId1 = sycl::get_kernel_id<TestKernelWithAspects>();
585+
auto KernelId2 = sycl::get_kernel_id<TestKernel>();
586+
587+
EXPECT_FALSE(Bundle.has_kernel(KernelId1));
588+
EXPECT_TRUE(Bundle.has_kernel(KernelId2));
589+
}
590+
591+
TEST(KernelBundle, CheckIfBundleHasCompatibleKernel) {
592+
sycl::unittest::PiMock Mock;
593+
sycl::platform Plt = Mock.getPlatform();
594+
// GPU by default.
595+
const sycl::device Dev = Plt.get_devices()[0];
596+
EXPECT_TRUE(Dev.is_gpu());
597+
598+
auto Bundle = sycl::get_kernel_bundle<sycl::bundle_state::executable>(
599+
sycl::context(Dev), {Dev});
600+
auto KernelId1 = sycl::get_kernel_id<TestKernelWithAspects>();
601+
auto KernelId2 = sycl::get_kernel_id<TestKernel>();
602+
603+
EXPECT_TRUE(Bundle.has_kernel(KernelId1));
604+
EXPECT_TRUE(Bundle.has_kernel(KernelId2));
605+
}
606+
607+
TEST(KernelBundle, CheckIfIncompatibleBundleExists) {
608+
sycl::unittest::PiMock Mock;
609+
// TestKernelWithAspects has GPU aspect, so it shouldn't be compatible with
610+
// the CPU device and hence shouldn't be in the kernel bundle.
611+
Mock.redefineAfter<sycl::detail::PiApiKind::piDeviceGetInfo>(
612+
redefinedDeviceGetInfoCPU);
613+
sycl::platform Plt = Mock.getPlatform();
614+
const sycl::device Dev = Plt.get_devices()[0];
615+
EXPECT_TRUE(Dev.is_cpu());
616+
617+
auto KernelId1 = sycl::get_kernel_id<TestKernelWithAspects>();
618+
auto KernelId2 = sycl::get_kernel_id<TestKernel>();
619+
620+
EXPECT_FALSE(sycl::has_kernel_bundle<sycl::bundle_state::executable>(
621+
sycl::context(Dev), {KernelId1, KernelId2}));
622+
EXPECT_FALSE(sycl::has_kernel_bundle<sycl::bundle_state::executable>(
623+
sycl::context(Dev), {KernelId1}));
624+
EXPECT_TRUE(sycl::has_kernel_bundle<sycl::bundle_state::executable>(
625+
sycl::context(Dev), {KernelId2}));
626+
}
627+
628+
TEST(KernelBundle, CheckIfCompatibleBundleExists2) {
629+
sycl::unittest::PiMock Mock;
630+
sycl::platform Plt = Mock.getPlatform();
631+
// GPU by default.
632+
const sycl::device Dev = Plt.get_devices()[0];
633+
EXPECT_TRUE(Dev.is_gpu());
634+
635+
auto KernelId1 = sycl::get_kernel_id<TestKernelWithAspects>();
636+
auto KernelId2 = sycl::get_kernel_id<TestKernel>();
637+
638+
EXPECT_TRUE(sycl::has_kernel_bundle<sycl::bundle_state::executable>(
639+
sycl::context(Dev), {KernelId1, KernelId2}));
640+
}
641+
642+
TEST(KernelBundle, CheckExceptionIfKernelIncompatible) {
643+
sycl::unittest::PiMock Mock;
644+
// TestKernelWithAspects has GPU aspect, so it shouldn't be compatible with
645+
// the CPU device and hence shouldn't be in the kernel bundle.
646+
Mock.redefineAfter<sycl::detail::PiApiKind::piDeviceGetInfo>(
647+
redefinedDeviceGetInfoCPU);
648+
sycl::platform Plt = Mock.getPlatform();
649+
const sycl::device Dev = Plt.get_devices()[0];
650+
EXPECT_TRUE(Dev.is_cpu());
651+
652+
auto KernelId = sycl::get_kernel_id<TestKernelWithAspects>();
653+
std::string msg = "";
654+
try {
655+
auto Bundle = sycl::get_kernel_bundle<sycl::bundle_state::executable>(
656+
sycl::context(Dev), {Dev}, {KernelId});
657+
} catch (sycl::exception &e) {
658+
msg = e.what();
659+
}
660+
EXPECT_EQ(msg, "Kernel is incompatible with all devices in devs");
661+
}

0 commit comments

Comments
 (0)