Skip to content

Commit f5b380b

Browse files
committed
[SYCL] Do not build device code for sub-devices.
Technically sub-devices are the same as their root device, so we can build program for root device only and re-use the binary for sub-devices to avoid "duplicate" builds.
1 parent ba29bbe commit f5b380b

File tree

7 files changed

+171
-15
lines changed

7 files changed

+171
-15
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2124,8 +2124,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
21242124
}
21252125
}
21262126
case PI_DEVICE_INFO_PARENT_DEVICE:
2127-
// TODO: all Level Zero devices are parent ?
2128-
return ReturnValue(pi_device{0});
2127+
return ReturnValue(Device->RootDevice);
21292128
case PI_DEVICE_INFO_PLATFORM:
21302129
return ReturnValue(Device->Platform);
21312130
case PI_DEVICE_INFO_VENDOR_ID:

sycl/source/detail/device_impl.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,11 @@ device_impl::device_impl(pi_native_handle InteropDeviceHandle,
5353
Plugin.call<PiApiKind::piDeviceGetInfo>(
5454
MDevice, PI_DEVICE_INFO_TYPE, sizeof(RT::PiDeviceType), &MType, nullptr);
5555

56-
RT::PiDevice parent = nullptr;
5756
// TODO catch an exception and put it to list of asynchronous exceptions
5857
Plugin.call<PiApiKind::piDeviceGetInfo>(MDevice, PI_DEVICE_INFO_PARENT_DEVICE,
59-
sizeof(RT::PiDevice), &parent,
58+
sizeof(RT::PiDevice), &MRootDevice,
6059
nullptr);
6160

62-
MIsRootDevice = (nullptr == parent);
6361
if (!InteroperabilityConstructor) {
6462
// TODO catch an exception and put it to list of asynchronous exceptions
6563
// Interoperability Constructor already calls DeviceRetain in

sycl/source/detail/device_impl.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,12 +225,14 @@ class device_impl {
225225

226226
bool isAssertFailSupported() const;
227227

228+
bool isRootDevice() const { return MRootDevice == nullptr; }
229+
228230
private:
229231
explicit device_impl(pi_native_handle InteropDevice, RT::PiDevice Device,
230232
PlatformImplPtr Platform, const plugin &Plugin);
231233
RT::PiDevice MDevice = 0;
232234
RT::PiDeviceType MType;
233-
bool MIsRootDevice = false;
235+
RT::PiDevice MRootDevice = nullptr;
234236
bool MIsHostDevice;
235237
PlatformImplPtr MPlatform;
236238
bool MIsAssertFailSupported = false;

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -470,10 +470,16 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(
470470
if (Prg)
471471
Prg->stableSerializeSpecConstRegistry(SpecConsts);
472472

473-
auto BuildF = [this, &M, &KSId, &ContextImpl, &DeviceImpl, Prg, &CompileOpts,
473+
// Use root device image to avoid building for the same architecture.
474+
DeviceImplPtr RootDev = DeviceImpl;
475+
while (!RootDev->isRootDevice())
476+
RootDev = detail::getSyclObjImpl(
477+
RootDev->get_info<info::device::parent_device>());
478+
479+
auto BuildF = [this, &M, &KSId, &ContextImpl, &RootDev, Prg, &CompileOpts,
474480
&LinkOpts, &JITCompilationIsRequired, SpecConsts] {
475481
auto Context = createSyclObjFromImpl<context>(ContextImpl);
476-
auto Device = createSyclObjFromImpl<device>(DeviceImpl);
482+
auto Device = createSyclObjFromImpl<device>(RootDev);
477483

478484
const RTDeviceBinaryImage &Img =
479485
getDeviceImage(M, KSId, Context, Device, JITCompilationIsRequired);
@@ -523,7 +529,7 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(
523529
return BuiltProgram.release();
524530
};
525531

526-
const RT::PiDevice PiDevice = DeviceImpl->getHandleRef();
532+
const RT::PiDevice PiDevice = RootDev->getHandleRef();
527533

528534
auto BuildResult = getOrBuild<PiProgramT, compile_program_error>(
529535
Cache,
@@ -560,7 +566,13 @@ ProgramManager::getOrCreateKernel(OSModuleHandle M,
560566
Prg->stableSerializeSpecConstRegistry(SpecConsts);
561567
}
562568
applyOptionsFromEnvironment(CompileOpts, LinkOpts);
563-
const RT::PiDevice PiDevice = DeviceImpl->getHandleRef();
569+
570+
// Use root device image to avoid building for the same architecture.
571+
DeviceImplPtr D = DeviceImpl;
572+
while (!D->isRootDevice())
573+
D = detail::getSyclObjImpl(D->get_info<info::device::parent_device>());
574+
575+
const RT::PiDevice PiDevice = D->getHandleRef();
564576

565577
auto key = std::make_tuple(std::move(SpecConsts), M, PiDevice,
566578
CompileOpts + LinkOpts, KernelName);
@@ -569,7 +581,7 @@ ProgramManager::getOrCreateKernel(OSModuleHandle M,
569581
return ret_tuple;
570582

571583
RT::PiProgram Program =
572-
getBuiltPIProgram(M, ContextImpl, DeviceImpl, KernelName, Prg);
584+
getBuiltPIProgram(M, ContextImpl, D, KernelName, Prg);
573585

574586
auto AcquireF = [](KernelProgramCache &Cache) {
575587
return Cache.acquireKernelsPerProgramCache();
@@ -830,8 +842,13 @@ ProgramManager::getDeviceImage(OSModuleHandle M, KernelSetId KSId,
830842
for (unsigned I = 0; I < Imgs.size(); I++)
831843
RawImgs[I] = const_cast<pi_device_binary>(&Imgs[I]->getRawData());
832844

845+
// Use root device image to avoid building for the same architecture.
846+
device RootDevice = Device;
847+
while (!getSyclObjImpl(RootDevice)->isRootDevice())
848+
RootDevice = Device.get_info<info::device::parent_device>();
849+
833850
Ctx->getPlugin().call<PiApiKind::piextDeviceSelectBinary>(
834-
getSyclObjImpl(Device)->getHandleRef(), RawImgs.data(),
851+
getSyclObjImpl(RootDevice)->getHandleRef(), RawImgs.data(),
835852
(cl_uint)RawImgs.size(), &ImgInd);
836853

837854
if (JITCompilationIsRequired) {

sycl/source/detail/program_manager/program_manager.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,8 @@ class ProgramManager {
107107
SerializedObj SpecConsts);
108108
/// Builds or retrieves from cache a program defining the kernel with given
109109
/// name.
110-
/// \param M idenfies the OS module the kernel comes from (multiple OS modules
111-
/// may have kernels with the same name)
110+
/// \param M identifies the OS module the kernel comes from (multiple OS
111+
/// modules may have kernels with the same name)
112112
/// \param Context the context to build the program with
113113
/// \param Device the device for which the program is built
114114
/// \param KernelName the kernel's name
@@ -152,7 +152,7 @@ class ProgramManager {
152152
/// \param NativePrg the native program, target for spec constant setting; if
153153
/// not null then overrides the native program in Prg
154154
/// \param Img A source of the information about which constants need
155-
/// setting and symboling->integer spec constnant ID mapping. If not
155+
/// setting and symboling->integer spec constant ID mapping. If not
156156
/// null, overrides native program->binary image binding maintained by
157157
/// the program manager.
158158
void flushSpecConstants(const program_impl &Prg,

sycl/unittests/program_manager/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,6 @@ set(CMAKE_CXX_EXTENSIONS OFF)
33
add_sycl_unittest(ProgramManagerTests OBJECT
44
EliminatedArgMask.cpp
55
itt_annotations.cpp
6+
SubDevices.cpp
67
)
78

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <CL/sycl/program.hpp>
10+
#include <detail/kernel_bundle_impl.hpp>
11+
12+
#include <helpers/CommonRedefinitions.hpp>
13+
#include <helpers/PiImage.hpp>
14+
#include <helpers/PiMock.hpp>
15+
16+
#include <gtest/gtest.h>
17+
18+
#include <helpers/TestKernel.hpp>
19+
20+
static pi_device rootDevice;
21+
static pi_device piSubDev1 = (pi_device)0x1;
22+
static pi_device piSubDev2 = (pi_device)0x2;
23+
24+
namespace {
25+
pi_result redefinedDeviceGetInfo(pi_device device, pi_device_info param_name,
26+
size_t param_value_size, void *param_value,
27+
size_t *param_value_size_ret) {
28+
if (param_name == PI_DEVICE_INFO_PARTITION_PROPERTIES) {
29+
if (!param_value) {
30+
*param_value_size_ret = 2 * sizeof(pi_device_partition_property);
31+
} else {
32+
((pi_device_partition_property *)param_value)[0] =
33+
PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;
34+
((pi_device_partition_property *)param_value)[1] =
35+
PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;
36+
}
37+
}
38+
if (param_name == PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN) {
39+
if (!param_value) {
40+
*param_value_size_ret = sizeof(pi_device_affinity_domain);
41+
} else {
42+
((pi_device_affinity_domain *)param_value)[0] =
43+
PI_DEVICE_AFFINITY_DOMAIN_NUMA |
44+
PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE;
45+
}
46+
}
47+
if (param_name == PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES) {
48+
((pi_uint32 *)param_value)[0] = 2;
49+
}
50+
if (param_name == PI_DEVICE_INFO_PARENT_DEVICE) {
51+
if (device == piSubDev1 || device == piSubDev2)
52+
((pi_device *)param_value)[0] = rootDevice;
53+
else
54+
((pi_device *)param_value)[0] = nullptr;
55+
}
56+
return PI_SUCCESS;
57+
}
58+
59+
pi_result redefinedDevicePartition(
60+
pi_device Device, const pi_device_partition_property *Properties,
61+
pi_uint32 NumDevices, pi_device *OutDevices, pi_uint32 *OutNumDevices) {
62+
if (OutNumDevices)
63+
*OutNumDevices = 2;
64+
if (OutDevices) {
65+
OutDevices[0] = {};
66+
OutDevices[1] = {};
67+
}
68+
return PI_SUCCESS;
69+
}
70+
71+
pi_result redefinedDeviceRetain(pi_device c) { return PI_SUCCESS; }
72+
73+
pi_result redefinedDeviceRelease(pi_device c) { return PI_SUCCESS; }
74+
75+
pi_result redefinedProgramBuild(
76+
pi_program prog, pi_uint32, const pi_device *, const char *,
77+
void (*pfn_notify)(pi_program program, void *user_data), void *user_data) {
78+
static int m = 0;
79+
m++;
80+
// if called more than once return an error
81+
if (m > 1)
82+
return PI_ERROR_UNKNOWN;
83+
84+
return PI_SUCCESS;
85+
}
86+
} // anonymous namespace
87+
88+
// Check that program is built once for all sub-devices
89+
TEST(SubDevices, BuildProgramForSubdevices) {
90+
sycl::platform Plt{sycl::default_selector()};
91+
// Host devices do not support sub-devices
92+
if (Plt.is_host()) {
93+
std::cerr << "Test is not supported on "
94+
<< Plt.get_info<sycl::info::platform::name>() << ", skipping\n";
95+
GTEST_SKIP(); // test is not supported on selected platform.
96+
}
97+
98+
// Setup Mock APIs
99+
sycl::unittest::PiMock Mock{Plt};
100+
setupDefaultMockAPIs(Mock);
101+
Mock.redefine<sycl::detail::PiApiKind::piDeviceGetInfo>(
102+
redefinedDeviceGetInfo);
103+
Mock.redefine<sycl::detail::PiApiKind::piDevicePartition>(
104+
redefinedDevicePartition);
105+
Mock.redefine<sycl::detail::PiApiKind::piDeviceRetain>(redefinedDeviceRetain);
106+
Mock.redefine<sycl::detail::PiApiKind::piDeviceRelease>(
107+
redefinedDeviceRelease);
108+
Mock.redefine<sycl::detail::PiApiKind::piProgramBuild>(redefinedProgramBuild);
109+
110+
// Create 2 sub-devices and use first device as a root device
111+
sycl::context Ctx{Plt};
112+
const sycl::device device = Plt.get_devices()[0];
113+
// Initialize root device
114+
rootDevice = sycl::detail::getSyclObjImpl(device)->getHandleRef();
115+
// Initialize sub-devices
116+
auto PltImpl = sycl::detail::getSyclObjImpl(Plt);
117+
auto subDev1 = std::make_shared<sycl::detail::device_impl>(
118+
piSubDev1, PltImpl->getPlugin());
119+
auto subDev2 = std::make_shared<sycl::detail::device_impl>(
120+
piSubDev2, PltImpl->getPlugin());
121+
122+
// Create device binary description structures for getBuiltPIProgram API.
123+
auto devBin = Img.convertToNativeType();
124+
pi_device_binaries_struct devBinStruct{PI_DEVICE_BINARIES_VERSION, 1,
125+
&devBin};
126+
sycl::detail::ProgramManager::getInstance().addImages(&devBinStruct);
127+
128+
// Build program via getBuiltPIProgram API
129+
sycl::detail::ProgramManager::getInstance().getBuiltPIProgram(
130+
sycl::detail::OSUtil::getOSModuleHandle(&devBin),
131+
sycl::detail::getSyclObjImpl(Ctx), subDev1,
132+
sycl::detail::KernelInfo<TestKernel>::getName());
133+
// This call should re-use built binary from the cache. If piProgramBuild is
134+
// called again, the test will fail as second call of redefinedProgramBuild
135+
sycl::detail::ProgramManager::getInstance().getBuiltPIProgram(
136+
sycl::detail::OSUtil::getOSModuleHandle(&devBin),
137+
sycl::detail::getSyclObjImpl(Ctx), subDev2,
138+
sycl::detail::KernelInfo<TestKernel>::getName());
139+
}

0 commit comments

Comments
 (0)