Skip to content

Commit 13a7455

Browse files
authored
[SYCL] Do not build device code for sub-devices (#5240)
Technically sub-devices are the same as their root device, so we can build program for root device only and re-use the binary for sub-devices to avoid "duplicate" builds.
1 parent fbf6e21 commit 13a7455

File tree

13 files changed

+227
-15
lines changed

13 files changed

+227
-15
lines changed

sycl/include/CL/sycl/detail/pi.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,8 @@ typedef enum {
305305
PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE = 0x10025,
306306
PI_DEVICE_INFO_MAX_MEM_BANDWIDTH = 0x10026,
307307
PI_DEVICE_INFO_IMAGE_SRGB = 0x10027,
308+
// Return true if sub-device should do its own program build
309+
PI_DEVICE_INFO_BUILD_ON_SUBDEVICE = 0x10028,
308310
PI_DEVICE_INFO_ATOMIC_64 = 0x10110,
309311
PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111,
310312
PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000,

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1490,6 +1490,10 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
14901490
return getInfo(param_value_size, param_value, param_value_size_ret,
14911491
PI_TRUE);
14921492
}
1493+
case PI_DEVICE_INFO_BUILD_ON_SUBDEVICE: {
1494+
return getInfo(param_value_size, param_value, param_value_size_ret,
1495+
PI_TRUE);
1496+
}
14931497
case PI_DEVICE_INFO_COMPILER_AVAILABLE: {
14941498
return getInfo(param_value_size, param_value, param_value_size_ret,
14951499
PI_TRUE);

sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,8 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
667667
return ReturnValue("");
668668
case PI_DEVICE_INFO_VERSION:
669669
return ReturnValue(Device->VersionStr.c_str());
670+
case PI_DEVICE_INFO_BUILD_ON_SUBDEVICE: // emulator doesn't support partition
671+
return ReturnValue(pi_bool{true});
670672
case PI_DEVICE_INFO_COMPILER_AVAILABLE:
671673
return ReturnValue(pi_bool{false});
672674
case PI_DEVICE_INFO_LINKER_AVAILABLE:

sycl/plugins/hip/pi_hip.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1409,6 +1409,10 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
14091409
return getInfo(param_value_size, param_value, param_value_size_ret,
14101410
PI_TRUE);
14111411
}
1412+
case PI_DEVICE_INFO_BUILD_ON_SUBDEVICE: {
1413+
return getInfo(param_value_size, param_value, param_value_size_ret,
1414+
PI_TRUE);
1415+
}
14121416
case PI_DEVICE_INFO_COMPILER_AVAILABLE: {
14131417
return getInfo(param_value_size, param_value, param_value_size_ret,
14141418
PI_TRUE);

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2028,7 +2028,7 @@ pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle,
20282028
return PI_INVALID_VALUE;
20292029
}
20302030

2031-
// Get the cahched PI device created for the L0 device handle.
2031+
// Get the cached PI device created for the L0 device handle.
20322032
// Return NULL if no such PI device found.
20332033
pi_device _pi_platform::getDeviceFromNativeHandle(ze_device_handle_t ZeDevice) {
20342034

@@ -2188,6 +2188,11 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() {
21882188
// Create PI sub-sub-devices with the sub-device for all the ordinals.
21892189
// Each {ordinal, index} points to a specific CCS which constructs
21902190
// a sub-sub-device at this point.
2191+
// FIXME: Level Zero creates multiple PiDevices for a single physical
2192+
// device when sub-device is partitioned into sub-sub-devices.
2193+
// Sub-sub-device is technically a command queue and we should not build
2194+
// program for each command queue. PiDevice is probably not the right
2195+
// abstraction for a Level Zero command queue.
21912196
for (uint32_t J = 0; J < Ordinals.size(); ++J) {
21922197
for (uint32_t K = 0; K < QueueGroupProperties[Ordinals[J]].numQueues;
21932198
++K) {
@@ -2276,8 +2281,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
22762281
}
22772282
}
22782283
case PI_DEVICE_INFO_PARENT_DEVICE:
2279-
// TODO: all Level Zero devices are parent ?
2280-
return ReturnValue(pi_device{0});
2284+
return ReturnValue(Device->RootDevice);
22812285
case PI_DEVICE_INFO_PLATFORM:
22822286
return ReturnValue(Device->Platform);
22832287
case PI_DEVICE_INFO_VENDOR_ID:
@@ -2337,6 +2341,11 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
23372341
}
23382342
case PI_DEVICE_INFO_NAME:
23392343
return ReturnValue(Device->ZeDeviceProperties->name);
2344+
// zeModuleCreate allows using root device module for sub-devices:
2345+
// > The application must only use the module for the device, or its
2346+
// > sub-devices, which was provided during creation.
2347+
case PI_DEVICE_INFO_BUILD_ON_SUBDEVICE:
2348+
return ReturnValue(PI_FALSE);
23402349
case PI_DEVICE_INFO_COMPILER_AVAILABLE:
23412350
return ReturnValue(pi_bool{1});
23422351
case PI_DEVICE_INFO_LINKER_AVAILABLE:

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,17 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName,
203203
std::memcpy(paramValue, &result, sizeof(cl_bool));
204204
return PI_SUCCESS;
205205
}
206+
case PI_DEVICE_INFO_BUILD_ON_SUBDEVICE: {
207+
cl_device_type devType = CL_DEVICE_TYPE_DEFAULT;
208+
cl_int res = clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_TYPE,
209+
sizeof(cl_device_type), &devType, nullptr);
206210

211+
// FIXME: here we assume that program built for a root GPU device can be
212+
// used on its sub-devices without re-building
213+
cl_bool result = (res == CL_SUCCESS) && (devType == CL_DEVICE_TYPE_GPU);
214+
std::memcpy(paramValue, &result, sizeof(cl_bool));
215+
return PI_SUCCESS;
216+
}
207217
case PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D:
208218
// Returns the maximum sizes of a work group for each dimension one
209219
// could use to submit a kernel. There is no such query defined in OpenCL

sycl/source/detail/device_impl.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,11 @@ device_impl::device_impl(pi_native_handle InteropDeviceHandle,
5353
Plugin.call<PiApiKind::piDeviceGetInfo>(
5454
MDevice, PI_DEVICE_INFO_TYPE, sizeof(RT::PiDeviceType), &MType, nullptr);
5555

56-
RT::PiDevice parent = nullptr;
5756
// TODO catch an exception and put it to list of asynchronous exceptions
5857
Plugin.call<PiApiKind::piDeviceGetInfo>(MDevice, PI_DEVICE_INFO_PARENT_DEVICE,
59-
sizeof(RT::PiDevice), &parent,
58+
sizeof(RT::PiDevice), &MRootDevice,
6059
nullptr);
6160

62-
MIsRootDevice = (nullptr == parent);
6361
if (!InteroperabilityConstructor) {
6462
// TODO catch an exception and put it to list of asynchronous exceptions
6563
// Interoperability Constructor already calls DeviceRetain in

sycl/source/detail/device_impl.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,14 +226,16 @@ class device_impl {
226226

227227
bool isAssertFailSupported() const;
228228

229+
bool isRootDevice() const { return MRootDevice == nullptr; }
230+
229231
std::string getDeviceName() const;
230232

231233
private:
232234
explicit device_impl(pi_native_handle InteropDevice, RT::PiDevice Device,
233235
PlatformImplPtr Platform, const plugin &Plugin);
234236
RT::PiDevice MDevice = 0;
235237
RT::PiDeviceType MType;
236-
bool MIsRootDevice = false;
238+
RT::PiDevice MRootDevice = nullptr;
237239
bool MIsHostDevice;
238240
PlatformImplPtr MPlatform;
239241
bool MIsAssertFailSupported = false;

sycl/source/detail/persistent_device_code_cache.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,10 +78,13 @@ void PersistentDeviceCodeCache::putItemToDisc(
7878
const SerializedObj &SpecConsts, const std::string &BuildOptionsString,
7979
const RT::PiProgram &NativePrg) {
8080

81+
if (!isImageCached(Img))
82+
return;
83+
8184
std::string DirName =
8285
getCacheItemPath(Device, Img, SpecConsts, BuildOptionsString);
8386

84-
if (!isImageCached(Img) || DirName.empty())
87+
if (DirName.empty())
8588
return;
8689

8790
auto Plugin = detail::getSyclObjImpl(Device)->getPlugin();
@@ -137,10 +140,13 @@ std::vector<std::vector<char>> PersistentDeviceCodeCache::getItemFromDisc(
137140
const device &Device, const RTDeviceBinaryImage &Img,
138141
const SerializedObj &SpecConsts, const std::string &BuildOptionsString) {
139142

143+
if (!isImageCached(Img))
144+
return {};
145+
140146
std::string Path =
141147
getCacheItemPath(Device, Img, SpecConsts, BuildOptionsString);
142148

143-
if (!isImageCached(Img) || Path.empty() || !OSUtil::isPathPresent(Path))
149+
if (Path.empty() || !OSUtil::isPathPresent(Path))
144150
return {};
145151

146152
int i = 0;

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -481,10 +481,29 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(
481481
if (Prg)
482482
Prg->stableSerializeSpecConstRegistry(SpecConsts);
483483

484-
auto BuildF = [this, &M, &KSId, &ContextImpl, &DeviceImpl, Prg, &CompileOpts,
484+
// Check if we can optimize program builds for sub-devices by using a program
485+
// built for the root device
486+
DeviceImplPtr RootDevImpl = DeviceImpl;
487+
while (!RootDevImpl->isRootDevice()) {
488+
auto ParentDev = detail::getSyclObjImpl(
489+
RootDevImpl->get_info<info::device::parent_device>());
490+
// Sharing is allowed within a single context only
491+
if (!ContextImpl->hasDevice(ParentDev))
492+
break;
493+
RootDevImpl = ParentDev;
494+
}
495+
496+
pi_bool MustBuildOnSubdevice = PI_TRUE;
497+
ContextImpl->getPlugin().call<PiApiKind::piDeviceGetInfo>(
498+
RootDevImpl->getHandleRef(), PI_DEVICE_INFO_BUILD_ON_SUBDEVICE,
499+
sizeof(pi_bool), &MustBuildOnSubdevice, nullptr);
500+
501+
DeviceImplPtr Dev =
502+
(MustBuildOnSubdevice == PI_TRUE) ? DeviceImpl : RootDevImpl;
503+
auto BuildF = [this, &M, &KSId, &ContextImpl, &Dev, Prg, &CompileOpts,
485504
&LinkOpts, &JITCompilationIsRequired, SpecConsts] {
486505
auto Context = createSyclObjFromImpl<context>(ContextImpl);
487-
auto Device = createSyclObjFromImpl<device>(DeviceImpl);
506+
auto Device = createSyclObjFromImpl<device>(Dev);
488507

489508
const RTDeviceBinaryImage &Img =
490509
getDeviceImage(M, KSId, Context, Device, JITCompilationIsRequired);
@@ -536,7 +555,7 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(
536555
return BuiltProgram.release();
537556
};
538557

539-
const RT::PiDevice PiDevice = DeviceImpl->getHandleRef();
558+
const RT::PiDevice PiDevice = Dev->getHandleRef();
540559

541560
auto BuildResult = getOrBuild<PiProgramT, compile_program_error>(
542561
Cache,

0 commit comments

Comments
 (0)