Skip to content

[UR] Stop querying adapter fp16/fp64 support via extension. #15811

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 20 commits into
base: sycl
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
32957aa
[UR] Stop querying adapter fp16/fp64 support via extension.
aarongreig Oct 22, 2024
5e66ecc
Simplify device info helpers
aarongreig Oct 22, 2024
8720fbe
Merge branch 'sycl' into aaron/stopReportingFPExtensions
aarongreig Oct 24, 2024
368a9e8
Rely on empty bitfield to report no type support
aarongreig Oct 28, 2024
d789703
Merge branch 'sycl' into aaron/stopReportingFPExtensions
aarongreig Oct 29, 2024
5358def
Revert change made for testing.
aarongreig Oct 29, 2024
b3b7153
Merge branch 'sycl' into aaron/stopReportingFPExtensions
aarongreig Jan 22, 2025
fba0498
Clean up some remaining uses of the old extension string.
aarongreig Jan 23, 2025
3049632
Merge branch 'sycl' into aaron/stopReportingFPExtensions
aarongreig Jan 23, 2025
ee0fc6c
Fix unit tests.
aarongreig Jan 23, 2025
189bf35
Merge branch 'sycl' into aaron/stopReportingFPExtensions
aarongreig Feb 4, 2025
62803c0
Merge branch 'sycl' into aaron/stopReportingFPExtensions
aarongreig Feb 24, 2025
3e22484
Report device fp support via config rather than extension string.
aarongreig Oct 22, 2024
461ca20
Merge branch 'sycl' into aaron/stopReportingFPExtensions
aarongreig Mar 7, 2025
f3ab64a
Fix formatting.
aarongreig Mar 10, 2025
8b55b79
Fix another bad conflict resolution.
aarongreig Mar 10, 2025
e965b3e
Adjust minimum flags in native cpu and link related issue.
aarongreig Mar 19, 2025
9ecf00b
Merge branch 'sycl' into aaron/stopReportingFPExtensions
aarongreig Mar 19, 2025
3f13197
Merge branch 'sycl' into aaron/stopReportingFPExtensions
aarongreig Jul 17, 2025
f5bcab7
Fix hip build.
aarongreig Jul 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions sycl/source/detail/device_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,18 @@ ur_native_handle_t device_impl::getNative() const {
return Handle;
}

bool device_impl::isFp16Supported() const {
// If we don't get anything back from this we can assume the device doesn't
// support fp16.
return !get_info<info::device::half_fp_config>().empty();
}

bool device_impl::isFp64Supported() const {
// If we don't get anything back from this we can assume the device doesn't
// support fp64.
return !get_info<info::device::double_fp_config>().empty();
}

// On the first call this function queries for device timestamp
// along with host synchronized timestamp and stores it in member variable
// MDeviceHostBaseTime. Subsequent calls to this function would just retrieve
Expand Down
10 changes: 8 additions & 2 deletions sycl/source/detail/device_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1222,8 +1222,8 @@ class device_impl : public std::enable_shared_from_this<device_impl> {
}
CASE(emulated) { return false; }
CASE(host_debuggable) { return false; }
CASE(fp16) { return has_extension("cl_khr_fp16"); }
CASE(fp64) { return has_extension("cl_khr_fp64"); }
CASE(fp16) { return isFp16Supported(); }
CASE(fp64) { return isFp64Supported(); }
CASE(int64_base_atomics) {
return has_extension("cl_khr_int64_base_atomics");
}
Expand Down Expand Up @@ -2226,6 +2226,12 @@ class device_impl : public std::enable_shared_from_this<device_impl> {
return {};
}

// Check if the device supports double precision floating point.
bool isFp64Supported() const;

// Check if the device supports half precision floating point.
bool isFp16Supported() const;

private:
ur_device_handle_t MDevice = 0;
// This is used for getAdapter so should be above other properties.
Expand Down
13 changes: 7 additions & 6 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1664,12 +1664,13 @@ getDeviceLibPrograms(context_impl &Context,
{DeviceLibExt::cl_intel_devicelib_bfloat16, false}};

// Disable all devicelib extensions requiring fp64 support if at least
// one underlying device doesn't support cl_khr_fp64.
const bool fp64Support = std::all_of(
Devices.begin(), Devices.end(), [&Context](ur_device_handle_t Device) {
return Context.getPlatformImpl().getDeviceImpl(Device)->has_extension(
"cl_khr_fp64");
});
// one underlying device doesn't support doubles.
const bool fp64Support = std::all_of(Devices.begin(), Devices.end(),
[&Context](ur_device_handle_t Device) {
return Context.getPlatformImpl()
.getDeviceImpl(Device)
->isFp64Supported();
});

// Load a fallback library for an extension if the any device does not
// support it.
Expand Down
21 changes: 19 additions & 2 deletions sycl/unittests/helpers/UrMock.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,7 @@ inline ur_result_t mock_urPlatformGetInfo(void *pParams) {
inline ur_result_t mock_urDeviceGetInfo(void *pParams) {
auto params = reinterpret_cast<ur_device_get_info_params_t *>(pParams);
constexpr char MockDeviceName[] = "Mock device";
constexpr char MockSupportedExtensions[] =
"cl_khr_fp64 cl_khr_fp16 cl_khr_il_program";
constexpr char MockSupportedExtensions[] = "cl_khr_fp64 cl_khr_il_program";
switch (*params->ppropName) {
case UR_DEVICE_INFO_TYPE: {
// Act like any device is a GPU.
Expand Down Expand Up @@ -264,6 +263,24 @@ inline ur_result_t mock_urDeviceGetInfo(void *pParams) {
**params->ppPropSizeRet = 0;
}
return UR_RESULT_SUCCESS;
case UR_DEVICE_INFO_SINGLE_FP_CONFIG:
case UR_DEVICE_INFO_HALF_FP_CONFIG:
case UR_DEVICE_INFO_DOUBLE_FP_CONFIG:
if (*params->ppPropValue) {
// This is the minimum requirement for a device reporting support for a
// given FP type.
ur_device_fp_capability_flags_t capabilities =
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
*static_cast<ur_device_fp_capability_flags_t *>(*params->ppPropValue) =
capabilities;
}
if (*params->ppPropSizeRet) {
**params->ppPropSizeRet = sizeof(ur_device_fp_capability_flags_t);
}
return UR_RESULT_SUCCESS;
default: {
// In the default case we fill the return value with 0's. This may not be
// valid for all device queries, but it will mean a consistent return value
Expand Down
3 changes: 1 addition & 2 deletions sycl/unittests/pipes/host_pipe_registration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,11 @@ ur_result_t redefinedEnqueueWriteHostPipe(void *pParams) {
ur_result_t after_urDeviceGetInfo(void *pParams) {
auto params = *static_cast<ur_device_get_info_params_t *>(pParams);
constexpr char MockSupportedExtensions[] =
"cl_khr_fp64 cl_khr_fp16 cl_khr_il_program "
"cl_khr_fp64 cl_khr_il_program ur_exp_command_buffer "
"cl_intel_program_scope_host_pipe";
switch (*params.ppropName) {
case UR_DEVICE_INFO_EXTENSIONS:
if (*params.ppPropValue) {
std::ignore = *params.ppropSize;
assert(*params.ppropSize >= sizeof(MockSupportedExtensions));
std::memcpy(*params.ppPropValue, MockSupportedExtensions,
sizeof(MockSupportedExtensions));
Expand Down
39 changes: 22 additions & 17 deletions unified-runtime/source/adapters/cuda/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -434,8 +434,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
return ReturnValue(MemBaseAddrAlign);
}
case UR_DEVICE_INFO_HALF_FP_CONFIG: {
// TODO: is this config consistent across all NVIDIA GPUs?
return ReturnValue(0u);
int Major = 0;
int Minor = 0;

UR_CHECK_ERROR(cuDeviceGetAttribute(
&Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()));
UR_CHECK_ERROR(cuDeviceGetAttribute(
&Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get()));

if ((Major >= 6) || ((Major == 5) && (Minor >= 3))) {
// TODO: is this config consistent across all NVIDIA GPUs?
ur_device_fp_capability_flags_t Config =
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
return ReturnValue(Config);
} else {
return ReturnValue(0u);
}
}
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
// TODO: is this config consistent across all NVIDIA GPUs?
Expand Down Expand Up @@ -608,21 +627,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
return ReturnValue(SS.str().c_str());
}
case UR_DEVICE_INFO_EXTENSIONS: {
std::string SupportedExtensions = "cl_khr_fp64 ";

int Major = 0;
int Minor = 0;

UR_CHECK_ERROR(cuDeviceGetAttribute(
&Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()));
UR_CHECK_ERROR(cuDeviceGetAttribute(
&Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get()));

if ((Major >= 6) || ((Major == 5) && (Minor >= 3))) {
SupportedExtensions += "cl_khr_fp16 ";
}

return ReturnValue(SupportedExtensions.c_str());
return ReturnValue("");
}
case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: {
// The minimum value for the FULL profile is 1 MB.
Expand Down
41 changes: 22 additions & 19 deletions unified-runtime/source/adapters/hip/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -311,29 +311,43 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
return ReturnValue(MemBaseAddrAlign);
}
case UR_DEVICE_INFO_HALF_FP_CONFIG: {
return ReturnValue(0u);
}
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
ur_device_fp_capability_flags_t Config =
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
UR_DEVICE_FP_CAPABILITY_FLAG_FMA |
UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT;
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
return ReturnValue(Config);
}
case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: {
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
ur_device_fp_capability_flags_t Config =
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
UR_DEVICE_FP_CAPABILITY_FLAG_FMA |
UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT;
return ReturnValue(Config);
}
case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: {
hipDeviceProp_t Props;
UR_CHECK_ERROR(hipGetDeviceProperties(&Props, hDevice->get()));

if (Props.arch.hasDoubles) {
ur_device_fp_capability_flags_t Config =
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
return ReturnValue(Config);
} else {
return ReturnValue(0u);
}
}
case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: {
return ReturnValue(UR_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE);
}
Expand Down Expand Up @@ -495,18 +509,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
return ReturnValue(S.str().c_str());
}
case UR_DEVICE_INFO_EXTENSIONS: {
std::string SupportedExtensions = "";

hipDeviceProp_t Props;
UR_CHECK_ERROR(hipGetDeviceProperties(&Props, hDevice->get()));

if (Props.arch.hasDoubles) {
SupportedExtensions += "cl_khr_fp64 ";
}

SupportedExtensions += "cl_khr_fp16 ";

return ReturnValue(SupportedExtensions.c_str());
return ReturnValue("");
}
case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: {
// The minimum value for the FULL profile is 1 MB.
Expand Down
6 changes: 0 additions & 6 deletions unified-runtime/source/adapters/level_zero/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -311,8 +311,6 @@ ur_result_t urDeviceGetInfo(
// for performance.
// cl_intel_required_subgroup_size - Extension to allow programmers to
// optionally specify the required subgroup size for a kernel function.
// cl_khr_fp16 - Optional half floating-point support.
// cl_khr_fp64 - Support for double floating-point precision.
// cl_khr_int64_base_atomics, cl_khr_int64_extended_atomics - Optional
// extensions that implement atomic operations on 64-bit signed and
// unsigned integers to locations in __global and __local memory.
Expand All @@ -322,10 +320,6 @@ ur_result_t urDeviceGetInfo(
// Hardcoding some extensions we know are supported by all Level Zero
// devices.
SupportedExtensions += (ZE_SUPPORTED_EXTENSIONS);
if (Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP16)
SupportedExtensions += ("cl_khr_fp16 ");
if (Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP64)
SupportedExtensions += ("cl_khr_fp64 ");
if (Device->ZeDeviceModuleProperties->flags &
ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS)
// int64AtomicsSupported indicates support for both.
Expand Down
26 changes: 10 additions & 16 deletions unified-runtime/source/adapters/native_cpu/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY:
return ReturnValue(bool{1});
case UR_DEVICE_INFO_EXTENSIONS:
// TODO : Populate return string accordingly - e.g. cl_khr_fp16,
// cl_khr_fp64, cl_khr_int64_base_atomics,
// cl_khr_int64_extended_atomics
return ReturnValue("cl_khr_fp16, cl_khr_fp64 ");
return ReturnValue("");
case UR_DEVICE_INFO_VERSION:
return ReturnValue("0.1");
case UR_DEVICE_INFO_COMPILER_AVAILABLE:
Expand Down Expand Up @@ -193,19 +190,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
case UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH:
// Default minimum values required by the SYCL specification.
return ReturnValue(size_t{2048});
case UR_DEVICE_INFO_HALF_FP_CONFIG: {
// todo:
ur_device_fp_capability_flags_t HalfFPValue = 0;
return ReturnValue(HalfFPValue);
}
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
// todo
ur_device_fp_capability_flags_t SingleFPValue = 0;
return ReturnValue(SingleFPValue);
}
case UR_DEVICE_INFO_HALF_FP_CONFIG:
case UR_DEVICE_INFO_SINGLE_FP_CONFIG:
case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: {
ur_device_fp_capability_flags_t DoubleFPValue = 0;
return ReturnValue(DoubleFPValue);
// All fp types are supported, return minimum flags to indicate support.
// TODO: these should be influenced by fp related flags, see
// https://github.com/intel/llvm/issues/17530
ur_device_fp_capability_flags_t SupportedFlags =
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST;
return ReturnValue(SupportedFlags);
}
case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS:
return ReturnValue(uint32_t{3});
Expand Down
Loading