From 32957aaf04484daff2101f52d852b37123b98855 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Tue, 22 Oct 2024 11:38:01 +0100 Subject: [PATCH 01/12] [UR] Stop querying adapter fp16/fp64 support via extension. --- sycl/cmake/modules/FetchUnifiedRuntime.cmake | 4 ++-- sycl/source/detail/device_impl.cpp | 18 ++++++++++++++++-- sycl/source/detail/device_impl.hpp | 6 ++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index 7dbb2f4c604ea..3333adeb1eb8c 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -116,14 +116,14 @@ if(SYCL_UR_USE_FETCH_CONTENT) CACHE PATH "Path to external '${name}' adapter source dir" FORCE) endfunction() - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") + set(UNIFIED_RUNTIME_REPO "https://github.com/aarongreig/unified-runtime.git") # commit c742ca49efb12380a35b8b0b467e6577ab8174ce # Merge: 3a8bf2c5 504d3b63 # Author: Kenneth Benzie (Benie) # Date: Mon Oct 21 11:55:23 2024 +0100 # Merge pull request #2131 from Bensuo/ben/command-handle-fix # [EXP][CMDBUF] Make command handle behaviour consistent - set(UNIFIED_RUNTIME_TAG c742ca49efb12380a35b8b0b467e6577ab8174ce) + set(UNIFIED_RUNTIME_TAG aaron/stopReportingFPExtensions) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") # Due to the use of dependentloadflag and no installer for UMF and hwloc we need diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index e0508b57e912b..bccf600d7cc5d 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -367,9 +367,9 @@ bool device_impl::has(aspect Aspect) const { case aspect::host_debuggable: return false; case aspect::fp16: - return has_extension("cl_khr_fp16"); + return isFp16Supported(); case aspect::fp64: - return has_extension("cl_khr_fp64"); + return isFp64Supported(); case aspect::int64_base_atomics: return has_extension("cl_khr_int64_base_atomics"); case aspect::int64_extended_atomics: @@ -800,6 +800,20 @@ ext::oneapi::experimental::architecture device_impl::getDeviceArch() const { return MDeviceArch; } +bool device_impl::isFp16Supported() const { + auto Fp16Config = get_info(); + // If we don't get anything back from this we can assume the device doesn't + // support fp16. + return Fp16Config.empty() ? false : true; +} + +bool device_impl::isFp64Supported() const { + auto Fp64Config = get_info(); + // If we don't get anything back from this we can assume the device doesn't + // support fp64. + return Fp64Config.empty() ? false : true; +} + // On the first call this function queries for device timestamp // along with host synchronized timestamp and stores it in member variable // MDeviceHostBaseTime. Subsequent calls to this function would just retrieve diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index b38b7582f3f28..f405e2a7ac612 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -293,6 +293,12 @@ class device_impl { /// Get device architecture ext::oneapi::experimental::architecture getDeviceArch() const; + // Check if the device supports double precision floating point. + bool isFp64Supported() const; + + // Check if the device supports half precision floating point. + bool isFp16Supported() const; + private: explicit device_impl(ur_native_handle_t InteropDevice, ur_device_handle_t Device, PlatformImplPtr Platform, From 5e66eccaa178f464f8af5cced33b21dc3321f599 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Tue, 22 Oct 2024 16:50:37 +0100 Subject: [PATCH 02/12] Simplify device info helpers --- sycl/source/detail/device_impl.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index bccf600d7cc5d..cdd30ac29b343 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -801,17 +801,15 @@ ext::oneapi::experimental::architecture device_impl::getDeviceArch() const { } bool device_impl::isFp16Supported() const { - auto Fp16Config = get_info(); // If we don't get anything back from this we can assume the device doesn't // support fp16. - return Fp16Config.empty() ? false : true; + return !get_info().empty(); } bool device_impl::isFp64Supported() const { - auto Fp64Config = get_info(); // If we don't get anything back from this we can assume the device doesn't // support fp64. - return Fp64Config.empty() ? false : true; + return !get_info().empty(); } // On the first call this function queries for device timestamp From 368a9e82bdddc84a6b87cfd980865da4fae88af6 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Mon, 28 Oct 2024 10:22:54 +0000 Subject: [PATCH 03/12] Rely on empty bitfield to report no type support instead of checking separately --- sycl/source/detail/device_impl.cpp | 4 +++- sycl/source/detail/device_info.hpp | 6 ------ sycl/unittests/helpers/UrMock.hpp | 18 ++++++++++++++++++ 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index cdd30ac29b343..953a1da4f2c78 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -803,7 +803,9 @@ ext::oneapi::experimental::architecture device_impl::getDeviceArch() const { bool device_impl::isFp16Supported() const { // If we don't get anything back from this we can assume the device doesn't // support fp16. - return !get_info().empty(); + auto halfConfig = get_info(); + // return !get_info().empty(); + return !halfConfig.empty(); } bool device_impl::isFp64Supported() const { diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index 6eedec5b4f404..86f9d32bfaa98 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -232,12 +232,6 @@ struct get_device_info_impl { template struct get_device_info_impl, Param> { static std::vector get(const DeviceImplPtr &Dev) { - // Check if fp type is supported - if (!get_device_info_impl< - typename check_fp_support::type::return_type, - typename check_fp_support::type>::get(Dev)) { - return {}; - } ur_device_fp_capability_flags_t result; Dev->getAdapter()->call( Dev->getHandleRef(), UrInfoCode::value, sizeof(result), &result, diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp index 0beed5a06dd1c..908a310f0276d 100644 --- a/sycl/unittests/helpers/UrMock.hpp +++ b/sycl/unittests/helpers/UrMock.hpp @@ -253,6 +253,24 @@ inline ur_result_t mock_urDeviceGetInfo(void *pParams) { **params->ppPropSizeRet = 0; } return UR_RESULT_SUCCESS; + case UR_DEVICE_INFO_SINGLE_FP_CONFIG: + case UR_DEVICE_INFO_HALF_FP_CONFIG: + case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: + if (*params->ppPropValue) { + // This is the minimum requirement for a device reporting support for a + // given FP type. + ur_device_fp_capability_flags_t capabilities = + UR_DEVICE_FP_CAPABILITY_FLAG_DENORM | + UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN | + UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST | + UR_DEVICE_FP_CAPABILITY_FLAG_FMA; + *static_cast(*params->ppPropValue) = + capabilities; + } + if (*params->ppPropSizeRet) { + **params->ppPropSizeRet = sizeof(ur_device_fp_capability_flags_t); + } + return UR_RESULT_SUCCESS; default: { // In the default case we fill the return value with 0's. This may not be // valid for all device queries, but it will mean a consistent return value From 5358def86c2cb3784d8a1ef588440385bc3c4302 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Tue, 29 Oct 2024 13:57:11 +0000 Subject: [PATCH 04/12] Revert change made for testing. --- sycl/source/detail/device_impl.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 953a1da4f2c78..cdd30ac29b343 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -803,9 +803,7 @@ ext::oneapi::experimental::architecture device_impl::getDeviceArch() const { bool device_impl::isFp16Supported() const { // If we don't get anything back from this we can assume the device doesn't // support fp16. - auto halfConfig = get_info(); - // return !get_info().empty(); - return !halfConfig.empty(); + return !get_info().empty(); } bool device_impl::isFp64Supported() const { From fba0498a90b387b98165b658cc1addd7f541afa8 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Thu, 23 Jan 2025 14:17:36 +0000 Subject: [PATCH 05/12] Clean up some remaining uses of the old extension string. --- .../detail/program_manager/program_manager.cpp | 17 +++++++---------- sycl/unittests/helpers/UrMock.hpp | 2 +- sycl/unittests/pipes/host_pipe_registration.cpp | 3 +-- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 7e2265484d759..7b4b6a030cd50 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1579,16 +1579,13 @@ getDeviceLibPrograms(const ContextImplPtr Context, {DeviceLibExt::cl_intel_devicelib_bfloat16, false}}; // Disable all devicelib extensions requiring fp64 support if at least - // one underlying device doesn't support cl_khr_fp64. - const bool fp64Support = std::all_of( - Devices.begin(), Devices.end(), [&Context](ur_device_handle_t Device) { - std::string DevExtList = - Context->getPlatformImpl() - ->getDeviceImpl(Device) - ->get_device_info_string( - UrInfoCode::value); - return (DevExtList.npos != DevExtList.find("cl_khr_fp64")); - }); + // one underlying device doesn't support doubles. + const bool fp64Support = std::all_of(Devices.begin(), Devices.end(), + [&Context](ur_device_handle_t Device) { + return Context->getPlatformImpl() + ->getDeviceImpl(Device) + ->isFp64Supported(); + }); // Load a fallback library for an extension if the any device does not // support it. diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp index 74d2d57c0054e..de4d30be001f2 100644 --- a/sycl/unittests/helpers/UrMock.hpp +++ b/sycl/unittests/helpers/UrMock.hpp @@ -156,7 +156,7 @@ inline ur_result_t mock_urDeviceGetInfo(void *pParams) { auto params = reinterpret_cast(pParams); constexpr char MockDeviceName[] = "Mock device"; constexpr char MockSupportedExtensions[] = - "cl_khr_fp64 cl_khr_fp16 cl_khr_il_program ur_exp_command_buffer"; + "cl_khr_il_program ur_exp_command_buffer"; switch (*params->ppropName) { case UR_DEVICE_INFO_TYPE: { // Act like any device is a GPU. diff --git a/sycl/unittests/pipes/host_pipe_registration.cpp b/sycl/unittests/pipes/host_pipe_registration.cpp index c821ddf4e09e9..02feb6b72b945 100644 --- a/sycl/unittests/pipes/host_pipe_registration.cpp +++ b/sycl/unittests/pipes/host_pipe_registration.cpp @@ -69,8 +69,7 @@ ur_result_t redefinedEnqueueWriteHostPipe(void *pParams) { ur_result_t after_urDeviceGetInfo(void *pParams) { auto params = *static_cast(pParams); constexpr char MockSupportedExtensions[] = - "cl_khr_fp64 cl_khr_fp16 cl_khr_il_program " - "cl_intel_program_scope_host_pipe"; + "cl_khr_il_program cl_intel_program_scope_host_pipe"; switch (*params.ppropName) { case UR_DEVICE_INFO_EXTENSIONS: if (*params.ppPropValue) { From ee0fc6cfef6c7d7ad85726a165ff0a6e6b597c8a Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Thu, 23 Jan 2025 15:55:01 +0000 Subject: [PATCH 06/12] Fix unit tests. --- sycl/unittests/helpers/UrMock.hpp | 2 +- sycl/unittests/pipes/host_pipe_registration.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp index de4d30be001f2..745f75bfd35a9 100644 --- a/sycl/unittests/helpers/UrMock.hpp +++ b/sycl/unittests/helpers/UrMock.hpp @@ -156,7 +156,7 @@ inline ur_result_t mock_urDeviceGetInfo(void *pParams) { auto params = reinterpret_cast(pParams); constexpr char MockDeviceName[] = "Mock device"; constexpr char MockSupportedExtensions[] = - "cl_khr_il_program ur_exp_command_buffer"; + "cl_khr_fp64 cl_khr_il_program ur_exp_command_buffer"; switch (*params->ppropName) { case UR_DEVICE_INFO_TYPE: { // Act like any device is a GPU. diff --git a/sycl/unittests/pipes/host_pipe_registration.cpp b/sycl/unittests/pipes/host_pipe_registration.cpp index 02feb6b72b945..dc7e06338dd1d 100644 --- a/sycl/unittests/pipes/host_pipe_registration.cpp +++ b/sycl/unittests/pipes/host_pipe_registration.cpp @@ -69,11 +69,11 @@ ur_result_t redefinedEnqueueWriteHostPipe(void *pParams) { ur_result_t after_urDeviceGetInfo(void *pParams) { auto params = *static_cast(pParams); constexpr char MockSupportedExtensions[] = - "cl_khr_il_program cl_intel_program_scope_host_pipe"; + "cl_khr_fp64 cl_khr_il_program ur_exp_command_buffer " + "cl_intel_program_scope_host_pipe"; switch (*params.ppropName) { case UR_DEVICE_INFO_EXTENSIONS: if (*params.ppPropValue) { - std::ignore = *params.ppropSize; assert(*params.ppropSize >= sizeof(MockSupportedExtensions)); std::memcpy(*params.ppPropValue, MockSupportedExtensions, sizeof(MockSupportedExtensions)); From 3e224840b5ab253d386d1133a3daf05e6cf2e741 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Tue, 22 Oct 2024 15:21:43 +0100 Subject: [PATCH 07/12] Report device fp support via config rather than extension string. We're trying to move the UR adapters away from returning hard coded OpenCL extension strings to report device capabilities, this is the first change in that direction. --- .../source/adapters/cuda/device.cpp | 37 +++++++++++------- .../source/adapters/hip/device.cpp | 39 +++++++++++-------- .../source/adapters/level_zero/device.cpp | 6 --- .../source/adapters/native_cpu/device.cpp | 28 ++++++------- 4 files changed, 56 insertions(+), 54 deletions(-) diff --git a/unified-runtime/source/adapters/cuda/device.cpp b/unified-runtime/source/adapters/cuda/device.cpp index 3e0ce05c27306..c404ee9c34d91 100644 --- a/unified-runtime/source/adapters/cuda/device.cpp +++ b/unified-runtime/source/adapters/cuda/device.cpp @@ -437,8 +437,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(MemBaseAddrAlign); } case UR_DEVICE_INFO_HALF_FP_CONFIG: { - // TODO: is this config consistent across all NVIDIA GPUs? - return ReturnValue(0u); + int Major = 0; + int Minor = 0; + + UR_CHECK_ERROR(cuDeviceGetAttribute( + &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get())); + UR_CHECK_ERROR(cuDeviceGetAttribute( + &Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get())); + + if ((Major >= 6) || ((Major == 5) && (Minor >= 3))) { + // TODO: is this config consistent across all NVIDIA GPUs? + ur_device_fp_capability_flags_t Config = + UR_DEVICE_FP_CAPABILITY_FLAG_DENORM | + UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN | + UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST | + UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO | + UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF | + UR_DEVICE_FP_CAPABILITY_FLAG_FMA; + return ReturnValue(Config); + } else { + return ReturnValue(0u); + } } case UR_DEVICE_INFO_SINGLE_FP_CONFIG: { // TODO: is this config consistent across all NVIDIA GPUs? @@ -616,7 +635,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_EXTENSIONS: { - std::string SupportedExtensions = "cl_khr_fp64 cl_khr_subgroups "; + std::string SupportedExtensions = "cl_khr_subgroups "; SupportedExtensions += "cl_intel_devicelib_assert "; // Return supported for the UR command-buffer experimental feature SupportedExtensions += "ur_exp_command_buffer "; @@ -624,18 +643,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, SupportedExtensions += "ur_exp_launch_properties "; SupportedExtensions += " "; - int Major = 0; - int Minor = 0; - - UR_CHECK_ERROR(cuDeviceGetAttribute( - &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get())); - UR_CHECK_ERROR(cuDeviceGetAttribute( - &Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get())); - - if ((Major >= 6) || ((Major == 5) && (Minor >= 3))) { - SupportedExtensions += "cl_khr_fp16 "; - } - return ReturnValue(SupportedExtensions.c_str()); } case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: { diff --git a/unified-runtime/source/adapters/hip/device.cpp b/unified-runtime/source/adapters/hip/device.cpp index 783f4899b9f23..4b0718383b350 100644 --- a/unified-runtime/source/adapters/hip/device.cpp +++ b/unified-runtime/source/adapters/hip/device.cpp @@ -370,29 +370,44 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(MemBaseAddrAlign); } case UR_DEVICE_INFO_HALF_FP_CONFIG: { - return ReturnValue(0u); - } - case UR_DEVICE_INFO_SINGLE_FP_CONFIG: { ur_device_fp_capability_flags_t Config = UR_DEVICE_FP_CAPABILITY_FLAG_DENORM | UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN | UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST | UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO | UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF | - UR_DEVICE_FP_CAPABILITY_FLAG_FMA | - UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT; + UR_DEVICE_FP_CAPABILITY_FLAG_FMA; return ReturnValue(Config); } - case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: { + case UR_DEVICE_INFO_SINGLE_FP_CONFIG: { ur_device_fp_capability_flags_t Config = UR_DEVICE_FP_CAPABILITY_FLAG_DENORM | UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN | UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST | UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO | UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF | - UR_DEVICE_FP_CAPABILITY_FLAG_FMA; + UR_DEVICE_FP_CAPABILITY_FLAG_FMA | + UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT; return ReturnValue(Config); } + case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: { + hipDeviceProp_t Props; + detail::ur::assertion(hipGetDeviceProperties(&Props, hDevice->get()) == + hipSuccess); + + if (Props.arch.hasDoubles) { + ur_device_fp_capability_flags_t Config = + UR_DEVICE_FP_CAPABILITY_FLAG_DENORM | + UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN | + UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST | + UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO | + UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF | + UR_DEVICE_FP_CAPABILITY_FLAG_FMA; + return ReturnValue(Config); + } else { + return ReturnValue(0u); + } + } case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: { return ReturnValue(UR_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE); } @@ -581,16 +596,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, SupportedExtensions += " "; - hipDeviceProp_t Props; - detail::ur::assertion(hipGetDeviceProperties(&Props, hDevice->get()) == - hipSuccess); - - if (Props.arch.hasDoubles) { - SupportedExtensions += "cl_khr_fp64 "; - } - - SupportedExtensions += "cl_khr_fp16 "; - return ReturnValue(SupportedExtensions.c_str()); } case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: { diff --git a/unified-runtime/source/adapters/level_zero/device.cpp b/unified-runtime/source/adapters/level_zero/device.cpp index 0d5323f6c3b03..b1ad85ec1f4dd 100644 --- a/unified-runtime/source/adapters/level_zero/device.cpp +++ b/unified-runtime/source/adapters/level_zero/device.cpp @@ -260,8 +260,6 @@ ur_result_t urDeviceGetInfo( // for performance. // cl_intel_required_subgroup_size - Extension to allow programmers to // optionally specify the required subgroup size for a kernel function. - // cl_khr_fp16 - Optional half floating-point support. - // cl_khr_fp64 - Support for double floating-point precision. // cl_khr_int64_base_atomics, cl_khr_int64_extended_atomics - Optional // extensions that implement atomic operations on 64-bit signed and // unsigned integers to locations in __global and __local memory. @@ -271,10 +269,6 @@ ur_result_t urDeviceGetInfo( // Hardcoding some extensions we know are supported by all Level Zero // devices. SupportedExtensions += (ZE_SUPPORTED_EXTENSIONS); - if (Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP16) - SupportedExtensions += ("cl_khr_fp16 "); - if (Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP64) - SupportedExtensions += ("cl_khr_fp64 "); if (Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS) // int64AtomicsSupported indicates support for both. diff --git a/unified-runtime/source/adapters/native_cpu/device.cpp b/unified-runtime/source/adapters/native_cpu/device.cpp index 6deca1ac37ac2..5c17391034082 100644 --- a/unified-runtime/source/adapters/native_cpu/device.cpp +++ b/unified-runtime/source/adapters/native_cpu/device.cpp @@ -154,10 +154,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY: return ReturnValue(bool{1}); case UR_DEVICE_INFO_EXTENSIONS: - // TODO : Populate return string accordingly - e.g. cl_khr_fp16, - // cl_khr_fp64, cl_khr_int64_base_atomics, - // cl_khr_int64_extended_atomics - return ReturnValue("cl_khr_fp16, cl_khr_fp64 "); + return ReturnValue(""); case UR_DEVICE_INFO_VERSION: return ReturnValue("0.1"); case UR_DEVICE_INFO_COMPILER_AVAILABLE: @@ -193,19 +190,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH: // Default minimum values required by the SYCL specification. return ReturnValue(size_t{2048}); - case UR_DEVICE_INFO_HALF_FP_CONFIG: { - // todo: - ur_device_fp_capability_flags_t HalfFPValue = 0; - return ReturnValue(HalfFPValue); - } - case UR_DEVICE_INFO_SINGLE_FP_CONFIG: { - // todo - ur_device_fp_capability_flags_t SingleFPValue = 0; - return ReturnValue(SingleFPValue); - } + case UR_DEVICE_INFO_HALF_FP_CONFIG: + case UR_DEVICE_INFO_SINGLE_FP_CONFIG: case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: { - ur_device_fp_capability_flags_t DoubleFPValue = 0; - return ReturnValue(DoubleFPValue); + // All fp types are supported, return minimum flags to indicate support. + // TODO: look at this in more detail. + ur_device_fp_capability_flags_t SupportedFlags = + UR_DEVICE_FP_CAPABILITY_FLAG_DENORM | + UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN | + UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST | + UR_DEVICE_FP_CAPABILITY_FLAG_FMA; + ; + return ReturnValue(SupportedFlags); } case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: return ReturnValue(uint32_t{3}); From f3ab64a99f73be5f95cbe134989cae9d04d8f184 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Mon, 10 Mar 2025 11:36:32 +0000 Subject: [PATCH 08/12] Fix formatting. --- sycl/unittests/helpers/UrMock.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp index d950364af1a78..b0d82ccb00f78 100644 --- a/sycl/unittests/helpers/UrMock.hpp +++ b/sycl/unittests/helpers/UrMock.hpp @@ -155,8 +155,7 @@ inline ur_result_t mock_urPlatformGetInfo(void *pParams) { inline ur_result_t mock_urDeviceGetInfo(void *pParams) { auto params = reinterpret_cast(pParams); constexpr char MockDeviceName[] = "Mock device"; - constexpr char MockSupportedExtensions[] = - "cl_khr_fp64 cl_khr_il_program"; + constexpr char MockSupportedExtensions[] = "cl_khr_fp64 cl_khr_il_program"; switch (*params->ppropName) { case UR_DEVICE_INFO_TYPE: { // Act like any device is a GPU. From 8b55b79f2b15a3b2dd2618c51175429e05278b09 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Mon, 10 Mar 2025 14:28:35 +0000 Subject: [PATCH 09/12] Fix another bad conflict resolution. --- unified-runtime/source/adapters/cuda/device.cpp | 4 +--- unified-runtime/source/adapters/hip/device.cpp | 5 +---- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/unified-runtime/source/adapters/cuda/device.cpp b/unified-runtime/source/adapters/cuda/device.cpp index ab604f5eb0059..c3e1ee75b55e2 100644 --- a/unified-runtime/source/adapters/cuda/device.cpp +++ b/unified-runtime/source/adapters/cuda/device.cpp @@ -634,9 +634,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(""); } case UR_DEVICE_INFO_EXTENSIONS: { - SupportedExtensions += "cl_intel_devicelib_assert "; - - return ReturnValue(SupportedExtensions.c_str()); + return ReturnValue("cl_intel_devicelib_assert"); } case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: { // The minimum value for the FULL profile is 1 MB. diff --git a/unified-runtime/source/adapters/hip/device.cpp b/unified-runtime/source/adapters/hip/device.cpp index ef7c8c0473d58..36221e8ebabde 100644 --- a/unified-runtime/source/adapters/hip/device.cpp +++ b/unified-runtime/source/adapters/hip/device.cpp @@ -565,10 +565,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, // DEVICELIB_ASSERT extension is set so fallback assert // postprocessing is NOP. HIP 4.3 docs indicate support for // native asserts are in progress - std::string SupportedExtensions = ""; - SupportedExtensions += "cl_intel_devicelib_assert "; - - return ReturnValue(SupportedExtensions.c_str()); + return ReturnValue("cl_intel_devicelib_assert"); } case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: { // The minimum value for the FULL profile is 1 MB. From e965b3e3bd4d1d249cca54ac8368163a6a9ebc64 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Wed, 19 Mar 2025 14:36:09 +0000 Subject: [PATCH 10/12] Adjust minimum flags in native cpu and link related issue. --- unified-runtime/source/adapters/native_cpu/device.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/unified-runtime/source/adapters/native_cpu/device.cpp b/unified-runtime/source/adapters/native_cpu/device.cpp index 247ab653adf21..0c4b932655f6b 100644 --- a/unified-runtime/source/adapters/native_cpu/device.cpp +++ b/unified-runtime/source/adapters/native_cpu/device.cpp @@ -194,13 +194,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_SINGLE_FP_CONFIG: case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: { // All fp types are supported, return minimum flags to indicate support. - // TODO: look at this in more detail. + // TODO: these should be influenced by fp related flags, see + // https://github.com/intel/llvm/issues/17530 ur_device_fp_capability_flags_t SupportedFlags = - UR_DEVICE_FP_CAPABILITY_FLAG_DENORM | UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN | - UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST | - UR_DEVICE_FP_CAPABILITY_FLAG_FMA; - ; + UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST; return ReturnValue(SupportedFlags); } case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: From f5bcab7ca1a685ab5fffb2fc491083a7e3c16aaa Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Fri, 18 Jul 2025 10:55:42 +0100 Subject: [PATCH 11/12] Fix hip build. --- unified-runtime/source/adapters/hip/device.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/unified-runtime/source/adapters/hip/device.cpp b/unified-runtime/source/adapters/hip/device.cpp index a177a7093a5c1..cc699e0afab72 100644 --- a/unified-runtime/source/adapters/hip/device.cpp +++ b/unified-runtime/source/adapters/hip/device.cpp @@ -333,8 +333,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: { hipDeviceProp_t Props; - detail::ur::assertion(hipGetDeviceProperties(&Props, hDevice->get()) == - hipSuccess); + UR_CHECK_ERROR(hipGetDeviceProperties(&Props, hDevice->get())); if (Props.arch.hasDoubles) { ur_device_fp_capability_flags_t Config = From 797dd4cf34653c253b5fdfed4c001fce4ae18401 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Mon, 21 Jul 2025 11:41:40 +0100 Subject: [PATCH 12/12] Fix unit tests and report proper vec widths for hip + cuda. --- .../SYCL2020/DeviceGetInfoAspects.cpp | 4 ++-- sycl/unittests/helpers/UrMock.hpp | 10 +++++++++ .../source/adapters/cuda/device.cpp | 22 +++++++++++++++++++ .../source/adapters/hip/device.cpp | 4 ++-- 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/sycl/unittests/SYCL2020/DeviceGetInfoAspects.cpp b/sycl/unittests/SYCL2020/DeviceGetInfoAspects.cpp index 0709e88c3fba1..790edfa0296bc 100644 --- a/sycl/unittests/SYCL2020/DeviceGetInfoAspects.cpp +++ b/sycl/unittests/SYCL2020/DeviceGetInfoAspects.cpp @@ -32,8 +32,8 @@ TEST(DeviceGetInfo, SupportedDeviceAspects) { Dev.get_info(); // Tests to examine aspects of default mock device, as defined in - // helpers/UrMockAdapter.hpp so these tests all need to be kept in sync with - // changes to that file. + // helpers/UrMock.hpp so these tests all need to be kept in sync with changes + // to that file. EXPECT_TRUE(containsAspect(DeviceAspects, aspect::gpu)); EXPECT_TRUE(containsAspect(DeviceAspects, aspect::fp16)); EXPECT_TRUE(containsAspect(DeviceAspects, aspect::fp64)); diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp index 85462914daa61..6a3242677bae6 100644 --- a/sycl/unittests/helpers/UrMock.hpp +++ b/sycl/unittests/helpers/UrMock.hpp @@ -281,6 +281,16 @@ inline ur_result_t mock_urDeviceGetInfo(void *pParams) { **params->ppPropSizeRet = sizeof(ur_device_fp_capability_flags_t); } return UR_RESULT_SUCCESS; + // SYCL requires these to be reported by devices supporting fp16/fp64 + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE: + if (*params->ppPropValue) { + *static_cast(*params->ppPropValue) = 1u; + } + if (*params->ppPropSizeRet) { + **params->ppPropSizeRet = sizeof(uint32_t); + } + return UR_RESULT_SUCCESS; default: { // In the default case we fill the return value with 0's. This may not be // valid for all device queries, but it will mean a consistent return value diff --git a/unified-runtime/source/adapters/cuda/device.cpp b/unified-runtime/source/adapters/cuda/device.cpp index 421a2bb0a225e..fdc2dbc5c47ea 100644 --- a/unified-runtime/source/adapters/cuda/device.cpp +++ b/unified-runtime/source/adapters/cuda/device.cpp @@ -139,6 +139,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(1u); } case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: { + int Major = 0; + int Minor = 0; + + UR_CHECK_ERROR(cuDeviceGetAttribute( + &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get())); + UR_CHECK_ERROR(cuDeviceGetAttribute( + &Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get())); + + if ((Major >= 6) || ((Major == 5) && (Minor >= 3))) { + return ReturnValue(1u); + } return ReturnValue(0u); } case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: { @@ -160,6 +171,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(1u); } case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: { + int Major = 0; + int Minor = 0; + + UR_CHECK_ERROR(cuDeviceGetAttribute( + &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get())); + UR_CHECK_ERROR(cuDeviceGetAttribute( + &Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get())); + + if ((Major >= 6) || ((Major == 5) && (Minor >= 3))) { + return ReturnValue(1u); + } return ReturnValue(0u); } case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: { diff --git a/unified-runtime/source/adapters/hip/device.cpp b/unified-runtime/source/adapters/hip/device.cpp index cc699e0afab72..55dfe513f417b 100644 --- a/unified-runtime/source/adapters/hip/device.cpp +++ b/unified-runtime/source/adapters/hip/device.cpp @@ -147,7 +147,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(1u); } case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: { - return ReturnValue(0u); + return ReturnValue(1u); } case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: { return ReturnValue(1u); @@ -168,7 +168,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(1u); } case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: { - return ReturnValue(0u); + return ReturnValue(1u); } case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: { // Number of sub-groups = max block size / warp size + possible remainder