diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index da497d8a97783..4ca693ee4401d 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -311,6 +311,18 @@ ur_native_handle_t device_impl::getNative() const { return Handle; } +bool device_impl::isFp16Supported() const { + // If we don't get anything back from this we can assume the device doesn't + // support fp16. + return !get_info().empty(); +} + +bool device_impl::isFp64Supported() const { + // If we don't get anything back from this we can assume the device doesn't + // support fp64. + return !get_info().empty(); +} + // On the first call this function queries for device timestamp // along with host synchronized timestamp and stores it in member variable // MDeviceHostBaseTime. Subsequent calls to this function would just retrieve diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 9c9a198a1f4d0..84b3ccd55be25 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -1222,8 +1222,8 @@ class device_impl : public std::enable_shared_from_this { } CASE(emulated) { return false; } CASE(host_debuggable) { return false; } - CASE(fp16) { return has_extension("cl_khr_fp16"); } - CASE(fp64) { return has_extension("cl_khr_fp64"); } + CASE(fp16) { return isFp16Supported(); } + CASE(fp64) { return isFp64Supported(); } CASE(int64_base_atomics) { return has_extension("cl_khr_int64_base_atomics"); } @@ -2226,6 +2226,12 @@ class device_impl : public std::enable_shared_from_this { return {}; } + // Check if the device supports double precision floating point. + bool isFp64Supported() const; + + // Check if the device supports half precision floating point. + bool isFp16Supported() const; + private: ur_device_handle_t MDevice = 0; // This is used for getAdapter so should be above other properties. diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 9177bf6a5956c..2177fb03bce96 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1664,12 +1664,13 @@ getDeviceLibPrograms(context_impl &Context, {DeviceLibExt::cl_intel_devicelib_bfloat16, false}}; // Disable all devicelib extensions requiring fp64 support if at least - // one underlying device doesn't support cl_khr_fp64. - const bool fp64Support = std::all_of( - Devices.begin(), Devices.end(), [&Context](ur_device_handle_t Device) { - return Context.getPlatformImpl().getDeviceImpl(Device)->has_extension( - "cl_khr_fp64"); - }); + // one underlying device doesn't support doubles. + const bool fp64Support = std::all_of(Devices.begin(), Devices.end(), + [&Context](ur_device_handle_t Device) { + return Context.getPlatformImpl() + .getDeviceImpl(Device) + ->isFp64Supported(); + }); // Load a fallback library for an extension if the any device does not // support it. diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp index df44d5ea56a76..85462914daa61 100644 --- a/sycl/unittests/helpers/UrMock.hpp +++ b/sycl/unittests/helpers/UrMock.hpp @@ -155,8 +155,7 @@ inline ur_result_t mock_urPlatformGetInfo(void *pParams) { inline ur_result_t mock_urDeviceGetInfo(void *pParams) { auto params = reinterpret_cast(pParams); constexpr char MockDeviceName[] = "Mock device"; - constexpr char MockSupportedExtensions[] = - "cl_khr_fp64 cl_khr_fp16 cl_khr_il_program"; + constexpr char MockSupportedExtensions[] = "cl_khr_fp64 cl_khr_il_program"; switch (*params->ppropName) { case UR_DEVICE_INFO_TYPE: { // Act like any device is a GPU. @@ -264,6 +263,24 @@ inline ur_result_t mock_urDeviceGetInfo(void *pParams) { **params->ppPropSizeRet = 0; } return UR_RESULT_SUCCESS; + case UR_DEVICE_INFO_SINGLE_FP_CONFIG: + case UR_DEVICE_INFO_HALF_FP_CONFIG: + case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: + if (*params->ppPropValue) { + // This is the minimum requirement for a device reporting support for a + // given FP type. + ur_device_fp_capability_flags_t capabilities = + UR_DEVICE_FP_CAPABILITY_FLAG_DENORM | + UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN | + UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST | + UR_DEVICE_FP_CAPABILITY_FLAG_FMA; + *static_cast(*params->ppPropValue) = + capabilities; + } + if (*params->ppPropSizeRet) { + **params->ppPropSizeRet = sizeof(ur_device_fp_capability_flags_t); + } + return UR_RESULT_SUCCESS; default: { // In the default case we fill the return value with 0's. This may not be // valid for all device queries, but it will mean a consistent return value diff --git a/sycl/unittests/pipes/host_pipe_registration.cpp b/sycl/unittests/pipes/host_pipe_registration.cpp index 6ba962c61bd08..8be3246494861 100644 --- a/sycl/unittests/pipes/host_pipe_registration.cpp +++ b/sycl/unittests/pipes/host_pipe_registration.cpp @@ -69,12 +69,11 @@ ur_result_t redefinedEnqueueWriteHostPipe(void *pParams) { ur_result_t after_urDeviceGetInfo(void *pParams) { auto params = *static_cast(pParams); constexpr char MockSupportedExtensions[] = - "cl_khr_fp64 cl_khr_fp16 cl_khr_il_program " + "cl_khr_fp64 cl_khr_il_program ur_exp_command_buffer " "cl_intel_program_scope_host_pipe"; switch (*params.ppropName) { case UR_DEVICE_INFO_EXTENSIONS: if (*params.ppPropValue) { - std::ignore = *params.ppropSize; assert(*params.ppropSize >= sizeof(MockSupportedExtensions)); std::memcpy(*params.ppPropValue, MockSupportedExtensions, sizeof(MockSupportedExtensions)); diff --git a/unified-runtime/source/adapters/cuda/device.cpp b/unified-runtime/source/adapters/cuda/device.cpp index 6f3f450877412..421a2bb0a225e 100644 --- a/unified-runtime/source/adapters/cuda/device.cpp +++ b/unified-runtime/source/adapters/cuda/device.cpp @@ -434,8 +434,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(MemBaseAddrAlign); } case UR_DEVICE_INFO_HALF_FP_CONFIG: { - // TODO: is this config consistent across all NVIDIA GPUs? - return ReturnValue(0u); + int Major = 0; + int Minor = 0; + + UR_CHECK_ERROR(cuDeviceGetAttribute( + &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get())); + UR_CHECK_ERROR(cuDeviceGetAttribute( + &Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get())); + + if ((Major >= 6) || ((Major == 5) && (Minor >= 3))) { + // TODO: is this config consistent across all NVIDIA GPUs? + ur_device_fp_capability_flags_t Config = + UR_DEVICE_FP_CAPABILITY_FLAG_DENORM | + UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN | + UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST | + UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO | + UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF | + UR_DEVICE_FP_CAPABILITY_FLAG_FMA; + return ReturnValue(Config); + } else { + return ReturnValue(0u); + } } case UR_DEVICE_INFO_SINGLE_FP_CONFIG: { // TODO: is this config consistent across all NVIDIA GPUs? @@ -608,21 +627,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(SS.str().c_str()); } case UR_DEVICE_INFO_EXTENSIONS: { - std::string SupportedExtensions = "cl_khr_fp64 "; - - int Major = 0; - int Minor = 0; - - UR_CHECK_ERROR(cuDeviceGetAttribute( - &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get())); - UR_CHECK_ERROR(cuDeviceGetAttribute( - &Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get())); - - if ((Major >= 6) || ((Major == 5) && (Minor >= 3))) { - SupportedExtensions += "cl_khr_fp16 "; - } - - return ReturnValue(SupportedExtensions.c_str()); + return ReturnValue(""); } case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: { // The minimum value for the FULL profile is 1 MB. diff --git a/unified-runtime/source/adapters/hip/device.cpp b/unified-runtime/source/adapters/hip/device.cpp index f8751031d4e0c..cc699e0afab72 100644 --- a/unified-runtime/source/adapters/hip/device.cpp +++ b/unified-runtime/source/adapters/hip/device.cpp @@ -311,29 +311,43 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(MemBaseAddrAlign); } case UR_DEVICE_INFO_HALF_FP_CONFIG: { - return ReturnValue(0u); - } - case UR_DEVICE_INFO_SINGLE_FP_CONFIG: { ur_device_fp_capability_flags_t Config = UR_DEVICE_FP_CAPABILITY_FLAG_DENORM | UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN | UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST | UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO | UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF | - UR_DEVICE_FP_CAPABILITY_FLAG_FMA | - UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT; + UR_DEVICE_FP_CAPABILITY_FLAG_FMA; return ReturnValue(Config); } - case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: { + case UR_DEVICE_INFO_SINGLE_FP_CONFIG: { ur_device_fp_capability_flags_t Config = UR_DEVICE_FP_CAPABILITY_FLAG_DENORM | UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN | UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST | UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO | UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF | - UR_DEVICE_FP_CAPABILITY_FLAG_FMA; + UR_DEVICE_FP_CAPABILITY_FLAG_FMA | + UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT; return ReturnValue(Config); } + case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: { + hipDeviceProp_t Props; + UR_CHECK_ERROR(hipGetDeviceProperties(&Props, hDevice->get())); + + if (Props.arch.hasDoubles) { + ur_device_fp_capability_flags_t Config = + UR_DEVICE_FP_CAPABILITY_FLAG_DENORM | + UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN | + UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST | + UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO | + UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF | + UR_DEVICE_FP_CAPABILITY_FLAG_FMA; + return ReturnValue(Config); + } else { + return ReturnValue(0u); + } + } case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: { return ReturnValue(UR_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE); } @@ -495,18 +509,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(S.str().c_str()); } case UR_DEVICE_INFO_EXTENSIONS: { - std::string SupportedExtensions = ""; - - hipDeviceProp_t Props; - UR_CHECK_ERROR(hipGetDeviceProperties(&Props, hDevice->get())); - - if (Props.arch.hasDoubles) { - SupportedExtensions += "cl_khr_fp64 "; - } - - SupportedExtensions += "cl_khr_fp16 "; - - return ReturnValue(SupportedExtensions.c_str()); + return ReturnValue(""); } case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: { // The minimum value for the FULL profile is 1 MB. diff --git a/unified-runtime/source/adapters/level_zero/device.cpp b/unified-runtime/source/adapters/level_zero/device.cpp index 167c7cb856b10..1a4776400042e 100644 --- a/unified-runtime/source/adapters/level_zero/device.cpp +++ b/unified-runtime/source/adapters/level_zero/device.cpp @@ -311,8 +311,6 @@ ur_result_t urDeviceGetInfo( // for performance. // cl_intel_required_subgroup_size - Extension to allow programmers to // optionally specify the required subgroup size for a kernel function. - // cl_khr_fp16 - Optional half floating-point support. - // cl_khr_fp64 - Support for double floating-point precision. // cl_khr_int64_base_atomics, cl_khr_int64_extended_atomics - Optional // extensions that implement atomic operations on 64-bit signed and // unsigned integers to locations in __global and __local memory. @@ -322,10 +320,6 @@ ur_result_t urDeviceGetInfo( // Hardcoding some extensions we know are supported by all Level Zero // devices. SupportedExtensions += (ZE_SUPPORTED_EXTENSIONS); - if (Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP16) - SupportedExtensions += ("cl_khr_fp16 "); - if (Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP64) - SupportedExtensions += ("cl_khr_fp64 "); if (Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS) // int64AtomicsSupported indicates support for both. diff --git a/unified-runtime/source/adapters/native_cpu/device.cpp b/unified-runtime/source/adapters/native_cpu/device.cpp index 369b4cd7ed013..389883ee9927d 100644 --- a/unified-runtime/source/adapters/native_cpu/device.cpp +++ b/unified-runtime/source/adapters/native_cpu/device.cpp @@ -154,10 +154,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY: return ReturnValue(bool{1}); case UR_DEVICE_INFO_EXTENSIONS: - // TODO : Populate return string accordingly - e.g. cl_khr_fp16, - // cl_khr_fp64, cl_khr_int64_base_atomics, - // cl_khr_int64_extended_atomics - return ReturnValue("cl_khr_fp16, cl_khr_fp64 "); + return ReturnValue(""); case UR_DEVICE_INFO_VERSION: return ReturnValue("0.1"); case UR_DEVICE_INFO_COMPILER_AVAILABLE: @@ -193,19 +190,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH: // Default minimum values required by the SYCL specification. return ReturnValue(size_t{2048}); - case UR_DEVICE_INFO_HALF_FP_CONFIG: { - // todo: - ur_device_fp_capability_flags_t HalfFPValue = 0; - return ReturnValue(HalfFPValue); - } - case UR_DEVICE_INFO_SINGLE_FP_CONFIG: { - // todo - ur_device_fp_capability_flags_t SingleFPValue = 0; - return ReturnValue(SingleFPValue); - } + case UR_DEVICE_INFO_HALF_FP_CONFIG: + case UR_DEVICE_INFO_SINGLE_FP_CONFIG: case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: { - ur_device_fp_capability_flags_t DoubleFPValue = 0; - return ReturnValue(DoubleFPValue); + // All fp types are supported, return minimum flags to indicate support. + // TODO: these should be influenced by fp related flags, see + // https://github.com/intel/llvm/issues/17530 + ur_device_fp_capability_flags_t SupportedFlags = + UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN | + UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST; + return ReturnValue(SupportedFlags); } case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: return ReturnValue(uint32_t{3});