From b73da28de84db0462747a419f95bec9e9dcae4a5 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Sun, 16 Mar 2025 21:00:17 -0700 Subject: [PATCH 1/6] [UR] Add new device descriptors --- unified-runtime/include/ur_api.h | 38 ++++ unified-runtime/include/ur_print.h | 10 + unified-runtime/include/ur_print.hpp | 204 ++++++++++++++++++ unified-runtime/scripts/core/device.yml | 35 +++ .../source/adapters/cuda/device.cpp | 4 + .../source/adapters/hip/device.cpp | 4 + .../source/adapters/level_zero/common.cpp | 8 + .../source/adapters/level_zero/device.cpp | 168 +++++++++++---- .../source/adapters/native_cpu/device.cpp | 4 + .../source/adapters/opencl/device.cpp | 4 + unified-runtime/source/loader/loader.def.in | 1 + unified-runtime/source/loader/loader.map.in | 1 + unified-runtime/source/loader/ur_print.cpp | 9 + .../conformance/device/urDeviceGetInfo.cpp | 68 ++++++ .../testing/include/uur/optional_queries.h | 4 + unified-runtime/tools/urinfo/urinfo.hpp | 9 + 16 files changed, 534 insertions(+), 37 deletions(-) diff --git a/unified-runtime/include/ur_api.h b/unified-runtime/include/ur_api.h index ef5b0070d6169..a142083f2d9bb 100644 --- a/unified-runtime/include/ur_api.h +++ b/unified-runtime/include/ur_api.h @@ -2225,6 +2225,16 @@ typedef enum ur_device_info_t { /// [::ur_bool_t] return true if the device has a native assert /// implementation. UR_DEVICE_INFO_USE_NATIVE_ASSERT = 122, + /// [::ur_device_throttle_reasons_flags_t][optional-query] return current + /// clock throttle reasons. + UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS = 123, + /// [int32_t][optional-query] return the current speed of fan as a + /// percentage of the maximum speed. + UR_DEVICE_INFO_FAN_SPEED = 124, + /// [int32_t][optional-query] return min power limit in milliwatts. + UR_DEVICE_INFO_MIN_POWER_LIMIT = 125, + /// [int32_t][optional-query] return min power limit in milliwatts. + UR_DEVICE_INFO_MAX_POWER_LIMIT = 126, /// [::ur_bool_t] Returns true if the device supports the use of /// command-buffers. UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP = 0x1000, @@ -2869,6 +2879,34 @@ typedef enum ur_device_usm_access_capability_flag_t { /// @brief Bit Mask for validating ur_device_usm_access_capability_flags_t #define UR_DEVICE_USM_ACCESS_CAPABILITY_FLAGS_MASK 0xfffffff0 +/////////////////////////////////////////////////////////////////////////////// +/// @brief Clock throttle reasons +typedef uint32_t ur_device_throttle_reasons_flags_t; +typedef enum ur_device_throttle_reasons_flag_t { + /// The clock frequency is throttled due to hitting the power limit. + UR_DEVICE_THROTTLE_REASONS_FLAG_POWER_CAP = UR_BIT(0), + /// The clock frequency is throttled due to hitting the current limit. + UR_DEVICE_THROTTLE_REASONS_FLAG_CURRENT_LIMIT = UR_BIT(1), + /// The clock frequency is throttled due to hitting the thermal limit. + UR_DEVICE_THROTTLE_REASONS_FLAG_THERMAL_LIMIT = UR_BIT(2), + /// The clock frequency is throttled due to power supply assertion. + UR_DEVICE_THROTTLE_REASONS_FLAG_PSU_ALERT = UR_BIT(3), + /// The clock frequency is throttled due to software supplied frequency + /// range. + UR_DEVICE_THROTTLE_REASONS_FLAG_SW_RANGE = UR_BIT(4), + /// The clock frequency is throttled because there is a sub block that has + /// a lower frequency when it receives clocks. + UR_DEVICE_THROTTLE_REASONS_FLAG_HW_RANGE = UR_BIT(5), + /// The clock frequency is throttled due to other reason. + UR_DEVICE_THROTTLE_REASONS_FLAG_OTHER = UR_BIT(6), + /// @cond + UR_DEVICE_THROTTLE_REASONS_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_device_throttle_reasons_flag_t; +/// @brief Bit Mask for validating ur_device_throttle_reasons_flags_t +#define UR_DEVICE_THROTTLE_REASONS_FLAGS_MASK 0xffffff80 + #if !defined(__GNUC__) #pragma endregion #endif diff --git a/unified-runtime/include/ur_print.h b/unified-runtime/include/ur_print.h index af0bd693f3f50..5b670f567e828 100644 --- a/unified-runtime/include/ur_print.h +++ b/unified-runtime/include/ur_print.h @@ -332,6 +332,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintDeviceUsmAccessCapabilityFlags( enum ur_device_usm_access_capability_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_device_throttle_reasons_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintDeviceThrottleReasonsFlags( + enum ur_device_throttle_reasons_flag_t value, char *buffer, + const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_context_flag_t enum /// @returns diff --git a/unified-runtime/include/ur_print.hpp b/unified-runtime/include/ur_print.hpp index 9dd6b16d2c7ee..3c27df3671037 100644 --- a/unified-runtime/include/ur_print.hpp +++ b/unified-runtime/include/ur_print.hpp @@ -102,6 +102,10 @@ inline ur_result_t printFlag(std::ostream &os, uint32_t flag); +template <> +inline ur_result_t +printFlag(std::ostream &os, uint32_t flag); + template <> inline ur_result_t printFlag(std::ostream &os, uint32_t flag); @@ -331,6 +335,8 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_memory_scope_capability_flag_t value); inline std::ostream & operator<<(std::ostream &os, enum ur_device_usm_access_capability_flag_t value); +inline std::ostream &operator<<(std::ostream &os, + enum ur_device_throttle_reasons_flag_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_context_flag_t value); inline std::ostream & operator<<(std::ostream &os, @@ -2933,6 +2939,18 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { case UR_DEVICE_INFO_USE_NATIVE_ASSERT: os << "UR_DEVICE_INFO_USE_NATIVE_ASSERT"; break; + case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS: + os << "UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS"; + break; + case UR_DEVICE_INFO_FAN_SPEED: + os << "UR_DEVICE_INFO_FAN_SPEED"; + break; + case UR_DEVICE_INFO_MIN_POWER_LIMIT: + os << "UR_DEVICE_INFO_MIN_POWER_LIMIT"; + break; + case UR_DEVICE_INFO_MAX_POWER_LIMIT: + os << "UR_DEVICE_INFO_MAX_POWER_LIMIT"; + break; case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: os << "UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP"; break; @@ -4618,6 +4636,60 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, os << ")"; } break; + case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS: { + const ur_device_throttle_reasons_flags_t *tptr = + (const ur_device_throttle_reasons_flags_t *)ptr; + if (sizeof(ur_device_throttle_reasons_flags_t) > size) { + os << "invalid size (is: " << size + << ", expected: >=" << sizeof(ur_device_throttle_reasons_flags_t) + << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + ur::details::printFlag(os, *tptr); + + os << ")"; + } break; + case UR_DEVICE_INFO_FAN_SPEED: { + const int32_t *tptr = (const int32_t *)ptr; + if (sizeof(int32_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(int32_t) + << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + case UR_DEVICE_INFO_MIN_POWER_LIMIT: { + const int32_t *tptr = (const int32_t *)ptr; + if (sizeof(int32_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(int32_t) + << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + case UR_DEVICE_INFO_MAX_POWER_LIMIT: { + const int32_t *tptr = (const int32_t *)ptr; + if (sizeof(int32_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(int32_t) + << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { @@ -5938,6 +6010,138 @@ printFlag(std::ostream &os, } } // namespace ur::details /////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_throttle_reasons_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, + enum ur_device_throttle_reasons_flag_t value) { + switch (value) { + case UR_DEVICE_THROTTLE_REASONS_FLAG_POWER_CAP: + os << "UR_DEVICE_THROTTLE_REASONS_FLAG_POWER_CAP"; + break; + case UR_DEVICE_THROTTLE_REASONS_FLAG_CURRENT_LIMIT: + os << "UR_DEVICE_THROTTLE_REASONS_FLAG_CURRENT_LIMIT"; + break; + case UR_DEVICE_THROTTLE_REASONS_FLAG_THERMAL_LIMIT: + os << "UR_DEVICE_THROTTLE_REASONS_FLAG_THERMAL_LIMIT"; + break; + case UR_DEVICE_THROTTLE_REASONS_FLAG_PSU_ALERT: + os << "UR_DEVICE_THROTTLE_REASONS_FLAG_PSU_ALERT"; + break; + case UR_DEVICE_THROTTLE_REASONS_FLAG_SW_RANGE: + os << "UR_DEVICE_THROTTLE_REASONS_FLAG_SW_RANGE"; + break; + case UR_DEVICE_THROTTLE_REASONS_FLAG_HW_RANGE: + os << "UR_DEVICE_THROTTLE_REASONS_FLAG_HW_RANGE"; + break; + case UR_DEVICE_THROTTLE_REASONS_FLAG_OTHER: + os << "UR_DEVICE_THROTTLE_REASONS_FLAG_OTHER"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_device_throttle_reasons_flag_t flag +template <> +inline ur_result_t +printFlag(std::ostream &os, uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_DEVICE_THROTTLE_REASONS_FLAG_POWER_CAP) == + (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_POWER_CAP) { + val ^= (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_POWER_CAP; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_THROTTLE_REASONS_FLAG_POWER_CAP; + } + + if ((val & UR_DEVICE_THROTTLE_REASONS_FLAG_CURRENT_LIMIT) == + (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_CURRENT_LIMIT) { + val ^= (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_CURRENT_LIMIT; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_THROTTLE_REASONS_FLAG_CURRENT_LIMIT; + } + + if ((val & UR_DEVICE_THROTTLE_REASONS_FLAG_THERMAL_LIMIT) == + (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_THERMAL_LIMIT) { + val ^= (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_THERMAL_LIMIT; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_THROTTLE_REASONS_FLAG_THERMAL_LIMIT; + } + + if ((val & UR_DEVICE_THROTTLE_REASONS_FLAG_PSU_ALERT) == + (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_PSU_ALERT) { + val ^= (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_PSU_ALERT; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_THROTTLE_REASONS_FLAG_PSU_ALERT; + } + + if ((val & UR_DEVICE_THROTTLE_REASONS_FLAG_SW_RANGE) == + (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_SW_RANGE) { + val ^= (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_SW_RANGE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_THROTTLE_REASONS_FLAG_SW_RANGE; + } + + if ((val & UR_DEVICE_THROTTLE_REASONS_FLAG_HW_RANGE) == + (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_HW_RANGE) { + val ^= (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_HW_RANGE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_THROTTLE_REASONS_FLAG_HW_RANGE; + } + + if ((val & UR_DEVICE_THROTTLE_REASONS_FLAG_OTHER) == + (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_OTHER) { + val ^= (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_OTHER; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_THROTTLE_REASONS_FLAG_OTHER; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_context_flag_t type /// @returns /// std::ostream & diff --git a/unified-runtime/scripts/core/device.yml b/unified-runtime/scripts/core/device.yml index cad893fb733a0..edbac0e72166f 100644 --- a/unified-runtime/scripts/core/device.yml +++ b/unified-runtime/scripts/core/device.yml @@ -449,6 +449,14 @@ etors: desc: "[$x_bool_t] support the $xProgramSetSpecializationConstants entry point" - name: USE_NATIVE_ASSERT desc: "[$x_bool_t] return true if the device has a native assert implementation." + - name: CURRENT_CLOCK_THROTTLE_REASONS + desc: "[$x_device_throttle_reasons_flags_t][optional-query] return current clock throttle reasons." + - name: FAN_SPEED + desc: "[int32_t][optional-query] return the current speed of fan as a percentage of the maximum speed." + - name: MIN_POWER_LIMIT + desc: "[int32_t][optional-query] return min power limit in milliwatts." + - name: MAX_POWER_LIMIT + desc: "[int32_t][optional-query] return min power limit in milliwatts." --- #-------------------------------------------------------------------------- type: function desc: "Retrieves various information about device" @@ -929,3 +937,30 @@ etors: - name: ATOMIC_CONCURRENT_ACCESS desc: "Memory can be accessed atomically and concurrently" value: "$X_BIT(3)" +--- #-------------------------------------------------------------------------- +type: enum +desc: "Clock throttle reasons" +class: $xDevice +name: $x_device_throttle_reasons_flags_t +etors: + - name: POWER_CAP + desc: "The clock frequency is throttled due to hitting the power limit." + value: "$X_BIT(0)" + - name: CURRENT_LIMIT + desc: "The clock frequency is throttled due to hitting the current limit." + value: "$X_BIT(1)" + - name: THERMAL_LIMIT + desc: "The clock frequency is throttled due to hitting the thermal limit." + value: "$X_BIT(2)" + - name: PSU_ALERT + desc: "The clock frequency is throttled due to power supply assertion." + value: "$X_BIT(3)" + - name: SW_RANGE + desc: "The clock frequency is throttled due to software supplied frequency range." + value: "$X_BIT(4)" + - name: HW_RANGE + desc: "The clock frequency is throttled because there is a sub block that has a lower frequency when it receives clocks." + value: "$X_BIT(5)" + - name: OTHER + desc: "The clock frequency is throttled due to other reason." + value: "$X_BIT(6)" diff --git a/unified-runtime/source/adapters/cuda/device.cpp b/unified-runtime/source/adapters/cuda/device.cpp index a4d33b989a2f8..17d26340a176c 100644 --- a/unified-runtime/source/adapters/cuda/device.cpp +++ b/unified-runtime/source/adapters/cuda/device.cpp @@ -1087,6 +1087,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU: case UR_DEVICE_INFO_IP_VERSION: + case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS: + case UR_DEVICE_INFO_FAN_SPEED: + case UR_DEVICE_INFO_MAX_POWER_LIMIT: + case UR_DEVICE_INFO_MIN_POWER_LIMIT: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: return ReturnValue( diff --git a/unified-runtime/source/adapters/hip/device.cpp b/unified-runtime/source/adapters/hip/device.cpp index dc62f6d9d0aac..bda4ef2409850 100644 --- a/unified-runtime/source/adapters/hip/device.cpp +++ b/unified-runtime/source/adapters/hip/device.cpp @@ -1038,6 +1038,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH: case UR_DEVICE_INFO_IP_VERSION: case UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP: + case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS: + case UR_DEVICE_INFO_FAN_SPEED: + case UR_DEVICE_INFO_MAX_POWER_LIMIT: + case UR_DEVICE_INFO_MIN_POWER_LIMIT: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: return ReturnValue( diff --git a/unified-runtime/source/adapters/level_zero/common.cpp b/unified-runtime/source/adapters/level_zero/common.cpp index 37d893a348a31..94d864108e68e 100644 --- a/unified-runtime/source/adapters/level_zero/common.cpp +++ b/unified-runtime/source/adapters/level_zero/common.cpp @@ -325,6 +325,14 @@ template <> zes_structure_type_t getZesStructureType() { return ZES_STRUCTURE_TYPE_MEM_PROPERTIES; } +template <> zes_structure_type_t getZesStructureType() { + return ZES_STRUCTURE_TYPE_FREQ_PROPERTIES; +} + +template <> zes_structure_type_t getZesStructureType() { + return ZES_STRUCTURE_TYPE_POWER_PROPERTIES; +} + #ifdef ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME template <> ze_structure_type_t diff --git a/unified-runtime/source/adapters/level_zero/device.cpp b/unified-runtime/source/adapters/level_zero/device.cpp index a806505fc9ee2..9ca5f6c0d1c2e 100644 --- a/unified-runtime/source/adapters/level_zero/device.cpp +++ b/unified-runtime/source/adapters/level_zero/device.cpp @@ -188,6 +188,39 @@ uint64_t calculateGlobalMemSize(ur_device_handle_t Device) { return Device->ZeGlobalMemSize.get().value; } +// Return the Sysman device handle and correpsonding data for the given UR +// device. +static std::tuple +getZesDeviceData(ur_device_handle_t Device) { + bool SysManEnv = getenv_tobool("ZES_ENABLE_SYSMAN", false); + if ((Device->Platform->ZedeviceToZesDeviceMap.size() == 0) && !SysManEnv) { + logger::error("SysMan support is unavailable on this system. Please " + "check your level zero driver installation."); + return {nullptr, {}, UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION}; + } + + zes_device_handle_t ZesDevice = Device->ZeDevice; + ur_zes_device_handle_data_t ZesDeviceData = {}; + // If legacy sysman is enabled thru the environment variable, then zesInit + // will fail, but sysman is still usable so go the legacy route. + if (!SysManEnv) { + auto It = Device->Platform->ZedeviceToZesDeviceMap.find(Device->ZeDevice); + if (It == Device->Platform->ZedeviceToZesDeviceMap.end()) { + // no matching device + return {nullptr, {}, UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION}; + } else { + ZesDeviceData = + Device->Platform->ZedeviceToZesDeviceMap[Device->ZeDevice]; + ZesDevice = ZesDeviceData.ZesDevice; + } + } else { + ZesDeviceData.SubDevice = Device->isSubDevice(); + ZesDeviceData.SubDeviceId = Device->ZeDeviceProperties->subdeviceId; + } + + return {ZesDevice, ZesDeviceData, UR_RESULT_SUCCESS}; +} + ur_result_t urDeviceGetInfo( /// [in] handle of the device instance ur_device_handle_t Device, @@ -757,12 +790,6 @@ ur_result_t urDeviceGetInfo( } case UR_DEVICE_INFO_GLOBAL_MEM_FREE: { - bool SysManEnv = getenv_tobool("ZES_ENABLE_SYSMAN", false); - if ((Device->Platform->ZedeviceToZesDeviceMap.size() == 0) && !SysManEnv) { - logger::error("SysMan support is unavailable on this system. Please " - "check your level zero driver installation."); - return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; - } // Calculate the global memory size as the max limit that can be reported as // "free" memory for the user to allocate. uint64_t GlobalMemSize = calculateGlobalMemSize(Device); @@ -771,21 +798,9 @@ ur_result_t urDeviceGetInfo( uint64_t FreeMemory = 0; uint32_t MemCount = 0; - zes_device_handle_t ZesDevice = Device->ZeDevice; - struct ur_zes_device_handle_data_t ZesDeviceData = {}; - // If legacy sysman is enabled thru the environment variable, then zesInit - // will fail, but sysman is still usable so go the legacy route. - if (!SysManEnv) { - auto It = Device->Platform->ZedeviceToZesDeviceMap.find(Device->ZeDevice); - if (It == Device->Platform->ZedeviceToZesDeviceMap.end()) { - // no matching device - return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; - } else { - ZesDeviceData = - Device->Platform->ZedeviceToZesDeviceMap[Device->ZeDevice]; - ZesDevice = ZesDeviceData.ZesDevice; - } - } + auto [ZesDevice, ZesDeviceData, Result] = getZesDeviceData(Device); + if (Result != UR_RESULT_SUCCESS) + return Result; ZE2UR_CALL(zesDeviceEnumMemoryModules, (ZesDevice, &MemCount, nullptr)); if (MemCount != 0) { @@ -798,22 +813,11 @@ ur_result_t urDeviceGetInfo( // For root-device report memory from all memory modules since that // is what totally available in the default implicit scaling mode. // For sub-devices only report memory local to them. - if (SysManEnv) { - if (!Device->isSubDevice() || - Device->ZeDeviceProperties->subdeviceId == - ZesMemProperties.subdeviceId) { - - ZesStruct ZesMemState; - ZE2UR_CALL(zesMemoryGetState, (ZesMemHandle, &ZesMemState)); - FreeMemory += ZesMemState.free; - } - } else { - if (ZesDeviceData.SubDeviceId == ZesMemProperties.subdeviceId || - !ZesDeviceData.SubDevice) { - ZesStruct ZesMemState; - ZE2UR_CALL(zesMemoryGetState, (ZesMemHandle, &ZesMemState)); - FreeMemory += ZesMemState.free; - } + if (ZesDeviceData.SubDeviceId == ZesMemProperties.subdeviceId || + !ZesDeviceData.SubDevice) { + ZesStruct ZesMemState; + ZE2UR_CALL(zesMemoryGetState, (ZesMemHandle, &ZesMemState)); + FreeMemory += ZesMemState.free; } } } @@ -1213,6 +1217,96 @@ ur_result_t urDeviceGetInfo( return ReturnValue(true); case UR_DEVICE_INFO_MULTI_DEVICE_COMPILE_SUPPORT_EXP: return ReturnValue(true); + case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS: { + ur_device_throttle_reasons_flags_t ThrottleReasons = 0; + [[maybe_unused]] auto [ZesDevice, Ignored, Result] = + getZesDeviceData(Device); + if (Result != UR_RESULT_SUCCESS) + return Result; + uint32_t FreqCount = 0; + ZE2UR_CALL(zesDeviceEnumFrequencyDomains, (ZesDevice, &FreqCount, nullptr)); + if (FreqCount != 0) { + std::vector ZesFreqHandles(FreqCount); + ZE2UR_CALL(zesDeviceEnumFrequencyDomains, + (ZesDevice, &FreqCount, ZesFreqHandles.data())); + for (auto &ZesFreqHandle : ZesFreqHandles) { + ZesStruct FreqProperties; + ZE2UR_CALL(zesFrequencyGetProperties, (ZesFreqHandle, &FreqProperties)); + if (FreqProperties.type != ZES_FREQ_DOMAIN_GPU) { + continue; + } + zes_freq_state_t State; + zesFrequencyGetState(ZesFreqHandle, &State); + constexpr zes_freq_throttle_reason_flags_t ZeThrottleFlags[] = { + ZES_FREQ_THROTTLE_REASON_FLAG_AVE_PWR_CAP, + ZES_FREQ_THROTTLE_REASON_FLAG_CURRENT_LIMIT, + ZES_FREQ_THROTTLE_REASON_FLAG_THERMAL_LIMIT, + ZES_FREQ_THROTTLE_REASON_FLAG_PSU_ALERT, + ZES_FREQ_THROTTLE_REASON_FLAG_SW_RANGE, + ZES_FREQ_THROTTLE_REASON_FLAG_HW_RANGE}; + + constexpr ur_device_throttle_reasons_flags_t UrThrottleFlags[] = { + UR_DEVICE_THROTTLE_REASONS_FLAG_POWER_CAP, + UR_DEVICE_THROTTLE_REASONS_FLAG_CURRENT_LIMIT, + UR_DEVICE_THROTTLE_REASONS_FLAG_THERMAL_LIMIT, + UR_DEVICE_THROTTLE_REASONS_FLAG_PSU_ALERT, + UR_DEVICE_THROTTLE_REASONS_FLAG_SW_RANGE, + UR_DEVICE_THROTTLE_REASONS_FLAG_HW_RANGE}; + + for (size_t i = 0; + i < sizeof(ZeThrottleFlags) / sizeof(ZeThrottleFlags[0]); ++i) { + if (State.throttleReasons & ZeThrottleFlags[i]) { + ThrottleReasons |= UrThrottleFlags[i]; + State.throttleReasons &= ~ZeThrottleFlags[i]; + } + } + + if (State.throttleReasons) { + ThrottleReasons |= UR_DEVICE_THROTTLE_REASONS_FLAG_OTHER; + } + } + } + return ReturnValue(ThrottleReasons); + } + case UR_DEVICE_INFO_FAN_SPEED: { + [[maybe_unused]] auto [ZesDevice, Ignored, Result] = + getZesDeviceData(Device); + if (Result != UR_RESULT_SUCCESS) + return Result; + + uint32_t FanCount = 0; + ZE2UR_CALL(zesDeviceEnumFans, (ZesDevice, &FanCount, nullptr)); + if (FanCount == 0) + return ReturnValue(int32_t{0}); + + std::vector ZeFanHandles(FanCount); + ZE2UR_CALL(zesDeviceEnumFans, (ZesDevice, &FanCount, ZeFanHandles.data())); + int32_t Speed = -1; + for (auto Fan : ZeFanHandles) { + int32_t CurSpeed; + ZE2UR_CALL(zesFanGetState, (Fan, ZES_FAN_SPEED_UNITS_PERCENT, &CurSpeed)); + Speed = std::max(Speed, CurSpeed); + } + return ReturnValue(Speed); + } + case UR_DEVICE_INFO_MIN_POWER_LIMIT: + case UR_DEVICE_INFO_MAX_POWER_LIMIT: { + [[maybe_unused]] auto [ZesDevice, Ignored, Result] = + getZesDeviceData(Device); + if (Result != UR_RESULT_SUCCESS) + return Result; + + zes_pwr_handle_t ZesPwrHandle = nullptr; + ZE2UR_CALL(zesDeviceGetCardPowerDomain, (ZesDevice, &ZesPwrHandle)); + ZesStruct PowerProperties; + ZE2UR_CALL(zesPowerGetProperties, (ZesPwrHandle, &PowerProperties)); + + if (ParamName == UR_DEVICE_INFO_MIN_POWER_LIMIT) { + return ReturnValue(int32_t{PowerProperties.minLimit}); + } else { + return ReturnValue(int32_t{PowerProperties.maxLimit}); + } + } default: logger::error("Unsupported ParamName in urGetDeviceInfo"); logger::error("ParamNameParamName={}(0x{})", ParamName, diff --git a/unified-runtime/source/adapters/native_cpu/device.cpp b/unified-runtime/source/adapters/native_cpu/device.cpp index 397b30badbb20..2524d46008412 100644 --- a/unified-runtime/source/adapters/native_cpu/device.cpp +++ b/unified-runtime/source/adapters/native_cpu/device.cpp @@ -370,6 +370,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH: case UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP: case UR_DEVICE_INFO_IP_VERSION: + case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS: + case UR_DEVICE_INFO_FAN_SPEED: + case UR_DEVICE_INFO_MAX_POWER_LIMIT: + case UR_DEVICE_INFO_MIN_POWER_LIMIT: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: return ReturnValue( diff --git a/unified-runtime/source/adapters/opencl/device.cpp b/unified-runtime/source/adapters/opencl/device.cpp index b96ad24a4e791..b20fd73db5698 100644 --- a/unified-runtime/source/adapters/opencl/device.cpp +++ b/unified-runtime/source/adapters/opencl/device.cpp @@ -1610,6 +1610,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MEMORY_BUS_WIDTH: case UR_DEVICE_INFO_COMPONENT_DEVICES: case UR_DEVICE_INFO_COMPOSITE_DEVICE: + case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS: + case UR_DEVICE_INFO_FAN_SPEED: + case UR_DEVICE_INFO_MAX_POWER_LIMIT: + case UR_DEVICE_INFO_MIN_POWER_LIMIT: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; default: { return UR_RESULT_ERROR_INVALID_ENUMERATION; diff --git a/unified-runtime/source/loader/loader.def.in b/unified-runtime/source/loader/loader.def.in index 039862f836e55..447749821fe03 100644 --- a/unified-runtime/source/loader/loader.def.in +++ b/unified-runtime/source/loader/loader.def.in @@ -259,6 +259,7 @@ EXPORTS urPrintDeviceReleaseParams urPrintDeviceRetainParams urPrintDeviceSelectBinaryParams + urPrintDeviceThrottleReasonsFlags urPrintDeviceType urPrintDeviceUsmAccessCapabilityFlags urPrintEnqueueCommandBufferExpParams diff --git a/unified-runtime/source/loader/loader.map.in b/unified-runtime/source/loader/loader.map.in index 9cba4122b8565..c8029693d6dbe 100644 --- a/unified-runtime/source/loader/loader.map.in +++ b/unified-runtime/source/loader/loader.map.in @@ -259,6 +259,7 @@ urPrintDeviceReleaseParams; urPrintDeviceRetainParams; urPrintDeviceSelectBinaryParams; + urPrintDeviceThrottleReasonsFlags; urPrintDeviceType; urPrintDeviceUsmAccessCapabilityFlags; urPrintEnqueueCommandBufferExpParams; diff --git a/unified-runtime/source/loader/ur_print.cpp b/unified-runtime/source/loader/ur_print.cpp index edbe0b9d4b192..8792717ac2198 100644 --- a/unified-runtime/source/loader/ur_print.cpp +++ b/unified-runtime/source/loader/ur_print.cpp @@ -284,6 +284,15 @@ ur_result_t urPrintDeviceUsmAccessCapabilityFlags( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t +urPrintDeviceThrottleReasonsFlags(enum ur_device_throttle_reasons_flag_t value, + char *buffer, const size_t buff_size, + size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintContextFlags(enum ur_context_flag_t value, char *buffer, const size_t buff_size, size_t *out_size) { std::stringstream ss; diff --git a/unified-runtime/test/conformance/device/urDeviceGetInfo.cpp b/unified-runtime/test/conformance/device/urDeviceGetInfo.cpp index 038543d01714a..2b53af6bee3e6 100644 --- a/unified-runtime/test/conformance/device/urDeviceGetInfo.cpp +++ b/unified-runtime/test/conformance/device/urDeviceGetInfo.cpp @@ -2577,6 +2577,74 @@ TEST_P(urDeviceGetInfoTest, SuccessUseNativeAssert) { property_value); } +TEST_P(urDeviceGetInfoTest, SuccessThrottleReasons) { + size_t property_size = 0; + const ur_device_info_t property_name = + UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS; + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urDeviceGetInfo(device, property_name, 0, nullptr, &property_size)); + ASSERT_EQ(property_size, sizeof(ur_device_throttle_reasons_flag_t)); + + ur_device_throttle_reasons_flag_t property_value = + UR_DEVICE_THROTTLE_REASONS_FLAG_FORCE_UINT32; + ASSERT_SUCCESS(urDeviceGetInfo(device, property_name, property_size, + &property_value, nullptr)); + + ASSERT_EQ(property_value & UR_DEVICE_THROTTLE_REASONS_FLAGS_MASK, 0); +} + +TEST_P(urDeviceGetInfoTest, SuccessFanSpeed) { + size_t property_size = 0; + const ur_device_info_t property_name = UR_DEVICE_INFO_FAN_SPEED; + + ASSERT_SUCCESS_OR_OPTIONAL_QUERY( + urDeviceGetInfo(device, property_name, 0, nullptr, &property_size), + UR_DEVICE_INFO_COMPOSITE_DEVICE); + + ASSERT_EQ(property_size, sizeof(int32_t)); + + uint32_t property_value = 0; + ASSERT_QUERY_RETURNS_VALUE(urDeviceGetInfo(device, property_name, + property_size, &property_value, + nullptr), + property_value); +} + +TEST_P(urDeviceGetInfoTest, SuccessMaxPowerLimit) { + size_t property_size = 0; + const ur_device_info_t property_name = UR_DEVICE_INFO_MAX_POWER_LIMIT; + + ASSERT_SUCCESS_OR_OPTIONAL_QUERY( + urDeviceGetInfo(device, property_name, 0, nullptr, &property_size), + UR_DEVICE_INFO_COMPOSITE_DEVICE); + + ASSERT_EQ(property_size, sizeof(int32_t)); + + uint32_t property_value = 0; + ASSERT_QUERY_RETURNS_VALUE(urDeviceGetInfo(device, property_name, + property_size, &property_value, + nullptr), + property_value); +} + +TEST_P(urDeviceGetInfoTest, SuccessMinPowerLimit) { + size_t property_size = 0; + const ur_device_info_t property_name = UR_DEVICE_INFO_MIN_POWER_LIMIT; + + ASSERT_SUCCESS_OR_OPTIONAL_QUERY( + urDeviceGetInfo(device, property_name, 0, nullptr, &property_size), + UR_DEVICE_INFO_COMPOSITE_DEVICE); + + ASSERT_EQ(property_size, sizeof(int32_t)); + + uint32_t property_value = 0; + ASSERT_QUERY_RETURNS_VALUE(urDeviceGetInfo(device, property_name, + property_size, &property_value, + nullptr), + property_value); +} + TEST_P(urDeviceGetInfoTest, InvalidNullHandleDevice) { ur_device_type_t device_type; ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, diff --git a/unified-runtime/test/conformance/testing/include/uur/optional_queries.h b/unified-runtime/test/conformance/testing/include/uur/optional_queries.h index 88e4271605041..3f3bdb2953b4e 100644 --- a/unified-runtime/test/conformance/testing/include/uur/optional_queries.h +++ b/unified-runtime/test/conformance/testing/include/uur/optional_queries.h @@ -42,6 +42,10 @@ constexpr std::array optional_ur_device_info_t = { UR_DEVICE_INFO_IP_VERSION, UR_DEVICE_INFO_COMPONENT_DEVICES, UR_DEVICE_INFO_COMPOSITE_DEVICE, + UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS, + UR_DEVICE_INFO_FAN_SPEED, + UR_DEVICE_INFO_MIN_POWER_LIMIT, + UR_DEVICE_INFO_MAX_POWER_LIMIT, }; template <> inline bool isQueryOptional(ur_device_info_t query) { diff --git a/unified-runtime/tools/urinfo/urinfo.hpp b/unified-runtime/tools/urinfo/urinfo.hpp index 90a1ea2350616..e230b986eb46c 100644 --- a/unified-runtime/tools/urinfo/urinfo.hpp +++ b/unified-runtime/tools/urinfo/urinfo.hpp @@ -334,6 +334,15 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_USE_NATIVE_ASSERT); std::cout << prefix; + printDeviceInfo( + hDevice, UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS); + std::cout << prefix; + printDeviceInfo(hDevice, UR_DEVICE_INFO_FAN_SPEED); + std::cout << prefix; + printDeviceInfo(hDevice, UR_DEVICE_INFO_MIN_POWER_LIMIT); + std::cout << prefix; + printDeviceInfo(hDevice, UR_DEVICE_INFO_MAX_POWER_LIMIT); + std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP); std::cout << prefix; From 8aa9b9e848937b2de768da5af0e6a17618c6863c Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Sun, 16 Mar 2025 21:14:57 -0700 Subject: [PATCH 2/6] [SYCL] Add new descriptors to sycl_ext_intel_device_info extension --- .../llvm/SYCLLowerIR/DeviceConfigFile.td | 8 +- sycl/include/sycl/info/aspects.def | 3 + .../sycl/info/ext_intel_device_traits.def | 4 + sycl/include/sycl/info/info_desc.hpp | 13 +++ sycl/source/detail/device_impl.cpp | 15 +++ sycl/source/detail/device_info.hpp | 75 +++++++++++++ sycl/test/abi/sycl_symbols_linux.dump | 8 ++ sycl/unittests/Extensions/CMakeLists.txt | 1 + sycl/unittests/Extensions/DeviceInfo.cpp | 101 ++++++++++++++++++ 9 files changed, 227 insertions(+), 1 deletion(-) create mode 100644 sycl/unittests/Extensions/DeviceInfo.cpp diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index 150206625f586..67c3395e90400 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -87,6 +87,9 @@ def AspectExt_intel_fpga_task_sequence : Aspect<"ext_intel_fpga_task_sequence">; def AspectExt_oneapi_atomic16 : Aspect<"ext_oneapi_atomic16">; def AspectExt_oneapi_virtual_functions : Aspect<"ext_oneapi_virtual_functions">; def AspectExt_intel_spill_memory_size : Aspect<"ext_intel_spill_memory_size">; +def AspectExt_intel_current_clock_throttle_reasons : Aspect<"ext_intel_current_clock_throttle_reasons">; +def AspectExt_intel_fan_speed : Aspect<"ext_intel_fan_speed">; +def AspectExt_intel_power_limits : Aspect<"ext_intel_power_limits">; // Deprecated aspects def AspectInt64_base_atomics : Aspect<"int64_base_atomics">; def AspectInt64_extended_atomics : Aspect<"int64_extended_atomics">; @@ -152,7 +155,10 @@ def : TargetInfo<"__TestAspectList", AspectExt_intel_fpga_task_sequence, AspectExt_oneapi_atomic16, AspectExt_oneapi_virtual_functions, - AspectExt_intel_spill_memory_size], + AspectExt_intel_spill_memory_size, + AspectExt_intel_current_clock_throttle_reasons, + AspectExt_intel_fan_speed, + AspectExt_intel_power_limits], []>; // This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT // match. diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def index d039e4bc2fee5..acaa80df71b4f 100644 --- a/sycl/include/sycl/info/aspects.def +++ b/sycl/include/sycl/info/aspects.def @@ -73,3 +73,6 @@ __SYCL_ASPECT(ext_oneapi_bindless_images_sample_2d_usm, 79) __SYCL_ASPECT(ext_oneapi_atomic16, 80) __SYCL_ASPECT(ext_oneapi_virtual_functions, 81) __SYCL_ASPECT(ext_intel_spill_memory_size, 82) +__SYCL_ASPECT(ext_intel_current_clock_throttle_reasons, 83) +__SYCL_ASPECT(ext_intel_fan_speed, 84) +__SYCL_ASPECT(ext_intel_power_limits, 85) diff --git a/sycl/include/sycl/info/ext_intel_device_traits.def b/sycl/include/sycl/info/ext_intel_device_traits.def index 50b4e9eec952a..a9fe6e575882a 100644 --- a/sycl/include/sycl/info/ext_intel_device_traits.def +++ b/sycl/include/sycl/info/ext_intel_device_traits.def @@ -17,6 +17,10 @@ __SYCL_PARAM_TRAITS_SPEC(ext::intel, device, memory_clock_rate, uint32_t, UR_DEV __SYCL_PARAM_TRAITS_SPEC(ext::intel, device, memory_bus_width, uint32_t, UR_DEVICE_INFO_MEMORY_BUS_WIDTH) __SYCL_PARAM_TRAITS_SPEC(ext::intel, device, max_compute_queue_indices, int32_t, UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES) __SYCL_PARAM_TRAITS_SPEC(ext::intel::esimd, device, has_2d_block_io_support, bool, UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, current_clock_throttle_reasons, std::vector, UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, fan_speed, int32_t, UR_DEVICE_INFO_FAN_SPEED) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, min_power_limit, int32_t, UR_DEVICE_INFO_MIN_POWER_LIMIT) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, max_power_limit, int32_t, UR_DEVICE_INFO_MAX_POWER_LIMIT) #ifdef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF #undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC #undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF diff --git a/sycl/include/sycl/info/info_desc.hpp b/sycl/include/sycl/info/info_desc.hpp index aea8a8e40e675..eb556d7dd1735 100644 --- a/sycl/include/sycl/info/info_desc.hpp +++ b/sycl/include/sycl/info/info_desc.hpp @@ -259,6 +259,19 @@ template struct work_item_progress_capabilities; } // namespace ext::oneapi::experimental::info::device + +namespace ext::intel { +enum class throttle_reason { + power_cap, + current_limit, + thermal_limit, + psu_alert, + sw_range, + hw_range, + other +}; +} // namespace ext::intel + #include #include #include diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 447fbfa4e2b54..af7d21e865d61 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -476,6 +476,21 @@ bool device_impl::has(aspect Aspect) const { case aspect::ext_intel_max_mem_bandwidth: // currently not supported return false; + case aspect::ext_intel_current_clock_throttle_reasons: + return getAdapter()->call_nocheck( + MDevice, UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS, 0, + nullptr, &return_size) == UR_RESULT_SUCCESS; + case aspect::ext_intel_fan_speed: + return getAdapter()->call_nocheck( + MDevice, UR_DEVICE_INFO_FAN_SPEED, 0, nullptr, &return_size) == + UR_RESULT_SUCCESS; + case aspect::ext_intel_power_limits: + return (getAdapter()->call_nocheck( + MDevice, UR_DEVICE_INFO_MIN_POWER_LIMIT, 0, nullptr, + &return_size) == UR_RESULT_SUCCESS) && + (getAdapter()->call_nocheck( + MDevice, UR_DEVICE_INFO_MAX_POWER_LIMIT, 0, nullptr, + &return_size) == UR_RESULT_SUCCESS); case aspect::ext_oneapi_srgb: return get_info(); case aspect::ext_oneapi_native_assert: diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index 14d860afac9a4..959094d018d3e 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -1637,6 +1637,81 @@ get_device_info( UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE); } +template <> +inline ext::intel::info::device::current_clock_throttle_reasons::return_type +get_device_info( + const DeviceImplPtr &Dev) { + if (!Dev->has(aspect::ext_intel_current_clock_throttle_reasons)) + throw exception(make_error_code(errc::feature_not_supported), + "The device does not have the " + "ext_intel_current_clock_throttle_reasons aspect"); + + ur_device_throttle_reasons_flags_t UrThrottleReasons; + Dev->getAdapter()->call( + Dev->getHandleRef(), + UrInfoCode< + ext::intel::info::device::current_clock_throttle_reasons>::value, + sizeof(UrThrottleReasons), &UrThrottleReasons, nullptr); + std::vector ThrottleReasons; + constexpr std::pair + UR2SYCLMappings[] = {{UR_DEVICE_THROTTLE_REASONS_FLAG_POWER_CAP, + ext::intel::throttle_reason::power_cap}, + {UR_DEVICE_THROTTLE_REASONS_FLAG_CURRENT_LIMIT, + ext::intel::throttle_reason::current_limit}, + {UR_DEVICE_THROTTLE_REASONS_FLAG_THERMAL_LIMIT, + ext::intel::throttle_reason::thermal_limit}, + {UR_DEVICE_THROTTLE_REASONS_FLAG_PSU_ALERT, + ext::intel::throttle_reason::psu_alert}, + {UR_DEVICE_THROTTLE_REASONS_FLAG_SW_RANGE, + ext::intel::throttle_reason::sw_range}, + {UR_DEVICE_THROTTLE_REASONS_FLAG_HW_RANGE, + ext::intel::throttle_reason::hw_range}, + {UR_DEVICE_THROTTLE_REASONS_FLAG_OTHER, + ext::intel::throttle_reason::other}}; + + for (const auto &[UrFlag, SyclReason] : UR2SYCLMappings) { + if (UrThrottleReasons & UrFlag) { + ThrottleReasons.push_back(SyclReason); + } + } + return ThrottleReasons; +} + +template <> +inline ext::intel::info::device::fan_speed::return_type +get_device_info(const DeviceImplPtr &Dev) { + if (!Dev->has(aspect::ext_intel_fan_speed)) + throw exception(make_error_code(errc::feature_not_supported), + "The device does not have the ext_intel_fan_speed aspect"); + using Param = ext::intel::info::device::fan_speed; + return get_device_info_impl::get(Dev); +} + +template <> +inline ext::intel::info::device::max_power_limit::return_type +get_device_info( + const DeviceImplPtr &Dev) { + if (!Dev->has(aspect::ext_intel_power_limits)) + throw exception( + make_error_code(errc::feature_not_supported), + "The device does not have the ext_intel_power_limits aspect"); + using Param = ext::intel::info::device::max_power_limit; + return get_device_info_impl::get(Dev); +} + +template <> +inline ext::intel::info::device::min_power_limit::return_type +get_device_info( + const DeviceImplPtr &Dev) { + if (!Dev->has(aspect::ext_intel_power_limits)) + throw exception( + make_error_code(errc::feature_not_supported), + "The device does not have the ext_intel_power_limits aspect"); + using Param = ext::intel::info::device::min_power_limit; + return get_device_info_impl::get(Dev); +} + // Returns the list of all progress guarantees that can be requested for // work_groups from the coordination level of root_group when using the device // given by Dev. First it calls getProgressGuarantee to get the strongest diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 564d64026ecc1..2e07c11ca538d 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3682,6 +3682,8 @@ _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device10gpu_slice _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device11free_memoryEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device11pci_addressEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device12gpu_eu_countEEENT_11return_typeEv +_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device15max_power_limitEEENT_11return_typeEv +_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device15min_power_limitEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device16memory_bus_widthEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device17gpu_eu_simd_widthEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device17max_mem_bandwidthEEENT_11return_typeEv @@ -3690,8 +3692,10 @@ _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device21gpu_hw_th _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device23gpu_subslices_per_sliceEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device25gpu_eu_count_per_subsliceEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device25max_compute_queue_indicesEEENT_11return_typeEv +_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device30current_clock_throttle_reasonsEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device4uuidEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device9device_idEEENT_11return_typeEv +_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device9fan_speedEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel5esimd4info6device23has_2d_block_io_supportEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext6oneapi12experimental4info6device12architectureEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext6oneapi12experimental4info6device15max_work_groupsILi1EEEEENT_11return_typeEv @@ -3792,6 +3796,8 @@ _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device10gpu_slicesEEENS0 _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device11free_memoryEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device11pci_addressEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device12gpu_eu_countEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device15max_power_limitEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device15min_power_limitEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device16memory_bus_widthEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device17gpu_eu_simd_widthEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device17max_mem_bandwidthEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv @@ -3800,8 +3806,10 @@ _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device21gpu_hw_threads_p _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device23gpu_subslices_per_sliceEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device25gpu_eu_count_per_subsliceEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device25max_compute_queue_indicesEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device30current_clock_throttle_reasonsEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device4uuidEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device9device_idEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device9fan_speedEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel5esimd4info6device23has_2d_block_io_supportEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device12architectureEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device15max_work_groupsILi1EEEEENS0_6detail11ABINeutralTINSA_19is_device_info_descIT_E11return_typeEE4typeEv diff --git a/sycl/unittests/Extensions/CMakeLists.txt b/sycl/unittests/Extensions/CMakeLists.txt index 0a4531eebed74..e1788eaa6e89e 100644 --- a/sycl/unittests/Extensions/CMakeLists.txt +++ b/sycl/unittests/Extensions/CMakeLists.txt @@ -21,6 +21,7 @@ add_sycl_unittest(ExtensionsTests OBJECT BFloat16.cpp LaunchQueries.cpp EventMode.cpp + DeviceInfo.cpp ) add_subdirectory(CommandGraph) diff --git a/sycl/unittests/Extensions/DeviceInfo.cpp b/sycl/unittests/Extensions/DeviceInfo.cpp new file mode 100644 index 0000000000000..05fbe23397468 --- /dev/null +++ b/sycl/unittests/Extensions/DeviceInfo.cpp @@ -0,0 +1,101 @@ +//==------------------ DeviceInfo.cpp - device info query test -------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +using namespace sycl; + +namespace { + +ur_result_t redefinedDeviceGetInfo(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS) { + if (*params.ppPropValue) { + ur_device_throttle_reasons_flags_t *ThrottleReasons = + reinterpret_cast( + *params.ppPropValue); + *ThrottleReasons = UR_DEVICE_THROTTLE_REASONS_FLAG_POWER_CAP | + UR_DEVICE_THROTTLE_REASONS_FLAG_CURRENT_LIMIT | + UR_DEVICE_THROTTLE_REASONS_FLAG_THERMAL_LIMIT; + } + } else if (*params.ppropName == UR_DEVICE_INFO_FAN_SPEED) { + if (*params.ppPropValue) { + int32_t *FanSpeed = reinterpret_cast(*params.ppPropValue); + *FanSpeed = 75; + } + } else if (*params.ppropName == UR_DEVICE_INFO_MAX_POWER_LIMIT) { + if (*params.ppPropValue) { + int32_t *MaxPowerLimit = reinterpret_cast(*params.ppPropValue); + *MaxPowerLimit = 150; + } + } else if (*params.ppropName == UR_DEVICE_INFO_MIN_POWER_LIMIT) { + if (*params.ppPropValue) { + int32_t *MinPowerLimit = reinterpret_cast(*params.ppPropValue); + *MinPowerLimit = 50; + } + } + return UR_RESULT_SUCCESS; +} + +class DeviceInfoTests : public ::testing::Test { +public: + DeviceInfoTests() : Mock{}, Dev{sycl::platform().get_devices()[0]} {} + +protected: + void SetUp() override { + + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfo); + } + + sycl::unittest::UrMock<> Mock; + sycl::device Dev; +}; + +TEST_F(DeviceInfoTests, CheckCurrentClockThrottleReasons) { + auto ThrottleReasons = + Dev.get_info(); + constexpr size_t expectedThrottleReasonsVecSize = 3; + EXPECT_EQ(ThrottleReasons.size(), expectedThrottleReasonsVecSize); + + auto HasThrottleReason = + [&](const std::vector &deviceThrottleReasons, + ext::intel::throttle_reason reasonToFind) -> bool { + return std::find(deviceThrottleReasons.begin(), deviceThrottleReasons.end(), + reasonToFind) != deviceThrottleReasons.end(); + }; + + EXPECT_TRUE(HasThrottleReason(ThrottleReasons, + ext::intel::throttle_reason::power_cap)); + EXPECT_TRUE(HasThrottleReason(ThrottleReasons, + ext::intel::throttle_reason::current_limit)); + EXPECT_TRUE(HasThrottleReason(ThrottleReasons, + ext::intel::throttle_reason::thermal_limit)); + EXPECT_FALSE( + HasThrottleReason(ThrottleReasons, ext::intel::throttle_reason::other)); +} + +TEST_F(DeviceInfoTests, CheckFanSpeed) { + auto FanSpeed = Dev.get_info(); + EXPECT_EQ(FanSpeed, 75); +} + +TEST_F(DeviceInfoTests, CheckPowerLimits) { + auto MinPowerLimit = + Dev.get_info(); + EXPECT_EQ(MinPowerLimit, 50); + + auto MaxPowerLimit = + Dev.get_info(); + EXPECT_EQ(MaxPowerLimit, 150); +} + +} // namespace From 101cb5e1c0c48f36be85ef530f96c5a1d4d99745 Mon Sep 17 00:00:00 2001 From: "Gainullin, Artur" Date: Mon, 17 Mar 2025 11:08:42 -0700 Subject: [PATCH 3/6] Update windows ABI dump --- sycl/test/abi/sycl_symbols_windows.dump | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index c55a840b78ada..3f8cab93c2626 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -65,10 +65,12 @@ ??$get_info@Ucomponent_devices@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ ??$get_info@Ucomposite_device@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AVdevice@23@XZ ??$get_info@Ucontext@queue@info@_V1@sycl@@@queue@_V1@sycl@@QEBA?AVcontext@12@XZ +??$get_info@Ucurrent_clock_throttle_reasons@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4throttle_reason@intel@ext@_V1@sycl@@V?$allocator@W4throttle_reason@intel@ext@_V1@sycl@@@std@@@std@@XZ ??$get_info@Udevice@queue@info@_V1@sycl@@@queue@_V1@sycl@@QEBA?AVdevice@12@XZ ??$get_info@Udevice_id@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAIXZ ??$get_info@Udevices@context@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ ??$get_info@Uext_codeplay_num_regs@kernel_device_specific@info@_V1@sycl@@@kernel@_V1@sycl@@QEBAIAEBVdevice@12@@Z +??$get_info@Ufan_speed@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAHXZ ??$get_info@Ufree_memory@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA_KXZ ??$get_info@Uglobal_work_size@kernel_device_specific@info@_V1@sycl@@@kernel@_V1@sycl@@QEBA?AV?$range@$02@12@AEBVdevice@12@@Z ??$get_info@Ugpu_eu_count@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAIXZ @@ -87,11 +89,13 @@ ??$get_info@Umax_image_linear_width@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA_KXZ ??$get_info@Umax_mem_bandwidth@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA_KXZ ??$get_info@Umax_num_sub_groups@kernel_device_specific@info@_V1@sycl@@@kernel@_V1@sycl@@QEBAIAEBVdevice@12@@Z +??$get_info@Umax_power_limit@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAHXZ ??$get_info@Umax_registers_per_work_group@device@info@experimental@codeplay@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAIXZ ??$get_info@Umax_sub_group_size@kernel_device_specific@info@_V1@sycl@@@kernel@_V1@sycl@@QEBAIAEBVdevice@12@@Z ??$get_info@Umax_sub_group_size@kernel_device_specific@info@_V1@sycl@@@kernel@_V1@sycl@@QEBAIAEBVdevice@12@AEBV?$range@$02@12@@Z ??$get_info@Umemory_bus_width@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAIXZ ??$get_info@Umemory_clock_rate@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAIXZ +??$get_info@Umin_power_limit@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAHXZ ??$get_info@Umipmap_max_anisotropy@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAMXZ ??$get_info@Unum_compute_units@device@info@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA_KXZ ??$get_info@Upci_address@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ @@ -132,6 +136,7 @@ ??$get_info_impl@Ucomponent_devices@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ ??$get_info_impl@Ucomposite_device@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV012@XZ ??$get_info_impl@Ucontext@kernel@info@_V1@sycl@@@kernel@_V1@sycl@@AEBA?AVcontext@12@XZ +??$get_info_impl@Ucurrent_clock_throttle_reasons@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4throttle_reason@intel@ext@_V1@sycl@@V?$allocator@W4throttle_reason@intel@ext@_V1@sycl@@@std@@@std@@XZ ??$get_info_impl@Udevice_id@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Udevice_type@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AW4device_type@info@12@XZ ??$get_info_impl@Udouble_fp_config@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4fp_config@info@_V1@sycl@@V?$allocator@W4fp_config@info@_V1@sycl@@@std@@@std@@XZ @@ -156,6 +161,7 @@ ??$get_info_impl@Uext_oneapi_srgb@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA_NXZ ??$get_info_impl@Uextensions@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@Vstring@detail@_V1@sycl@@V?$allocator@Vstring@detail@_V1@sycl@@@std@@@std@@XZ ??$get_info_impl@Uextensions@platform@info@_V1@sycl@@@platform@_V1@sycl@@AEBA?AV?$vector@Vstring@detail@_V1@sycl@@V?$allocator@Vstring@detail@_V1@sycl@@@std@@@std@@XZ +??$get_info_impl@Ufan_speed@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAHXZ ??$get_info_impl@Ufree_memory@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBA_KXZ ??$get_info_impl@Ufunction_name@kernel@info@_V1@sycl@@@kernel@_V1@sycl@@AEBA?AVstring@detail@12@XZ ??$get_info_impl@Uglobal_mem_cache_line_size@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ @@ -201,6 +207,7 @@ ??$get_info_impl@Umax_mem_bandwidth@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBA_KXZ ??$get_info_impl@Umax_num_sub_groups@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Umax_parameter_size@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA_KXZ +??$get_info_impl@Umax_power_limit@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAHXZ ??$get_info_impl@Umax_read_image_args@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Umax_registers_per_work_group@device@info@experimental@codeplay@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Umax_samplers@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ @@ -210,6 +217,7 @@ ??$get_info_impl@Umem_base_addr_align@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Umemory_bus_width@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Umemory_clock_rate@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ +??$get_info_impl@Umin_power_limit@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAHXZ ??$get_info_impl@Umipmap_max_anisotropy@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBAMXZ ??$get_info_impl@Uname@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AVstring@detail@12@XZ ??$get_info_impl@Uname@platform@info@_V1@sycl@@@platform@_V1@sycl@@AEBA?AVstring@detail@12@XZ From 023e7c3c55afbe14c1c36e9ad7253d5bc32bc1b1 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Tue, 18 Mar 2025 09:07:07 -0700 Subject: [PATCH 4/6] Improve L0 implementation --- .../source/adapters/level_zero/device.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/unified-runtime/source/adapters/level_zero/device.cpp b/unified-runtime/source/adapters/level_zero/device.cpp index 5707ae8399fc7..b195c5b0c38ca 100644 --- a/unified-runtime/source/adapters/level_zero/device.cpp +++ b/unified-runtime/source/adapters/level_zero/device.cpp @@ -1221,6 +1221,11 @@ ur_result_t urDeviceGetInfo( return ReturnValue(true); case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS: { ur_device_throttle_reasons_flags_t ThrottleReasons = 0; + if (!ParamValue) { + // If ParamValue is nullptr, then we are only interested in the size of + // the value. + return ReturnValue(ThrottleReasons); + } [[maybe_unused]] auto [ZesDevice, Ignored, Result] = getZesDeviceData(Device); if (Result != UR_RESULT_SUCCESS) @@ -1278,8 +1283,15 @@ ur_result_t urDeviceGetInfo( uint32_t FanCount = 0; ZE2UR_CALL(zesDeviceEnumFans, (ZesDevice, &FanCount, nullptr)); + // If there are no fans, then report speed query as unsupported. if (FanCount == 0) + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + + if (!ParamValue) { + // If ParamValue is nullptr, then we are only interested in the size of + // the value. return ReturnValue(int32_t{0}); + } std::vector ZeFanHandles(FanCount); ZE2UR_CALL(zesDeviceEnumFans, (ZesDevice, &FanCount, ZeFanHandles.data())); @@ -1293,6 +1305,12 @@ ur_result_t urDeviceGetInfo( } case UR_DEVICE_INFO_MIN_POWER_LIMIT: case UR_DEVICE_INFO_MAX_POWER_LIMIT: { + if (!ParamValue) { + // If ParamValue is nullptr, then we are only interested in the size of + // the value. + return ReturnValue(int32_t{0}); + } + [[maybe_unused]] auto [ZesDevice, Ignored, Result] = getZesDeviceData(Device); if (Result != UR_RESULT_SUCCESS) From 3cb3ccaff3649c1955ad74f892c3a21e411237a2 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Thu, 20 Mar 2025 08:28:05 -0700 Subject: [PATCH 5/6] Address review --- sycl/unittests/Extensions/DeviceInfo.cpp | 10 +++++----- unified-runtime/include/ur_api.h | 2 +- unified-runtime/scripts/core/device.yml | 2 +- unified-runtime/source/adapters/cuda/device.cpp | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sycl/unittests/Extensions/DeviceInfo.cpp b/sycl/unittests/Extensions/DeviceInfo.cpp index 05fbe23397468..d5896ade01d84 100644 --- a/sycl/unittests/Extensions/DeviceInfo.cpp +++ b/sycl/unittests/Extensions/DeviceInfo.cpp @@ -31,16 +31,16 @@ ur_result_t redefinedDeviceGetInfo(void *pParams) { int32_t *FanSpeed = reinterpret_cast(*params.ppPropValue); *FanSpeed = 75; } - } else if (*params.ppropName == UR_DEVICE_INFO_MAX_POWER_LIMIT) { - if (*params.ppPropValue) { - int32_t *MaxPowerLimit = reinterpret_cast(*params.ppPropValue); - *MaxPowerLimit = 150; - } } else if (*params.ppropName == UR_DEVICE_INFO_MIN_POWER_LIMIT) { if (*params.ppPropValue) { int32_t *MinPowerLimit = reinterpret_cast(*params.ppPropValue); *MinPowerLimit = 50; } + } else if (*params.ppropName == UR_DEVICE_INFO_MAX_POWER_LIMIT) { + if (*params.ppPropValue) { + int32_t *MaxPowerLimit = reinterpret_cast(*params.ppPropValue); + *MaxPowerLimit = 150; + } } return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/include/ur_api.h b/unified-runtime/include/ur_api.h index 15be97c33c13d..e0c4c30d059bd 100644 --- a/unified-runtime/include/ur_api.h +++ b/unified-runtime/include/ur_api.h @@ -2237,7 +2237,7 @@ typedef enum ur_device_info_t { UR_DEVICE_INFO_FAN_SPEED = 124, /// [int32_t][optional-query] return min power limit in milliwatts. UR_DEVICE_INFO_MIN_POWER_LIMIT = 125, - /// [int32_t][optional-query] return min power limit in milliwatts. + /// [int32_t][optional-query] return max power limit in milliwatts. UR_DEVICE_INFO_MAX_POWER_LIMIT = 126, /// [::ur_bool_t] Returns true if the device supports the use of /// command-buffers. diff --git a/unified-runtime/scripts/core/device.yml b/unified-runtime/scripts/core/device.yml index 565ffb908cd21..20947c36cac53 100644 --- a/unified-runtime/scripts/core/device.yml +++ b/unified-runtime/scripts/core/device.yml @@ -456,7 +456,7 @@ etors: - name: MIN_POWER_LIMIT desc: "[int32_t][optional-query] return min power limit in milliwatts." - name: MAX_POWER_LIMIT - desc: "[int32_t][optional-query] return min power limit in milliwatts." + desc: "[int32_t][optional-query] return max power limit in milliwatts." --- #-------------------------------------------------------------------------- type: function desc: "Retrieves various information about device" diff --git a/unified-runtime/source/adapters/cuda/device.cpp b/unified-runtime/source/adapters/cuda/device.cpp index 0c8257d2793d6..96dc84555a9bd 100644 --- a/unified-runtime/source/adapters/cuda/device.cpp +++ b/unified-runtime/source/adapters/cuda/device.cpp @@ -1085,8 +1085,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_IP_VERSION: case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS: case UR_DEVICE_INFO_FAN_SPEED: - case UR_DEVICE_INFO_MAX_POWER_LIMIT: case UR_DEVICE_INFO_MIN_POWER_LIMIT: + case UR_DEVICE_INFO_MAX_POWER_LIMIT: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: return ReturnValue( From ebbe0777699d08d0875892b7f802cffcab88ace1 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Thu, 20 Mar 2025 09:21:13 -0700 Subject: [PATCH 6/6] change order --- unified-runtime/source/adapters/hip/device.cpp | 2 +- unified-runtime/source/adapters/native_cpu/device.cpp | 2 +- unified-runtime/source/adapters/opencl/device.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/unified-runtime/source/adapters/hip/device.cpp b/unified-runtime/source/adapters/hip/device.cpp index f82994eedf939..98ab6184e341a 100644 --- a/unified-runtime/source/adapters/hip/device.cpp +++ b/unified-runtime/source/adapters/hip/device.cpp @@ -1041,8 +1041,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP: case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS: case UR_DEVICE_INFO_FAN_SPEED: - case UR_DEVICE_INFO_MAX_POWER_LIMIT: case UR_DEVICE_INFO_MIN_POWER_LIMIT: + case UR_DEVICE_INFO_MAX_POWER_LIMIT: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: return ReturnValue( diff --git a/unified-runtime/source/adapters/native_cpu/device.cpp b/unified-runtime/source/adapters/native_cpu/device.cpp index ad93b998bdf51..5d13a4a5cf2ff 100644 --- a/unified-runtime/source/adapters/native_cpu/device.cpp +++ b/unified-runtime/source/adapters/native_cpu/device.cpp @@ -370,8 +370,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_IP_VERSION: case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS: case UR_DEVICE_INFO_FAN_SPEED: - case UR_DEVICE_INFO_MAX_POWER_LIMIT: case UR_DEVICE_INFO_MIN_POWER_LIMIT: + case UR_DEVICE_INFO_MAX_POWER_LIMIT: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: return ReturnValue( diff --git a/unified-runtime/source/adapters/opencl/device.cpp b/unified-runtime/source/adapters/opencl/device.cpp index a2b2331ad0533..8ab3556b24c92 100644 --- a/unified-runtime/source/adapters/opencl/device.cpp +++ b/unified-runtime/source/adapters/opencl/device.cpp @@ -1611,8 +1611,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_COMPOSITE_DEVICE: case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS: case UR_DEVICE_INFO_FAN_SPEED: - case UR_DEVICE_INFO_MAX_POWER_LIMIT: case UR_DEVICE_INFO_MIN_POWER_LIMIT: + case UR_DEVICE_INFO_MAX_POWER_LIMIT: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; default: { return UR_RESULT_ERROR_INVALID_ENUMERATION;