diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index 64bacd3964540..74c59fbc18377 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -88,6 +88,10 @@ def AspectExt_oneapi_atomic16 : Aspect<"ext_oneapi_atomic16">; def AspectExt_oneapi_virtual_functions : Aspect<"ext_oneapi_virtual_functions">; def AspectExt_intel_spill_memory_size : Aspect<"ext_intel_spill_memory_size">; def AspectExt_oneapi_bindless_images_gather : Aspect<"ext_oneapi_bindless_images_gather">; +def AspectExt_intel_current_clock_throttle_reasons : Aspect<"ext_intel_current_clock_throttle_reasons">; +def AspectExt_intel_fan_speed : Aspect<"ext_intel_fan_speed">; +def AspectExt_intel_power_limits : Aspect<"ext_intel_power_limits">; + // Deprecated aspects def AspectInt64_base_atomics : Aspect<"int64_base_atomics">; def AspectInt64_extended_atomics : Aspect<"int64_extended_atomics">; @@ -154,7 +158,10 @@ def : TargetInfo<"__TestAspectList", AspectExt_intel_fpga_task_sequence, AspectExt_oneapi_atomic16, AspectExt_oneapi_virtual_functions, - AspectExt_intel_spill_memory_size], + AspectExt_intel_spill_memory_size, + AspectExt_intel_current_clock_throttle_reasons, + AspectExt_intel_fan_speed, + AspectExt_intel_power_limits], []>; // This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT // match. diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def index cf6d072806c11..b1d5330498ade 100644 --- a/sycl/include/sycl/info/aspects.def +++ b/sycl/include/sycl/info/aspects.def @@ -74,3 +74,7 @@ __SYCL_ASPECT(ext_oneapi_atomic16, 80) __SYCL_ASPECT(ext_oneapi_virtual_functions, 81) __SYCL_ASPECT(ext_intel_spill_memory_size, 82) __SYCL_ASPECT(ext_oneapi_bindless_images_gather, 83) +__SYCL_ASPECT(ext_intel_current_clock_throttle_reasons, 84) +__SYCL_ASPECT(ext_intel_fan_speed, 85) +__SYCL_ASPECT(ext_intel_power_limits, 86) + diff --git a/sycl/include/sycl/info/ext_intel_device_traits.def b/sycl/include/sycl/info/ext_intel_device_traits.def index 50b4e9eec952a..a9fe6e575882a 100644 --- a/sycl/include/sycl/info/ext_intel_device_traits.def +++ b/sycl/include/sycl/info/ext_intel_device_traits.def @@ -17,6 +17,10 @@ __SYCL_PARAM_TRAITS_SPEC(ext::intel, device, memory_clock_rate, uint32_t, UR_DEV __SYCL_PARAM_TRAITS_SPEC(ext::intel, device, memory_bus_width, uint32_t, UR_DEVICE_INFO_MEMORY_BUS_WIDTH) __SYCL_PARAM_TRAITS_SPEC(ext::intel, device, max_compute_queue_indices, int32_t, UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES) __SYCL_PARAM_TRAITS_SPEC(ext::intel::esimd, device, has_2d_block_io_support, bool, UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, current_clock_throttle_reasons, std::vector, UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, fan_speed, int32_t, UR_DEVICE_INFO_FAN_SPEED) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, min_power_limit, int32_t, UR_DEVICE_INFO_MIN_POWER_LIMIT) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, max_power_limit, int32_t, UR_DEVICE_INFO_MAX_POWER_LIMIT) #ifdef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF #undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC #undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF diff --git a/sycl/include/sycl/info/info_desc.hpp b/sycl/include/sycl/info/info_desc.hpp index aea8a8e40e675..eb556d7dd1735 100644 --- a/sycl/include/sycl/info/info_desc.hpp +++ b/sycl/include/sycl/info/info_desc.hpp @@ -259,6 +259,19 @@ template struct work_item_progress_capabilities; } // namespace ext::oneapi::experimental::info::device + +namespace ext::intel { +enum class throttle_reason { + power_cap, + current_limit, + thermal_limit, + psu_alert, + sw_range, + hw_range, + other +}; +} // namespace ext::intel + #include #include #include diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index e6cb0c174cec2..5d3d4217e454b 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -476,6 +476,21 @@ bool device_impl::has(aspect Aspect) const { case aspect::ext_intel_max_mem_bandwidth: // currently not supported return false; + case aspect::ext_intel_current_clock_throttle_reasons: + return getAdapter()->call_nocheck( + MDevice, UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS, 0, + nullptr, &return_size) == UR_RESULT_SUCCESS; + case aspect::ext_intel_fan_speed: + return getAdapter()->call_nocheck( + MDevice, UR_DEVICE_INFO_FAN_SPEED, 0, nullptr, &return_size) == + UR_RESULT_SUCCESS; + case aspect::ext_intel_power_limits: + return (getAdapter()->call_nocheck( + MDevice, UR_DEVICE_INFO_MIN_POWER_LIMIT, 0, nullptr, + &return_size) == UR_RESULT_SUCCESS) && + (getAdapter()->call_nocheck( + MDevice, UR_DEVICE_INFO_MAX_POWER_LIMIT, 0, nullptr, + &return_size) == UR_RESULT_SUCCESS); case aspect::ext_oneapi_srgb: return get_info(); case aspect::ext_oneapi_native_assert: diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index 14d860afac9a4..959094d018d3e 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -1637,6 +1637,81 @@ get_device_info( UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE); } +template <> +inline ext::intel::info::device::current_clock_throttle_reasons::return_type +get_device_info( + const DeviceImplPtr &Dev) { + if (!Dev->has(aspect::ext_intel_current_clock_throttle_reasons)) + throw exception(make_error_code(errc::feature_not_supported), + "The device does not have the " + "ext_intel_current_clock_throttle_reasons aspect"); + + ur_device_throttle_reasons_flags_t UrThrottleReasons; + Dev->getAdapter()->call( + Dev->getHandleRef(), + UrInfoCode< + ext::intel::info::device::current_clock_throttle_reasons>::value, + sizeof(UrThrottleReasons), &UrThrottleReasons, nullptr); + std::vector ThrottleReasons; + constexpr std::pair + UR2SYCLMappings[] = {{UR_DEVICE_THROTTLE_REASONS_FLAG_POWER_CAP, + ext::intel::throttle_reason::power_cap}, + {UR_DEVICE_THROTTLE_REASONS_FLAG_CURRENT_LIMIT, + ext::intel::throttle_reason::current_limit}, + {UR_DEVICE_THROTTLE_REASONS_FLAG_THERMAL_LIMIT, + ext::intel::throttle_reason::thermal_limit}, + {UR_DEVICE_THROTTLE_REASONS_FLAG_PSU_ALERT, + ext::intel::throttle_reason::psu_alert}, + {UR_DEVICE_THROTTLE_REASONS_FLAG_SW_RANGE, + ext::intel::throttle_reason::sw_range}, + {UR_DEVICE_THROTTLE_REASONS_FLAG_HW_RANGE, + ext::intel::throttle_reason::hw_range}, + {UR_DEVICE_THROTTLE_REASONS_FLAG_OTHER, + ext::intel::throttle_reason::other}}; + + for (const auto &[UrFlag, SyclReason] : UR2SYCLMappings) { + if (UrThrottleReasons & UrFlag) { + ThrottleReasons.push_back(SyclReason); + } + } + return ThrottleReasons; +} + +template <> +inline ext::intel::info::device::fan_speed::return_type +get_device_info(const DeviceImplPtr &Dev) { + if (!Dev->has(aspect::ext_intel_fan_speed)) + throw exception(make_error_code(errc::feature_not_supported), + "The device does not have the ext_intel_fan_speed aspect"); + using Param = ext::intel::info::device::fan_speed; + return get_device_info_impl::get(Dev); +} + +template <> +inline ext::intel::info::device::max_power_limit::return_type +get_device_info( + const DeviceImplPtr &Dev) { + if (!Dev->has(aspect::ext_intel_power_limits)) + throw exception( + make_error_code(errc::feature_not_supported), + "The device does not have the ext_intel_power_limits aspect"); + using Param = ext::intel::info::device::max_power_limit; + return get_device_info_impl::get(Dev); +} + +template <> +inline ext::intel::info::device::min_power_limit::return_type +get_device_info( + const DeviceImplPtr &Dev) { + if (!Dev->has(aspect::ext_intel_power_limits)) + throw exception( + make_error_code(errc::feature_not_supported), + "The device does not have the ext_intel_power_limits aspect"); + using Param = ext::intel::info::device::min_power_limit; + return get_device_info_impl::get(Dev); +} + // Returns the list of all progress guarantees that can be requested for // work_groups from the coordination level of root_group when using the device // given by Dev. First it calls getProgressGuarantee to get the strongest diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 63f95d2a5cae4..d16f2d2b7a131 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3684,6 +3684,8 @@ _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device10gpu_slice _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device11free_memoryEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device11pci_addressEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device12gpu_eu_countEEENT_11return_typeEv +_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device15max_power_limitEEENT_11return_typeEv +_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device15min_power_limitEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device16memory_bus_widthEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device17gpu_eu_simd_widthEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device17max_mem_bandwidthEEENT_11return_typeEv @@ -3692,8 +3694,10 @@ _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device21gpu_hw_th _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device23gpu_subslices_per_sliceEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device25gpu_eu_count_per_subsliceEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device25max_compute_queue_indicesEEENT_11return_typeEv +_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device30current_clock_throttle_reasonsEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device4uuidEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device9device_idEEENT_11return_typeEv +_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device9fan_speedEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel5esimd4info6device23has_2d_block_io_supportEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext6oneapi12experimental4info6device12architectureEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext6oneapi12experimental4info6device15max_work_groupsILi1EEEEENT_11return_typeEv @@ -3794,6 +3798,8 @@ _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device10gpu_slicesEEENS0 _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device11free_memoryEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device11pci_addressEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device12gpu_eu_countEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device15max_power_limitEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device15min_power_limitEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device16memory_bus_widthEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device17gpu_eu_simd_widthEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device17max_mem_bandwidthEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv @@ -3802,8 +3808,10 @@ _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device21gpu_hw_threads_p _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device23gpu_subslices_per_sliceEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device25gpu_eu_count_per_subsliceEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device25max_compute_queue_indicesEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device30current_clock_throttle_reasonsEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device4uuidEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device9device_idEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv +_ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device9fan_speedEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel5esimd4info6device23has_2d_block_io_supportEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device12architectureEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv _ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device15max_work_groupsILi1EEEEENS0_6detail11ABINeutralTINSA_19is_device_info_descIT_E11return_typeEE4typeEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 37de88770e9cb..20eed583e5e4c 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -65,10 +65,12 @@ ??$get_info@Ucomponent_devices@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ ??$get_info@Ucomposite_device@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AVdevice@23@XZ ??$get_info@Ucontext@queue@info@_V1@sycl@@@queue@_V1@sycl@@QEBA?AVcontext@12@XZ +??$get_info@Ucurrent_clock_throttle_reasons@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@W4throttle_reason@intel@ext@_V1@sycl@@V?$allocator@W4throttle_reason@intel@ext@_V1@sycl@@@std@@@std@@XZ ??$get_info@Udevice@queue@info@_V1@sycl@@@queue@_V1@sycl@@QEBA?AVdevice@12@XZ ??$get_info@Udevice_id@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAIXZ ??$get_info@Udevices@context@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ ??$get_info@Uext_codeplay_num_regs@kernel_device_specific@info@_V1@sycl@@@kernel@_V1@sycl@@QEBAIAEBVdevice@12@@Z +??$get_info@Ufan_speed@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAHXZ ??$get_info@Ufree_memory@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA_KXZ ??$get_info@Uglobal_work_size@kernel_device_specific@info@_V1@sycl@@@kernel@_V1@sycl@@QEBA?AV?$range@$02@12@AEBVdevice@12@@Z ??$get_info@Ugpu_eu_count@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAIXZ @@ -87,11 +89,13 @@ ??$get_info@Umax_image_linear_width@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA_KXZ ??$get_info@Umax_mem_bandwidth@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA_KXZ ??$get_info@Umax_num_sub_groups@kernel_device_specific@info@_V1@sycl@@@kernel@_V1@sycl@@QEBAIAEBVdevice@12@@Z +??$get_info@Umax_power_limit@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAHXZ ??$get_info@Umax_registers_per_work_group@device@info@experimental@codeplay@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAIXZ ??$get_info@Umax_sub_group_size@kernel_device_specific@info@_V1@sycl@@@kernel@_V1@sycl@@QEBAIAEBVdevice@12@@Z ??$get_info@Umax_sub_group_size@kernel_device_specific@info@_V1@sycl@@@kernel@_V1@sycl@@QEBAIAEBVdevice@12@AEBV?$range@$02@12@@Z ??$get_info@Umemory_bus_width@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAIXZ ??$get_info@Umemory_clock_rate@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAIXZ +??$get_info@Umin_power_limit@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAHXZ ??$get_info@Umipmap_max_anisotropy@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAMXZ ??$get_info@Unum_compute_units@device@info@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA_KXZ ??$get_info@Upci_address@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ @@ -132,6 +136,7 @@ ??$get_info_impl@Ucomponent_devices@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ ??$get_info_impl@Ucomposite_device@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV012@XZ ??$get_info_impl@Ucontext@kernel@info@_V1@sycl@@@kernel@_V1@sycl@@AEBA?AVcontext@12@XZ +??$get_info_impl@Ucurrent_clock_throttle_reasons@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4throttle_reason@intel@ext@_V1@sycl@@V?$allocator@W4throttle_reason@intel@ext@_V1@sycl@@@std@@@std@@XZ ??$get_info_impl@Udevice_id@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Udevice_type@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AW4device_type@info@12@XZ ??$get_info_impl@Udouble_fp_config@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4fp_config@info@_V1@sycl@@V?$allocator@W4fp_config@info@_V1@sycl@@@std@@@std@@XZ @@ -156,6 +161,7 @@ ??$get_info_impl@Uext_oneapi_srgb@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA_NXZ ??$get_info_impl@Uextensions@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@Vstring@detail@_V1@sycl@@V?$allocator@Vstring@detail@_V1@sycl@@@std@@@std@@XZ ??$get_info_impl@Uextensions@platform@info@_V1@sycl@@@platform@_V1@sycl@@AEBA?AV?$vector@Vstring@detail@_V1@sycl@@V?$allocator@Vstring@detail@_V1@sycl@@@std@@@std@@XZ +??$get_info_impl@Ufan_speed@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAHXZ ??$get_info_impl@Ufree_memory@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBA_KXZ ??$get_info_impl@Ufunction_name@kernel@info@_V1@sycl@@@kernel@_V1@sycl@@AEBA?AVstring@detail@12@XZ ??$get_info_impl@Uglobal_mem_cache_line_size@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ @@ -201,6 +207,7 @@ ??$get_info_impl@Umax_mem_bandwidth@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBA_KXZ ??$get_info_impl@Umax_num_sub_groups@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Umax_parameter_size@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA_KXZ +??$get_info_impl@Umax_power_limit@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAHXZ ??$get_info_impl@Umax_read_image_args@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Umax_registers_per_work_group@device@info@experimental@codeplay@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Umax_samplers@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ @@ -210,6 +217,7 @@ ??$get_info_impl@Umem_base_addr_align@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Umemory_bus_width@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Umemory_clock_rate@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ +??$get_info_impl@Umin_power_limit@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAHXZ ??$get_info_impl@Umipmap_max_anisotropy@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBAMXZ ??$get_info_impl@Uname@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AVstring@detail@12@XZ ??$get_info_impl@Uname@platform@info@_V1@sycl@@@platform@_V1@sycl@@AEBA?AVstring@detail@12@XZ diff --git a/sycl/unittests/Extensions/CMakeLists.txt b/sycl/unittests/Extensions/CMakeLists.txt index 0a4531eebed74..e1788eaa6e89e 100644 --- a/sycl/unittests/Extensions/CMakeLists.txt +++ b/sycl/unittests/Extensions/CMakeLists.txt @@ -21,6 +21,7 @@ add_sycl_unittest(ExtensionsTests OBJECT BFloat16.cpp LaunchQueries.cpp EventMode.cpp + DeviceInfo.cpp ) add_subdirectory(CommandGraph) diff --git a/sycl/unittests/Extensions/DeviceInfo.cpp b/sycl/unittests/Extensions/DeviceInfo.cpp new file mode 100644 index 0000000000000..d5896ade01d84 --- /dev/null +++ b/sycl/unittests/Extensions/DeviceInfo.cpp @@ -0,0 +1,101 @@ +//==------------------ DeviceInfo.cpp - device info query test -------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +using namespace sycl; + +namespace { + +ur_result_t redefinedDeviceGetInfo(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS) { + if (*params.ppPropValue) { + ur_device_throttle_reasons_flags_t *ThrottleReasons = + reinterpret_cast( + *params.ppPropValue); + *ThrottleReasons = UR_DEVICE_THROTTLE_REASONS_FLAG_POWER_CAP | + UR_DEVICE_THROTTLE_REASONS_FLAG_CURRENT_LIMIT | + UR_DEVICE_THROTTLE_REASONS_FLAG_THERMAL_LIMIT; + } + } else if (*params.ppropName == UR_DEVICE_INFO_FAN_SPEED) { + if (*params.ppPropValue) { + int32_t *FanSpeed = reinterpret_cast(*params.ppPropValue); + *FanSpeed = 75; + } + } else if (*params.ppropName == UR_DEVICE_INFO_MIN_POWER_LIMIT) { + if (*params.ppPropValue) { + int32_t *MinPowerLimit = reinterpret_cast(*params.ppPropValue); + *MinPowerLimit = 50; + } + } else if (*params.ppropName == UR_DEVICE_INFO_MAX_POWER_LIMIT) { + if (*params.ppPropValue) { + int32_t *MaxPowerLimit = reinterpret_cast(*params.ppPropValue); + *MaxPowerLimit = 150; + } + } + return UR_RESULT_SUCCESS; +} + +class DeviceInfoTests : public ::testing::Test { +public: + DeviceInfoTests() : Mock{}, Dev{sycl::platform().get_devices()[0]} {} + +protected: + void SetUp() override { + + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfo); + } + + sycl::unittest::UrMock<> Mock; + sycl::device Dev; +}; + +TEST_F(DeviceInfoTests, CheckCurrentClockThrottleReasons) { + auto ThrottleReasons = + Dev.get_info(); + constexpr size_t expectedThrottleReasonsVecSize = 3; + EXPECT_EQ(ThrottleReasons.size(), expectedThrottleReasonsVecSize); + + auto HasThrottleReason = + [&](const std::vector &deviceThrottleReasons, + ext::intel::throttle_reason reasonToFind) -> bool { + return std::find(deviceThrottleReasons.begin(), deviceThrottleReasons.end(), + reasonToFind) != deviceThrottleReasons.end(); + }; + + EXPECT_TRUE(HasThrottleReason(ThrottleReasons, + ext::intel::throttle_reason::power_cap)); + EXPECT_TRUE(HasThrottleReason(ThrottleReasons, + ext::intel::throttle_reason::current_limit)); + EXPECT_TRUE(HasThrottleReason(ThrottleReasons, + ext::intel::throttle_reason::thermal_limit)); + EXPECT_FALSE( + HasThrottleReason(ThrottleReasons, ext::intel::throttle_reason::other)); +} + +TEST_F(DeviceInfoTests, CheckFanSpeed) { + auto FanSpeed = Dev.get_info(); + EXPECT_EQ(FanSpeed, 75); +} + +TEST_F(DeviceInfoTests, CheckPowerLimits) { + auto MinPowerLimit = + Dev.get_info(); + EXPECT_EQ(MinPowerLimit, 50); + + auto MaxPowerLimit = + Dev.get_info(); + EXPECT_EQ(MaxPowerLimit, 150); +} + +} // namespace diff --git a/unified-runtime/include/ur_api.h b/unified-runtime/include/ur_api.h index 4e403e6d30186..e0c4c30d059bd 100644 --- a/unified-runtime/include/ur_api.h +++ b/unified-runtime/include/ur_api.h @@ -2229,6 +2229,16 @@ typedef enum ur_device_info_t { /// [::ur_bool_t] return true if the device has a native assert /// implementation. UR_DEVICE_INFO_USE_NATIVE_ASSERT = 122, + /// [::ur_device_throttle_reasons_flags_t][optional-query] return current + /// clock throttle reasons. + UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS = 123, + /// [int32_t][optional-query] return the current speed of fan as a + /// percentage of the maximum speed. + UR_DEVICE_INFO_FAN_SPEED = 124, + /// [int32_t][optional-query] return min power limit in milliwatts. + UR_DEVICE_INFO_MIN_POWER_LIMIT = 125, + /// [int32_t][optional-query] return max power limit in milliwatts. + UR_DEVICE_INFO_MAX_POWER_LIMIT = 126, /// [::ur_bool_t] Returns true if the device supports the use of /// command-buffers. UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP = 0x1000, @@ -2879,6 +2889,34 @@ typedef enum ur_device_usm_access_capability_flag_t { /// @brief Bit Mask for validating ur_device_usm_access_capability_flags_t #define UR_DEVICE_USM_ACCESS_CAPABILITY_FLAGS_MASK 0xfffffff0 +/////////////////////////////////////////////////////////////////////////////// +/// @brief Clock throttle reasons +typedef uint32_t ur_device_throttle_reasons_flags_t; +typedef enum ur_device_throttle_reasons_flag_t { + /// The clock frequency is throttled due to hitting the power limit. + UR_DEVICE_THROTTLE_REASONS_FLAG_POWER_CAP = UR_BIT(0), + /// The clock frequency is throttled due to hitting the current limit. + UR_DEVICE_THROTTLE_REASONS_FLAG_CURRENT_LIMIT = UR_BIT(1), + /// The clock frequency is throttled due to hitting the thermal limit. + UR_DEVICE_THROTTLE_REASONS_FLAG_THERMAL_LIMIT = UR_BIT(2), + /// The clock frequency is throttled due to power supply assertion. + UR_DEVICE_THROTTLE_REASONS_FLAG_PSU_ALERT = UR_BIT(3), + /// The clock frequency is throttled due to software supplied frequency + /// range. + UR_DEVICE_THROTTLE_REASONS_FLAG_SW_RANGE = UR_BIT(4), + /// The clock frequency is throttled because there is a sub block that has + /// a lower frequency when it receives clocks. + UR_DEVICE_THROTTLE_REASONS_FLAG_HW_RANGE = UR_BIT(5), + /// The clock frequency is throttled due to other reason. + UR_DEVICE_THROTTLE_REASONS_FLAG_OTHER = UR_BIT(6), + /// @cond + UR_DEVICE_THROTTLE_REASONS_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_device_throttle_reasons_flag_t; +/// @brief Bit Mask for validating ur_device_throttle_reasons_flags_t +#define UR_DEVICE_THROTTLE_REASONS_FLAGS_MASK 0xffffff80 + #if !defined(__GNUC__) #pragma endregion #endif diff --git a/unified-runtime/include/ur_print.h b/unified-runtime/include/ur_print.h index 01b589af9b3a3..520adad880b6d 100644 --- a/unified-runtime/include/ur_print.h +++ b/unified-runtime/include/ur_print.h @@ -332,6 +332,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintDeviceUsmAccessCapabilityFlags( enum ur_device_usm_access_capability_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_device_throttle_reasons_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintDeviceThrottleReasonsFlags( + enum ur_device_throttle_reasons_flag_t value, char *buffer, + const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_context_flag_t enum /// @returns diff --git a/unified-runtime/include/ur_print.hpp b/unified-runtime/include/ur_print.hpp index 8fa8022aad12e..7d015eafaa8f0 100644 --- a/unified-runtime/include/ur_print.hpp +++ b/unified-runtime/include/ur_print.hpp @@ -102,6 +102,10 @@ inline ur_result_t printFlag(std::ostream &os, uint32_t flag); +template <> +inline ur_result_t +printFlag(std::ostream &os, uint32_t flag); + template <> inline ur_result_t printFlag(std::ostream &os, uint32_t flag); @@ -331,6 +335,8 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_memory_scope_capability_flag_t value); inline std::ostream & operator<<(std::ostream &os, enum ur_device_usm_access_capability_flag_t value); +inline std::ostream &operator<<(std::ostream &os, + enum ur_device_throttle_reasons_flag_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_context_flag_t value); inline std::ostream & operator<<(std::ostream &os, @@ -2936,6 +2942,18 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { case UR_DEVICE_INFO_USE_NATIVE_ASSERT: os << "UR_DEVICE_INFO_USE_NATIVE_ASSERT"; break; + case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS: + os << "UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS"; + break; + case UR_DEVICE_INFO_FAN_SPEED: + os << "UR_DEVICE_INFO_FAN_SPEED"; + break; + case UR_DEVICE_INFO_MIN_POWER_LIMIT: + os << "UR_DEVICE_INFO_MIN_POWER_LIMIT"; + break; + case UR_DEVICE_INFO_MAX_POWER_LIMIT: + os << "UR_DEVICE_INFO_MAX_POWER_LIMIT"; + break; case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: os << "UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP"; break; @@ -4614,6 +4632,60 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, os << ")"; } break; + case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS: { + const ur_device_throttle_reasons_flags_t *tptr = + (const ur_device_throttle_reasons_flags_t *)ptr; + if (sizeof(ur_device_throttle_reasons_flags_t) > size) { + os << "invalid size (is: " << size + << ", expected: >=" << sizeof(ur_device_throttle_reasons_flags_t) + << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + ur::details::printFlag(os, *tptr); + + os << ")"; + } break; + case UR_DEVICE_INFO_FAN_SPEED: { + const int32_t *tptr = (const int32_t *)ptr; + if (sizeof(int32_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(int32_t) + << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + case UR_DEVICE_INFO_MIN_POWER_LIMIT: { + const int32_t *tptr = (const int32_t *)ptr; + if (sizeof(int32_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(int32_t) + << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + case UR_DEVICE_INFO_MAX_POWER_LIMIT: { + const int32_t *tptr = (const int32_t *)ptr; + if (sizeof(int32_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(int32_t) + << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { @@ -5960,6 +6032,138 @@ printFlag(std::ostream &os, } } // namespace ur::details /////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_throttle_reasons_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, + enum ur_device_throttle_reasons_flag_t value) { + switch (value) { + case UR_DEVICE_THROTTLE_REASONS_FLAG_POWER_CAP: + os << "UR_DEVICE_THROTTLE_REASONS_FLAG_POWER_CAP"; + break; + case UR_DEVICE_THROTTLE_REASONS_FLAG_CURRENT_LIMIT: + os << "UR_DEVICE_THROTTLE_REASONS_FLAG_CURRENT_LIMIT"; + break; + case UR_DEVICE_THROTTLE_REASONS_FLAG_THERMAL_LIMIT: + os << "UR_DEVICE_THROTTLE_REASONS_FLAG_THERMAL_LIMIT"; + break; + case UR_DEVICE_THROTTLE_REASONS_FLAG_PSU_ALERT: + os << "UR_DEVICE_THROTTLE_REASONS_FLAG_PSU_ALERT"; + break; + case UR_DEVICE_THROTTLE_REASONS_FLAG_SW_RANGE: + os << "UR_DEVICE_THROTTLE_REASONS_FLAG_SW_RANGE"; + break; + case UR_DEVICE_THROTTLE_REASONS_FLAG_HW_RANGE: + os << "UR_DEVICE_THROTTLE_REASONS_FLAG_HW_RANGE"; + break; + case UR_DEVICE_THROTTLE_REASONS_FLAG_OTHER: + os << "UR_DEVICE_THROTTLE_REASONS_FLAG_OTHER"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_device_throttle_reasons_flag_t flag +template <> +inline ur_result_t +printFlag(std::ostream &os, uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_DEVICE_THROTTLE_REASONS_FLAG_POWER_CAP) == + (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_POWER_CAP) { + val ^= (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_POWER_CAP; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_THROTTLE_REASONS_FLAG_POWER_CAP; + } + + if ((val & UR_DEVICE_THROTTLE_REASONS_FLAG_CURRENT_LIMIT) == + (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_CURRENT_LIMIT) { + val ^= (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_CURRENT_LIMIT; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_THROTTLE_REASONS_FLAG_CURRENT_LIMIT; + } + + if ((val & UR_DEVICE_THROTTLE_REASONS_FLAG_THERMAL_LIMIT) == + (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_THERMAL_LIMIT) { + val ^= (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_THERMAL_LIMIT; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_THROTTLE_REASONS_FLAG_THERMAL_LIMIT; + } + + if ((val & UR_DEVICE_THROTTLE_REASONS_FLAG_PSU_ALERT) == + (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_PSU_ALERT) { + val ^= (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_PSU_ALERT; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_THROTTLE_REASONS_FLAG_PSU_ALERT; + } + + if ((val & UR_DEVICE_THROTTLE_REASONS_FLAG_SW_RANGE) == + (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_SW_RANGE) { + val ^= (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_SW_RANGE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_THROTTLE_REASONS_FLAG_SW_RANGE; + } + + if ((val & UR_DEVICE_THROTTLE_REASONS_FLAG_HW_RANGE) == + (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_HW_RANGE) { + val ^= (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_HW_RANGE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_THROTTLE_REASONS_FLAG_HW_RANGE; + } + + if ((val & UR_DEVICE_THROTTLE_REASONS_FLAG_OTHER) == + (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_OTHER) { + val ^= (uint32_t)UR_DEVICE_THROTTLE_REASONS_FLAG_OTHER; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_THROTTLE_REASONS_FLAG_OTHER; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_context_flag_t type /// @returns /// std::ostream & diff --git a/unified-runtime/scripts/core/device.yml b/unified-runtime/scripts/core/device.yml index a3609f5aaedbe..20947c36cac53 100644 --- a/unified-runtime/scripts/core/device.yml +++ b/unified-runtime/scripts/core/device.yml @@ -449,6 +449,14 @@ etors: desc: "[$x_bool_t] support the $xProgramSetSpecializationConstants entry point" - name: USE_NATIVE_ASSERT desc: "[$x_bool_t] return true if the device has a native assert implementation." + - name: CURRENT_CLOCK_THROTTLE_REASONS + desc: "[$x_device_throttle_reasons_flags_t][optional-query] return current clock throttle reasons." + - name: FAN_SPEED + desc: "[int32_t][optional-query] return the current speed of fan as a percentage of the maximum speed." + - name: MIN_POWER_LIMIT + desc: "[int32_t][optional-query] return min power limit in milliwatts." + - name: MAX_POWER_LIMIT + desc: "[int32_t][optional-query] return max power limit in milliwatts." --- #-------------------------------------------------------------------------- type: function desc: "Retrieves various information about device" @@ -929,3 +937,30 @@ etors: - name: ATOMIC_CONCURRENT_ACCESS desc: "Memory can be accessed atomically and concurrently" value: "$X_BIT(3)" +--- #-------------------------------------------------------------------------- +type: enum +desc: "Clock throttle reasons" +class: $xDevice +name: $x_device_throttle_reasons_flags_t +etors: + - name: POWER_CAP + desc: "The clock frequency is throttled due to hitting the power limit." + value: "$X_BIT(0)" + - name: CURRENT_LIMIT + desc: "The clock frequency is throttled due to hitting the current limit." + value: "$X_BIT(1)" + - name: THERMAL_LIMIT + desc: "The clock frequency is throttled due to hitting the thermal limit." + value: "$X_BIT(2)" + - name: PSU_ALERT + desc: "The clock frequency is throttled due to power supply assertion." + value: "$X_BIT(3)" + - name: SW_RANGE + desc: "The clock frequency is throttled due to software supplied frequency range." + value: "$X_BIT(4)" + - name: HW_RANGE + desc: "The clock frequency is throttled because there is a sub block that has a lower frequency when it receives clocks." + value: "$X_BIT(5)" + - name: OTHER + desc: "The clock frequency is throttled due to other reason." + value: "$X_BIT(6)" diff --git a/unified-runtime/source/adapters/cuda/device.cpp b/unified-runtime/source/adapters/cuda/device.cpp index 3d413f1cdd715..96dc84555a9bd 100644 --- a/unified-runtime/source/adapters/cuda/device.cpp +++ b/unified-runtime/source/adapters/cuda/device.cpp @@ -1083,6 +1083,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU: case UR_DEVICE_INFO_IP_VERSION: + case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS: + case UR_DEVICE_INFO_FAN_SPEED: + case UR_DEVICE_INFO_MIN_POWER_LIMIT: + case UR_DEVICE_INFO_MAX_POWER_LIMIT: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: return ReturnValue( diff --git a/unified-runtime/source/adapters/hip/device.cpp b/unified-runtime/source/adapters/hip/device.cpp index 182727a82d89b..98ab6184e341a 100644 --- a/unified-runtime/source/adapters/hip/device.cpp +++ b/unified-runtime/source/adapters/hip/device.cpp @@ -1039,6 +1039,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH: case UR_DEVICE_INFO_IP_VERSION: case UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP: + case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS: + case UR_DEVICE_INFO_FAN_SPEED: + case UR_DEVICE_INFO_MIN_POWER_LIMIT: + case UR_DEVICE_INFO_MAX_POWER_LIMIT: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: return ReturnValue( diff --git a/unified-runtime/source/adapters/level_zero/common.cpp b/unified-runtime/source/adapters/level_zero/common.cpp index 37d893a348a31..94d864108e68e 100644 --- a/unified-runtime/source/adapters/level_zero/common.cpp +++ b/unified-runtime/source/adapters/level_zero/common.cpp @@ -325,6 +325,14 @@ template <> zes_structure_type_t getZesStructureType() { return ZES_STRUCTURE_TYPE_MEM_PROPERTIES; } +template <> zes_structure_type_t getZesStructureType() { + return ZES_STRUCTURE_TYPE_FREQ_PROPERTIES; +} + +template <> zes_structure_type_t getZesStructureType() { + return ZES_STRUCTURE_TYPE_POWER_PROPERTIES; +} + #ifdef ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME template <> ze_structure_type_t diff --git a/unified-runtime/source/adapters/level_zero/device.cpp b/unified-runtime/source/adapters/level_zero/device.cpp index e789d6993dfd1..b195c5b0c38ca 100644 --- a/unified-runtime/source/adapters/level_zero/device.cpp +++ b/unified-runtime/source/adapters/level_zero/device.cpp @@ -188,6 +188,39 @@ uint64_t calculateGlobalMemSize(ur_device_handle_t Device) { return Device->ZeGlobalMemSize.get().value; } +// Return the Sysman device handle and correpsonding data for the given UR +// device. +static std::tuple +getZesDeviceData(ur_device_handle_t Device) { + bool SysManEnv = getenv_tobool("ZES_ENABLE_SYSMAN", false); + if ((Device->Platform->ZedeviceToZesDeviceMap.size() == 0) && !SysManEnv) { + logger::error("SysMan support is unavailable on this system. Please " + "check your level zero driver installation."); + return {nullptr, {}, UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION}; + } + + zes_device_handle_t ZesDevice = Device->ZeDevice; + ur_zes_device_handle_data_t ZesDeviceData = {}; + // If legacy sysman is enabled thru the environment variable, then zesInit + // will fail, but sysman is still usable so go the legacy route. + if (!SysManEnv) { + auto It = Device->Platform->ZedeviceToZesDeviceMap.find(Device->ZeDevice); + if (It == Device->Platform->ZedeviceToZesDeviceMap.end()) { + // no matching device + return {nullptr, {}, UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION}; + } else { + ZesDeviceData = + Device->Platform->ZedeviceToZesDeviceMap[Device->ZeDevice]; + ZesDevice = ZesDeviceData.ZesDevice; + } + } else { + ZesDeviceData.SubDevice = Device->isSubDevice(); + ZesDeviceData.SubDeviceId = Device->ZeDeviceProperties->subdeviceId; + } + + return {ZesDevice, ZesDeviceData, UR_RESULT_SUCCESS}; +} + ur_result_t urDeviceGetInfo( /// [in] handle of the device instance ur_device_handle_t Device, @@ -757,12 +790,6 @@ ur_result_t urDeviceGetInfo( } case UR_DEVICE_INFO_GLOBAL_MEM_FREE: { - bool SysManEnv = getenv_tobool("ZES_ENABLE_SYSMAN", false); - if ((Device->Platform->ZedeviceToZesDeviceMap.size() == 0) && !SysManEnv) { - logger::error("SysMan support is unavailable on this system. Please " - "check your level zero driver installation."); - return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; - } // Calculate the global memory size as the max limit that can be reported as // "free" memory for the user to allocate. uint64_t GlobalMemSize = calculateGlobalMemSize(Device); @@ -771,21 +798,9 @@ ur_result_t urDeviceGetInfo( uint64_t FreeMemory = 0; uint32_t MemCount = 0; - zes_device_handle_t ZesDevice = Device->ZeDevice; - struct ur_zes_device_handle_data_t ZesDeviceData = {}; - // If legacy sysman is enabled thru the environment variable, then zesInit - // will fail, but sysman is still usable so go the legacy route. - if (!SysManEnv) { - auto It = Device->Platform->ZedeviceToZesDeviceMap.find(Device->ZeDevice); - if (It == Device->Platform->ZedeviceToZesDeviceMap.end()) { - // no matching device - return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; - } else { - ZesDeviceData = - Device->Platform->ZedeviceToZesDeviceMap[Device->ZeDevice]; - ZesDevice = ZesDeviceData.ZesDevice; - } - } + auto [ZesDevice, ZesDeviceData, Result] = getZesDeviceData(Device); + if (Result != UR_RESULT_SUCCESS) + return Result; ZE2UR_CALL(zesDeviceEnumMemoryModules, (ZesDevice, &MemCount, nullptr)); if (MemCount != 0) { @@ -798,22 +813,11 @@ ur_result_t urDeviceGetInfo( // For root-device report memory from all memory modules since that // is what totally available in the default implicit scaling mode. // For sub-devices only report memory local to them. - if (SysManEnv) { - if (!Device->isSubDevice() || - Device->ZeDeviceProperties->subdeviceId == - ZesMemProperties.subdeviceId) { - - ZesStruct ZesMemState; - ZE2UR_CALL(zesMemoryGetState, (ZesMemHandle, &ZesMemState)); - FreeMemory += ZesMemState.free; - } - } else { - if (ZesDeviceData.SubDeviceId == ZesMemProperties.subdeviceId || - !ZesDeviceData.SubDevice) { - ZesStruct ZesMemState; - ZE2UR_CALL(zesMemoryGetState, (ZesMemHandle, &ZesMemState)); - FreeMemory += ZesMemState.free; - } + if (ZesDeviceData.SubDeviceId == ZesMemProperties.subdeviceId || + !ZesDeviceData.SubDevice) { + ZesStruct ZesMemState; + ZE2UR_CALL(zesMemoryGetState, (ZesMemHandle, &ZesMemState)); + FreeMemory += ZesMemState.free; } } } @@ -1215,6 +1219,114 @@ ur_result_t urDeviceGetInfo( return ReturnValue(true); case UR_DEVICE_INFO_MULTI_DEVICE_COMPILE_SUPPORT_EXP: return ReturnValue(true); + case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS: { + ur_device_throttle_reasons_flags_t ThrottleReasons = 0; + if (!ParamValue) { + // If ParamValue is nullptr, then we are only interested in the size of + // the value. + return ReturnValue(ThrottleReasons); + } + [[maybe_unused]] auto [ZesDevice, Ignored, Result] = + getZesDeviceData(Device); + if (Result != UR_RESULT_SUCCESS) + return Result; + uint32_t FreqCount = 0; + ZE2UR_CALL(zesDeviceEnumFrequencyDomains, (ZesDevice, &FreqCount, nullptr)); + if (FreqCount != 0) { + std::vector ZesFreqHandles(FreqCount); + ZE2UR_CALL(zesDeviceEnumFrequencyDomains, + (ZesDevice, &FreqCount, ZesFreqHandles.data())); + for (auto &ZesFreqHandle : ZesFreqHandles) { + ZesStruct FreqProperties; + ZE2UR_CALL(zesFrequencyGetProperties, (ZesFreqHandle, &FreqProperties)); + if (FreqProperties.type != ZES_FREQ_DOMAIN_GPU) { + continue; + } + zes_freq_state_t State; + zesFrequencyGetState(ZesFreqHandle, &State); + constexpr zes_freq_throttle_reason_flags_t ZeThrottleFlags[] = { + ZES_FREQ_THROTTLE_REASON_FLAG_AVE_PWR_CAP, + ZES_FREQ_THROTTLE_REASON_FLAG_CURRENT_LIMIT, + ZES_FREQ_THROTTLE_REASON_FLAG_THERMAL_LIMIT, + ZES_FREQ_THROTTLE_REASON_FLAG_PSU_ALERT, + ZES_FREQ_THROTTLE_REASON_FLAG_SW_RANGE, + ZES_FREQ_THROTTLE_REASON_FLAG_HW_RANGE}; + + constexpr ur_device_throttle_reasons_flags_t UrThrottleFlags[] = { + UR_DEVICE_THROTTLE_REASONS_FLAG_POWER_CAP, + UR_DEVICE_THROTTLE_REASONS_FLAG_CURRENT_LIMIT, + UR_DEVICE_THROTTLE_REASONS_FLAG_THERMAL_LIMIT, + UR_DEVICE_THROTTLE_REASONS_FLAG_PSU_ALERT, + UR_DEVICE_THROTTLE_REASONS_FLAG_SW_RANGE, + UR_DEVICE_THROTTLE_REASONS_FLAG_HW_RANGE}; + + for (size_t i = 0; + i < sizeof(ZeThrottleFlags) / sizeof(ZeThrottleFlags[0]); ++i) { + if (State.throttleReasons & ZeThrottleFlags[i]) { + ThrottleReasons |= UrThrottleFlags[i]; + State.throttleReasons &= ~ZeThrottleFlags[i]; + } + } + + if (State.throttleReasons) { + ThrottleReasons |= UR_DEVICE_THROTTLE_REASONS_FLAG_OTHER; + } + } + } + return ReturnValue(ThrottleReasons); + } + case UR_DEVICE_INFO_FAN_SPEED: { + [[maybe_unused]] auto [ZesDevice, Ignored, Result] = + getZesDeviceData(Device); + if (Result != UR_RESULT_SUCCESS) + return Result; + + uint32_t FanCount = 0; + ZE2UR_CALL(zesDeviceEnumFans, (ZesDevice, &FanCount, nullptr)); + // If there are no fans, then report speed query as unsupported. + if (FanCount == 0) + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + + if (!ParamValue) { + // If ParamValue is nullptr, then we are only interested in the size of + // the value. + return ReturnValue(int32_t{0}); + } + + std::vector ZeFanHandles(FanCount); + ZE2UR_CALL(zesDeviceEnumFans, (ZesDevice, &FanCount, ZeFanHandles.data())); + int32_t Speed = -1; + for (auto Fan : ZeFanHandles) { + int32_t CurSpeed; + ZE2UR_CALL(zesFanGetState, (Fan, ZES_FAN_SPEED_UNITS_PERCENT, &CurSpeed)); + Speed = std::max(Speed, CurSpeed); + } + return ReturnValue(Speed); + } + case UR_DEVICE_INFO_MIN_POWER_LIMIT: + case UR_DEVICE_INFO_MAX_POWER_LIMIT: { + if (!ParamValue) { + // If ParamValue is nullptr, then we are only interested in the size of + // the value. + return ReturnValue(int32_t{0}); + } + + [[maybe_unused]] auto [ZesDevice, Ignored, Result] = + getZesDeviceData(Device); + if (Result != UR_RESULT_SUCCESS) + return Result; + + zes_pwr_handle_t ZesPwrHandle = nullptr; + ZE2UR_CALL(zesDeviceGetCardPowerDomain, (ZesDevice, &ZesPwrHandle)); + ZesStruct PowerProperties; + ZE2UR_CALL(zesPowerGetProperties, (ZesPwrHandle, &PowerProperties)); + + if (ParamName == UR_DEVICE_INFO_MIN_POWER_LIMIT) { + return ReturnValue(int32_t{PowerProperties.minLimit}); + } else { + return ReturnValue(int32_t{PowerProperties.maxLimit}); + } + } default: logger::error("Unsupported ParamName in urGetDeviceInfo"); logger::error("ParamNameParamName={}(0x{})", ParamName, diff --git a/unified-runtime/source/adapters/native_cpu/device.cpp b/unified-runtime/source/adapters/native_cpu/device.cpp index 4e9d5d87da9d1..5d13a4a5cf2ff 100644 --- a/unified-runtime/source/adapters/native_cpu/device.cpp +++ b/unified-runtime/source/adapters/native_cpu/device.cpp @@ -368,6 +368,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH: case UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP: case UR_DEVICE_INFO_IP_VERSION: + case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS: + case UR_DEVICE_INFO_FAN_SPEED: + case UR_DEVICE_INFO_MIN_POWER_LIMIT: + case UR_DEVICE_INFO_MAX_POWER_LIMIT: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: return ReturnValue( diff --git a/unified-runtime/source/adapters/opencl/device.cpp b/unified-runtime/source/adapters/opencl/device.cpp index ff67f0339da70..8ab3556b24c92 100644 --- a/unified-runtime/source/adapters/opencl/device.cpp +++ b/unified-runtime/source/adapters/opencl/device.cpp @@ -1609,6 +1609,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MEMORY_BUS_WIDTH: case UR_DEVICE_INFO_COMPONENT_DEVICES: case UR_DEVICE_INFO_COMPOSITE_DEVICE: + case UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS: + case UR_DEVICE_INFO_FAN_SPEED: + case UR_DEVICE_INFO_MIN_POWER_LIMIT: + case UR_DEVICE_INFO_MAX_POWER_LIMIT: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; default: { return UR_RESULT_ERROR_INVALID_ENUMERATION; diff --git a/unified-runtime/source/loader/loader.def.in b/unified-runtime/source/loader/loader.def.in index 14efd8a16c762..f9e66a8f98990 100644 --- a/unified-runtime/source/loader/loader.def.in +++ b/unified-runtime/source/loader/loader.def.in @@ -263,6 +263,7 @@ EXPORTS urPrintDeviceReleaseParams urPrintDeviceRetainParams urPrintDeviceSelectBinaryParams + urPrintDeviceThrottleReasonsFlags urPrintDeviceType urPrintDeviceUsmAccessCapabilityFlags urPrintEnqueueCommandBufferExpParams diff --git a/unified-runtime/source/loader/loader.map.in b/unified-runtime/source/loader/loader.map.in index 1e9d6336ba75b..459fcc2bdba72 100644 --- a/unified-runtime/source/loader/loader.map.in +++ b/unified-runtime/source/loader/loader.map.in @@ -263,6 +263,7 @@ urPrintDeviceReleaseParams; urPrintDeviceRetainParams; urPrintDeviceSelectBinaryParams; + urPrintDeviceThrottleReasonsFlags; urPrintDeviceType; urPrintDeviceUsmAccessCapabilityFlags; urPrintEnqueueCommandBufferExpParams; diff --git a/unified-runtime/source/loader/ur_print.cpp b/unified-runtime/source/loader/ur_print.cpp index a5668b34a927f..15967bb5fe30c 100644 --- a/unified-runtime/source/loader/ur_print.cpp +++ b/unified-runtime/source/loader/ur_print.cpp @@ -284,6 +284,15 @@ ur_result_t urPrintDeviceUsmAccessCapabilityFlags( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t +urPrintDeviceThrottleReasonsFlags(enum ur_device_throttle_reasons_flag_t value, + char *buffer, const size_t buff_size, + size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintContextFlags(enum ur_context_flag_t value, char *buffer, const size_t buff_size, size_t *out_size) { std::stringstream ss; diff --git a/unified-runtime/test/conformance/device/urDeviceGetInfo.cpp b/unified-runtime/test/conformance/device/urDeviceGetInfo.cpp index 2a334738d3cc1..8bdac57f6f6ea 100644 --- a/unified-runtime/test/conformance/device/urDeviceGetInfo.cpp +++ b/unified-runtime/test/conformance/device/urDeviceGetInfo.cpp @@ -2560,6 +2560,74 @@ TEST_P(urDeviceGetInfoTest, SuccessUseNativeAssert) { property_value); } +TEST_P(urDeviceGetInfoTest, SuccessThrottleReasons) { + size_t property_size = 0; + const ur_device_info_t property_name = + UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS; + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urDeviceGetInfo(device, property_name, 0, nullptr, &property_size)); + ASSERT_EQ(property_size, sizeof(ur_device_throttle_reasons_flag_t)); + + ur_device_throttle_reasons_flag_t property_value = + UR_DEVICE_THROTTLE_REASONS_FLAG_FORCE_UINT32; + ASSERT_SUCCESS(urDeviceGetInfo(device, property_name, property_size, + &property_value, nullptr)); + + ASSERT_EQ(property_value & UR_DEVICE_THROTTLE_REASONS_FLAGS_MASK, 0); +} + +TEST_P(urDeviceGetInfoTest, SuccessFanSpeed) { + size_t property_size = 0; + const ur_device_info_t property_name = UR_DEVICE_INFO_FAN_SPEED; + + ASSERT_SUCCESS_OR_OPTIONAL_QUERY( + urDeviceGetInfo(device, property_name, 0, nullptr, &property_size), + UR_DEVICE_INFO_COMPOSITE_DEVICE); + + ASSERT_EQ(property_size, sizeof(int32_t)); + + uint32_t property_value = 0; + ASSERT_QUERY_RETURNS_VALUE(urDeviceGetInfo(device, property_name, + property_size, &property_value, + nullptr), + property_value); +} + +TEST_P(urDeviceGetInfoTest, SuccessMaxPowerLimit) { + size_t property_size = 0; + const ur_device_info_t property_name = UR_DEVICE_INFO_MAX_POWER_LIMIT; + + ASSERT_SUCCESS_OR_OPTIONAL_QUERY( + urDeviceGetInfo(device, property_name, 0, nullptr, &property_size), + UR_DEVICE_INFO_COMPOSITE_DEVICE); + + ASSERT_EQ(property_size, sizeof(int32_t)); + + uint32_t property_value = 0; + ASSERT_QUERY_RETURNS_VALUE(urDeviceGetInfo(device, property_name, + property_size, &property_value, + nullptr), + property_value); +} + +TEST_P(urDeviceGetInfoTest, SuccessMinPowerLimit) { + size_t property_size = 0; + const ur_device_info_t property_name = UR_DEVICE_INFO_MIN_POWER_LIMIT; + + ASSERT_SUCCESS_OR_OPTIONAL_QUERY( + urDeviceGetInfo(device, property_name, 0, nullptr, &property_size), + UR_DEVICE_INFO_COMPOSITE_DEVICE); + + ASSERT_EQ(property_size, sizeof(int32_t)); + + uint32_t property_value = 0; + ASSERT_QUERY_RETURNS_VALUE(urDeviceGetInfo(device, property_name, + property_size, &property_value, + nullptr), + property_value); +} + TEST_P(urDeviceGetInfoTest, InvalidNullHandleDevice) { ur_device_type_t device_type; ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, diff --git a/unified-runtime/test/conformance/testing/include/uur/optional_queries.h b/unified-runtime/test/conformance/testing/include/uur/optional_queries.h index 88e4271605041..3f3bdb2953b4e 100644 --- a/unified-runtime/test/conformance/testing/include/uur/optional_queries.h +++ b/unified-runtime/test/conformance/testing/include/uur/optional_queries.h @@ -42,6 +42,10 @@ constexpr std::array optional_ur_device_info_t = { UR_DEVICE_INFO_IP_VERSION, UR_DEVICE_INFO_COMPONENT_DEVICES, UR_DEVICE_INFO_COMPOSITE_DEVICE, + UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS, + UR_DEVICE_INFO_FAN_SPEED, + UR_DEVICE_INFO_MIN_POWER_LIMIT, + UR_DEVICE_INFO_MAX_POWER_LIMIT, }; template <> inline bool isQueryOptional(ur_device_info_t query) { diff --git a/unified-runtime/tools/urinfo/urinfo.hpp b/unified-runtime/tools/urinfo/urinfo.hpp index bde3e8069c8d7..b2e63b7c46896 100644 --- a/unified-runtime/tools/urinfo/urinfo.hpp +++ b/unified-runtime/tools/urinfo/urinfo.hpp @@ -332,6 +332,15 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_USE_NATIVE_ASSERT); std::cout << prefix; + printDeviceInfo( + hDevice, UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS); + std::cout << prefix; + printDeviceInfo(hDevice, UR_DEVICE_INFO_FAN_SPEED); + std::cout << prefix; + printDeviceInfo(hDevice, UR_DEVICE_INFO_MIN_POWER_LIMIT); + std::cout << prefix; + printDeviceInfo(hDevice, UR_DEVICE_INFO_MAX_POWER_LIMIT); + std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP); std::cout << prefix;