diff --git a/sycl/include/CL/sycl/info/info_desc.hpp b/sycl/include/CL/sycl/info/info_desc.hpp index 22146debf3563..7b6c72d6746d3 100644 --- a/sycl/include/CL/sycl/info/info_desc.hpp +++ b/sycl/include/CL/sycl/info/info_desc.hpp @@ -214,6 +214,19 @@ enum class kernel_sub_group : cl_kernel_sub_group_info { compile_sub_group_size = CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL }; +enum class kernel_device_specific : cl_kernel_work_group_info { + global_work_size = CL_KERNEL_GLOBAL_WORK_SIZE, + work_group_size = CL_KERNEL_WORK_GROUP_SIZE, + compile_work_group_size = CL_KERNEL_COMPILE_WORK_GROUP_SIZE, + preferred_work_group_size_multiple = + CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, + private_mem_size = CL_KERNEL_PRIVATE_MEM_SIZE, + max_sub_group_size = CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, + max_num_sub_groups = CL_KERNEL_MAX_NUM_SUB_GROUPS, + compile_num_sub_groups = CL_KERNEL_COMPILE_NUM_SUB_GROUPS, + compile_sub_group_size = CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL +}; + // A.6 Program information desctiptors enum class program : cl_program_info { context = CL_PROGRAM_CONTEXT, @@ -242,6 +255,8 @@ enum class event_profiling : cl_profiling_info { // Provide an alias to the return type for each of the info parameters template class param_traits {}; +template struct compatibility_param_traits {}; + #define PARAM_TRAITS_SPEC(param_type, param, ret_type) \ template <> class param_traits { \ public: \ @@ -263,6 +278,7 @@ template class param_traits {}; #include +#include #include #include #include @@ -276,6 +292,24 @@ template class param_traits {}; #undef PARAM_TRAITS_SPEC #undef PARAM_TRAITS_SPEC_WITH_INPUT +#define PARAM_TRAITS_SPEC(param_type, param, ret_type) \ + template <> \ + struct compatibility_param_traits { \ + static constexpr auto value = kernel_device_specific::param; \ + }; + +#define PARAM_TRAITS_SPEC_WITH_INPUT(param_type, param, ret_type, in_type) \ + template <> \ + struct compatibility_param_traits { \ + static constexpr auto value = kernel_device_specific::param; \ + }; + +#include +#include + +#undef PARAM_TRAITS_SPEC +#undef PARAM_TRAITS_SPEC_WITH_INPUT + } // namespace info } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/info/kernel_device_specific_traits.def b/sycl/include/CL/sycl/info/kernel_device_specific_traits.def new file mode 100644 index 0000000000000..01c0ac7b30a2f --- /dev/null +++ b/sycl/include/CL/sycl/info/kernel_device_specific_traits.def @@ -0,0 +1,12 @@ +PARAM_TRAITS_SPEC(kernel_device_specific, compile_work_group_size, + cl::sycl::range<3>) +PARAM_TRAITS_SPEC(kernel_device_specific, global_work_size, cl::sycl::range<3>) +PARAM_TRAITS_SPEC(kernel_device_specific, + preferred_work_group_size_multiple, size_t) +PARAM_TRAITS_SPEC(kernel_device_specific, private_mem_size, cl_ulong) +PARAM_TRAITS_SPEC(kernel_device_specific, work_group_size, size_t) +PARAM_TRAITS_SPEC_WITH_INPUT(kernel_device_specific, max_sub_group_size, + uint32_t, cl::sycl::range<3>) +PARAM_TRAITS_SPEC(kernel_device_specific, max_num_sub_groups, uint32_t) +PARAM_TRAITS_SPEC(kernel_device_specific, compile_num_sub_groups, uint32_t) +PARAM_TRAITS_SPEC(kernel_device_specific, compile_sub_group_size, uint32_t) diff --git a/sycl/include/CL/sycl/kernel.hpp b/sycl/include/CL/sycl/kernel.hpp index 41af661ed2a22..c71a4cf510025 100644 --- a/sycl/include/CL/sycl/kernel.hpp +++ b/sycl/include/CL/sycl/kernel.hpp @@ -92,6 +92,27 @@ class __SYCL_EXPORT kernel { typename info::param_traits::return_type get_info() const; + /// Query device-specific information from the kernel object using the + /// info::kernel_device_specific descriptor. + /// + /// \param Device is a valid SYCL device to query info for. + /// \return depends on information being queried. + template + typename info::param_traits::return_type + get_info(const device &Device) const; + + /// Query device-specific information from a kernel using the + /// info::kernel_device_specific descriptor for a specific device and value. + /// + /// \param Device is a valid SYCL device. + /// \param Value depends on information being queried. + /// \return depends on information being queried. + template + typename info::param_traits::return_type + get_info(const device &Device, + typename info::param_traits::input_type Value) const; + /// Query work-group information from a kernel using the /// info::kernel_work_group descriptor for a specific device. /// @@ -107,8 +128,11 @@ class __SYCL_EXPORT kernel { /// \param Device is a valid SYCL device. /// \return depends on information being queried. template + // clang-format off typename info::param_traits::return_type + __SYCL_DEPRECATED("Use get_info with info::kernel_device_specific instead.") get_sub_group_info(const device &Device) const; + // clang-format on /// Query sub-group information from a kernel using the /// info::kernel_sub_group descriptor for a specific device and value. @@ -117,11 +141,13 @@ class __SYCL_EXPORT kernel { /// \param Value depends on information being queried. /// \return depends on information being queried. template + // clang-format off typename info::param_traits::return_type - get_sub_group_info( - const device &Device, - typename info::param_traits::input_type - Value) const; + __SYCL_DEPRECATED("Use get_info with info::kernel_device_specific instead.") + get_sub_group_info(const device &Device, + typename info::param_traits::input_type Value) const; + // clang-format on private: /// Constructs a SYCL kernel object from a valid kernel_impl instance. diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 052d9bdcfdc77..e5dc044cf9451 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -82,43 +82,59 @@ template <> program kernel_impl::get_info() const { return createSyclObjFromImpl(MProgramImpl); } -template -typename info::param_traits::return_type -kernel_impl::get_work_group_info(const device &Device) const { +template +typename info::param_traits::return_type +kernel_impl::get_info(const device &Device) const { if (is_host()) { - return get_kernel_work_group_info_host(Device); + return get_kernel_device_specific_info_host(Device); } - return get_kernel_work_group_info< - typename info::param_traits::return_type, + return get_kernel_device_specific_info< + typename info::param_traits::return_type, param>::get(this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), getPlugin()); } -template -typename info::param_traits::return_type -kernel_impl::get_sub_group_info(const device &Device) const { +template +typename info::param_traits::return_type +kernel_impl::get_info( + const device &Device, + typename info::param_traits::input_type + Value) const { if (is_host()) { throw runtime_error("Sub-group feature is not supported on HOST device.", PI_INVALID_DEVICE); } - return get_kernel_sub_group_info::get( - this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), + return get_kernel_device_specific_info_with_input::get( + this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), Value, getPlugin()); } +template +typename info::param_traits::return_type +kernel_impl::get_work_group_info(const device &Device) const { + return get_info< + info::compatibility_param_traits::value>( + Device); +} + +template +typename info::param_traits::return_type +kernel_impl::get_sub_group_info(const device &Device) const { + return get_info< + info::compatibility_param_traits::value>( + Device); +} + template typename info::param_traits::return_type kernel_impl::get_sub_group_info( const device &Device, typename info::param_traits::input_type Value) const { - if (is_host()) { - throw runtime_error("Sub-group feature is not supported on HOST device.", - PI_INVALID_DEVICE); - } - return get_kernel_sub_group_info_with_input::get( - this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), Value, - getPlugin()); + return get_info< + info::compatibility_param_traits::value>( + Device, Value); } #define PARAM_TRAITS_SPEC(param_type, param, ret_type) \ @@ -128,6 +144,18 @@ kernel_impl::get_sub_group_info( #undef PARAM_TRAITS_SPEC +#define PARAM_TRAITS_SPEC(param_type, param, ret_type) \ + template ret_type kernel_impl::get_info( \ + const device &) const; +#define PARAM_TRAITS_SPEC_WITH_INPUT(param_type, param, ret_type, in_type) \ + template ret_type kernel_impl::get_info( \ + const device &, in_type) const; + +#include + +#undef PARAM_TRAITS_SPEC +#undef PARAM_TRAITS_SPEC_WITH_INPUT + #define PARAM_TRAITS_SPEC(param_type, param, ret_type) \ template ret_type kernel_impl::get_work_group_info( \ const device &) const; diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index 7761f168670c4..8258fa3419484 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -103,6 +103,27 @@ class kernel_impl { typename info::param_traits::return_type get_info() const; + /// Query device-specific information from a kernel object using the + /// info::kernel_device_specific descriptor. + /// + /// \param Device is a valid SYCL device to query info for. + /// \return depends on information being queried. + template + typename info::param_traits::return_type + get_info(const device &Device) const; + + /// Query device-specific information from a kernel using the + /// info::kernel_device_specific descriptor for a specific device and value. + /// + /// \param Device is a valid SYCL device. + /// \param Value depends on information being queried. + /// \return depends on information being queried. + template + typename info::param_traits::return_type + get_info(const device &Device, + typename info::param_traits::input_type Value) const; + /// Query work-group information from a kernel using the /// info::kernel_work_group descriptor for a specific device. /// diff --git a/sycl/source/detail/kernel_info.hpp b/sycl/source/detail/kernel_info.hpp index d7514b1522b57..178bd273f33bb 100644 --- a/sycl/source/detail/kernel_info.hpp +++ b/sycl/source/detail/kernel_info.hpp @@ -51,22 +51,48 @@ template struct get_kernel_info { } }; -// OpenCL kernel work-group methods +// Device-specific methods -template -struct get_kernel_work_group_info { +template +struct IsWorkGroupInfo : std::false_type {}; + +template <> +struct IsWorkGroupInfo + : std::true_type {}; +template <> +struct IsWorkGroupInfo + : std::true_type {}; +template <> +struct IsWorkGroupInfo + : std::true_type {}; +template <> +struct IsWorkGroupInfo< + info::kernel_device_specific::preferred_work_group_size_multiple> + : std::true_type {}; +template <> +struct IsWorkGroupInfo + : std::true_type {}; + +template +struct get_kernel_device_specific_info { static T get(RT::PiKernel Kernel, RT::PiDevice Device, const plugin &Plugin) { T Result; - // TODO catch an exception and put it to list of asynchronous exceptions - Plugin.call( - Kernel, Device, pi::cast(Param), sizeof(T), - &Result, nullptr); + if (IsWorkGroupInfo::value) { + // TODO catch an exception and put it to list of asynchronous exceptions + Plugin.call( + Kernel, Device, pi::cast(Param), sizeof(T), + &Result, nullptr); + } else { + Plugin.call( + Kernel, Device, pi_kernel_sub_group_info(Param), 0, nullptr, + sizeof(T), &Result, nullptr); + } return Result; } }; -template -struct get_kernel_work_group_info, Param> { +template +struct get_kernel_device_specific_info, Param> { static cl::sycl::range<3> get(RT::PiKernel Kernel, RT::PiDevice Device, const plugin &Plugin) { size_t Result[3]; @@ -78,63 +104,72 @@ struct get_kernel_work_group_info, Param> { } }; -template -inline typename info::param_traits::return_type -get_kernel_work_group_info_host(const cl::sycl::device &Device); +template +inline typename info::param_traits::return_type +get_kernel_device_specific_info_host(const cl::sycl::device &Device); template <> -inline cl::sycl::range<3> -get_kernel_work_group_info_host( - const cl::sycl::device &) { +inline cl::sycl::range<3> get_kernel_device_specific_info_host< + info::kernel_device_specific::global_work_size>(const cl::sycl::device &) { throw invalid_object_error("This instance of kernel is a host instance", PI_INVALID_KERNEL); } template <> -inline size_t -get_kernel_work_group_info_host( +inline size_t get_kernel_device_specific_info_host< + info::kernel_device_specific::work_group_size>( const cl::sycl::device &Dev) { return Dev.get_info(); } template <> -inline cl::sycl::range<3> get_kernel_work_group_info_host< - info::kernel_work_group::compile_work_group_size>( +inline cl::sycl::range<3> get_kernel_device_specific_info_host< + info::kernel_device_specific::compile_work_group_size>( const cl::sycl::device &) { return {0, 0, 0}; } template <> -inline size_t get_kernel_work_group_info_host< - info::kernel_work_group::preferred_work_group_size_multiple>( +inline size_t get_kernel_device_specific_info_host< + info::kernel_device_specific::preferred_work_group_size_multiple>( const cl::sycl::device &Dev) { - return get_kernel_work_group_info_host< - info::kernel_work_group::work_group_size>(Dev); + return get_kernel_device_specific_info_host< + info::kernel_device_specific::work_group_size>(Dev); } template <> -inline cl_ulong -get_kernel_work_group_info_host( - const cl::sycl::device &) { +inline cl_ulong get_kernel_device_specific_info_host< + info::kernel_device_specific::private_mem_size>(const cl::sycl::device &) { return 0; } -// The kernel sub-group methods -template struct get_kernel_sub_group_info { - static uint32_t get(RT::PiKernel Kernel, RT::PiDevice Device, - const plugin &Plugin) { - uint32_t Result; - // TODO catch an exception and put it to list of asynchronous exceptions - Plugin.call( - Kernel, Device, pi_kernel_sub_group_info(Param), 0, nullptr, - sizeof(uint32_t), &Result, nullptr); +template <> +inline uint32_t get_kernel_device_specific_info_host< + info::kernel_device_specific::max_num_sub_groups>( + const cl::sycl::device &) { + throw invalid_object_error("This instance of kernel is a host instance", + PI_INVALID_KERNEL); +} - return Result; - } -}; +template <> +inline uint32_t get_kernel_device_specific_info_host< + info::kernel_device_specific::compile_num_sub_groups>( + const cl::sycl::device &) { + throw invalid_object_error("This instance of kernel is a host instance", + PI_INVALID_KERNEL); +} + +template <> +inline uint32_t get_kernel_device_specific_info_host< + info::kernel_device_specific::compile_sub_group_size>( + const cl::sycl::device &) { + throw invalid_object_error("This instance of kernel is a host instance", + PI_INVALID_KERNEL); +} -template -struct get_kernel_sub_group_info_with_input { +template +struct get_kernel_device_specific_info_with_input { static uint32_t get(RT::PiKernel Kernel, RT::PiDevice Device, cl::sycl::range<3> In, const plugin &Plugin) { size_t Input[3] = {In[0], In[1], In[2]}; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index d6dad42b68b34..22624546e78e9 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1605,8 +1605,9 @@ static void adjustNDRangePerKernel(NDRDescT &NDR, RT::PiKernel Kernel, assert(NDR.NumWorkGroups[0] != 0 && NDR.LocalSize[0] == 0); // TODO might be good to cache this info together with the kernel info to // avoid get_kernel_work_group_info on every kernel run - range<3> WGSize = get_kernel_work_group_info< - range<3>, cl::sycl::info::kernel_work_group::compile_work_group_size>:: + range<3> WGSize = get_kernel_device_specific_info< + range<3>, + cl::sycl::info::kernel_device_specific::compile_work_group_size>:: get(Kernel, DeviceImpl.getHandleRef(), DeviceImpl.getPlugin()); if (WGSize[0] == 0) { @@ -1615,8 +1616,8 @@ static void adjustNDRangePerKernel(NDRDescT &NDR, RT::PiKernel Kernel, get_device_info, cl::sycl::info::device::max_work_item_sizes>:: get(DeviceImpl.getHandleRef(), DeviceImpl.getPlugin()); - size_t WGSize1D = get_kernel_work_group_info< - size_t, cl::sycl::info::kernel_work_group::work_group_size>:: + size_t WGSize1D = get_kernel_device_specific_info< + size_t, cl::sycl::info::kernel_device_specific::work_group_size>:: get(Kernel, DeviceImpl.getHandleRef(), DeviceImpl.getPlugin()); assert(MaxWGSizes[2] != 0); diff --git a/sycl/source/kernel.cpp b/sycl/source/kernel.cpp index b22be2a49852b..8be742dd2c7b5 100644 --- a/sycl/source/kernel.cpp +++ b/sycl/source/kernel.cpp @@ -45,6 +45,32 @@ kernel::get_info() const { #undef PARAM_TRAITS_SPEC +template +typename info::param_traits::return_type +kernel::get_info(const device &Dev) const { + return impl->get_info(Dev); +} + +template +typename info::param_traits::return_type +kernel::get_info(const device &Device, + typename info::param_traits::input_type Value) const { + return impl->get_info(Device, Value); +} + +#define PARAM_TRAITS_SPEC(param_type, param, ret_type) \ + template __SYCL_EXPORT ret_type kernel::get_info( \ + const device &) const; +#define PARAM_TRAITS_SPEC_WITH_INPUT(param_type, param, ret_type, in_type) \ + template __SYCL_EXPORT ret_type kernel::get_info( \ + const device &, in_type) const; + +#include + +#undef PARAM_TRAITS_SPEC +#undef PARAM_TRAITS_SPEC_WITH_INPUT + template typename info::param_traits::return_type kernel::get_work_group_info(const device &dev) const { diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 323961f0585aa..0c9f6da10cdcc 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3720,6 +3720,9 @@ _ZN2cl4sycl6detail11SYCLMemObjT20getBufSizeForContextERKSt10shared_ptrINS1_12con _ZN2cl4sycl6detail11SYCLMemObjTC1EP7_cl_memRKNS0_7contextEmNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteISA_EE _ZN2cl4sycl6detail11SYCLMemObjTC2EP7_cl_memRKNS0_7contextEmNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteISA_EE _ZN2cl4sycl6detail11buffer_impl11allocateMemESt10shared_ptrINS1_12context_implEEbPvRP9_pi_event +_ZN2cl4sycl6detail11stream_impl15accessGlobalBufERNS0_7handlerE +_ZN2cl4sycl6detail11stream_impl18accessGlobalOffsetERNS0_7handlerE +_ZN2cl4sycl6detail11stream_impl20accessGlobalFlushBufERNS0_7handlerE _ZN2cl4sycl6detail11stream_impl5flushEv _ZN2cl4sycl6detail11stream_implC1EmmRNS0_7handlerE _ZN2cl4sycl6detail11stream_implC2EmmRNS0_7handlerE @@ -4043,6 +4046,15 @@ _ZNK2cl4sycl6kernel19get_work_group_infoILNS0_4info17kernel_work_groupE4532EEENS _ZNK2cl4sycl6kernel19get_work_group_infoILNS0_4info17kernel_work_groupE4533EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE _ZNK2cl4sycl6kernel3getEv _ZNK2cl4sycl6kernel7is_hostEv +_ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE16650EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE +_ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE4528EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE +_ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE4529EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE +_ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE4531EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE +_ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE4532EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE +_ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE4533EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE +_ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE4537EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE +_ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE4538EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE +_ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE8243EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceENS6_10input_typeE _ZNK2cl4sycl6kernel8get_infoILNS0_4info6kernelE4496EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl6kernel8get_infoILNS0_4info6kernelE4497EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl6kernel8get_infoILNS0_4info6kernelE4498EEENS3_12param_traitsIS4_XT_EE11return_typeEv @@ -4161,6 +4173,3 @@ _ZNK2cl4sycl9exception11has_contextEv _ZNK2cl4sycl9exception4whatEv __sycl_register_lib __sycl_unregister_lib -_ZN2cl4sycl6detail11stream_impl15accessGlobalBufERNS0_7handlerE -_ZN2cl4sycl6detail11stream_impl20accessGlobalFlushBufERNS0_7handlerE -_ZN2cl4sycl6detail11stream_impl18accessGlobalOffsetERNS0_7handlerE diff --git a/sycl/test/basic_tests/kernel_info.cpp b/sycl/test/basic_tests/kernel_info.cpp index 68a642b476e27..10bc4ff6f41c2 100644 --- a/sycl/test/basic_tests/kernel_info.cpp +++ b/sycl/test/basic_tests/kernel_info.cpp @@ -51,10 +51,21 @@ int main() { const size_t wgSize = krn.get_work_group_info(dev); assert(wgSize > 0); + const size_t wgSizeNew = + krn.get_info(dev); + assert(wgSizeNew > 0); + assert(wgSize == wgSizeNew); const size_t prefWGSizeMult = krn.get_work_group_info< info::kernel_work_group::preferred_work_group_size_multiple>(dev); assert(prefWGSizeMult > 0); + const size_t prefWGSizeMultNew = krn.get_info< + info::kernel_device_specific::preferred_work_group_size_multiple>(dev); + assert(prefWGSizeMultNew > 0); + assert(prefWGSizeMult == prefWGSizeMultNew); const cl_ulong prvMemSize = krn.get_work_group_info(dev); assert(prvMemSize == 0); + const cl_ulong prvMemSizeNew = + krn.get_info(dev); + assert(prvMemSizeNew == 0); } diff --git a/sycl/test/sub_group/info.cpp b/sycl/test/sub_group/info.cpp index 1f02a3fa65269..83cd72e431efc 100644 --- a/sycl/test/sub_group/info.cpp +++ b/sycl/test/sub_group/info.cpp @@ -63,6 +63,12 @@ int main() { bool Expected = std::find(sg_sizes.begin(), sg_sizes.end(), Res) != sg_sizes.end(); exit_if_not_equal(Expected, true, "max_sub_group_size"); + + Res = Kernel.get_info( + Device, r); + Expected = + std::find(sg_sizes.begin(), sg_sizes.end(), Res) != sg_sizes.end(); + exit_if_not_equal(Expected, true, "max_sub_group_size"); } } @@ -72,6 +78,12 @@ int main() { /* Sub-group size is not specified in kernel or IL*/ exit_if_not_equal(Res, 0, "compile_num_sub_groups"); + Res = Kernel.get_info( + Device); + + /* Sub-group size is not specified in kernel or IL*/ + exit_if_not_equal(Res, 0, "compile_num_sub_groups"); + // According to specification, this kernel query requires `cl_khr_subgroups` // or `cl_intel_subgroups` if ((Device.has_extension("cl_khr_subgroups") || @@ -82,6 +94,13 @@ int main() { /* Required sub-group size is not specified in kernel or IL*/ exit_if_not_equal(Res, 0, "compile_sub_group_size"); + + Res = + Kernel.get_info( + Device); + + /* Required sub-group size is not specified in kernel or IL*/ + exit_if_not_equal(Res, 0, "compile_sub_group_size"); } } catch (exception e) {