diff --git a/sycl/include/CL/sycl/aspects.hpp b/sycl/include/CL/sycl/aspects.hpp index c8d140cd5ed24..f7f10734a580c 100644 --- a/sycl/include/CL/sycl/aspects.hpp +++ b/sycl/include/CL/sycl/aspects.hpp @@ -50,6 +50,7 @@ enum class aspect { host_debuggable = 32, ext_intel_gpu_hw_threads_per_eu = 33, ext_oneapi_cuda_async_barrier = 34, + ext_oneapi_bfloat16 = 35, }; } // namespace sycl diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index ad2a929b23071..3bd0008cdd457 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -298,6 +298,8 @@ typedef enum { PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000, PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU = 0x10112, PI_DEVICE_INFO_BACKEND_VERSION = 0x10113, + // Return true if bfloat16 data type is supported by device + PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16 = 0x1FFFF, PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS = 0x20000, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D = 0x20001, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D = 0x20002, diff --git a/sycl/include/CL/sycl/info/device_traits.def b/sycl/include/CL/sycl/info/device_traits.def index 3e1692df2e943..34385674b29da 100644 --- a/sycl/include/CL/sycl/info/device_traits.def +++ b/sycl/include/CL/sycl/info/device_traits.def @@ -27,6 +27,7 @@ __SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_order_capabilities, std::vector) __SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_scope_capabilities, std::vector) +__SYCL_PARAM_TRAITS_SPEC(device, ext_oneapi_bfloat16, bool) __SYCL_PARAM_TRAITS_SPEC(device, max_read_image_args, pi_uint32) __SYCL_PARAM_TRAITS_SPEC(device, max_write_image_args, pi_uint32) __SYCL_PARAM_TRAITS_SPEC(device, image2d_max_width, size_t) diff --git a/sycl/include/CL/sycl/info/info_desc.hpp b/sycl/include/CL/sycl/info/info_desc.hpp index f56f06247f51e..8b0bc8be5a229 100644 --- a/sycl/include/CL/sycl/info/info_desc.hpp +++ b/sycl/include/CL/sycl/info/info_desc.hpp @@ -174,7 +174,8 @@ enum class device : cl_device_info { ext_oneapi_max_work_groups_2d = PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D, ext_oneapi_max_work_groups_3d = PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D, atomic_memory_scope_capabilities = - PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES + PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, + ext_oneapi_bfloat16 = PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16, }; enum class device_type : pi_uint64 { diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 19330f4e8114b..809b366908be2 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1235,6 +1235,17 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, return getInfo(param_value_size, param_value, param_value_size_ret, capabilities); } + case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16: { + int major = 0; + cl::sycl::detail::pi::assertion( + cuDeviceGetAttribute(&major, + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, + device->get()) == CUDA_SUCCESS); + + bool bfloat16 = (major >= 8) ? true : false; + return getInfo(param_value_size, param_value, param_value_size_ret, + bfloat16); + } case PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: { // NVIDIA devices only support one sub-group size (the warp size) int warpSize = 0; diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index b0a9d58187494..f82d514f17c17 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1677,6 +1677,7 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: case PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU: case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH: + case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16: return PI_ERROR_INVALID_VALUE; default: diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 5f5b3d3993e63..360a93a947618 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -2925,6 +2925,8 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH: // currently not supported in level zero runtime return PI_ERROR_INVALID_VALUE; + case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16: + return PI_ERROR_INVALID_VALUE; // TODO: Implement. case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 6580c312cfb96..e75a7d12c5a9f 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -230,6 +230,8 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, std::memcpy(paramValue, &result, sizeof(cl_bool)); return PI_SUCCESS; } + case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16: + return PI_ERROR_INVALID_VALUE; case PI_DEVICE_INFO_IMAGE_SRGB: { cl_bool result = true; std::memcpy(paramValue, &result, sizeof(cl_bool)); diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 2232e9063ce99..212e9e5ba2ecc 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -276,6 +276,8 @@ bool device_impl::has(aspect Aspect) const { return has_extension("cl_khr_fp16"); case aspect::fp64: return has_extension("cl_khr_fp64"); + case aspect::ext_oneapi_bfloat16: + return get_info(); case aspect::int64_base_atomics: return has_extension("cl_khr_int64_base_atomics"); case aspect::int64_extended_atomics: diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index 3543ae296a605..f2d57663f9a3b 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -264,6 +264,22 @@ struct get_device_info, } }; +// Specialization for bf16 +template <> struct get_device_info { + static bool get(RT::PiDevice dev, const plugin &Plugin) { + + bool result = false; + + RT::PiResult Err = Plugin.call_nocheck( + dev, pi::cast(info::device::ext_oneapi_bfloat16), + sizeof(result), &result, nullptr); + if (Err != PI_SUCCESS) { + return false; + } + return result; + } +}; + // Specialization for exec_capabilities, OpenCL returns a bitfield template <> struct get_device_info, @@ -769,6 +785,11 @@ get_device_info_host() { memory_scope::work_group, memory_scope::device, memory_scope::system}; } +template <> +inline bool get_device_info_host() { + return false; +} + template <> inline cl_uint get_device_info_host() { // current value is the required minimum diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 836e0af8d8523..89e4d8343f5e1 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4300,6 +4300,7 @@ _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65809EEENS3_12param_traitsIS4_XT_ _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65810EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65811EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE69632EEENS3_12param_traitsIS4_XT_EE11return_typeEv +_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE131071EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl6device9getNativeEv _ZNK2cl4sycl6kernel11get_backendEv _ZNK2cl4sycl6kernel11get_contextEv