From fcb840f8f1f61b5de2fa452271156f13fcbf19a0 Mon Sep 17 00:00:00 2001 From: Sergey V Maslov Date: Sat, 15 Jun 2019 20:37:10 -0700 Subject: [PATCH 1/3] [SYCL] create PI (Plugin Interface) layer and redirect there platform/device stuff. Signed-off-by: Sergey V Maslov --- sycl/CMakeLists.txt | 7 +- .../CL/sycl/detail/aligned_allocator.hpp | 2 +- sycl/include/CL/sycl/detail/cnri.h | 120 --------- sycl/include/CL/sycl/detail/common.hpp | 25 +- sycl/include/CL/sycl/detail/device_host.hpp | 68 ----- sycl/include/CL/sycl/detail/device_impl.hpp | 156 ++++++++++- sycl/include/CL/sycl/detail/device_info.hpp | 217 ++++++++-------- sycl/include/CL/sycl/detail/device_opencl.hpp | 146 ----------- .../CL/sycl}/detail/force_device.hpp | 0 sycl/include/CL/sycl/detail/pi.h | 245 ++++++++++++++++++ sycl/include/CL/sycl/detail/pi.hpp | 77 ++++++ sycl/include/CL/sycl/detail/pi_opencl.hpp | 52 ++++ sycl/include/CL/sycl/detail/platform_host.hpp | 40 --- sycl/include/CL/sycl/detail/platform_impl.hpp | 52 +++- sycl/include/CL/sycl/detail/platform_info.hpp | 24 +- .../CL/sycl/detail/platform_opencl.hpp | 44 ---- .../program_manager/program_manager.hpp | 14 +- sycl/include/CL/sycl/device.hpp | 8 +- sycl/include/CL/sycl/info/info_desc.hpp | 26 +- sycl/include/CL/sycl/platform.hpp | 11 +- sycl/source/detail/cnri.cpp | 29 --- sycl/source/detail/device_impl.cpp | 91 +++++++ sycl/source/detail/device_info.cpp | 22 ++ sycl/source/detail/force_device.cpp | 2 +- sycl/source/detail/pi.cpp | 88 +++++++ sycl/source/detail/pi_opencl.cpp | 100 +++++++ sycl/source/detail/platform_host.cpp | 26 -- sycl/source/detail/platform_impl.cpp | 87 +++++++ sycl/source/detail/platform_opencl.cpp | 42 --- .../program_manager/program_manager.cpp | 75 +++--- sycl/source/device.cpp | 8 +- sycl/source/platform.cpp | 37 +-- 32 files changed, 1199 insertions(+), 742 deletions(-) mode change 100644 => 100755 sycl/include/CL/sycl/detail/aligned_allocator.hpp delete mode 100644 sycl/include/CL/sycl/detail/cnri.h delete mode 100644 sycl/include/CL/sycl/detail/device_host.hpp delete mode 100644 sycl/include/CL/sycl/detail/device_opencl.hpp rename sycl/{source => include/CL/sycl}/detail/force_device.hpp (100%) create mode 100644 sycl/include/CL/sycl/detail/pi.h create mode 100644 sycl/include/CL/sycl/detail/pi.hpp create mode 100644 sycl/include/CL/sycl/detail/pi_opencl.hpp delete mode 100644 sycl/include/CL/sycl/detail/platform_host.hpp delete mode 100644 sycl/include/CL/sycl/detail/platform_opencl.hpp mode change 100644 => 100755 sycl/include/CL/sycl/device.hpp mode change 100644 => 100755 sycl/include/CL/sycl/platform.hpp delete mode 100644 sycl/source/detail/cnri.cpp create mode 100644 sycl/source/detail/device_impl.cpp create mode 100644 sycl/source/detail/pi.cpp create mode 100644 sycl/source/detail/pi_opencl.cpp delete mode 100644 sycl/source/detail/platform_host.cpp create mode 100644 sycl/source/detail/platform_impl.cpp delete mode 100644 sycl/source/detail/platform_opencl.cpp diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 87160efee406c..5c9c67c90bac8 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -123,9 +123,11 @@ set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin/") add_library("${SYCLLibrary}" SHARED "${includeRootPath}/CL/sycl.hpp" "${sourceRootPath}/detail/builtins.cpp" - "${sourceRootPath}/detail/cnri.cpp" + "${sourceRootPath}/detail/pi.cpp" + "${sourceRootPath}/detail/pi_opencl.cpp" "${sourceRootPath}/detail/common.cpp" "${sourceRootPath}/detail/context_impl.cpp" + "${sourceRootPath}/detail/device_impl.cpp" "${sourceRootPath}/detail/device_info.cpp" "${sourceRootPath}/detail/event_impl.cpp" "${sourceRootPath}/detail/force_device.cpp" @@ -134,8 +136,7 @@ add_library("${SYCLLibrary}" SHARED "${sourceRootPath}/detail/kernel_impl.cpp" "${sourceRootPath}/detail/kernel_info.cpp" "${sourceRootPath}/detail/memory_manager.cpp" - "${sourceRootPath}/detail/platform_host.cpp" - "${sourceRootPath}/detail/platform_opencl.cpp" + "${sourceRootPath}/detail/platform_impl.cpp" "${sourceRootPath}/detail/platform_info.cpp" "${sourceRootPath}/detail/program_impl.cpp" "${sourceRootPath}/detail/program_manager/program_manager.cpp" diff --git a/sycl/include/CL/sycl/detail/aligned_allocator.hpp b/sycl/include/CL/sycl/detail/aligned_allocator.hpp old mode 100644 new mode 100755 index d99dd9dba166f..6beab481b0294 --- a/sycl/include/CL/sycl/detail/aligned_allocator.hpp +++ b/sycl/include/CL/sycl/detail/aligned_allocator.hpp @@ -9,7 +9,7 @@ #pragma once #include -#include +#include #include #include diff --git a/sycl/include/CL/sycl/detail/cnri.h b/sycl/include/CL/sycl/detail/cnri.h deleted file mode 100644 index 5dc0d79e4d121..0000000000000 --- a/sycl/include/CL/sycl/detail/cnri.h +++ /dev/null @@ -1,120 +0,0 @@ -//===-- cnri.h - SYCL common native runtime interface -----------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/// This source is the definition of the SYCL Common Native Runtime Interface -/// (CNRI). It is the interface between the device-agnostic SYCL runtime layer -/// and underlying "native" runtimes such as OpenCL. - -#pragma once - -#include "CL/opencl.h" - -#include - -/// Target identification strings -#define CNRI_TGT_STR_UNKNOWN "" -#define CNRI_TGT_STR_SPIRV32 "spir" -#define CNRI_TGT_STR_SPIRV64 "spir64" - -/// Kinds of device images -enum cnri_device_image_format { - CNRI_IMG_NONE, // image format is not determined - CNRI_IMG_NATIVE, // image format is specific to a device - // portable image kinds go next - CNRI_IMG_SPIRV, // SPIR-V - CNRI_IMG_LLVMIR_BITCODE // LLVM bitcode -}; - -typedef void __tgt_offload_entry; - -// Device image descriptor version supported by this library. -#define CNRI_DEVICE_IMAGE_STRUCT_VERSION ((uint16_t)1) -#define SYCL_OFFLOAD_KIND ((uint8_t)4) - -/// This struct is a record of the device image information. If the Kind field -/// denotes a portable image kind (SPIRV or LLVMIR), the DeviceTargetSpec field -/// can still be specific and denote e.g. FPGA target. -/// It must match the __tgt_device_image structure generated by -/// the clang-offload-wrapper tool when their Version field match. -struct cnri_device_image { - /// version of this structure - for backward compatibility; - /// all modifications which change order/type/offsets of existing fields - /// should increment the version. - uint16_t Version; - /// the kind of offload model the image employs; must be 4 for SYCL - uint8_t Kind; - /// format of the image data - SPIRV, LLVMIR bitcode,... - uint8_t Format; - /// null-terminated string representation of the device's target architecture - const char *DeviceTargetSpec; - /// a null-terminated string; target- and compiler-specific options - /// which are suggested to use to "build" program at runtime - const char *BuildOptions; - /// Pointer to the manifest data start - const unsigned char *ManifestStart; - /// Pointer to the manifest data end - const unsigned char *ManifestEnd; - /// Pointer to the target code start - const unsigned char *ImageStart; - /// Pointer to the target code end - const unsigned char *ImageEnd; - /// the offload entry table (not used, for compatibility with OpenMP) - __tgt_offload_entry *EntriesBegin; - __tgt_offload_entry *EntriesEnd; -}; - -// Offload binary descriptor version supported by this library. -#define CNRI_BIN_DESC_STRUCT_VERSION ((uint16_t)1) - -/// This struct is a record of all the device code that may be offloaded. -/// It must match the __tgt_bin_desc structure generated by -/// the clang-offload-wrapper tool when their Version field match. -struct cnri_bin_desc { - /// version of this structure - for backward compatibility; - /// all modifications which change order/type/offsets of existing fields - /// should increment the version. - uint16_t Version; - /// Number of device binary images in this descriptor - uint16_t NumDeviceImages; - /// Device binary images data - cnri_device_image *DeviceImages; - /// the offload entry table (not used, for compatibility with OpenMP) - __tgt_offload_entry *HostEntriesBegin; - __tgt_offload_entry *HostEntriesEnd; -}; - -// TODO For now code below is a placeholder for future real implementation -typedef cl_context cnri_context; -typedef cl_event cnri_event; -typedef cl_program cnri_program; -typedef cl_kernel cnri_kernel; - -enum { CNRI_SUCCESS = CL_SUCCESS }; - -// redirections to OpenCL -#define cnriReleaseProgram clReleaseProgram -#define cnriRetainProgram clRetainProgram - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -// CNRI unique APIs - -/// Selects the most appropriate device image based on runtime information and -/// the image characteristics -cl_int cnriSelectDeviceImage(cnri_context ctx, cnri_device_image **images, - cl_uint num_images, - cnri_device_image **selected_image); - -#ifdef __cplusplus -} -#endif // __cplusplus - -#define CHECK_CNRI_CODE(x) CHECK_OCL_CODE(x) -#define CHECK_CNRI_CODE_NO_EXC(x) CHECK_OCL_CODE_NO_EXC(x) diff --git a/sycl/include/CL/sycl/detail/common.hpp b/sycl/include/CL/sycl/detail/common.hpp index 4031e138f8d47..adb5b2beb28d0 100644 --- a/sycl/include/CL/sycl/detail/common.hpp +++ b/sycl/include/CL/sycl/detail/common.hpp @@ -15,9 +15,22 @@ #include #include #include - #include +// Select underlying runtime interface in compile-time (OpenCL or PI). +// Comment the define of the FORCE_SYCL_BE_OPENCL below to switch to PI. +// As such only one path (OpenCL today) is being regularily tested. +// +// TODO: we can just remove this when switch to PI completely. +// +#define FORCE_SYCL_BE_OPENCL + +#ifdef FORCE_SYCL_BE_OPENCL +#include +#else +#include +#endif + const char *stringifyErrorCode(cl_int error); #define OCL_CODE_TO_STR(code) \ @@ -78,6 +91,16 @@ const char *stringifyErrorCode(cl_int error); namespace cl { namespace sycl { namespace detail { + +// Select underlying runtime interface (RT) in compile-time (OpenCL or PI). +// As such only one path (OpenCL today) is being regularily tested. +// +#ifdef FORCE_SYCL_BE_OPENCL +using RT = cl::sycl::detail::opencl; +#else +using RT = cl::sycl::detail::pi; +#endif + // Helper function for extracting implementation from SYCL's interface objects. // Note! This function relies on the fact that all SYCL interface classes // contain "impl" field that points to implementation object. "impl" field diff --git a/sycl/include/CL/sycl/detail/device_host.hpp b/sycl/include/CL/sycl/detail/device_host.hpp deleted file mode 100644 index 237e9f6ea9a8c..0000000000000 --- a/sycl/include/CL/sycl/detail/device_host.hpp +++ /dev/null @@ -1,68 +0,0 @@ -//==--------------- device_host.hpp - SYCL host device --------------------== // -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include -#include - -namespace cl { -namespace sycl { -namespace detail { -// TODO: 4.6.4 Partitioning into multiple SYCL devices -// TODO: 4.6.4.2 Device information descriptors -// TODO: Make code thread-safe -class device_host : public device_impl { -public: - device_host() = default; - cl_device_id get() const override { - throw invalid_object_error("This instance of device is a host instance"); - } - cl_device_id &getHandleRef() override { - throw invalid_object_error("This instance of device is a host instance"); - } - - bool is_host() const override { return true; } - - bool is_cpu() const override { return false; } - - bool is_gpu() const override { return false; } - - bool is_accelerator() const override { return false; } - - platform get_platform() const override { return platform(); } - - bool has_extension(const string_class &extension_name) const override { - // TODO: implement extension management; - return false; - } - - vector_class create_sub_devices(size_t nbSubDev) const { - // TODO: implement host device partitioning - throw runtime_error( - "Partitioning to subdevices of the host device is not implemented yet"); - } - - vector_class - create_sub_devices(const vector_class &counts) const { - // TODO: implement host device partitioning - throw runtime_error( - "Partitioning to subdevices of the host device is not implemented yet"); - } - - vector_class - create_sub_devices(info::partition_affinity_domain affinityDomain) const { - // TODO: implement host device partitioning - throw runtime_error( - "Partitioning to subdevices of the host device is not implemented yet"); - } -}; -} // namespace detail -} // namespace sycl -} // namespace cl diff --git a/sycl/include/CL/sycl/detail/device_impl.hpp b/sycl/include/CL/sycl/detail/device_impl.hpp index 0b3721908fee2..4f07f3495d2ba 100644 --- a/sycl/include/CL/sycl/detail/device_impl.hpp +++ b/sycl/include/CL/sycl/detail/device_impl.hpp @@ -33,7 +33,11 @@ class device_impl { // modification. Caller must ensure the returned object lives on stack only. // It can also be safely passed to the underlying native runtime API. // Warning. Returned reference will be invalid if device_impl was destroyed. + // + // TODO: change all uses of getHandleRef to get_handle, and remove the + // getHandleRef after that. virtual cl_device_id &getHandleRef() = 0; + virtual RT::pi_device get_handle() const = 0; virtual bool is_host() const = 0; @@ -62,9 +66,9 @@ class device_impl { if (is_host()) { return get_device_info_host(); } - return get_device_info_cl< + return get_device_info< typename info::param_traits::return_type, - param>::_(this->get()); + param>::_(this->get_handle()); } bool is_partition_supported(info::partition_property Prop) const { @@ -83,6 +87,154 @@ class device_impl { virtual bool has_extension(const string_class &extension_name) const = 0; }; + +// TODO: 4.6.4 Partitioning into multiple SYCL devices +// TODO: 4.6.4.2 Device information descriptors +// TODO: Make code thread-safe +class device_impl_pi : public device_impl { +public: + explicit device_impl_pi(RT::pi_device a_device) : m_device(a_device) { + // TODO catch an exception and put it to list of asynchronous exceptions + PI_CALL(RT::piDeviceGetInfo( + m_device, PI_DEVICE_INFO_TYPE, sizeof(RT::pi_device_type), &m_type, 0)); + + RT::pi_device parent; + // TODO catch an exception and put it to list of asynchronous exceptions + PI_CALL(RT::piDeviceGetInfo( + m_device, PI_DEVICE_INFO_PARENT, sizeof(RT::pi_device), &parent, 0)); + + m_isRootDevice = (nullptr == parent); + if (!m_isRootDevice) { + // TODO catch an exception and put it to list of asynchronous exceptions + PI_CALL(RT::piDeviceRetain(m_device)); + } + } + + ~device_impl_pi() { + if (!m_isRootDevice) { + // TODO catch an exception and put it to list of asynchronous exceptions + PI_CALL(RT::piDeviceRelease(m_device)); + } + } + + cl_device_id get() const override { + if (!m_isRootDevice) { + // TODO catch an exception and put it to list of asynchronous exceptions + PI_CALL(RT::piDeviceRetain(m_device)); + } + // TODO: check that device is an OpenCL interop one + return pi_cast(m_device); + } + + cl_device_id &getHandleRef() override { + // TODO: check that device is an OpenCL interop one before cast, or just + // remove when all the users are moved to get_handle. + return (cl_device_id&)(m_device); + } + RT::pi_device get_handle() const override { + return m_device; + } + + bool is_host() const override { return false; } + + bool is_cpu() const override { return (m_type == PI_DEVICE_TYPE_CPU); } + + bool is_gpu() const override { return (m_type == PI_DEVICE_TYPE_GPU); } + + bool is_accelerator() const override { + return (m_type == PI_DEVICE_TYPE_ACC); + } + + platform get_platform() const override { + RT::pi_platform plt; + // TODO catch an exception and put it to list of asynchronous exceptions + PI_CALL(RT::piDeviceGetInfo( + m_device, PI_DEVICE_INFO_PLATFORM, sizeof(plt), &plt, 0)); + + // TODO: thisi possibly will violate common reference semantics, + // particularly, equality comparison may fail for two consecutive + // get_platform() on the same device, as it compares impl objects. + return createSyclObjFromImpl( + std::make_shared(plt)); + } + + bool has_extension(const string_class &extension_name) const override { + string_class all_extension_names = + get_device_info::_(m_device); + return (all_extension_names.find(extension_name) != std::string::npos); + } + + vector_class + create_sub_devices(const cl_device_partition_property *Properties, + size_t SubDevicesCount) const; + + vector_class + create_sub_devices(size_t ComputeUnits) const; + + vector_class + create_sub_devices(const vector_class &Counts) const; + + vector_class + create_sub_devices(info::partition_affinity_domain AffinityDomain) const; + +private: + RT::pi_device m_device = 0; + RT::pi_device_type m_type; + bool m_isRootDevice = false; +}; // class device_impl_pi + +// TODO: 4.6.4 Partitioning into multiple SYCL devices +// TODO: 4.6.4.2 Device information descriptors +// TODO: Make code thread-safe +class device_host : public device_impl { +public: + device_host() = default; + cl_device_id get() const override { + throw invalid_object_error("This instance of device is a host instance"); + } + cl_device_id &getHandleRef() override { + throw invalid_object_error("This instance of device is a host instance"); + } + RT::pi_device get_handle() const override { + pi_die("This instance of device is a host instance"); + } + + bool is_host() const override { return true; } + + bool is_cpu() const override { return false; } + + bool is_gpu() const override { return false; } + + bool is_accelerator() const override { return false; } + + platform get_platform() const override { return platform(); } + + bool has_extension(const string_class &extension_name) const override { + // TODO: implement extension management; + return false; + } + + vector_class create_sub_devices(size_t nbSubDev) const { + // TODO: implement host device partitioning + throw runtime_error( + "Partitioning to subdevices of the host device is not implemented yet"); + } + + vector_class + create_sub_devices(const vector_class &counts) const { + // TODO: implement host device partitioning + throw runtime_error( + "Partitioning to subdevices of the host device is not implemented yet"); + } + + vector_class + create_sub_devices(info::partition_affinity_domain affinityDomain) const { + // TODO: implement host device partitioning + throw runtime_error( + "Partitioning to subdevices of the host device is not implemented yet"); + } +}; // class device_host + } // namespace detail } // namespace sycl } // namespace cl diff --git a/sycl/include/CL/sycl/detail/device_info.hpp b/sycl/include/CL/sycl/detail/device_info.hpp index 0c0e0f2313668..ccdffcc5de574 100644 --- a/sycl/include/CL/sycl/detail/device_info.hpp +++ b/sycl/include/CL/sycl/detail/device_info.hpp @@ -23,14 +23,11 @@ read_domain_bitfield(cl_device_affinity_domain bits); vector_class read_execution_bitfield(cl_device_exec_capabilities bits); -// Mapping expected SYCL return types to those returned by OpenCL calls -template struct sycl_to_ocl { using type = T; }; - -template <> struct sycl_to_ocl { using type = cl_bool; }; - -template <> struct sycl_to_ocl { using type = cl_device_id; }; - -template <> struct sycl_to_ocl { using type = cl_platform_id; }; +// Mapping expected SYCL return types to those returned by PI calls +template struct sycl_to_pi { using type = T; }; +template <> struct sycl_to_pi { using type = pi_bool; }; +template <> struct sycl_to_pi { using type = RT::pi_device; }; +template <> struct sycl_to_pi { using type = RT::pi_platform; }; // Mapping fp_config device info types to the values used to check fp support template struct check_fp_support {}; @@ -45,137 +42,147 @@ template <> struct check_fp_support { // Structs for emulating function template partial specialization // Default template for the general case -template struct get_device_info_cl { - static T _(cl_device_id dev) { - typename sycl_to_ocl::type result; - CHECK_OCL_CODE(clGetDeviceInfo(dev, (cl_device_info)param, sizeof(result), - &result, NULL)); +// TODO: get rid of remainig uses of OpenCL directly +// +template struct get_device_info { + static T _(RT::pi_device dev) { + typename sycl_to_pi::type result; + PI_CALL(RT::piDeviceGetInfo( + dev, pi_cast(param), sizeof(result), &result, NULL)); return T(result); } }; -// Specialization for string return type, variable OpenCL return size -template struct get_device_info_cl { - static string_class _(cl_device_id dev) { +// Specialization for string return type, variable return size +template struct get_device_info { + static string_class _(RT::pi_device dev) { size_t resultSize; - CHECK_OCL_CODE( - clGetDeviceInfo(dev, (cl_device_info)param, 0, NULL, &resultSize)); + PI_CALL(RT::piDeviceGetInfo( + dev, pi_cast(param), 0, NULL, &resultSize)); if (resultSize == 0) { return string_class(); } unique_ptr_class result(new char[resultSize]); - CHECK_OCL_CODE(clGetDeviceInfo(dev, (cl_device_info)param, resultSize, - result.get(), NULL)); + PI_CALL(RT::piDeviceGetInfo( + dev, pi_cast(param), + resultSize, result.get(), NULL)); + return string_class(result.get()); } }; + +// Specialization for parent device +template +struct get_device_info { + static T _(RT::pi_device dev); +}; // Specialization for id return type -template struct get_device_info_cl, param> { - static id<3> _(cl_device_id dev) { +template struct get_device_info, param> { + static id<3> _(RT::pi_device dev) { size_t result[3]; - CHECK_OCL_CODE(clGetDeviceInfo(dev, (cl_device_info)param, sizeof(result), - &result, NULL)); + PI_CALL(RT::piDeviceGetInfo( + dev, pi_cast(param), sizeof(result), &result, NULL)); return id<3>(result[0], result[1], result[2]); } }; // Specialization for fp_config types, checks the corresponding fp type support template -struct get_device_info_cl, param> { - static vector_class _(cl_device_id dev) { +struct get_device_info, param> { + static vector_class _(RT::pi_device dev) { // Check if fp type is supported - if (!get_device_info_cl< + if (!get_device_info< typename info::param_traits< info::device, check_fp_support::value>::return_type, check_fp_support::value>::_(dev)) { return {}; } cl_device_fp_config result; - CHECK_OCL_CODE(clGetDeviceInfo(dev, (cl_device_info)param, sizeof(result), - &result, NULL)); + PI_CALL(RT::piDeviceGetInfo( + dev, pi_cast(param), sizeof(result), &result, NULL)); return read_fp_bitfield(result); } }; // Specialization for single_fp_config, no type support check required template <> -struct get_device_info_cl, +struct get_device_info, info::device::single_fp_config> { - static vector_class _(cl_device_id dev) { + static vector_class _(RT::pi_device dev) { cl_device_fp_config result; - CHECK_OCL_CODE( - clGetDeviceInfo(dev, (cl_device_info)info::device::single_fp_config, - sizeof(result), &result, NULL)); + PI_CALL(RT::piDeviceGetInfo( + dev, pi_cast(info::device::single_fp_config), + sizeof(result), &result, NULL)); return read_fp_bitfield(result); } }; // Specialization for queue_profiling, OpenCL returns a bitfield -template <> struct get_device_info_cl { - static bool _(cl_device_id dev) { +template <> struct get_device_info { + static bool _(RT::pi_device dev) { cl_command_queue_properties result; - CHECK_OCL_CODE( - clGetDeviceInfo(dev, (cl_device_info)info::device::queue_profiling, - sizeof(result), &result, NULL)); + PI_CALL(RT::piDeviceGetInfo( + dev, pi_cast(info::device::queue_profiling), + sizeof(result), &result, NULL)); return (result & CL_QUEUE_PROFILING_ENABLE); } }; // Specialization for exec_capabilities, OpenCL returns a bitfield template <> -struct get_device_info_cl, - info::device::execution_capabilities> { - static vector_class _(cl_device_id dev) { +struct get_device_info, + info::device::execution_capabilities> { + static vector_class _(RT::pi_device dev) { cl_device_exec_capabilities result; - CHECK_OCL_CODE(clGetDeviceInfo( - dev, (cl_device_info)info::device::execution_capabilities, - sizeof(result), &result, NULL)); + PI_CALL(RT::piDeviceGetInfo( + dev, pi_cast(info::device::execution_capabilities), + sizeof(result), &result, NULL)); return read_execution_bitfield(result); } }; // Specialization for built in kernels, splits the string returned by OpenCL template <> -struct get_device_info_cl, - info::device::built_in_kernels> { - static vector_class _(cl_device_id dev) { +struct get_device_info, + info::device::built_in_kernels> { + static vector_class _(RT::pi_device dev) { string_class result = - get_device_info_cl::_( - dev); + get_device_info::_(dev); return split_string(result, ';'); } }; // Specialization for extensions, splits the string returned by OpenCL template <> -struct get_device_info_cl, - info::device::extensions> { - static vector_class _(cl_device_id dev) { +struct get_device_info, + info::device::extensions> { + static vector_class _(RT::pi_device dev) { string_class result = - get_device_info_cl::_(dev); + get_device_info::_(dev); return split_string(result, ' '); } }; // Specialization for partition properties, variable OpenCL return size template <> -struct get_device_info_cl, - info::device::partition_properties> { - static vector_class _(cl_device_id dev) { +struct get_device_info, + info::device::partition_properties> { + static vector_class _(RT::pi_device dev) { + auto info_partition = + pi_cast(info::device::partition_properties); + size_t resultSize; - CHECK_OCL_CODE( - clGetDeviceInfo(dev, (cl_device_info)info::device::partition_properties, - 0, NULL, &resultSize)); + PI_CALL(RT::piDeviceGetInfo(dev, info_partition, 0, NULL, &resultSize)); + size_t arrayLength = resultSize / sizeof(cl_device_partition_property); if (arrayLength == 0) { return {}; } unique_ptr_class arrayResult( new cl_device_partition_property[arrayLength]); - CHECK_OCL_CODE( - clGetDeviceInfo(dev, (cl_device_info)info::device::partition_properties, - resultSize, arrayResult.get(), NULL)); + PI_CALL(RT::piDeviceGetInfo( + dev, info_partition, resultSize, arrayResult.get(), NULL)); vector_class result; for (size_t i = 0; i < arrayLength - 1; ++i) { @@ -187,13 +194,13 @@ struct get_device_info_cl, // Specialization for partition affinity domains, OpenCL returns a bitfield template <> -struct get_device_info_cl, - info::device::partition_affinity_domains> { - static vector_class _(cl_device_id dev) { +struct get_device_info, + info::device::partition_affinity_domains> { + static vector_class _(RT::pi_device dev) { cl_device_affinity_domain result; - CHECK_OCL_CODE(clGetDeviceInfo( - dev, (cl_device_info)info::device::partition_affinity_domains, - sizeof(result), &result, NULL)); + PI_CALL(RT::piDeviceGetInfo( + dev, pi_cast(info::device::partition_affinity_domains), + sizeof(result), &result, NULL)); return read_domain_bitfield(result); } }; @@ -201,20 +208,22 @@ struct get_device_info_cl, // Specialization for partition type affinity domain, OpenCL can return other // partition properties instead template <> -struct get_device_info_cl { - static info::partition_affinity_domain _(cl_device_id dev) { +struct get_device_info { + static info::partition_affinity_domain _(RT::pi_device dev) { size_t resultSize; - CHECK_OCL_CODE(clGetDeviceInfo( - dev, (cl_device_info)info::device::partition_type_affinity_domain, 0, - NULL, &resultSize)); + PI_CALL(RT::piDeviceGetInfo( + dev, pi_cast( + info::device::partition_type_affinity_domain), + 0, NULL, &resultSize)); if (resultSize != 1) { return info::partition_affinity_domain::not_applicable; } cl_device_partition_property result; - CHECK_OCL_CODE(clGetDeviceInfo( - dev, (cl_device_info)info::device::partition_type_affinity_domain, - sizeof(result), &result, NULL)); + PI_CALL(RT::piDeviceGetInfo( + dev, pi_cast( + info::device::partition_type_affinity_domain), + sizeof(result), &result, NULL)); if (result == CL_DEVICE_AFFINITY_DOMAIN_NUMA || result == CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE || result == CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE || @@ -229,12 +238,12 @@ struct get_device_info_cl -struct get_device_info_cl { - static info::partition_property _(cl_device_id dev) { +struct get_device_info { + static info::partition_property _(RT::pi_device dev) { size_t resultSize; - CHECK_OCL_CODE( - clGetDeviceInfo(dev, CL_DEVICE_PARTITION_TYPE, 0, NULL, &resultSize)); + PI_CALL(RT::piDeviceGetInfo( + dev, PI_DEVICE_INFO_PARTITION_TYPE, 0, NULL, &resultSize)); if (!resultSize) return info::partition_property::no_partition; @@ -242,42 +251,34 @@ struct get_device_info_cl arrayResult( new cl_device_partition_property[arrayLength]); - CHECK_OCL_CODE(clGetDeviceInfo(dev, CL_DEVICE_PARTITION_TYPE, resultSize, - arrayResult.get(), NULL)); + PI_CALL(RT::piDeviceGetInfo( + dev, PI_DEVICE_INFO_PARTITION_TYPE, resultSize, arrayResult.get(), 0)); if (!arrayResult[0]) return info::partition_property::no_partition; return info::partition_property(arrayResult[0]); } }; - +/* // Specialization for parent device -template -struct get_device_info_cl { - static T _(cl_device_id dev) { - typename sycl_to_ocl::type result; - CHECK_OCL_CODE( - clGetDeviceInfo(dev, (cl_device_info)info::device::parent_device, - sizeof(result), &result, NULL)); - if (result == nullptr) - throw invalid_object_error( - "No parent for device because it is not a subdevice"); - return T(result); - } +template <> +struct get_device_info { + static device _(RT::pi_device dev); }; - +*/ // Specialization for supported subgroup sizes template <> -struct get_device_info_cl, - info::device::sub_group_sizes> { - static vector_class _(cl_device_id dev) { +struct get_device_info, + info::device::sub_group_sizes> { + static vector_class _(RT::pi_device dev) { size_t resultSize = 0; - CHECK_OCL_CODE( - clGetDeviceInfo(dev, (cl_device_info)info::device::sub_group_sizes, - 0, nullptr, &resultSize)); + PI_CALL(RT::piDeviceGetInfo( + dev, pi_cast(info::device::sub_group_sizes), + 0, nullptr, &resultSize)); + vector_class result(resultSize); - CHECK_OCL_CODE( - clGetDeviceInfo(dev, (cl_device_info)info::device::sub_group_sizes, - resultSize, result.data(), nullptr)); + PI_CALL(RT::piDeviceGetInfo( + dev, pi_cast(info::device::sub_group_sizes), + resultSize, result.data(), nullptr)); return result; } }; diff --git a/sycl/include/CL/sycl/detail/device_opencl.hpp b/sycl/include/CL/sycl/detail/device_opencl.hpp deleted file mode 100644 index a31f41b005ec1..0000000000000 --- a/sycl/include/CL/sycl/detail/device_opencl.hpp +++ /dev/null @@ -1,146 +0,0 @@ -//==------------ device_opencl.hpp - SYCL OpenCL device --------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include -#include -#include - -class device_selector; - -namespace cl { -namespace sycl { -namespace detail { -// TODO: 4.6.4 Partitioning into multiple SYCL devices -// TODO: 4.6.4.2 Device information descriptors -// TODO: Make code thread-safe -class device_opencl : public device_impl { -public: - /** Constructs a device class instance using cl device_id of the OpenCL - * device. */ - explicit device_opencl(cl_device_id deviceId) { - id = deviceId; - // TODO catch an exception and put it to list of asynchronous exceptions - CHECK_OCL_CODE( - clGetDeviceInfo(id, CL_DEVICE_TYPE, sizeof(cl_device_type), &type, 0)); - cl_device_id parent; - // TODO catch an exception and put it to list of asynchronous exceptions - CHECK_OCL_CODE(clGetDeviceInfo(id, CL_DEVICE_PARENT_DEVICE, - sizeof(cl_device_id), &parent, nullptr)); - isRootDevice = (nullptr == parent); - if (!isRootDevice) { - // TODO catch an exception and put it to list of asynchronous exceptions - CHECK_OCL_CODE(clRetainDevice(id)); - } - } - - ~device_opencl() { - if (!isRootDevice) { - // TODO replace CHECK_OCL_CODE_NO_EXC to CHECK_OCL_CODE and - // TODO catch an exception and put it to list of asynchronous exceptions - CHECK_OCL_CODE_NO_EXC(clReleaseDevice(id)); - } - } - - cl_device_id get() const override { - if (!isRootDevice) { - // TODO catch an exception and put it to list of asynchronous exceptions - CHECK_OCL_CODE(clRetainDevice(id)); - } - return id; - } - - cl_device_id &getHandleRef() override{ - return id; - } - - bool is_host() const override { return false; } - - bool is_cpu() const override { return (type == CL_DEVICE_TYPE_CPU); } - - bool is_gpu() const override { return (type == CL_DEVICE_TYPE_GPU); } - - bool is_accelerator() const override { - return (type == CL_DEVICE_TYPE_ACCELERATOR); - } - - platform get_platform() const override { - cl_platform_id plt_id; - // TODO catch an exception and put it to list of asynchronous exceptions - CHECK_OCL_CODE( - clGetDeviceInfo(id, CL_DEVICE_PLATFORM, sizeof(plt_id), &plt_id, 0)); - return platform(plt_id); - } - - bool has_extension(const string_class &extension_name) const override { - string_class all_extension_names = - get_device_info_cl::_(id); - return (all_extension_names.find(extension_name) != std::string::npos); - } - - vector_class - create_sub_devices(const cl_device_partition_property *Properties, - size_t SubDevicesCount) const { - vector_class SubDevices(SubDevicesCount); - cl_uint ReturnedSubDevices; - CHECK_OCL_CODE(clCreateSubDevices(id, Properties, SubDevicesCount, - SubDevices.data(), &ReturnedSubDevices)); - return vector_class(SubDevices.begin(), SubDevices.end()); - } - - vector_class create_sub_devices(size_t ComputeUnits) const { - if (!is_partition_supported(info::partition_property::partition_equally)) { - throw cl::sycl::feature_not_supported(); - } - size_t SubDevicesCount = - get_info() / ComputeUnits; - const cl_device_partition_property Properties[3] = { - CL_DEVICE_PARTITION_EQUALLY, (cl_device_partition_property)ComputeUnits, - 0}; - return create_sub_devices(Properties, SubDevicesCount); - } - - vector_class - create_sub_devices(const vector_class &Counts) const { - if (!is_partition_supported( - info::partition_property::partition_by_counts)) { - throw cl::sycl::feature_not_supported(); - } - static const cl_device_partition_property P[] = { - CL_DEVICE_PARTITION_BY_COUNTS, CL_DEVICE_PARTITION_BY_COUNTS_LIST_END, - 0}; - vector_class Properties(P, P + 3); - Properties.insert(Properties.begin() + 1, Counts.begin(), Counts.end()); - return create_sub_devices(Properties.data(), Counts.size()); - } - - vector_class - create_sub_devices(info::partition_affinity_domain AffinityDomain) const { - if (!is_partition_supported( - info::partition_property::partition_by_affinity_domain) || - !is_affinity_supported(AffinityDomain)) { - throw cl::sycl::feature_not_supported(); - } - const cl_device_partition_property Properties[3] = { - CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, - (cl_device_partition_property)AffinityDomain, 0}; - size_t SubDevicesCount = - get_info(); - return create_sub_devices(Properties, SubDevicesCount); - } - -private: - cl_device_id id = 0; - cl_device_type type = 0; - bool isRootDevice = false; -}; -} // namespace detail -} // namespace sycl -} // namespace cl diff --git a/sycl/source/detail/force_device.hpp b/sycl/include/CL/sycl/detail/force_device.hpp similarity index 100% rename from sycl/source/detail/force_device.hpp rename to sycl/include/CL/sycl/detail/force_device.hpp diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h new file mode 100644 index 0000000000000..7e5d38538967e --- /dev/null +++ b/sycl/include/CL/sycl/detail/pi.h @@ -0,0 +1,245 @@ +//==---------- pi.h - Plugin Interface -------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is the definition of a generic offload Plugin Interface (PI), which is +// used by the SYCL implementation to connect to multiple device back-ends, +// e.g. to OpenCL. The interface is intentionally kept C-only for the +// purpose of having full flexibility and interoperability with different +// environments. +// +#ifndef _PI_H_ +#define _PI_H_ + +// TODO: we need a mapping of PI to OpenCL somewhere, and this can be done +// elsewhere, e.g. in the pi_opencl, but constants/enums mapping is now +// done here, for efficiency and simplicity. +// +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +typedef uint32_t pi_uint32; +typedef uint64_t pi_uint64; +typedef pi_uint32 pi_bool; + +// +// NOTE: prefer to map 1:1 to OpenCL so that no translation is needed +// for PI <-> OpenCL ways. The PI <-> to other BE translation is almost +// always needed anyway. +// +// TODO: populate PI enums. +// +typedef enum { + PI_SUCCESS = CL_SUCCESS +} _pi_result; + +typedef enum { + PI_PLATFORM_INFO_EXTENSIONS = CL_PLATFORM_EXTENSIONS, + PI_PLATFORM_INFO_NAME = CL_PLATFORM_NAME, + PI_PLATFORM_INFO_PROFILE = CL_PLATFORM_PROFILE, + PI_PLATFORM_INFO_VENDOR = CL_PLATFORM_VENDOR, + PI_PLATFORM_INFO_VERSION = CL_PLATFORM_VERSION, +} _pi_platform_info; + +// NOTE: this is made 64-bit to match the size of cl_device_type to +// make the translation to OpenCL transparent. +// +typedef enum : pi_uint64 { + PI_DEVICE_TYPE_CPU = CL_DEVICE_TYPE_CPU, + PI_DEVICE_TYPE_GPU = CL_DEVICE_TYPE_GPU, + PI_DEVICE_TYPE_ACC = CL_DEVICE_TYPE_ACCELERATOR +} _pi_device_type; + +// TODO: populate and sync with cl::sycl::info::device +typedef enum { + PI_DEVICE_INFO_TYPE = CL_DEVICE_TYPE, + PI_DEVICE_INFO_PARENT = CL_DEVICE_PARENT_DEVICE, + PI_DEVICE_INFO_PLATFORM = CL_DEVICE_PLATFORM, + PI_DEVICE_INFO_PARTITION_TYPE = CL_DEVICE_PARTITION_TYPE +} _pi_device_info; + +typedef _pi_result pi_result; +typedef _pi_platform_info pi_platform_info; +typedef _pi_device_type pi_device_type; +typedef _pi_device_info pi_device_info; + +// Opaque data type for compatibility with OpenMP. +typedef void * _pi_offload_entry; + +/// Types of device binary. +typedef uint8_t pi_device_binary_type; +static const uint8_t PI_DEVICE_BINARY_TYPE_NONE = 0; // format is not determined +static const uint8_t PI_DEVICE_BINARY_TYPE_NATIVE = 1; // specific to a device +// portable binary types go next +static const uint8_t PI_DEVICE_BINARY_TYPE_SPIRV = 2; // SPIR-V +static const uint8_t PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE = 3; // LLVM bitcode + +// Device binary descriptor version supported by this library. +static const uint16_t PI_DEVICE_BINARY_VERSION = 1; + +// The kind of offload model the binary employs; must be 4 for SYCL +static const uint8_t PI_DEVICE_BINARY_OFFLOAD_KIND_SYCL = 4; + +/// Target identification strings for +/// pi_device_binary_struct.DeviceTargetSpec +/// +#define PI_DEVICE_BINARY_TARGET_UNKNOWN "" +#define PI_DEVICE_BINARY_TARGET_SPIRV32 "spir" +#define PI_DEVICE_BINARY_TARGET_SPIRV64 "spir64"; + +/// This struct is a record of the device binary information. If the Kind field +/// denotes a portable binary type (SPIRV or LLVMIR), the DeviceTargetSpec field +/// can still be specific and denote e.g. FPGA target. +/// It must match the __tgt_device_image structure generated by +/// the clang-offload-wrapper tool when their Version field match. +struct pi_device_binary_struct { + /// version of this structure - for backward compatibility; + /// all modifications which change order/type/offsets of existing fields + /// should increment the version. + uint16_t Version; + /// the type of offload model the binary employs; must be 4 for SYCL + uint8_t Kind; + /// format of the binary data - SPIRV, LLVMIR bitcode,... + uint8_t Format; + /// null-terminated string representation of the device's target architecture + const char *DeviceTargetSpec; + /// a null-terminated string; target- and compiler-specific options + /// which are suggested to use to "build" program at runtime + const char *BuildOptions; + /// Pointer to the manifest data start + const char *ManifestStart; + /// Pointer to the manifest data end + const char *ManifestEnd; + /// Pointer to the target code start + const unsigned char *BinaryStart; + /// Pointer to the target code end + const unsigned char *BinaryEnd; + /// the offload entry table (not used, for compatibility with OpenMP) + _pi_offload_entry EntriesBegin; + _pi_offload_entry EntriesEnd; +}; +typedef pi_device_binary_struct * pi_device_binary; + +// Offload binaries descriptor version supported by this library. +static const uint16_t PI_DEVICE_BINARIES_VERSION = 1; + +/// This struct is a record of all the device code that may be offloaded. +/// It must match the __tgt_bin_desc structure generated by +/// the clang-offload-wrapper tool when their Version field match. +struct pi_device_binaries_struct { + /// version of this structure - for backward compatibility; + /// all modifications which change order/type/offsets of existing fields + /// should increment the version. + uint16_t Version; + /// Number of device binaries in this descriptor + uint16_t NumDeviceBinaries; + /// Device binaries data + pi_device_binary DeviceBinaries; + /// the offload entry table (not used, for compatibility with OpenMP) + _pi_offload_entry *HostEntriesBegin; + _pi_offload_entry *HostEntriesEnd; +}; +typedef pi_device_binaries_struct * pi_device_binaries; + +// Opaque types that make reading build log errors easier. +class _pi_platform; +class _pi_device; +class _pi_context; + +typedef _pi_platform * pi_platform; +typedef _pi_device * pi_device; +typedef _pi_context * pi_context; +// TODO: remove dependency on OpenCL +typedef cl_program pi_program; + +// +// Following section contains SYCL RT Plugin Interface (PI) methods +// having direct analogy in OpenCL, and needed for the core SYCL +// functionality. The convention is to prefix such interfaces with "pi". +// +// TODO: describe interfaces in Doxygen format +// + +// Platform +pi_result piPlatformsGet( + pi_uint32 num_entries, + pi_platform * platforms, + pi_uint32 * num_platforms); + +pi_result piPlatformGetInfo( + pi_platform platform, + pi_platform_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret); + +// Device +pi_result piDevicesGet( + pi_platform platform, + pi_device_type device_type, + pi_uint32 num_entries, + pi_device * devices, + pi_uint32 * num_devices); + +pi_result piDeviceGetInfo( + pi_device device, + pi_device_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret); + +pi_result piDeviceRetain(pi_device device); + +pi_result piDeviceRelease(pi_device device); + +pi_result piDevicePartition( + pi_device device, + const cl_device_partition_property * properties, + pi_uint32 num_devices, + pi_device * out_devices, + pi_uint32 * out_num_devices); + +// +// The following section contains SYCL RT Plugin Interface (PI) methods +// having direct analogy in OpenCL, but only needed for greater +// interoperability with the OpenCL itself, and not to run core SYCL. +// The convention is to prefix such interfaces with "picl". +// +// TODO: describe interfaces in Doxygen format + +pi_program piclProgramCreateWithSource( // TODO: change to return pi_result + pi_context context, + pi_uint32 count, + const char ** strings, + const size_t * lengths, + pi_result * errcode); + + +// +// The following section contains SYCL RT Plugin Interface (PI) methods +// having *no* direct analogy in OpenCL standard. The convention is +// to prefix such interfaces with "piext". +// +// TODO: describe interfaces in Doxygen format + +/// Selects the most appropriate device ninary based on runtime information +/// and the IR characteristics. +/// +pi_result piextDeviceSelectBinary( + pi_device device, // TODO: does this need to be context? + pi_device_binary * binaries, + pi_uint32 num_binaries, + pi_device_binary * selected_binary); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // _PI_H_ diff --git a/sycl/include/CL/sycl/detail/pi.hpp b/sycl/include/CL/sycl/detail/pi.hpp new file mode 100644 index 0000000000000..dd5e34b5b3dbb --- /dev/null +++ b/sycl/include/CL/sycl/detail/pi.hpp @@ -0,0 +1,77 @@ +//==---------- pi.hpp - Plugin Interface for SYCL RT -----------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// C++ wrapper of extern "C" PI interfaces +// +#pragma once + +#include + +namespace cl { +namespace sycl { +namespace detail { + +class pi { +public: + using pi_result = ::pi_result; + using pi_platform = ::pi_platform; + using pi_device = ::pi_device; + using pi_device_type = ::pi_device_type; + using pi_device_binary_type = ::pi_device_binary_type; + using pi_device_info = ::pi_device_info; + using pi_program = ::pi_program; + + // Convinience macro to have things look compact. + #define PI_API(pi_api) \ + static constexpr decltype(::pi_api) * pi_api = &::pi_api; + + // Platform + PI_API(piPlatformsGet) + PI_API(piPlatformGetInfo) + // Device + PI_API(piDevicesGet) + PI_API(piDeviceGetInfo) + PI_API(piDevicePartition) + PI_API(piDeviceRetain) + PI_API(piDeviceRelease) + // IR + PI_API(piextDeviceSelectBinary) +}; + +// Report error and no return (keeps compiler happy about no return statements). +[[noreturn]] void pi_die(const char *message); +void pi_assert(bool condition, const char *message = 0); + +#define STRINGIZE(x) STRINGIZE2(x) +#define STRINGIZE2(x) #x +#define PI_ASSERT(cond, msg) \ + pi_assert(condition, "assert @ " __FILE__ ":" STRINGIZE(__LINE__) msg); + +// This does the call, the trace and the check for no errors. +// TODO: remove dependency on CHECK_OCL_CODE. +// TODO: implement a more mature and controllable tracing of PI calls. +void pi_trace(const char *format, ...); +#define PI_CALL(pi_call) { \ + pi_trace("PI ---> %s\n", #pi_call); \ + auto __result = (pi_call); \ + pi_trace("PI <--- %d\n", __result); \ + CHECK_OCL_CODE(__result); \ +} + +// Want all the needed casts be explicit, do not define conversion operators. +template +To pi_cast(From value) { + // TODO: see if more sanity checks are possible. + pi_assert(sizeof(From) == sizeof(To)); + return (To)(value); +} + +} // namespace detail +} // namespace sycl +} // namespace cl + diff --git a/sycl/include/CL/sycl/detail/pi_opencl.hpp b/sycl/include/CL/sycl/detail/pi_opencl.hpp new file mode 100644 index 0000000000000..6c8d353afda82 --- /dev/null +++ b/sycl/include/CL/sycl/detail/pi_opencl.hpp @@ -0,0 +1,52 @@ +#pragma once + +#include +#include + +namespace cl { +namespace sycl { +namespace detail { + +// +// TODO: there is no such functionality in OpenCL so call PI OpenCL +// plugin directly for now, the whole "opencl" class is temporary anyway. +// +extern "C" decltype(::piextDeviceSelectBinary) ocl_piextDeviceSelectBinary; +using cl_device_binary_type = ::pi_device_binary_type; + +// Mapping of PI interfaces to OpenCL at compile-time. +// This is the default config until the entire SYCL RT is transferred to PI. +// TODO: we can just remove this when default is change to PI. +// +class opencl { +public: + using pi_result = cl_int; + using pi_platform = cl_platform_id; + using pi_device = cl_device_id; + using pi_device_type = cl_device_type; + using pi_device_binary_type = cl_device_binary_type; + using pi_device_info = cl_device_info; + using pi_program = cl_program; + + // Convinience macro to have mapping look like a compact table. + #define PI_CL(pi_api, cl_api) \ + static constexpr decltype(cl_api) * pi_api = &cl_api; + + // Platform + PI_CL(piPlatformsGet, clGetPlatformIDs) + PI_CL(piPlatformGetInfo, clGetPlatformInfo) + // Device + PI_CL(piDevicesGet, clGetDeviceIDs) + PI_CL(piDeviceGetInfo, clGetDeviceInfo) + PI_CL(piDevicePartition, clCreateSubDevices) + PI_CL(piDeviceRetain, clRetainDevice) + PI_CL(piDeviceRelease, clReleaseDevice) + // IR + PI_CL(piextDeviceSelectBinary, ocl_piextDeviceSelectBinary) + + #undef PI_CL +}; + +} // namespace detail +} // namespace sycl +} // namespace cl diff --git a/sycl/include/CL/sycl/detail/platform_host.hpp b/sycl/include/CL/sycl/detail/platform_host.hpp deleted file mode 100644 index efb612576e02e..0000000000000 --- a/sycl/include/CL/sycl/detail/platform_host.hpp +++ /dev/null @@ -1,40 +0,0 @@ -//==------------ platform_host.hpp - SYCL host platform --------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma once -#include - -// 4.6.2 Platform class for host platform -namespace cl { -namespace sycl { - -// Forward declaration -class device; - -namespace detail { -// TODO: implement extension management -// TODO: implement parameters treatment - -class platform_host : public platform_impl { -public: - vector_class get_devices( - info::device_type dev_type = info::device_type::all) const override; - - bool has_extension(const string_class &extension_name) const override { - return false; - } - - cl_platform_id get() const override { - throw invalid_object_error("This instance of platform is a host instance"); - } - - bool is_host() const override { return true; } -}; // class platform_host -} // namespace detail -} // namespace sycl -} // namespace cl diff --git a/sycl/include/CL/sycl/detail/platform_impl.hpp b/sycl/include/CL/sycl/detail/platform_impl.hpp index 8d1831bea6323..3525fddce93e1 100644 --- a/sycl/include/CL/sycl/detail/platform_impl.hpp +++ b/sycl/include/CL/sycl/detail/platform_impl.hpp @@ -8,8 +8,9 @@ #pragma once #include -#include +#include #include +#include #include // 4.6.2 Platform class @@ -39,7 +40,7 @@ class platform_impl { if (is_host()) { return get_platform_info_host(); } - return get_platform_info_cl< + return get_platform_info< typename info::param_traits::return_type, param>::_(this->get()); } @@ -49,7 +50,52 @@ class platform_impl { virtual cl_platform_id get() const = 0; virtual ~platform_impl() = default; -}; // class platform_impl +}; + +// TODO: merge platform_impl_pi, platform_impl_host and platform_impl? +class platform_impl_pi : public platform_impl { +public: + platform_impl_pi(RT::pi_platform a_platform) : m_platform(a_platform) {} + + vector_class get_devices( + info::device_type deviceType = info::device_type::all) const override; + + bool has_extension(const string_class &extension_name) const override { + string_class all_extension_names = + get_platform_info::_(m_platform); + return (all_extension_names.find(extension_name) != std::string::npos); + } + + cl_platform_id get() const override { + return pi_cast(m_platform); } + + bool is_host() const override { return false; } + + static vector_class get_platforms(); + +private: + RT::pi_platform m_platform = 0; +}; // class platform_opencl + +// TODO: implement extension management +// TODO: implement parameters treatment +// TODO: merge platform_impl_pi, platform_impl_host and platform_impl? +class platform_impl_host : public platform_impl { +public: + vector_class get_devices( + info::device_type dev_type = info::device_type::all) const override; + + bool has_extension(const string_class &extension_name) const override { + return false; + } + + cl_platform_id get() const override { + throw invalid_object_error("This instance of platform is a host instance"); + } + + bool is_host() const override { return true; } +}; // class platform_host + } // namespace detail } // namespace sycl diff --git a/sycl/include/CL/sycl/detail/platform_info.hpp b/sycl/include/CL/sycl/detail/platform_info.hpp index 4e2336015df63..c56a7458fc0cb 100644 --- a/sycl/include/CL/sycl/detail/platform_info.hpp +++ b/sycl/include/CL/sycl/detail/platform_info.hpp @@ -15,33 +15,33 @@ namespace cl { namespace sycl { namespace detail { -// OpenCL platform information methods -template struct get_platform_info_cl {}; +// The platform information methods +template struct get_platform_info {}; template -struct get_platform_info_cl { - static string_class _(cl_platform_id plt) { +struct get_platform_info { + static string_class _(RT::pi_platform plt) { size_t resultSize; // TODO catch an exception and put it to list of asynchronous exceptions - CHECK_OCL_CODE( - clGetPlatformInfo(plt, cl_platform_info(param), 0, NULL, &resultSize)); + PI_CALL(RT::piPlatformGetInfo( + plt, pi_cast(param), 0, 0, &resultSize)); if (resultSize == 0) { return ""; } unique_ptr_class result(new char[resultSize]); // TODO catch an exception and put it to list of asynchronous exceptions - CHECK_OCL_CODE(clGetPlatformInfo(plt, cl_platform_info(param), resultSize, - result.get(), NULL)); + PI_CALL(RT::piPlatformGetInfo( + plt, pi_cast(param), resultSize, result.get(), 0)); return result.get(); } }; template <> -struct get_platform_info_cl, - info::platform::extensions> { - static vector_class _(cl_platform_id plt) { +struct get_platform_info, + info::platform::extensions> { + static vector_class _(RT::pi_platform plt) { string_class result = - get_platform_info_cl::_(plt); + get_platform_info::_(plt); return split_string(result, ' '); } }; diff --git a/sycl/include/CL/sycl/detail/platform_opencl.hpp b/sycl/include/CL/sycl/detail/platform_opencl.hpp deleted file mode 100644 index a88fd4552d0ad..0000000000000 --- a/sycl/include/CL/sycl/detail/platform_opencl.hpp +++ /dev/null @@ -1,44 +0,0 @@ -//==-------- platform_opencl.hpp - SYCL OpenCL platform --------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma once -#include - -// 4.6.2 Platform class for opencl platform -namespace cl { -namespace sycl { - -// Forward declaration -class device_selector; -class device; - -namespace detail { -// TODO: implement parameters treatment -class platform_opencl : public platform_impl { -public: - platform_opencl(cl_platform_id platform_id) : id(platform_id) {} - - vector_class get_devices( - info::device_type deviceType = info::device_type::all) const override; - - bool has_extension(const string_class &extension_name) const override { - string_class all_extension_names = - get_platform_info_cl::_(id); - return (all_extension_names.find(extension_name) != std::string::npos); - } - - cl_platform_id get() const override { return id; } - - bool is_host() const override { return false; } - -private: - cl_platform_id id = 0; -}; // class platform_opencl -} // namespace detail -} // namespace sycl -} // namespace cl diff --git a/sycl/include/CL/sycl/detail/program_manager/program_manager.hpp b/sycl/include/CL/sycl/detail/program_manager/program_manager.hpp index 20e82731fadcf..0980a7eb3999c 100644 --- a/sycl/include/CL/sycl/detail/program_manager/program_manager.hpp +++ b/sycl/include/CL/sycl/detail/program_manager/program_manager.hpp @@ -8,7 +8,7 @@ #pragma once -#include +#include #include #include #include @@ -20,12 +20,12 @@ /// Executed as a part of current module's (.exe, .dll) static initialization. /// Registers device executable images with the runtime. -extern "C" void __tgt_register_lib(cnri_bin_desc *desc); +extern "C" void __tgt_register_lib(pi_device_binaries desc); /// Executed as a part of current module's (.exe, .dll) static /// de-initialization. /// Unregisters device executable images with the runtime. -extern "C" void __tgt_unregister_lib(cnri_bin_desc *desc); +extern "C" void __tgt_unregister_lib(pi_device_binaries desc); // +++ } @@ -34,7 +34,7 @@ namespace sycl { class context; namespace detail { -using DeviceImage = cnri_device_image; +using DeviceImage = pi_device_binary_struct; // Custom deleter for the DeviceImage. Must only be called for "orphan" images // allocated by the runtime. Those Images which are part of binaries must not @@ -57,13 +57,13 @@ class ProgramManager { const string_class &KernelName); cl_program getClProgramFromClKernel(cl_kernel ClKernel); - void addImages(cnri_bin_desc *DeviceImages); + void addImages(pi_device_binaries DeviceImages); void debugDumpBinaryImages() const; void debugDumpBinaryImage(const DeviceImage *Img) const; private: - cnri_program loadProgram(OSModuleHandle M, const context &Context, - DeviceImage **I = nullptr); + RT::pi_program loadProgram(OSModuleHandle M, const context &Context, + DeviceImage **I = nullptr); void build(cl_program &ClProgram, const string_class &Options = "", std::vector ClDevices = std::vector()); diff --git a/sycl/include/CL/sycl/device.hpp b/sycl/include/CL/sycl/device.hpp old mode 100644 new mode 100755 index 35b16f25db1ee..cfb459666aafe --- a/sycl/include/CL/sycl/device.hpp +++ b/sycl/include/CL/sycl/device.hpp @@ -98,8 +98,12 @@ class device { private: std::shared_ptr impl; - template - friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject); + device(std::shared_ptr impl) : impl(impl) {} + + template + friend decltype(T::impl) detail::getSyclObjImpl(const T &SyclObject); + template + friend T detail::createSyclObjFromImpl(decltype(T::impl) ImplObj); }; } // namespace sycl diff --git a/sycl/include/CL/sycl/info/info_desc.hpp b/sycl/include/CL/sycl/info/info_desc.hpp index e1f7df2fe150f..ee92ce0f7cb1c 100644 --- a/sycl/include/CL/sycl/info/info_desc.hpp +++ b/sycl/include/CL/sycl/info/info_desc.hpp @@ -18,16 +18,17 @@ class program; class device; class platform; +// TODO: stop using OpenCL directly, use PI. namespace info { // Information descriptors // A.1 Platform information descriptors -enum class platform : cl_platform_info { - profile = CL_PLATFORM_PROFILE, - version = CL_PLATFORM_VERSION, - name = CL_PLATFORM_NAME, - vendor = CL_PLATFORM_VENDOR, - extensions = CL_PLATFORM_EXTENSIONS +enum class platform { + profile = PI_PLATFORM_INFO_PROFILE, + version = PI_PLATFORM_INFO_VERSION, + name = PI_PLATFORM_INFO_NAME, + vendor = PI_PLATFORM_INFO_VENDOR, + extensions = PI_PLATFORM_INFO_EXTENSIONS, }; // A.2 Context information desctiptors @@ -122,14 +123,15 @@ enum class device : cl_device_info { partition_type_property }; -enum class device_type : cl_device_type { - cpu = CL_DEVICE_TYPE_CPU, - gpu = CL_DEVICE_TYPE_GPU, - accelerator = CL_DEVICE_TYPE_ACCELERATOR, - custom = CL_DEVICE_TYPE_CUSTOM, +enum class device_type : pi_uint64 { + cpu = PI_DEVICE_TYPE_CPU, + gpu = PI_DEVICE_TYPE_GPU, + accelerator = PI_DEVICE_TYPE_ACC, + // TODO: figure out if we need all the below in PI + custom = CL_DEVICE_TYPE_CUSTOM, automatic, host, - all = CL_DEVICE_TYPE_ALL + all = CL_DEVICE_TYPE_ALL }; enum class partition_property : cl_device_partition_property { diff --git a/sycl/include/CL/sycl/platform.hpp b/sycl/include/CL/sycl/platform.hpp old mode 100644 new mode 100755 index 306cdba3e8737..6f015e73a1f4d --- a/sycl/include/CL/sycl/platform.hpp +++ b/sycl/include/CL/sycl/platform.hpp @@ -9,13 +9,13 @@ #pragma once #include #include +#include #include // 4.6.2 Platform class #include #include namespace cl { namespace sycl { - // TODO: make code thread-safe // Forward declaration @@ -63,8 +63,13 @@ class platform { private: std::shared_ptr impl; - template - friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject); + platform(std::shared_ptr impl) : impl(impl) {} + + template + friend T detail::createSyclObjFromImpl(decltype(T::impl) ImplObj); + template + friend decltype(T::impl) detail::getSyclObjImpl(const T &SyclObject); + }; // class platform } // namespace sycl } // namespace cl diff --git a/sycl/source/detail/cnri.cpp b/sycl/source/detail/cnri.cpp deleted file mode 100644 index 0d30943c562b2..0000000000000 --- a/sycl/source/detail/cnri.cpp +++ /dev/null @@ -1,29 +0,0 @@ -//===-- cnri.cpp - SYCL common native runtime interface impl-----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "CL/sycl/detail/cnri.h" -#include - -#include -#include - -cl_int cnriSelectDeviceImage(cnri_context ctx, cnri_device_image **images, - cl_uint num_images, - cnri_device_image **selected_image) { - // TODO dummy implementation. - // Real implementaion will use the same mechanism OpenCL ICD dispatcher - // uses. Somthing like: - // CNRI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, CNRI_INVALID_CONTEXT); - // return context->dispatch->cnriSelectDeviceImage( - // ctx, images, num_images, selected_image); - // where context->dispatch is set to the dispatch table provided by CNRI - // plugin for platform/device the ctx was created for. - - *selected_image = num_images > 0 ? images[0] : nullptr; - return CNRI_SUCCESS; -} diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp new file mode 100644 index 0000000000000..67dedf2c6c049 --- /dev/null +++ b/sycl/source/detail/device_impl.cpp @@ -0,0 +1,91 @@ +//==----------------- device_impl.hpp - SYCL device ------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +namespace cl { +namespace sycl { +namespace detail { + +vector_class +device_impl_pi::create_sub_devices( + const cl_device_partition_property *Properties, + size_t SubDevicesCount) const { + + vector_class SubDevices(SubDevicesCount); + pi_uint32 ReturnedSubDevices = 0; + PI_CALL(RT::piDevicePartition(m_device, Properties, SubDevicesCount, + SubDevices.data(), &ReturnedSubDevices)); + // TODO: check that returned number of sub-devices matches what was + // requested, otherwise this walk below is wrong. + // + // TODO: Need to describe the subdevice model. Some sub_device management + // may be necessary. What happens if create_sub_devices is called multiple + // times with the same arguments? + // + vector_class res; + std::for_each(SubDevices.begin(), SubDevices.end(), + [&res](const RT::pi_device &a_pi_device) { + device sycl_device = + detail::createSyclObjFromImpl( + std::make_shared(a_pi_device)); + res.push_back(sycl_device); + }); + return res; +} + +vector_class +device_impl_pi::create_sub_devices(size_t ComputeUnits) const { + + if (!is_partition_supported(info::partition_property::partition_equally)) { + throw cl::sycl::feature_not_supported(); + } + size_t SubDevicesCount = + get_info() / ComputeUnits; + const cl_device_partition_property Properties[3] = { + CL_DEVICE_PARTITION_EQUALLY, (cl_device_partition_property)ComputeUnits, + 0}; + return create_sub_devices(Properties, SubDevicesCount); +} + +vector_class +device_impl_pi::create_sub_devices(const vector_class &Counts) const { + + if (!is_partition_supported( + info::partition_property::partition_by_counts)) { + throw cl::sycl::feature_not_supported(); + } + static const cl_device_partition_property P[] = { + CL_DEVICE_PARTITION_BY_COUNTS, CL_DEVICE_PARTITION_BY_COUNTS_LIST_END, + 0}; + vector_class Properties(P, P + 3); + Properties.insert(Properties.begin() + 1, Counts.begin(), Counts.end()); + return create_sub_devices(Properties.data(), Counts.size()); +} + +vector_class +device_impl_pi::create_sub_devices( + info::partition_affinity_domain AffinityDomain) const { + + if (!is_partition_supported( + info::partition_property::partition_by_affinity_domain) || + !is_affinity_supported(AffinityDomain)) { + throw cl::sycl::feature_not_supported(); + } + const cl_device_partition_property Properties[3] = { + CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, + (cl_device_partition_property)AffinityDomain, 0}; + size_t SubDevicesCount = + get_info(); + return create_sub_devices(Properties, SubDevicesCount); +} + +} // namespace detail +} // namespace sycl +} // namespace cl diff --git a/sycl/source/detail/device_info.cpp b/sycl/source/detail/device_info.cpp index 869049a665058..2a51a7ae83dc8 100644 --- a/sycl/source/detail/device_info.cpp +++ b/sycl/source/detail/device_info.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include +#include #include #include #include @@ -22,6 +23,27 @@ namespace cl { namespace sycl { namespace detail { +// Specialization for parent device +template <> +device get_device_info::_( + RT::pi_device dev) { + + typename sycl_to_pi::type result; + PI_CALL(RT::piDeviceGetInfo( + dev, pi_cast(info::device::parent_device), + sizeof(result), &result, NULL)); + if (result == nullptr) + throw invalid_object_error( + "No parent for device because it is not a subdevice"); + + return createSyclObjFromImpl( + std::make_shared(result)); +} + +//template struct get_device_info; + +void get_device_info_BAD() {} + vector_class read_fp_bitfield(cl_device_fp_config bits) { vector_class result; if (bits & CL_FP_DENORM) diff --git a/sycl/source/detail/force_device.cpp b/sycl/source/detail/force_device.cpp index e138ad02febf9..c70326c9420ef 100644 --- a/sycl/source/detail/force_device.cpp +++ b/sycl/source/detail/force_device.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include +#include #include #include -#include "force_device.hpp" namespace cl { namespace sycl { diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp new file mode 100644 index 0000000000000..3f345fd9ce618 --- /dev/null +++ b/sycl/source/detail/pi.cpp @@ -0,0 +1,88 @@ +#include +#include +#include + +namespace cl { +namespace sycl { +namespace detail { + +// For selection of SYCL RT back-end, now manually through the "SYCL_BE" +// environment variable. +// +enum pi_backend { + SYCL_BE_PI_OPENCL, + SYCL_BE_PI_OTHER +}; + +// Check for manually selected BE at run-time. +bool pi_use_backend(pi_backend be) { + static const pi_backend use = + std::map{ + { "PI_OPENCL", SYCL_BE_PI_OPENCL }, + { "PI_OTHER", SYCL_BE_PI_OTHER } + // Any other value would yeild 0 -> PI_OPENCL (current default) + }[std::getenv("SYCL_BE")]; + return be == use; +} + +// Report error and no return (keeps compiler from printing warnings). +// TODO: Probably change that to throw a catchable exception, +// but for now it is useful to see every failure. +// +[[noreturn]] void pi_die(const char *message) { + fprintf(stderr, "pi_die(%s)\n", message ? message : ""); + std::terminate(); +} + +void pi_assert(bool condition, const char *message) { + if (!condition) + pi_die(message); +} + +void pi_trace(const char *format, ...) { + static bool do_trace = std::getenv("SYCL_BE_TRACE"); + if (!do_trace) + return; + + va_list args; + va_start(args, format); + vprintf(format, args); +} + +extern "C" { +// TODO: change this pseudo-dispatch to plugins (ICD-like?) +// Currently this is using the low-level "ifunc" machinery to +// re-direct (with no overhead) the PI call to the underlying +// PI plugin requested by SYCL_BE environment variable (today +// only OpenCL, other would just die). +// +void __resolve_die() { + pi_die("Unknown SYCL_BE"); +} + +#define PI_DISPATCH(api) \ +decltype(api) ocl_##api; \ +static void *__resolve_##api(void) { \ + return (pi_use_backend(SYCL_BE_PI_OPENCL) ? \ + (void*)ocl_##api : (void*)__resolve_die); \ +} \ +decltype(api) api __attribute__((ifunc ("__resolve_" #api))); + +// Platform +PI_DISPATCH(piPlatformsGet) +PI_DISPATCH(piPlatformGetInfo) +// Device +PI_DISPATCH(piDevicesGet) +PI_DISPATCH(piDeviceRetain) +PI_DISPATCH(piDeviceRelease) +PI_DISPATCH(piDeviceGetInfo) +PI_DISPATCH(piDevicePartition) +// IR +PI_DISPATCH(piextDeviceSelectBinary) + +} // extern "C" + +} // namespace detail +} // namespace sycl +} // namespace cl + diff --git a/sycl/source/detail/pi_opencl.cpp b/sycl/source/detail/pi_opencl.cpp new file mode 100644 index 0000000000000..7892d8ba8025b --- /dev/null +++ b/sycl/source/detail/pi_opencl.cpp @@ -0,0 +1,100 @@ +#include +#include "CL/opencl.h" + +namespace cl { +namespace sycl { +namespace detail { + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +// Convinience macro makes source code search easier +#define OCL(pi_api) ocl_##pi_api + +// Example of a PI interface that does not map exactly to an OpenCL one. +pi_result OCL(piPlatformsGet)(pi_uint32 num_entries, + pi_platform * platforms, + pi_uint32 * num_platforms) { + cl_int result = + clGetPlatformIDs(pi_cast (num_entries), + pi_cast (platforms), + pi_cast (num_platforms)); + + // Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms + if (result == CL_PLATFORM_NOT_FOUND_KHR) { + pi_assert(num_platforms != 0); + *num_platforms = 0; + result = CL_SUCCESS; + } + return pi_cast(result); +} + + +// Example of a PI interface that does not map exactly to an OpenCL one. +pi_result OCL(piDevicesGet)(pi_platform platform, + pi_device_type device_type, + pi_uint32 num_entries, + pi_device * devices, + pi_uint32 * num_devices) { + cl_int result = + clGetDeviceIDs(pi_cast (platform), + pi_cast (device_type), + pi_cast (num_entries), + pi_cast (devices), + pi_cast (num_devices)); + + // Absorb the CL_DEVICE_NOT_FOUND and just return 0 in num_devices + if (result == CL_DEVICE_NOT_FOUND) { + pi_assert(num_devices != 0); + *num_devices = 0; + result = CL_SUCCESS; + } + return pi_cast(result); +} + +pi_result OCL(piextDeviceSelectBinary)( + pi_device device, // TODO: does this need to be context? + pi_device_binary * images, + pi_uint32 num_images, + pi_device_binary * selected_image) { + + // TODO dummy implementation. + // Real implementaion will use the same mechanism OpenCL ICD dispatcher + // uses. Somthing like: + // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_INVALID_CONTEXT); + // return context->dispatch->piextDeviceSelectIR( + // ctx, images, num_images, selected_image); + // where context->dispatch is set to the dispatch table provided by PI + // plugin for platform/device the ctx was created for. + + *selected_image = num_images > 0 ? images[0] : nullptr; + return PI_SUCCESS; +} + +// TODO: implement portable call forwarding (ifunc is a GNU extension). +// TODO: reuse same PI -> OCL mapping in pi_opencl.hpp, or maybe just +// wait until that one is completely removed. +// +#define PI_ALIAS(pi_api, ocl_api) \ +static void *__resolve_##pi_api(void) { \ + return (void*) (ocl_api); \ +} \ +decltype(ocl_api) OCL(pi_api) __attribute__((ifunc ("__resolve_" #pi_api))); + +// Platform +PI_ALIAS(piPlatformGetInfo, clGetPlatformInfo) +// Device +PI_ALIAS(piDeviceRetain, clRetainDevice) +PI_ALIAS(piDeviceRelease, clReleaseDevice) +PI_ALIAS(piDevicePartition, clCreateSubDevices) +PI_ALIAS(piDeviceGetInfo, clGetDeviceInfo) + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +} // namespace detail +} // namespace sycl +} // namespace cl + diff --git a/sycl/source/detail/platform_host.cpp b/sycl/source/detail/platform_host.cpp deleted file mode 100644 index 63257a96df4ab..0000000000000 --- a/sycl/source/detail/platform_host.cpp +++ /dev/null @@ -1,26 +0,0 @@ -//==----------- platform_host.cpp -----------------------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include - -namespace cl { -namespace sycl { -namespace detail { - -vector_class -platform_host::get_devices(info::device_type dev_type) const { - vector_class res; - if (dev_type == info::device_type::host || dev_type == info::device_type::all) - res.resize(1); // default device construct creates host device - return res; -} - -} // namespace detail -} // namespace sycl -} // namespace cl diff --git a/sycl/source/detail/platform_impl.cpp b/sycl/source/detail/platform_impl.cpp new file mode 100644 index 0000000000000..68044190a2e02 --- /dev/null +++ b/sycl/source/detail/platform_impl.cpp @@ -0,0 +1,87 @@ +//==----------- platform_host.cpp -----------------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +namespace cl { +namespace sycl { +namespace detail { + +vector_class +platform_impl_pi::get_platforms() { + vector_class platforms; + + pi_uint32 num_platforms = 0; + PI_CALL(RT::piPlatformsGet(0, 0, &num_platforms)); + info::device_type forced_type = detail::get_forced_type(); + + if (num_platforms) { + vector_class pi_platforms(num_platforms); + PI_CALL(RT::piPlatformsGet(num_platforms, pi_platforms.data(), 0)); + + for (pi_uint32 i = 0; i < num_platforms; i++) { + + platform plt = + detail::createSyclObjFromImpl( + std::make_shared(pi_platforms[i])); + + // Skip platforms which do not contain requested device types + if (!plt.get_devices(forced_type).empty()) + platforms.push_back(plt); + } + } + return platforms; +} + +vector_class +platform_impl_host::get_devices(info::device_type dev_type) const { + vector_class res; + if (dev_type == info::device_type::host || dev_type == info::device_type::all) + res.resize(1); // default device construct creates host device + return res; +} + +vector_class +platform_impl_pi::get_devices(info::device_type deviceType) const { + vector_class res; + if (deviceType == info::device_type::host) + return res; + + pi_uint32 num_devices; + auto err = RT::piDevicesGet( + m_platform, pi_cast(deviceType), 0, 0, &num_devices); + + // TODO: remove this check when switched to PI as it would just return + // zero in num_devices. + if (err == CL_DEVICE_NOT_FOUND) { + return res; + } + // TODO catch an exception and put it to list of asynchronous exceptions + // TODO: remove dependency on CHECK_OCL_CODE + CHECK_OCL_CODE(err); + + vector_class pi_devices(num_devices); + // TODO catch an exception and put it to list of asynchronous exceptions + PI_CALL(RT::piDevicesGet( + m_platform, pi_cast(deviceType), num_devices, + pi_devices.data(), 0)); + + std::for_each(pi_devices.begin(), pi_devices.end(), + [&res](const RT::pi_device &a_pi_device) { + device sycl_device = + detail::createSyclObjFromImpl( + std::make_shared(a_pi_device)); + res.push_back(sycl_device); + }); + return res; +} + +} // namespace detail +} // namespace sycl +} // namespace cl diff --git a/sycl/source/detail/platform_opencl.cpp b/sycl/source/detail/platform_opencl.cpp deleted file mode 100644 index f6cb39cdf52da..0000000000000 --- a/sycl/source/detail/platform_opencl.cpp +++ /dev/null @@ -1,42 +0,0 @@ -//==----------- platform_opencl.cpp -----------------------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include -#include -#include - -namespace cl { -namespace sycl { -namespace detail { - -vector_class -platform_opencl::get_devices(info::device_type deviceType) const { - vector_class res; - if (deviceType == info::device_type::host) - return res; - cl_uint num_devices; - auto err = clGetDeviceIDs(id, (cl_device_type)deviceType, 0, 0, &num_devices); - if (err == CL_DEVICE_NOT_FOUND) { - return res; - } - // TODO catch an exception and put it to list of asynchronous exceptions - CHECK_OCL_CODE(err); - vector_class device_ids(num_devices); - // TODO catch an exception and put it to list of asynchronous exceptions - CHECK_OCL_CODE(clGetDeviceIDs(id, (cl_device_type)deviceType, num_devices, - device_ids.data(), 0)); - vector_class devices = - vector_class(device_ids.data(), device_ids.data() + num_devices); - res.insert(res.end(), devices.begin(), devices.end()); - return res; -} - -} // namespace detail -} // namespace sycl -} // namespace cl diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 6425737b795b1..cf56db55f6d0c 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -167,11 +167,11 @@ operator()(const std::pair &LHS, reinterpret_cast(RHS.second); } -void ProgramManager::addImages(cnri_bin_desc *DeviceImages) { +void ProgramManager::addImages(pi_device_binaries DeviceBinary) { std::lock_guard Guard(Sync::getGlobalLock()); - for (int I = 0; I < DeviceImages->NumDeviceImages; I++) { - cnri_device_image *Img = &(DeviceImages->DeviceImages[I]); + for (int I = 0; I < DeviceBinary->NumDeviceBinaries; I++) { + pi_device_binary Img = &(DeviceBinary->DeviceBinaries[I]); OSModuleHandle M = OSUtil::getOSModuleHandle(Img); auto &Imgs = m_DeviceImages[M]; @@ -192,7 +192,7 @@ void ProgramManager::debugDumpBinaryImage(const DeviceImage *Img) const { std::cerr << " Options : " << (Img->BuildOptions ? Img->BuildOptions : "NULL") << "\n"; std::cerr << " Bin size : " - << ((intptr_t)Img->ImageEnd - (intptr_t)Img->ImageStart) << "\n"; + << ((intptr_t)Img->BinaryEnd - (intptr_t)Img->BinaryStart) << "\n"; } void ProgramManager::debugDumpBinaryImages() const { @@ -206,14 +206,14 @@ void ProgramManager::debugDumpBinaryImages() const { struct ImageDeleter { void operator()(DeviceImage *I) { - delete[] I->ImageStart; + delete[] I->BinaryStart; delete I; } }; -cnri_program ProgramManager::loadProgram(OSModuleHandle M, - const context &Context, - DeviceImage **I) { +RT::pi_program ProgramManager::loadProgram(OSModuleHandle M, + const context &Context, + DeviceImage **I) { std::lock_guard Guard(Sync::getGlobalLock()); if (DbgProgMgr > 0) { @@ -248,15 +248,15 @@ cnri_program ProgramManager::loadProgram(OSModuleHandle M, std::string(" failed")); } Img = new DeviceImage(); - Img->Version = CNRI_DEVICE_IMAGE_STRUCT_VERSION; - Img->Kind = SYCL_OFFLOAD_KIND; - Img->Format = CNRI_IMG_NONE; - Img->DeviceTargetSpec = CNRI_TGT_STR_UNKNOWN; + Img->Version = PI_DEVICE_BINARY_VERSION; + Img->Kind = PI_DEVICE_BINARY_OFFLOAD_KIND_SYCL; + Img->Format = PI_DEVICE_BINARY_TYPE_NONE; + Img->DeviceTargetSpec = PI_DEVICE_BINARY_TARGET_UNKNOWN; Img->BuildOptions = ""; Img->ManifestStart = nullptr; Img->ManifestEnd = nullptr; - Img->ImageStart = Data; - Img->ImageEnd = Data + Size; + Img->BinaryStart = Data; + Img->BinaryEnd = Data + Size; Img->EntriesBegin = nullptr; Img->EntriesEnd = nullptr; @@ -275,12 +275,10 @@ cnri_program ProgramManager::loadProgram(OSModuleHandle M, throw runtime_error("No device program image found"); } std::vector *Imgs = (ImgIt->second).get(); - const cnri_context &Ctx = getRawSyclObjImpl(Context)->getHandleRef(); - if (cnriSelectDeviceImage(Ctx, Imgs->data(), (cl_uint)Imgs->size(), &Img) != - CNRI_SUCCESS) { - throw device_error("cnriSelectDeviceImage failed"); - } + PI_CALL(RT::piextDeviceSelectBinary( + 0, Imgs->data(), (cl_uint)Imgs->size(), &Img)); + if (DbgProgMgr > 0) { std::cerr << "available device images:\n"; debugDumpBinaryImages(); @@ -289,26 +287,25 @@ cnri_program ProgramManager::loadProgram(OSModuleHandle M, } } // perform minimal sanity checks on the device image and the descriptor - if (Img->ImageEnd < Img->ImageStart) { + if (Img->BinaryEnd < Img->BinaryStart) { throw runtime_error("Malformed device program image descriptor"); } - if (Img->ImageEnd == Img->ImageStart) { + if (Img->BinaryEnd == Img->BinaryStart) { throw runtime_error("Invalid device program image: size is zero"); } - size_t ImgSize = static_cast(Img->ImageEnd - Img->ImageStart); - cnri_device_image_format Format = - static_cast(Img->Format); + size_t ImgSize = static_cast(Img->BinaryEnd - Img->BinaryStart); + auto Format = pi_cast(Img->Format); // Determine the format of the image if not set already - if (Format == CNRI_IMG_NONE) { + if (Format == PI_DEVICE_BINARY_TYPE_NONE) { struct { - cnri_device_image_format Fmt; + pi_device_binary_type Fmt; const uint32_t Magic; - } Fmts[] = {{CNRI_IMG_SPIRV, 0x07230203}, - {CNRI_IMG_LLVMIR_BITCODE, 0xDEC04342}}; + } Fmts[] = {{PI_DEVICE_BINARY_TYPE_SPIRV, 0x07230203}, + {PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE, 0xDEC04342}}; if (ImgSize >= sizeof(Fmts[0].Magic)) { std::remove_const::type Hdr = 0; - std::copy(Img->ImageStart, Img->ImageStart + sizeof(Hdr), + std::copy(Img->BinaryStart, Img->BinaryStart + sizeof(Hdr), reinterpret_cast(&Hdr)); for (const auto &Fmt : Fmts) { @@ -341,9 +338,9 @@ cnri_program ProgramManager::loadProgram(OSModuleHandle M, Fname += Img->DeviceTargetSpec; std::string Ext; - if (Format == CNRI_IMG_SPIRV) { + if (Format == PI_DEVICE_BINARY_TYPE_SPIRV) { Ext = ".spv"; - } else if (Format == CNRI_IMG_LLVMIR_BITCODE) { + } else if (Format == PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE) { Ext = ".bc"; } else { Ext = ".bin"; @@ -355,15 +352,15 @@ cnri_program ProgramManager::loadProgram(OSModuleHandle M, if (!F.is_open()) { throw runtime_error(std::string("Can not write ") + Fname); } - F.write(reinterpret_cast(Img->ImageStart), ImgSize); + F.write(reinterpret_cast(Img->BinaryStart), ImgSize); F.close(); } // Load the selected image - const cnri_context &Ctx = getRawSyclObjImpl(Context)->getHandleRef(); - cnri_program Res = nullptr; - Res = Format == CNRI_IMG_SPIRV - ? createSpirvProgram(Ctx, Img->ImageStart, ImgSize) - : createBinaryProgram(Ctx, Img->ImageStart, ImgSize); + const cl_context &Ctx = getRawSyclObjImpl(Context)->getHandleRef(); + RT::pi_program Res = nullptr; + Res = Format == PI_DEVICE_BINARY_TYPE_SPIRV + ? createSpirvProgram(Ctx, Img->BinaryStart, ImgSize) + : createBinaryProgram(Ctx, Img->BinaryStart, ImgSize); if (I) *I = Img; @@ -376,11 +373,11 @@ cnri_program ProgramManager::loadProgram(OSModuleHandle M, } // namespace sycl } // namespace cl -extern "C" void __tgt_register_lib(cnri_bin_desc *desc) { +extern "C" void __tgt_register_lib(pi_device_binaries desc) { cl::sycl::detail::ProgramManager::getInstance().addImages(desc); } // Executed as a part of current module's (.exe, .dll) static initialization -extern "C" void __tgt_unregister_lib(cnri_bin_desc *desc) { +extern "C" void __tgt_unregister_lib(pi_device_binaries desc) { // TODO implement the function } diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index e847616f930c7..d56f021baadb1 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -6,11 +6,10 @@ // //===----------------------------------------------------------------------===// -#include -#include +#include +#include #include #include -#include "detail/force_device.hpp" namespace cl { namespace sycl { @@ -27,7 +26,8 @@ void force_type(info::device_type &t, const info::device_type &ft) { device::device() : impl(std::make_shared()) {} device::device(cl_device_id deviceId) - : impl(std::make_shared(deviceId)) {} + : impl(std::make_shared( + detail::pi_cast(deviceId))) {} device::device(const device_selector &deviceSelector) { *this = deviceSelector.select_device(); diff --git a/sycl/source/platform.cpp b/sycl/source/platform.cpp index 34815cfedc8e4..027d8033e28fe 100644 --- a/sycl/source/platform.cpp +++ b/sycl/source/platform.cpp @@ -6,20 +6,20 @@ // //===----------------------------------------------------------------------===// -#include -#include +#include #include #include #include -#include "detail/force_device.hpp" +#include namespace cl { namespace sycl { -platform::platform() : impl(std::make_shared()) {} +platform::platform() : impl(std::make_shared()) {} platform::platform(cl_platform_id platform_id) - : impl(std::make_shared(platform_id)) {} + : impl(std::make_shared( + detail::pi_cast(platform_id))) {} platform::platform(const device_selector &dev_selector) { *this = dev_selector.select_device().get_platform(); @@ -30,33 +30,12 @@ vector_class platform::get_devices(info::device_type dev_type) const { } vector_class platform::get_platforms() { - static vector_class platforms; - if (!platforms.empty()) { - return platforms; - } - - cl_uint num_platforms = 0; - info::device_type forced_type = detail::get_forced_type(); - - auto error = clGetPlatformIDs(0, 0, &num_platforms); - if (error != CL_PLATFORM_NOT_FOUND_KHR) - CHECK_OCL_CODE(error); // Skip check if no OpenCL available - if (num_platforms) { - vector_class platform_ids(num_platforms); - error = clGetPlatformIDs(num_platforms, platform_ids.data(), 0); - CHECK_OCL_CODE(error); - - for (cl_uint i = 0; i < num_platforms; i++) { - platform plt(platform_ids[i]); - - // Skip platforms which do not contain requested device types - if (!plt.get_devices(forced_type).empty()) - platforms.push_back(plt); - } - } + vector_class platforms = + detail::platform_impl_pi::get_platforms(); // Add host device platform if required + info::device_type forced_type = detail::get_forced_type(); if (detail::match_types(forced_type, info::device_type::host)) platforms.push_back(platform()); From 62ada2c3ae3b5e371331acdbcc724c4c2d547971 Mon Sep 17 00:00:00 2001 From: Sergey V Maslov Date: Tue, 18 Jun 2019 07:52:54 -0700 Subject: [PATCH 2/3] [SYCL] fixed typos, use less C Signed-off-by: Sergey V Maslov --- sycl/include/CL/sycl/detail/device_impl.hpp | 2 +- sycl/include/CL/sycl/detail/device_info.hpp | 4 +- sycl/include/CL/sycl/detail/pi.h | 6 +- sycl/include/CL/sycl/detail/pi.hpp | 156 +++++++-------- sycl/include/CL/sycl/detail/pi_opencl.hpp | 104 +++++----- sycl/source/detail/device_impl.cpp | 182 +++++++++--------- sycl/source/detail/pi.cpp | 178 ++++++++--------- sycl/source/detail/pi_opencl.cpp | 200 ++++++++++---------- sycl/source/detail/platform_impl.cpp | 6 +- 9 files changed, 421 insertions(+), 417 deletions(-) diff --git a/sycl/include/CL/sycl/detail/device_impl.hpp b/sycl/include/CL/sycl/detail/device_impl.hpp index 4f07f3495d2ba..33bdef7fdf768 100644 --- a/sycl/include/CL/sycl/detail/device_impl.hpp +++ b/sycl/include/CL/sycl/detail/device_impl.hpp @@ -181,7 +181,7 @@ class device_impl_pi : public device_impl { RT::pi_device m_device = 0; RT::pi_device_type m_type; bool m_isRootDevice = false; -}; // class device_impl_pi +}; // class device_impl_pi // TODO: 4.6.4 Partitioning into multiple SYCL devices // TODO: 4.6.4.2 Device information descriptors diff --git a/sycl/include/CL/sycl/detail/device_info.hpp b/sycl/include/CL/sycl/detail/device_info.hpp index ccdffcc5de574..b3b6c6d73629c 100644 --- a/sycl/include/CL/sycl/detail/device_info.hpp +++ b/sycl/include/CL/sycl/detail/device_info.hpp @@ -42,7 +42,7 @@ template <> struct check_fp_support { // Structs for emulating function template partial specialization // Default template for the general case -// TODO: get rid of remainig uses of OpenCL directly +// TODO: get rid of remaining uses of OpenCL directly // template struct get_device_info { static T _(RT::pi_device dev) { @@ -70,7 +70,7 @@ template struct get_device_info { return string_class(result.get()); } }; - + // Specialization for parent device template struct get_device_info { diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 7e5d38538967e..e570d25fd0e45 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -125,7 +125,7 @@ struct pi_device_binary_struct { _pi_offload_entry EntriesBegin; _pi_offload_entry EntriesEnd; }; -typedef pi_device_binary_struct * pi_device_binary; +typedef pi_device_binary_struct * pi_device_binary; // Offload binaries descriptor version supported by this library. static const uint16_t PI_DEVICE_BINARIES_VERSION = 1; @@ -146,7 +146,7 @@ struct pi_device_binaries_struct { _pi_offload_entry *HostEntriesBegin; _pi_offload_entry *HostEntriesEnd; }; -typedef pi_device_binaries_struct * pi_device_binaries; +typedef pi_device_binaries_struct * pi_device_binaries; // Opaque types that make reading build log errors easier. class _pi_platform; @@ -162,7 +162,7 @@ typedef cl_program pi_program; // // Following section contains SYCL RT Plugin Interface (PI) methods // having direct analogy in OpenCL, and needed for the core SYCL -// functionality. The convention is to prefix such interfaces with "pi". +// functionality. The convention is to prefix such interfaces with "pi". // // TODO: describe interfaces in Doxygen format // diff --git a/sycl/include/CL/sycl/detail/pi.hpp b/sycl/include/CL/sycl/detail/pi.hpp index dd5e34b5b3dbb..12e997319c65b 100644 --- a/sycl/include/CL/sycl/detail/pi.hpp +++ b/sycl/include/CL/sycl/detail/pi.hpp @@ -1,77 +1,79 @@ -//==---------- pi.hpp - Plugin Interface for SYCL RT -----------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// C++ wrapper of extern "C" PI interfaces -// -#pragma once - -#include - -namespace cl { -namespace sycl { -namespace detail { - -class pi { -public: - using pi_result = ::pi_result; - using pi_platform = ::pi_platform; - using pi_device = ::pi_device; - using pi_device_type = ::pi_device_type; - using pi_device_binary_type = ::pi_device_binary_type; - using pi_device_info = ::pi_device_info; - using pi_program = ::pi_program; - - // Convinience macro to have things look compact. - #define PI_API(pi_api) \ - static constexpr decltype(::pi_api) * pi_api = &::pi_api; - - // Platform - PI_API(piPlatformsGet) - PI_API(piPlatformGetInfo) - // Device - PI_API(piDevicesGet) - PI_API(piDeviceGetInfo) - PI_API(piDevicePartition) - PI_API(piDeviceRetain) - PI_API(piDeviceRelease) - // IR - PI_API(piextDeviceSelectBinary) -}; - -// Report error and no return (keeps compiler happy about no return statements). -[[noreturn]] void pi_die(const char *message); -void pi_assert(bool condition, const char *message = 0); - -#define STRINGIZE(x) STRINGIZE2(x) -#define STRINGIZE2(x) #x -#define PI_ASSERT(cond, msg) \ - pi_assert(condition, "assert @ " __FILE__ ":" STRINGIZE(__LINE__) msg); - -// This does the call, the trace and the check for no errors. -// TODO: remove dependency on CHECK_OCL_CODE. -// TODO: implement a more mature and controllable tracing of PI calls. -void pi_trace(const char *format, ...); -#define PI_CALL(pi_call) { \ - pi_trace("PI ---> %s\n", #pi_call); \ - auto __result = (pi_call); \ - pi_trace("PI <--- %d\n", __result); \ - CHECK_OCL_CODE(__result); \ -} - -// Want all the needed casts be explicit, do not define conversion operators. -template -To pi_cast(From value) { - // TODO: see if more sanity checks are possible. - pi_assert(sizeof(From) == sizeof(To)); - return (To)(value); -} - -} // namespace detail -} // namespace sycl -} // namespace cl - +//==---------- pi.hpp - Plugin Interface for SYCL RT -----------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// C++ wrapper of extern "C" PI interfaces +// +#pragma once + +#include + +namespace cl { +namespace sycl { +namespace detail { + +class pi { +public: + using pi_result = ::pi_result; + using pi_platform = ::pi_platform; + using pi_device = ::pi_device; + using pi_device_type = ::pi_device_type; + using pi_device_binary_type = ::pi_device_binary_type; + using pi_device_info = ::pi_device_info; + using pi_program = ::pi_program; + + // Convinience macro to have things look compact. + #define _PI_API(pi_api) \ + static constexpr decltype(::pi_api) * pi_api = &::pi_api; + + // Platform + _PI_API(piPlatformsGet) + _PI_API(piPlatformGetInfo) + // Device + _PI_API(piDevicesGet) + _PI_API(piDeviceGetInfo) + _PI_API(piDevicePartition) + _PI_API(piDeviceRetain) + _PI_API(piDeviceRelease) + // IR + _PI_API(piextDeviceSelectBinary) + + #undef _PI_API +}; + +// Report error and no return (keeps compiler happy about no return statements). +[[noreturn]] void pi_die(const char *message); +void pi_assert(bool condition, const char *message = 0); + +#define _PI_STRINGIZE(x) _PI_STRINGIZE2(x) +#define _PI_STRINGIZE2(x) #x +#define PI_ASSERT(cond, msg) \ + pi_assert(condition, "assert @ " __FILE__ ":" _PI_STRINGIZE(__LINE__) msg); + +// This does the call, the trace and the check for no errors. +// TODO: remove dependency on CHECK_OCL_CODE. +// TODO: implement a more mature and controllable tracing of PI calls. +void pi_trace(const char *format, ...); +#define PI_CALL(pi_call) { \ + pi_trace("PI ---> %s\n", #pi_call); \ + auto __result = (pi_call); \ + pi_trace("PI <--- %d\n", __result); \ + CHECK_OCL_CODE(__result); \ +} + +// Want all the needed casts be explicit, do not define conversion operators. +template +To pi_cast(From value) { + // TODO: see if more sanity checks are possible. + pi_assert(sizeof(From) == sizeof(To)); + return (To)(value); +} + +} // namespace detail +} // namespace sycl +} // namespace cl + diff --git a/sycl/include/CL/sycl/detail/pi_opencl.hpp b/sycl/include/CL/sycl/detail/pi_opencl.hpp index 6c8d353afda82..be040f5aea4cb 100644 --- a/sycl/include/CL/sycl/detail/pi_opencl.hpp +++ b/sycl/include/CL/sycl/detail/pi_opencl.hpp @@ -1,52 +1,52 @@ -#pragma once - -#include -#include - -namespace cl { -namespace sycl { -namespace detail { - -// -// TODO: there is no such functionality in OpenCL so call PI OpenCL -// plugin directly for now, the whole "opencl" class is temporary anyway. -// -extern "C" decltype(::piextDeviceSelectBinary) ocl_piextDeviceSelectBinary; -using cl_device_binary_type = ::pi_device_binary_type; - -// Mapping of PI interfaces to OpenCL at compile-time. -// This is the default config until the entire SYCL RT is transferred to PI. -// TODO: we can just remove this when default is change to PI. -// -class opencl { -public: - using pi_result = cl_int; - using pi_platform = cl_platform_id; - using pi_device = cl_device_id; - using pi_device_type = cl_device_type; - using pi_device_binary_type = cl_device_binary_type; - using pi_device_info = cl_device_info; - using pi_program = cl_program; - - // Convinience macro to have mapping look like a compact table. - #define PI_CL(pi_api, cl_api) \ - static constexpr decltype(cl_api) * pi_api = &cl_api; - - // Platform - PI_CL(piPlatformsGet, clGetPlatformIDs) - PI_CL(piPlatformGetInfo, clGetPlatformInfo) - // Device - PI_CL(piDevicesGet, clGetDeviceIDs) - PI_CL(piDeviceGetInfo, clGetDeviceInfo) - PI_CL(piDevicePartition, clCreateSubDevices) - PI_CL(piDeviceRetain, clRetainDevice) - PI_CL(piDeviceRelease, clReleaseDevice) - // IR - PI_CL(piextDeviceSelectBinary, ocl_piextDeviceSelectBinary) - - #undef PI_CL -}; - -} // namespace detail -} // namespace sycl -} // namespace cl +#pragma once + +#include +#include + +namespace cl { +namespace sycl { +namespace detail { + +// +// TODO: there is no such functionality in OpenCL so call PI OpenCL +// plugin directly for now, the whole "opencl" class is temporary anyway. +// +extern "C" decltype(::piextDeviceSelectBinary) ocl_piextDeviceSelectBinary; +using cl_device_binary_type = ::pi_device_binary_type; + +// Mapping of PI interfaces to OpenCL at compile-time. +// This is the default config until the entire SYCL RT is transferred to PI. +// TODO: we can just remove this when default is change to PI. +// +class opencl { +public: + using pi_result = cl_int; + using pi_platform = cl_platform_id; + using pi_device = cl_device_id; + using pi_device_type = cl_device_type; + using pi_device_binary_type = cl_device_binary_type; + using pi_device_info = cl_device_info; + using pi_program = cl_program; + + // Convinience macro to have mapping look like a compact table. + #define PI_CL(pi_api, cl_api) \ + static constexpr decltype(cl_api) * pi_api = &cl_api; + + // Platform + PI_CL(piPlatformsGet, clGetPlatformIDs) + PI_CL(piPlatformGetInfo, clGetPlatformInfo) + // Device + PI_CL(piDevicesGet, clGetDeviceIDs) + PI_CL(piDeviceGetInfo, clGetDeviceInfo) + PI_CL(piDevicePartition, clCreateSubDevices) + PI_CL(piDeviceRetain, clRetainDevice) + PI_CL(piDeviceRelease, clReleaseDevice) + // IR + PI_CL(piextDeviceSelectBinary, ocl_piextDeviceSelectBinary) + + #undef PI_CL +}; + +} // namespace detail +} // namespace sycl +} // namespace cl diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 67dedf2c6c049..29136f4c4f820 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -1,91 +1,91 @@ -//==----------------- device_impl.hpp - SYCL device ------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include - -namespace cl { -namespace sycl { -namespace detail { - -vector_class -device_impl_pi::create_sub_devices( - const cl_device_partition_property *Properties, - size_t SubDevicesCount) const { - - vector_class SubDevices(SubDevicesCount); - pi_uint32 ReturnedSubDevices = 0; - PI_CALL(RT::piDevicePartition(m_device, Properties, SubDevicesCount, - SubDevices.data(), &ReturnedSubDevices)); - // TODO: check that returned number of sub-devices matches what was - // requested, otherwise this walk below is wrong. - // - // TODO: Need to describe the subdevice model. Some sub_device management - // may be necessary. What happens if create_sub_devices is called multiple - // times with the same arguments? - // - vector_class res; - std::for_each(SubDevices.begin(), SubDevices.end(), - [&res](const RT::pi_device &a_pi_device) { - device sycl_device = - detail::createSyclObjFromImpl( - std::make_shared(a_pi_device)); - res.push_back(sycl_device); - }); - return res; -} - -vector_class -device_impl_pi::create_sub_devices(size_t ComputeUnits) const { - - if (!is_partition_supported(info::partition_property::partition_equally)) { - throw cl::sycl::feature_not_supported(); - } - size_t SubDevicesCount = - get_info() / ComputeUnits; - const cl_device_partition_property Properties[3] = { - CL_DEVICE_PARTITION_EQUALLY, (cl_device_partition_property)ComputeUnits, - 0}; - return create_sub_devices(Properties, SubDevicesCount); -} - -vector_class -device_impl_pi::create_sub_devices(const vector_class &Counts) const { - - if (!is_partition_supported( - info::partition_property::partition_by_counts)) { - throw cl::sycl::feature_not_supported(); - } - static const cl_device_partition_property P[] = { - CL_DEVICE_PARTITION_BY_COUNTS, CL_DEVICE_PARTITION_BY_COUNTS_LIST_END, - 0}; - vector_class Properties(P, P + 3); - Properties.insert(Properties.begin() + 1, Counts.begin(), Counts.end()); - return create_sub_devices(Properties.data(), Counts.size()); -} - -vector_class -device_impl_pi::create_sub_devices( - info::partition_affinity_domain AffinityDomain) const { - - if (!is_partition_supported( - info::partition_property::partition_by_affinity_domain) || - !is_affinity_supported(AffinityDomain)) { - throw cl::sycl::feature_not_supported(); - } - const cl_device_partition_property Properties[3] = { - CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, - (cl_device_partition_property)AffinityDomain, 0}; - size_t SubDevicesCount = - get_info(); - return create_sub_devices(Properties, SubDevicesCount); -} - -} // namespace detail -} // namespace sycl -} // namespace cl +//==----------------- device_impl.hpp - SYCL device ------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +namespace cl { +namespace sycl { +namespace detail { + +vector_class +device_impl_pi::create_sub_devices( + const cl_device_partition_property *Properties, + size_t SubDevicesCount) const { + + vector_class SubDevices(SubDevicesCount); + pi_uint32 ReturnedSubDevices = 0; + PI_CALL(RT::piDevicePartition(m_device, Properties, SubDevicesCount, + SubDevices.data(), &ReturnedSubDevices)); + // TODO: check that returned number of sub-devices matches what was + // requested, otherwise this walk below is wrong. + // + // TODO: Need to describe the subdevice model. Some sub_device management + // may be necessary. What happens if create_sub_devices is called multiple + // times with the same arguments? + // + vector_class res; + std::for_each(SubDevices.begin(), SubDevices.end(), + [&res](const RT::pi_device &a_pi_device) { + device sycl_device = + detail::createSyclObjFromImpl( + std::make_shared(a_pi_device)); + res.push_back(sycl_device); + }); + return res; +} + +vector_class +device_impl_pi::create_sub_devices(size_t ComputeUnits) const { + + if (!is_partition_supported(info::partition_property::partition_equally)) { + throw cl::sycl::feature_not_supported(); + } + size_t SubDevicesCount = + get_info() / ComputeUnits; + const cl_device_partition_property Properties[3] = { + CL_DEVICE_PARTITION_EQUALLY, (cl_device_partition_property)ComputeUnits, + 0}; + return create_sub_devices(Properties, SubDevicesCount); +} + +vector_class +device_impl_pi::create_sub_devices(const vector_class &Counts) const { + + if (!is_partition_supported( + info::partition_property::partition_by_counts)) { + throw cl::sycl::feature_not_supported(); + } + static const cl_device_partition_property P[] = { + CL_DEVICE_PARTITION_BY_COUNTS, CL_DEVICE_PARTITION_BY_COUNTS_LIST_END, + 0}; + vector_class Properties(P, P + 3); + Properties.insert(Properties.begin() + 1, Counts.begin(), Counts.end()); + return create_sub_devices(Properties.data(), Counts.size()); +} + +vector_class +device_impl_pi::create_sub_devices( + info::partition_affinity_domain AffinityDomain) const { + + if (!is_partition_supported( + info::partition_property::partition_by_affinity_domain) || + !is_affinity_supported(AffinityDomain)) { + throw cl::sycl::feature_not_supported(); + } + const cl_device_partition_property Properties[3] = { + CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, + (cl_device_partition_property)AffinityDomain, 0}; + size_t SubDevicesCount = + get_info(); + return create_sub_devices(Properties, SubDevicesCount); +} + +} // namespace detail +} // namespace sycl +} // namespace cl diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index 3f345fd9ce618..8d9f8e5a24e5e 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -1,88 +1,90 @@ -#include -#include -#include - -namespace cl { -namespace sycl { -namespace detail { - -// For selection of SYCL RT back-end, now manually through the "SYCL_BE" -// environment variable. -// -enum pi_backend { - SYCL_BE_PI_OPENCL, - SYCL_BE_PI_OTHER -}; - -// Check for manually selected BE at run-time. -bool pi_use_backend(pi_backend be) { - static const pi_backend use = - std::map{ - { "PI_OPENCL", SYCL_BE_PI_OPENCL }, - { "PI_OTHER", SYCL_BE_PI_OTHER } - // Any other value would yeild 0 -> PI_OPENCL (current default) - }[std::getenv("SYCL_BE")]; - return be == use; -} - -// Report error and no return (keeps compiler from printing warnings). -// TODO: Probably change that to throw a catchable exception, -// but for now it is useful to see every failure. -// -[[noreturn]] void pi_die(const char *message) { - fprintf(stderr, "pi_die(%s)\n", message ? message : ""); - std::terminate(); -} - -void pi_assert(bool condition, const char *message) { - if (!condition) - pi_die(message); -} - -void pi_trace(const char *format, ...) { - static bool do_trace = std::getenv("SYCL_BE_TRACE"); - if (!do_trace) - return; - - va_list args; - va_start(args, format); - vprintf(format, args); -} - -extern "C" { -// TODO: change this pseudo-dispatch to plugins (ICD-like?) -// Currently this is using the low-level "ifunc" machinery to -// re-direct (with no overhead) the PI call to the underlying -// PI plugin requested by SYCL_BE environment variable (today -// only OpenCL, other would just die). -// -void __resolve_die() { - pi_die("Unknown SYCL_BE"); -} - -#define PI_DISPATCH(api) \ -decltype(api) ocl_##api; \ -static void *__resolve_##api(void) { \ - return (pi_use_backend(SYCL_BE_PI_OPENCL) ? \ - (void*)ocl_##api : (void*)__resolve_die); \ -} \ -decltype(api) api __attribute__((ifunc ("__resolve_" #api))); - -// Platform -PI_DISPATCH(piPlatformsGet) -PI_DISPATCH(piPlatformGetInfo) -// Device -PI_DISPATCH(piDevicesGet) -PI_DISPATCH(piDeviceRetain) -PI_DISPATCH(piDeviceRelease) -PI_DISPATCH(piDeviceGetInfo) -PI_DISPATCH(piDevicePartition) -// IR -PI_DISPATCH(piextDeviceSelectBinary) - -} // extern "C" - -} // namespace detail -} // namespace sycl -} // namespace cl - +#include +#include +#include +#include + +namespace cl { +namespace sycl { +namespace detail { + +// For selection of SYCL RT back-end, now manually through the "SYCL_BE" +// environment variable. +// +enum pi_backend { + SYCL_BE_PI_OPENCL, + SYCL_BE_PI_OTHER +}; + +// Check for manually selected BE at run-time. +bool pi_use_backend(pi_backend be) { + static const pi_backend use = + std::map{ + { "PI_OPENCL", SYCL_BE_PI_OPENCL }, + { "PI_OTHER", SYCL_BE_PI_OTHER } + // Any other value would yield 0 -> PI_OPENCL (current default) + }[std::getenv("SYCL_BE")]; + return be == use; +} + +// Report error and no return (keeps compiler from printing warnings). +// TODO: Probably change that to throw a catchable exception, +// but for now it is useful to see every failure. +// +[[noreturn]] void pi_die(const char *message) { + std::cerr << "pi_die: " << message << std::endl; + std::terminate(); +} + +void pi_assert(bool condition, const char *message) { + if (!condition) + pi_die(message); +} + +// TODO: implement a more mature and controllable tracing of PI calls. +void pi_trace(const char *format, ...) { + static bool do_trace = std::getenv("SYCL_BE_TRACE"); + if (!do_trace) + return; + + va_list args; + va_start(args, format); + vprintf(format, args); +} + +extern "C" { +// TODO: change this pseudo-dispatch to plugins (ICD-like?) +// Currently this is using the low-level "ifunc" machinery to +// re-direct (with no overhead) the PI call to the underlying +// PI plugin requested by SYCL_BE environment variable (today +// only OpenCL, other would just die). +// +void __resolve_die() { + pi_die("Unknown SYCL_BE"); +} + +#define PI_DISPATCH(api) \ +decltype(api) ocl_##api; \ +static void *__resolve_##api(void) { \ + return (pi_use_backend(SYCL_BE_PI_OPENCL) ? \ + (void*)ocl_##api : (void*)__resolve_die); \ +} \ +decltype(api) api __attribute__((ifunc ("__resolve_" #api))); + +// Platform +PI_DISPATCH(piPlatformsGet) +PI_DISPATCH(piPlatformGetInfo) +// Device +PI_DISPATCH(piDevicesGet) +PI_DISPATCH(piDeviceRetain) +PI_DISPATCH(piDeviceRelease) +PI_DISPATCH(piDeviceGetInfo) +PI_DISPATCH(piDevicePartition) +// IR +PI_DISPATCH(piextDeviceSelectBinary) + +} // extern "C" + +} // namespace detail +} // namespace sycl +} // namespace cl + diff --git a/sycl/source/detail/pi_opencl.cpp b/sycl/source/detail/pi_opencl.cpp index 7892d8ba8025b..734d3e02fb819 100644 --- a/sycl/source/detail/pi_opencl.cpp +++ b/sycl/source/detail/pi_opencl.cpp @@ -1,100 +1,100 @@ -#include -#include "CL/opencl.h" - -namespace cl { -namespace sycl { -namespace detail { - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -// Convinience macro makes source code search easier -#define OCL(pi_api) ocl_##pi_api - -// Example of a PI interface that does not map exactly to an OpenCL one. -pi_result OCL(piPlatformsGet)(pi_uint32 num_entries, - pi_platform * platforms, - pi_uint32 * num_platforms) { - cl_int result = - clGetPlatformIDs(pi_cast (num_entries), - pi_cast (platforms), - pi_cast (num_platforms)); - - // Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms - if (result == CL_PLATFORM_NOT_FOUND_KHR) { - pi_assert(num_platforms != 0); - *num_platforms = 0; - result = CL_SUCCESS; - } - return pi_cast(result); -} - - -// Example of a PI interface that does not map exactly to an OpenCL one. -pi_result OCL(piDevicesGet)(pi_platform platform, - pi_device_type device_type, - pi_uint32 num_entries, - pi_device * devices, - pi_uint32 * num_devices) { - cl_int result = - clGetDeviceIDs(pi_cast (platform), - pi_cast (device_type), - pi_cast (num_entries), - pi_cast (devices), - pi_cast (num_devices)); - - // Absorb the CL_DEVICE_NOT_FOUND and just return 0 in num_devices - if (result == CL_DEVICE_NOT_FOUND) { - pi_assert(num_devices != 0); - *num_devices = 0; - result = CL_SUCCESS; - } - return pi_cast(result); -} - -pi_result OCL(piextDeviceSelectBinary)( - pi_device device, // TODO: does this need to be context? - pi_device_binary * images, - pi_uint32 num_images, - pi_device_binary * selected_image) { - - // TODO dummy implementation. - // Real implementaion will use the same mechanism OpenCL ICD dispatcher - // uses. Somthing like: - // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_INVALID_CONTEXT); - // return context->dispatch->piextDeviceSelectIR( - // ctx, images, num_images, selected_image); - // where context->dispatch is set to the dispatch table provided by PI - // plugin for platform/device the ctx was created for. - - *selected_image = num_images > 0 ? images[0] : nullptr; - return PI_SUCCESS; -} - -// TODO: implement portable call forwarding (ifunc is a GNU extension). -// TODO: reuse same PI -> OCL mapping in pi_opencl.hpp, or maybe just -// wait until that one is completely removed. -// -#define PI_ALIAS(pi_api, ocl_api) \ -static void *__resolve_##pi_api(void) { \ - return (void*) (ocl_api); \ -} \ -decltype(ocl_api) OCL(pi_api) __attribute__((ifunc ("__resolve_" #pi_api))); - -// Platform -PI_ALIAS(piPlatformGetInfo, clGetPlatformInfo) -// Device -PI_ALIAS(piDeviceRetain, clRetainDevice) -PI_ALIAS(piDeviceRelease, clReleaseDevice) -PI_ALIAS(piDevicePartition, clCreateSubDevices) -PI_ALIAS(piDeviceGetInfo, clGetDeviceInfo) - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -} // namespace detail -} // namespace sycl -} // namespace cl - +#include +#include "CL/opencl.h" + +namespace cl { +namespace sycl { +namespace detail { + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +// Convinience macro makes source code search easier +#define OCL(pi_api) ocl_##pi_api + +// Example of a PI interface that does not map exactly to an OpenCL one. +pi_result OCL(piPlatformsGet)(pi_uint32 num_entries, + pi_platform * platforms, + pi_uint32 * num_platforms) { + cl_int result = + clGetPlatformIDs(pi_cast (num_entries), + pi_cast (platforms), + pi_cast (num_platforms)); + + // Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms + if (result == CL_PLATFORM_NOT_FOUND_KHR) { + pi_assert(num_platforms != 0); + *num_platforms = 0; + result = CL_SUCCESS; + } + return pi_cast(result); +} + + +// Example of a PI interface that does not map exactly to an OpenCL one. +pi_result OCL(piDevicesGet)(pi_platform platform, + pi_device_type device_type, + pi_uint32 num_entries, + pi_device * devices, + pi_uint32 * num_devices) { + cl_int result = + clGetDeviceIDs(pi_cast (platform), + pi_cast (device_type), + pi_cast (num_entries), + pi_cast (devices), + pi_cast (num_devices)); + + // Absorb the CL_DEVICE_NOT_FOUND and just return 0 in num_devices + if (result == CL_DEVICE_NOT_FOUND) { + pi_assert(num_devices != 0); + *num_devices = 0; + result = CL_SUCCESS; + } + return pi_cast(result); +} + +pi_result OCL(piextDeviceSelectBinary)( + pi_device device, // TODO: does this need to be context? + pi_device_binary * images, + pi_uint32 num_images, + pi_device_binary * selected_image) { + + // TODO dummy implementation. + // Real implementaion will use the same mechanism OpenCL ICD dispatcher + // uses. Somthing like: + // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_INVALID_CONTEXT); + // return context->dispatch->piextDeviceSelectIR( + // ctx, images, num_images, selected_image); + // where context->dispatch is set to the dispatch table provided by PI + // plugin for platform/device the ctx was created for. + + *selected_image = num_images > 0 ? images[0] : nullptr; + return PI_SUCCESS; +} + +// TODO: implement portable call forwarding (ifunc is a GNU extension). +// TODO: reuse same PI -> OCL mapping in pi_opencl.hpp, or maybe just +// wait until that one is completely removed. +// +#define PI_ALIAS(pi_api, ocl_api) \ +static void *__resolve_##pi_api(void) { \ + return (void*) (ocl_api); \ +} \ +decltype(ocl_api) OCL(pi_api) __attribute__((ifunc ("__resolve_" #pi_api))); + +// Platform +PI_ALIAS(piPlatformGetInfo, clGetPlatformInfo) +// Device +PI_ALIAS(piDeviceRetain, clRetainDevice) +PI_ALIAS(piDeviceRelease, clReleaseDevice) +PI_ALIAS(piDevicePartition, clCreateSubDevices) +PI_ALIAS(piDeviceGetInfo, clGetDeviceInfo) + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +} // namespace detail +} // namespace sycl +} // namespace cl + diff --git a/sycl/source/detail/platform_impl.cpp b/sycl/source/detail/platform_impl.cpp index 68044190a2e02..c135aaffc1a7c 100644 --- a/sycl/source/detail/platform_impl.cpp +++ b/sycl/source/detail/platform_impl.cpp @@ -20,17 +20,17 @@ platform_impl_pi::get_platforms() { pi_uint32 num_platforms = 0; PI_CALL(RT::piPlatformsGet(0, 0, &num_platforms)); info::device_type forced_type = detail::get_forced_type(); - + if (num_platforms) { vector_class pi_platforms(num_platforms); PI_CALL(RT::piPlatformsGet(num_platforms, pi_platforms.data(), 0)); - + for (pi_uint32 i = 0; i < num_platforms; i++) { platform plt = detail::createSyclObjFromImpl( std::make_shared(pi_platforms[i])); - + // Skip platforms which do not contain requested device types if (!plt.get_devices(forced_type).empty()) platforms.push_back(plt); From 15a03a7ba12b5c77541e94671eaf9ca00523c2b3 Mon Sep 17 00:00:00 2001 From: Sergey V Maslov Date: Fri, 21 Jun 2019 12:29:07 -0700 Subject: [PATCH 3/3] [SYCL] address Alexey's comments Signed-off-by: Sergey V Maslov --- .../include/CL/sycl/detail/aligned_allocator.hpp | 0 sycl/include/CL/sycl/detail/device_impl.hpp | 16 ++++++++-------- sycl/include/CL/sycl/detail/device_info.hpp | 7 ------- sycl/include/CL/sycl/detail/image_impl.hpp | 2 +- sycl/include/CL/sycl/detail/pi.h | 10 +++++++++- sycl/include/CL/sycl/detail/pi_opencl.hpp | 7 +++++++ sycl/include/CL/sycl/device.hpp | 0 sycl/include/CL/sycl/platform.hpp | 0 sycl/source/detail/device_impl.cpp | 2 +- sycl/source/detail/device_info.cpp | 4 ---- sycl/source/detail/pi.cpp | 7 +++++++ sycl/source/detail/pi_opencl.cpp | 7 +++++++ sycl/source/detail/platform_impl.cpp | 6 +++--- 13 files changed, 43 insertions(+), 25 deletions(-) mode change 100755 => 100644 sycl/include/CL/sycl/detail/aligned_allocator.hpp mode change 100755 => 100644 sycl/include/CL/sycl/device.hpp mode change 100755 => 100644 sycl/include/CL/sycl/platform.hpp diff --git a/sycl/include/CL/sycl/detail/aligned_allocator.hpp b/sycl/include/CL/sycl/detail/aligned_allocator.hpp old mode 100755 new mode 100644 diff --git a/sycl/include/CL/sycl/detail/device_impl.hpp b/sycl/include/CL/sycl/detail/device_impl.hpp index 33bdef7fdf768..84c31ef8afbd5 100644 --- a/sycl/include/CL/sycl/detail/device_impl.hpp +++ b/sycl/include/CL/sycl/detail/device_impl.hpp @@ -113,7 +113,7 @@ class device_impl_pi : public device_impl { ~device_impl_pi() { if (!m_isRootDevice) { // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(RT::piDeviceRelease(m_device)); + CHECK_OCL_CODE_NO_EXC(RT::piDeviceRelease(m_device)); } } @@ -151,7 +151,7 @@ class device_impl_pi : public device_impl { PI_CALL(RT::piDeviceGetInfo( m_device, PI_DEVICE_INFO_PLATFORM, sizeof(plt), &plt, 0)); - // TODO: thisi possibly will violate common reference semantics, + // TODO: this possibly will violate common reference semantics, // particularly, equality comparison may fail for two consecutive // get_platform() on the same device, as it compares impl objects. return createSyclObjFromImpl( @@ -169,13 +169,13 @@ class device_impl_pi : public device_impl { size_t SubDevicesCount) const; vector_class - create_sub_devices(size_t ComputeUnits) const; + create_sub_devices(size_t ComputeUnits) const override; vector_class - create_sub_devices(const vector_class &Counts) const; + create_sub_devices(const vector_class &Counts) const override; vector_class - create_sub_devices(info::partition_affinity_domain AffinityDomain) const; + create_sub_devices(info::partition_affinity_domain AffinityDomain) const override; private: RT::pi_device m_device = 0; @@ -214,21 +214,21 @@ class device_host : public device_impl { return false; } - vector_class create_sub_devices(size_t nbSubDev) const { + vector_class create_sub_devices(size_t nbSubDev) const override { // TODO: implement host device partitioning throw runtime_error( "Partitioning to subdevices of the host device is not implemented yet"); } vector_class - create_sub_devices(const vector_class &counts) const { + create_sub_devices(const vector_class &counts) const override { // TODO: implement host device partitioning throw runtime_error( "Partitioning to subdevices of the host device is not implemented yet"); } vector_class - create_sub_devices(info::partition_affinity_domain affinityDomain) const { + create_sub_devices(info::partition_affinity_domain affinityDomain) const override { // TODO: implement host device partitioning throw runtime_error( "Partitioning to subdevices of the host device is not implemented yet"); diff --git a/sycl/include/CL/sycl/detail/device_info.hpp b/sycl/include/CL/sycl/detail/device_info.hpp index b3b6c6d73629c..0b639c4818c57 100644 --- a/sycl/include/CL/sycl/detail/device_info.hpp +++ b/sycl/include/CL/sycl/detail/device_info.hpp @@ -258,13 +258,6 @@ struct get_device_info -struct get_device_info { - static device _(RT::pi_device dev); -}; -*/ // Specialization for supported subgroup sizes template <> struct get_device_info, diff --git a/sycl/include/CL/sycl/detail/image_impl.hpp b/sycl/include/CL/sycl/detail/image_impl.hpp index 51a11639081f2..29abaa9358abd 100644 --- a/sycl/include/CL/sycl/detail/image_impl.hpp +++ b/sycl/include/CL/sycl/detail/image_impl.hpp @@ -279,7 +279,7 @@ class image_impl : public SYCLMemObjT { // TODO: Implement this function. void *allocateMem(ContextImplPtr Context, bool InitFromUserData, - cnri_event &OutEventToWait) override { + cl_event &OutEventToWait) override { if (true) throw cl::sycl::feature_not_supported( "MemoryAllocation Function Not Implemented for image class"); diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index e570d25fd0e45..8c62f9b3ce60f 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -15,6 +15,14 @@ #ifndef _PI_H_ #define _PI_H_ +// Every single change in PI API should be accamponied with the minor +// version increase (+1). In the cases where backward compatibility is not +// maintained there should be a (+1) change to the major version in +// addition to the increase of the minor. +// +#define _PI_H_VERSION_MAJOR 1 +#define _PI_H_VERSION_MINOR 1 + // TODO: we need a mapping of PI to OpenCL somewhere, and this can be done // elsewhere, e.g. in the pi_opencl, but constants/enums mapping is now // done here, for efficiency and simplicity. @@ -229,7 +237,7 @@ pi_program piclProgramCreateWithSource( // TODO: change to return pi_result // // TODO: describe interfaces in Doxygen format -/// Selects the most appropriate device ninary based on runtime information +/// Selects the most appropriate device binary based on runtime information /// and the IR characteristics. /// pi_result piextDeviceSelectBinary( diff --git a/sycl/include/CL/sycl/detail/pi_opencl.hpp b/sycl/include/CL/sycl/detail/pi_opencl.hpp index be040f5aea4cb..4b418921221e0 100644 --- a/sycl/include/CL/sycl/detail/pi_opencl.hpp +++ b/sycl/include/CL/sycl/detail/pi_opencl.hpp @@ -1,3 +1,10 @@ +//==---------- pi_opencl.hpp - OpenCL Plugin for SYCL RT -------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// #pragma once #include diff --git a/sycl/include/CL/sycl/device.hpp b/sycl/include/CL/sycl/device.hpp old mode 100755 new mode 100644 diff --git a/sycl/include/CL/sycl/platform.hpp b/sycl/include/CL/sycl/platform.hpp old mode 100755 new mode 100644 diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 29136f4c4f820..56aac780c0c41 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -1,4 +1,4 @@ -//==----------------- device_impl.hpp - SYCL device ------------------------==// +//==----------------- device_impl.cpp - SYCL device ------------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/sycl/source/detail/device_info.cpp b/sycl/source/detail/device_info.cpp index 2a51a7ae83dc8..b846defefceca 100644 --- a/sycl/source/detail/device_info.cpp +++ b/sycl/source/detail/device_info.cpp @@ -40,10 +40,6 @@ device get_device_info::_( std::make_shared(result)); } -//template struct get_device_info; - -void get_device_info_BAD() {} - vector_class read_fp_bitfield(cl_device_fp_config bits) { vector_class result; if (bits & CL_FP_DENORM) diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index 8d9f8e5a24e5e..c131c239511cf 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -1,3 +1,10 @@ +//==---------- pi.cpp - Plugin Interface for SYCL RT -----------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// #include #include #include diff --git a/sycl/source/detail/pi_opencl.cpp b/sycl/source/detail/pi_opencl.cpp index 734d3e02fb819..92c0c36930185 100644 --- a/sycl/source/detail/pi_opencl.cpp +++ b/sycl/source/detail/pi_opencl.cpp @@ -1,3 +1,10 @@ +//==---------- pi_opencl.cpp - OpenCL Plugin -------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// #include #include "CL/opencl.h" diff --git a/sycl/source/detail/platform_impl.cpp b/sycl/source/detail/platform_impl.cpp index c135aaffc1a7c..26162f11ea718 100644 --- a/sycl/source/detail/platform_impl.cpp +++ b/sycl/source/detail/platform_impl.cpp @@ -1,4 +1,4 @@ -//==----------- platform_host.cpp -----------------------------------------------==// +//==----------- platform_impl.cpp ------------------------------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -59,9 +59,9 @@ platform_impl_pi::get_devices(info::device_type deviceType) const { // TODO: remove this check when switched to PI as it would just return // zero in num_devices. - if (err == CL_DEVICE_NOT_FOUND) { + if (err == CL_DEVICE_NOT_FOUND) return res; - } + // TODO catch an exception and put it to list of asynchronous exceptions // TODO: remove dependency on CHECK_OCL_CODE CHECK_OCL_CODE(err);