diff --git a/sycl/include/CL/sycl/buffer.hpp b/sycl/include/CL/sycl/buffer.hpp index 19697002b612e..3e96f1b960bbc 100644 --- a/sycl/include/CL/sycl/buffer.hpp +++ b/sycl/include/CL/sycl/buffer.hpp @@ -195,8 +195,10 @@ class buffer { : Range{0} { size_t BufSize = 0; - PI_CALL(piMemGetInfo)(detail::pi::cast(MemObject), - CL_MEM_SIZE, sizeof(size_t), &BufSize, nullptr); + const detail::plugin &Plugin = detail::getSyclObjImpl(SyclContext)->getPlugin(); + Plugin.call( + detail::pi::cast(MemObject), CL_MEM_SIZE, + sizeof(size_t), &BufSize, nullptr); Range[0] = BufSize / sizeof(T); impl = std::make_shared( diff --git a/sycl/include/CL/sycl/detail/context_impl.hpp b/sycl/include/CL/sycl/detail/context_impl.hpp index 7a2ddccf7f091..ebe5d09037285 100644 --- a/sycl/include/CL/sycl/detail/context_impl.hpp +++ b/sycl/include/CL/sycl/detail/context_impl.hpp @@ -59,7 +59,10 @@ class context_impl { /// /// @param PiContext is an instance of a valid plug-in context handle. /// @param AsyncHandler is an instance of async_handler. - context_impl(RT::PiContext PiContext, async_handler AsyncHandler); + /// @param &Plugin is the reference to the underlying Plugin that this context + /// is associated with. + context_impl(RT::PiContext PiContext, async_handler AsyncHandler, + const plugin &Plugin); ~context_impl(); @@ -78,6 +81,12 @@ class context_impl { /// @return an instance of SYCL async_handler. const async_handler &get_async_handler() const; + /// @return the Plugin associated with the platform of this context. + const plugin &getPlugin() const { return MPlatform->getPlugin(); } + + /// @return the PlatformImpl associated with this context. + PlatformImplPtr getPlatformImpl() const { return MPlatform; } + /// Queries this context for information. /// /// The return type depends on information being queried. diff --git a/sycl/include/CL/sycl/detail/context_info.hpp b/sycl/include/CL/sycl/detail/context_info.hpp index 0f55b177ba638..f9d73b1f2642e 100644 --- a/sycl/include/CL/sycl/detail/context_info.hpp +++ b/sycl/include/CL/sycl/detail/context_info.hpp @@ -9,6 +9,7 @@ #pragma once #include +#include #include __SYCL_INLINE namespace cl { @@ -19,11 +20,12 @@ template struct get_context_info { using RetType = typename info::param_traits::return_type; - static RetType get(RT::PiContext ctx) { + static RetType get(RT::PiContext ctx, const plugin &Plugin) { RetType Result = 0; // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piContextGetInfo)(ctx, pi::cast(param), - sizeof(Result), &Result, nullptr); + Plugin.call(ctx, + pi::cast(param), + sizeof(Result), &Result, nullptr); return Result; } }; diff --git a/sycl/include/CL/sycl/detail/device_impl.hpp b/sycl/include/CL/sycl/detail/device_impl.hpp index 30e758777f461..c10c91ea19bd9 100644 --- a/sycl/include/CL/sycl/detail/device_impl.hpp +++ b/sycl/include/CL/sycl/detail/device_impl.hpp @@ -23,16 +23,21 @@ namespace detail { // Forward declaration class platform_impl; -class platform_impl_pi; +using PlatformImplPtr = std::shared_ptr; // TODO: Make code thread-safe class device_impl { public: /// Constructs a SYCL device instance as a host device. device_impl(); + /// Constructs a SYCL device instance using the provided /// PI device instance. - explicit device_impl(RT::PiDevice Device); + explicit device_impl(RT::PiDevice Device, PlatformImplPtr Platform); + + /// Constructs a SYCL device instance using the provided + /// PI device instance. + explicit device_impl(RT::PiDevice Device, const plugin &Plugin); ~device_impl(); @@ -100,6 +105,9 @@ class device_impl { /// @return The associated SYCL platform. platform get_platform() const; + /// @return the associated plugin with this device. + const plugin &getPlugin() const { return MPlatform->getPlugin(); } + /// Check SYCL extension support by device /// /// @param ExtensionName is a name of queried extension. @@ -165,7 +173,7 @@ class device_impl { } return get_device_info< typename info::param_traits::return_type, - param>::get(this->getHandleRef()); + param>::get(this->getHandleRef(), this->getPlugin()); } /// Check if affinity partitioning by specified domain is supported by device @@ -176,10 +184,13 @@ class device_impl { is_affinity_supported(info::partition_affinity_domain AffinityDomain) const; private: + explicit device_impl(RT::PiDevice Device, PlatformImplPtr Platform, + const plugin &Plugin); RT::PiDevice MDevice = 0; RT::PiDeviceType MType; bool MIsRootDevice = false; bool MIsHostDevice; + PlatformImplPtr MPlatform; }; // class device_impl } // namespace detail diff --git a/sycl/include/CL/sycl/detail/device_info.hpp b/sycl/include/CL/sycl/detail/device_info.hpp index 28e45f22c91aa..c591f7feb24f8 100644 --- a/sycl/include/CL/sycl/detail/device_info.hpp +++ b/sycl/include/CL/sycl/detail/device_info.hpp @@ -48,37 +48,43 @@ template <> struct check_fp_support { // TODO: get rid of remaining uses of OpenCL directly // template struct get_device_info { - static T get(RT::PiDevice dev) { + static T get(RT::PiDevice dev, const plugin &Plugin) { typename sycl_to_pi::type result; - PI_CALL(piDeviceGetInfo)(dev, pi::cast(param), - sizeof(result), &result, nullptr); + Plugin.call(dev, + pi::cast(param), + sizeof(result), &result, nullptr); return T(result); } }; // Specialization for platform template struct get_device_info { - static platform get(RT::PiDevice dev) { + static platform get(RT::PiDevice dev, const plugin &Plugin) { typename sycl_to_pi::type result; - PI_CALL(piDeviceGetInfo)(dev, pi::cast(param), - sizeof(result), &result, nullptr); + Plugin.call(dev, + pi::cast(param), + sizeof(result), &result, nullptr); + // TODO: Change PiDevice to device_impl. + // Use the Plugin from the device_impl class after plugin details + // are added to the class. return createSyclObjFromImpl( - std::make_shared(result)); + std::make_shared(result, RT::GlobalPlugin)); } }; // Specialization for string return type, variable return size template struct get_device_info { - static string_class get(RT::PiDevice dev) { + static string_class get(RT::PiDevice dev, const plugin &Plugin) { size_t resultSize; - PI_CALL(piDeviceGetInfo)(dev, pi::cast(param), 0, nullptr, - &resultSize); + Plugin.call( + dev, pi::cast(param), 0, nullptr, &resultSize); if (resultSize == 0) { return string_class(); } unique_ptr_class result(new char[resultSize]); - PI_CALL(piDeviceGetInfo)(dev, pi::cast(param), resultSize, - result.get(), nullptr); + Plugin.call(dev, + pi::cast(param), + resultSize, result.get(), nullptr); return string_class(result.get()); } @@ -86,15 +92,16 @@ template struct get_device_info { // Specialization for parent device template struct get_device_info { - static T get(RT::PiDevice dev); + static T get(RT::PiDevice dev, const plugin &Plugin); }; // Specialization for id return type template struct get_device_info, param> { - static id<3> get(RT::PiDevice dev) { + static id<3> get(RT::PiDevice dev, const plugin &Plugin) { size_t result[3]; - PI_CALL(piDeviceGetInfo)(dev, pi::cast(param), - sizeof(result), &result, nullptr); + Plugin.call(dev, + pi::cast(param), + sizeof(result), &result, nullptr); return id<3>(result[0], result[1], result[2]); } }; @@ -102,17 +109,19 @@ template struct get_device_info, param> { // Specialization for fp_config types, checks the corresponding fp type support template struct get_device_info, param> { - static vector_class get(RT::PiDevice dev) { + static vector_class get(RT::PiDevice dev, + const plugin &Plugin) { // Check if fp type is supported if (!get_device_info< typename info::param_traits< info::device, check_fp_support::value>::return_type, - check_fp_support::value>::get(dev)) { + check_fp_support::value>::get(dev, Plugin)) { return {}; } cl_device_fp_config result; - PI_CALL(piDeviceGetInfo)(dev, pi::cast(param), - sizeof(result), &result, nullptr); + Plugin.call(dev, + pi::cast(param), + sizeof(result), &result, nullptr); return read_fp_bitfield(result); } }; @@ -121,9 +130,10 @@ struct get_device_info, param> { template <> struct get_device_info, info::device::single_fp_config> { - static vector_class get(RT::PiDevice dev) { + static vector_class get(RT::PiDevice dev, + const plugin &Plugin) { cl_device_fp_config result; - PI_CALL(piDeviceGetInfo)( + Plugin.call( dev, pi::cast(info::device::single_fp_config), sizeof(result), &result, nullptr); return read_fp_bitfield(result); @@ -132,9 +142,9 @@ struct get_device_info, // Specialization for queue_profiling, OpenCL returns a bitfield template <> struct get_device_info { - static bool get(RT::PiDevice dev) { + static bool get(RT::PiDevice dev, const plugin &Plugin) { cl_command_queue_properties result; - PI_CALL(piDeviceGetInfo)( + Plugin.call( dev, pi::cast(info::device::queue_profiling), sizeof(result), &result, nullptr); return (result & CL_QUEUE_PROFILING_ENABLE); @@ -145,9 +155,10 @@ template <> struct get_device_info { template <> struct get_device_info, info::device::execution_capabilities> { - static vector_class get(RT::PiDevice dev) { + static vector_class + get(RT::PiDevice dev, const plugin &Plugin) { cl_device_exec_capabilities result; - PI_CALL(piDeviceGetInfo)( + Plugin.call( dev, pi::cast(info::device::execution_capabilities), sizeof(result), &result, nullptr); return read_execution_bitfield(result); @@ -158,9 +169,11 @@ struct get_device_info, template <> struct get_device_info, info::device::built_in_kernels> { - static vector_class get(RT::PiDevice dev) { + static vector_class get(RT::PiDevice dev, + const plugin &Plugin) { string_class result = - get_device_info::get(dev); + get_device_info::get( + dev, Plugin); return split_string(result, ';'); } }; @@ -168,9 +181,11 @@ struct get_device_info, // Specialization for extensions, splits the string returned by OpenCL template <> struct get_device_info, info::device::extensions> { - static vector_class get(RT::PiDevice dev) { + static vector_class get(RT::PiDevice dev, + const plugin &Plugin) { string_class result = - get_device_info::get(dev); + get_device_info::get(dev, + Plugin); return split_string(result, ' '); } }; @@ -179,12 +194,14 @@ struct get_device_info, info::device::extensions> { template <> struct get_device_info, info::device::partition_properties> { - static vector_class get(RT::PiDevice dev) { + static vector_class get(RT::PiDevice dev, + const plugin &Plugin) { auto info_partition = pi::cast(info::device::partition_properties); size_t resultSize; - PI_CALL(piDeviceGetInfo)(dev, info_partition, 0, nullptr, &resultSize); + Plugin.call(dev, info_partition, 0, nullptr, + &resultSize); size_t arrayLength = resultSize / sizeof(cl_device_partition_property); if (arrayLength == 0) { @@ -192,8 +209,8 @@ struct get_device_info, } unique_ptr_class arrayResult( new cl_device_partition_property[arrayLength]); - PI_CALL(piDeviceGetInfo)(dev, info_partition, resultSize, arrayResult.get(), - nullptr); + Plugin.call(dev, info_partition, resultSize, + arrayResult.get(), nullptr); vector_class result; for (size_t i = 0; i < arrayLength - 1; ++i) { @@ -207,9 +224,10 @@ struct get_device_info, template <> struct get_device_info, info::device::partition_affinity_domains> { - static vector_class get(RT::PiDevice dev) { + static vector_class + get(RT::PiDevice dev, const plugin &Plugin) { cl_device_affinity_domain result; - PI_CALL(piDeviceGetInfo)( + Plugin.call( dev, pi::cast(info::device::partition_affinity_domains), sizeof(result), &result, nullptr); @@ -222,20 +240,23 @@ struct get_device_info, template <> struct get_device_info { - static info::partition_affinity_domain get(RT::PiDevice dev) { + static info::partition_affinity_domain get(RT::PiDevice dev, + const plugin &Plugin) { size_t resultSize; - PI_CALL(piDeviceGetInfo)(dev, - pi::cast( - info::device::partition_type_affinity_domain), - 0, nullptr, &resultSize); + Plugin.call( + dev, + pi::cast( + info::device::partition_type_affinity_domain), + 0, nullptr, &resultSize); if (resultSize != 1) { return info::partition_affinity_domain::not_applicable; } cl_device_partition_property result; - PI_CALL(piDeviceGetInfo)(dev, - pi::cast( - info::device::partition_type_affinity_domain), - sizeof(result), &result, nullptr); + Plugin.call( + dev, + pi::cast( + info::device::partition_type_affinity_domain), + sizeof(result), &result, nullptr); if (result == CL_DEVICE_AFFINITY_DOMAIN_NUMA || result == CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE || result == CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE || @@ -252,10 +273,11 @@ struct get_device_info struct get_device_info { - static info::partition_property get(RT::PiDevice dev) { + static info::partition_property get(RT::PiDevice dev, + const plugin &Plugin) { size_t resultSize; - PI_CALL(piDeviceGetInfo)(dev, PI_DEVICE_INFO_PARTITION_TYPE, 0, nullptr, - &resultSize); + Plugin.call(dev, PI_DEVICE_INFO_PARTITION_TYPE, + 0, nullptr, &resultSize); if (!resultSize) return info::partition_property::no_partition; @@ -263,8 +285,9 @@ struct get_device_info arrayResult( new cl_device_partition_property[arrayLength]); - PI_CALL(piDeviceGetInfo)(dev, PI_DEVICE_INFO_PARTITION_TYPE, resultSize, - arrayResult.get(), nullptr); + Plugin.call(dev, PI_DEVICE_INFO_PARTITION_TYPE, + resultSize, arrayResult.get(), + nullptr); if (!arrayResult[0]) return info::partition_property::no_partition; return info::partition_property(arrayResult[0]); @@ -273,14 +296,14 @@ struct get_device_info struct get_device_info, info::device::sub_group_sizes> { - static vector_class get(RT::PiDevice dev) { + static vector_class get(RT::PiDevice dev, const plugin &Plugin) { size_t resultSize = 0; - PI_CALL(piDeviceGetInfo)( + Plugin.call( dev, pi::cast(info::device::sub_group_sizes), 0, nullptr, &resultSize); vector_class result(resultSize / sizeof(size_t)); - PI_CALL(piDeviceGetInfo)( + Plugin.call( dev, pi::cast(info::device::sub_group_sizes), resultSize, result.data(), nullptr); return result; @@ -292,10 +315,11 @@ struct get_device_info, info::device::sub_group_sizes> { // enum for global pipes feature. template <> struct get_device_info { - static bool get(RT::PiDevice dev) { + static bool get(RT::PiDevice dev, const plugin &Plugin) { // We claim, that all Intel FPGA devices support kernel to kernel pipe // feature (at least at the scope of SYCL_INTEL_data_flow_pipes extension). - platform plt = get_device_info::get(dev); + platform plt = + get_device_info::get(dev, Plugin); string_class platform_name = plt.get_info(); if (platform_name == "Intel(R) FPGA Emulation Platform for OpenCL(TM)" || platform_name == "Intel(R) FPGA SDK for OpenCL(TM)") @@ -331,9 +355,9 @@ cl_uint get_native_vector_width(size_t idx); // Specialization for device usm query. template <> struct get_device_info { - static bool get(RT::PiDevice dev) { + static bool get(RT::PiDevice dev, const plugin &Plugin) { pi_usm_capabilities caps; - pi_result Err = PI_CALL_NOCHECK(piDeviceGetInfo)( + pi_result Err = Plugin.call_nocheck( dev, pi::cast(info::device::usm_device_allocations), sizeof(pi_usm_capabilities), &caps, nullptr); @@ -344,9 +368,9 @@ struct get_device_info { // Specialization for host usm query. template <> struct get_device_info { - static bool get(RT::PiDevice dev) { + static bool get(RT::PiDevice dev, const plugin &Plugin) { pi_usm_capabilities caps; - pi_result Err = PI_CALL_NOCHECK(piDeviceGetInfo)( + pi_result Err = Plugin.call_nocheck( dev, pi::cast(info::device::usm_host_allocations), sizeof(pi_usm_capabilities), &caps, nullptr); @@ -357,9 +381,9 @@ struct get_device_info { // Specialization for shared usm query. template <> struct get_device_info { - static bool get(RT::PiDevice dev) { + static bool get(RT::PiDevice dev, const plugin &Plugin) { pi_usm_capabilities caps; - pi_result Err = PI_CALL_NOCHECK(piDeviceGetInfo)( + pi_result Err = Plugin.call_nocheck( dev, pi::cast(info::device::usm_shared_allocations), sizeof(pi_usm_capabilities), &caps, nullptr); return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS); @@ -369,9 +393,9 @@ struct get_device_info { // Specialization for restricted usm query template <> struct get_device_info { - static bool get(RT::PiDevice dev) { + static bool get(RT::PiDevice dev, const plugin &Plugin) { pi_usm_capabilities caps; - pi_result Err = PI_CALL_NOCHECK(piDeviceGetInfo)( + pi_result Err = Plugin.call_nocheck( dev, pi::cast( info::device::usm_restricted_shared_allocations), @@ -386,9 +410,9 @@ struct get_device_info { // Specialization for system usm query template <> struct get_device_info { - static bool get(RT::PiDevice dev) { + static bool get(RT::PiDevice dev, const plugin &Plugin) { pi_usm_capabilities caps; - pi_result Err = PI_CALL_NOCHECK(piDeviceGetInfo)( + pi_result Err = Plugin.call_nocheck( dev, pi::cast(info::device::usm_system_allocator), sizeof(pi_usm_capabilities), &caps, nullptr); return (Err != PI_SUCCESS) ? false : (caps & PI_USM_ACCESS); diff --git a/sycl/include/CL/sycl/detail/event_impl.hpp b/sycl/include/CL/sycl/detail/event_impl.hpp index 8d28ea278fccd..2acc33b8ebbb6 100644 --- a/sycl/include/CL/sycl/detail/event_impl.hpp +++ b/sycl/include/CL/sycl/detail/event_impl.hpp @@ -9,8 +9,8 @@ #pragma once #include -#include #include +#include #include #include @@ -19,6 +19,7 @@ __SYCL_INLINE namespace cl { namespace sycl { class context; namespace detail { +class plugin; class context_impl; using ContextImplPtr = std::shared_ptr; class queue_impl; @@ -135,6 +136,10 @@ class event_impl { /// @return a shared pointer to a valid context_impl. const ContextImplPtr &getContextImpl(); + // @return the Plugin associated with the context of this event. + // Should be called when this is not a Host Event. + const plugin &getPlugin() const; + /// Associate event with the context. /// /// Provided PiContext inside ContextImplPtr must be associated diff --git a/sycl/include/CL/sycl/detail/event_info.hpp b/sycl/include/CL/sycl/detail/event_info.hpp index 7164534a25fa2..3863f9ab7f277 100644 --- a/sycl/include/CL/sycl/detail/event_info.hpp +++ b/sycl/include/CL/sycl/detail/event_info.hpp @@ -9,6 +9,8 @@ #pragma once #include +#include +#include #include __SYCL_INLINE namespace cl { @@ -19,11 +21,11 @@ template struct get_event_profiling_info { using RetType = typename info::param_traits::return_type; - static RetType get(RT::PiEvent Event) { + static RetType get(RT::PiEvent Event, const plugin &Plugin) { RetType Result = 0; // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piEventGetProfilingInfo)(Event, cl_profiling_info(Param), - sizeof(Result), &Result, nullptr); + Plugin.call( + Event, cl_profiling_info(Param), sizeof(Result), &Result, nullptr); return Result; } }; @@ -31,11 +33,11 @@ template struct get_event_profiling_info { template struct get_event_info { using RetType = typename info::param_traits::return_type; - static RetType get(RT::PiEvent Event) { + static RetType get(RT::PiEvent Event, const plugin &Plugin) { RetType Result = (RetType)0; // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piEventGetInfo)(Event, cl_profiling_info(Param), sizeof(Result), - &Result, nullptr); + Plugin.call(Event, cl_profiling_info(Param), + sizeof(Result), &Result, nullptr); return Result; } }; diff --git a/sycl/include/CL/sycl/detail/image_impl.hpp b/sycl/include/CL/sycl/detail/image_impl.hpp index 4125ffdb803fc..12ac8a8bf263e 100644 --- a/sycl/include/CL/sycl/detail/image_impl.hpp +++ b/sycl/include/CL/sycl/detail/image_impl.hpp @@ -219,9 +219,13 @@ template class image_impl final : public SYCLMemObjT { ~image_impl() { BaseT::updateHostMemory(); } private: - template void getImageInfo(RT::PiMemImageInfo Info, T &Dest) { + template + void getImageInfo(const ContextImplPtr Context, RT::PiMemImageInfo Info, + T &Dest) { + const detail::plugin &Plugin = Context->getPlugin(); RT::PiMem Mem = pi::cast(BaseT::MInteropMemObject); - PI_CALL(piMemImageGetInfo)(Mem, Info, sizeof(T), &Dest, nullptr); + Plugin.call(Mem, Info, sizeof(T), &Dest, + nullptr); } vector_class getDevices(const ContextImplPtr Context); diff --git a/sycl/include/CL/sycl/detail/kernel_impl.hpp b/sycl/include/CL/sycl/detail/kernel_impl.hpp index f04dd94b8639a..5ec0f156377f1 100644 --- a/sycl/include/CL/sycl/detail/kernel_impl.hpp +++ b/sycl/include/CL/sycl/detail/kernel_impl.hpp @@ -76,7 +76,7 @@ class kernel_impl { cl_kernel get() const { if (is_host()) throw invalid_object_error("This instance of kernel is a host instance"); - PI_CALL(piKernelRetain)(MKernel); + getPlugin().call(MKernel); return pi::cast(MKernel); } @@ -85,6 +85,8 @@ class kernel_impl { /// @return true if this SYCL kernel is a host kernel. bool is_host() const { return MContext->is_host(); } + const plugin &getPlugin() const { return MContext->getPlugin(); } + /// Query information from the kernel object using the info::kernel_info /// descriptor. /// diff --git a/sycl/include/CL/sycl/detail/kernel_info.hpp b/sycl/include/CL/sycl/detail/kernel_info.hpp index ad4e4356063c1..3ce376b461a79 100644 --- a/sycl/include/CL/sycl/detail/kernel_info.hpp +++ b/sycl/include/CL/sycl/detail/kernel_info.hpp @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -21,28 +22,30 @@ namespace detail { template struct get_kernel_info {}; template struct get_kernel_info { - static string_class get(RT::PiKernel Kernel) { + static string_class get(RT::PiKernel Kernel, const plugin &Plugin) { size_t ResultSize; + // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piKernelGetInfo)(Kernel, cl_kernel_info(Param), 0, nullptr, - &ResultSize); + Plugin.call(Kernel, cl_kernel_info(Param), 0, + nullptr, &ResultSize); if (ResultSize == 0) { return ""; } vector_class Result(ResultSize); // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piKernelGetInfo)(Kernel, cl_kernel_info(Param), ResultSize, - Result.data(), nullptr); + Plugin.call(Kernel, cl_kernel_info(Param), + ResultSize, Result.data(), nullptr); return string_class(Result.data()); } }; template struct get_kernel_info { - static cl_uint get(RT::PiKernel Kernel) { + static cl_uint get(RT::PiKernel Kernel, const plugin &Plugin) { cl_uint Result; + // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piKernelGetInfo)(Kernel, cl_kernel_info(Param), sizeof(cl_uint), - &Result, nullptr); + Plugin.call(Kernel, cl_kernel_info(Param), + sizeof(cl_uint), &Result, nullptr); return Result; } }; @@ -51,24 +54,26 @@ template struct get_kernel_info { template struct get_kernel_work_group_info { - static T get(RT::PiKernel Kernel, RT::PiDevice Device) { + static T get(RT::PiKernel Kernel, RT::PiDevice Device, + const plugin &Plugin) { T Result; // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piKernelGetGroupInfo)(Kernel, Device, - cl_kernel_work_group_info(Param), sizeof(T), - &Result, nullptr); + Plugin.call( + Kernel, Device, cl_kernel_work_group_info(Param), sizeof(T), &Result, + nullptr); return Result; } }; template struct get_kernel_work_group_info, Param> { - static cl::sycl::range<3> get(RT::PiKernel Kernel, RT::PiDevice Device) { + static cl::sycl::range<3> get(RT::PiKernel Kernel, RT::PiDevice Device, + const plugin &Plugin) { size_t Result[3]; // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piKernelGetGroupInfo)(Kernel, Device, - cl_kernel_work_group_info(Param), - sizeof(size_t) * 3, Result, nullptr); + Plugin.call( + Kernel, Device, cl_kernel_work_group_info(Param), sizeof(size_t) * 3, + Result, nullptr); return cl::sycl::range<3>(Result[0], Result[1], Result[2]); } }; @@ -106,22 +111,24 @@ get_kernel_work_group_info_host( template struct get_kernel_sub_group_info { - static TOut get(RT::PiKernel Kernel, RT::PiDevice Device) { + static TOut get(RT::PiKernel Kernel, RT::PiDevice Device, + const plugin &Plugin) { TOut Result; // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piKernelGetSubGroupInfo)(Kernel, Device, - cl_kernel_sub_group_info(Param), 0, - nullptr, sizeof(TOut), &Result, nullptr); + Plugin.call( + Kernel, Device, cl_kernel_sub_group_info(Param), 0, nullptr, + sizeof(TOut), &Result, nullptr); return Result; } }; template struct get_kernel_sub_group_info_with_input { - static TOut get(RT::PiKernel Kernel, RT::PiDevice Device, TIn In) { + static TOut get(RT::PiKernel Kernel, RT::PiDevice Device, TIn In, + const plugin &Plugin) { TOut Result; // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piKernelGetSubGroupInfo)( + Plugin.call( Kernel, Device, cl_kernel_sub_group_info(Param), sizeof(TIn), &In, sizeof(TOut), &Result, nullptr); return Result; @@ -129,13 +136,12 @@ struct get_kernel_sub_group_info_with_input { }; template -struct get_kernel_sub_group_info_with_input, Param, - size_t> { +struct get_kernel_sub_group_info_with_input, Param, size_t> { static cl::sycl::range<3> get(RT::PiKernel Kernel, RT::PiDevice Device, - size_t In) { + size_t In, const plugin &Plugin) { size_t Result[3]; // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piKernelGetSubGroupInfo)( + Plugin.call( Kernel, Device, cl_kernel_sub_group_info(Param), sizeof(size_t), &In, sizeof(size_t) * 3, Result, nullptr); return cl::sycl::range<3>(Result[0], Result[1], Result[2]); @@ -143,14 +149,13 @@ struct get_kernel_sub_group_info_with_input, Param, }; template -struct get_kernel_sub_group_info_with_input> { +struct get_kernel_sub_group_info_with_input> { static size_t get(RT::PiKernel Kernel, RT::PiDevice Device, - cl::sycl::range<3> In) { + cl::sycl::range<3> In, const plugin &Plugin) { size_t Input[3] = {In[0], In[1], In[2]}; size_t Result; // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piKernelGetSubGroupInfo)( + Plugin.call( Kernel, Device, cl_kernel_sub_group_info(Param), sizeof(size_t) * 3, Input, sizeof(size_t), &Result, nullptr); return Result; diff --git a/sycl/include/CL/sycl/detail/kernel_program_cache.hpp b/sycl/include/CL/sycl/detail/kernel_program_cache.hpp index 8fece6399f4cc..470650d84f824 100644 --- a/sycl/include/CL/sycl/detail/kernel_program_cache.hpp +++ b/sycl/include/CL/sycl/detail/kernel_program_cache.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -21,6 +22,7 @@ __SYCL_INLINE namespace cl { namespace sycl { namespace detail { +class context_impl; class KernelProgramCache { public: /// Denotes pointer to some entity with its state. @@ -41,6 +43,7 @@ class KernelProgramCache { using PiProgramPtrT = std::atomic; using ProgramWithBuildStateT = EntityWithState; using ProgramCacheT = std::map; + using ContextPtr = context_impl *; using PiKernelT = std::remove_pointer::type; using PiKernelPtrT = std::atomic; @@ -50,6 +53,8 @@ class KernelProgramCache { ~KernelProgramCache(); + void setContextPtr(const ContextPtr &AContext) { MParentContext = AContext; } + Locked acquireCachedPrograms() { return {MCachedPrograms, MProgramCacheMutex}; } @@ -78,6 +83,7 @@ class KernelProgramCache { ProgramCacheT MCachedPrograms; KernelCacheT MKernelsPerProgramCache; + ContextPtr MParentContext; }; } } diff --git a/sycl/include/CL/sycl/detail/memory_manager.hpp b/sycl/include/CL/sycl/detail/memory_manager.hpp index 8d5b2b3d201dd..06921329afa7a 100644 --- a/sycl/include/CL/sycl/detail/memory_manager.hpp +++ b/sycl/include/CL/sycl/detail/memory_manager.hpp @@ -36,14 +36,14 @@ class MemoryManager { // The following method releases memory allocation of memory object. // Depending on the context it releases memory on host or on device. static void release(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, - void *MemAllocation, std::vector DepEvents, + void *MemAllocation, std::vector DepEvents, RT::PiEvent &OutEvent); // The following method allocates memory allocation of memory object. // Depending on the context it allocates memory on host or on device. static void *allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, bool InitFromUserData, void *HostPtr, - std::vector DepEvents, + std::vector DepEvents, RT::PiEvent &OutEvent); // The following method creates OpenCL sub buffer for specified @@ -51,7 +51,7 @@ class MemoryManager { static void *allocateMemSubBuffer(ContextImplPtr TargetContext, void *ParentMemObj, size_t ElemSize, size_t Offset, range<3> Range, - std::vector DepEvents, + std::vector DepEvents, RT::PiEvent &OutEvent); // Allocates buffer in specified context taking into account situations such diff --git a/sycl/include/CL/sycl/detail/pi.hpp b/sycl/include/CL/sycl/detail/pi.hpp index 579ba0c237c4f..5665c990ad3ad 100644 --- a/sycl/include/CL/sycl/detail/pi.hpp +++ b/sycl/include/CL/sycl/detail/pi.hpp @@ -20,20 +20,13 @@ __SYCL_INLINE namespace cl { namespace sycl { namespace detail { -namespace pi { -// Function to load the shared library -// Implementation is OS dependent. -void *loadOsLibrary(const std::string &Library); - -// Function to get Address of a symbol defined in the shared -// library, implementation is OS dependent. -void *getOsLibraryFuncAddress(void *Library, const std::string &FunctionName); - -// For selection of SYCL RT back-end, now manually through the "SYCL_BE" -// environment variable. -// -enum Backend { SYCL_BE_PI_OPENCL, SYCL_BE_PI_OTHER }; +enum class PiApiKind { +#define _PI_API(api) api, +#include +}; +class plugin; +namespace pi { #ifdef SYCL_RT_OS_WINDOWS #define PLUGIN_NAME "pi_opencl.dll" @@ -41,9 +34,7 @@ enum Backend { SYCL_BE_PI_OPENCL, SYCL_BE_PI_OTHER }; #define PLUGIN_NAME "libpi_opencl.so" #endif -// Check for manually selected BE at run-time. -bool useBackend(Backend Backend); - +using PiPlugin = ::pi_plugin; using PiResult = ::pi_result; using PiPlatform = ::pi_platform; using PiDevice = ::pi_device; @@ -70,40 +61,69 @@ using PiMemObjectType = ::pi_mem_type; using PiMemImageChannelOrder = ::pi_image_channel_order; using PiMemImageChannelType = ::pi_image_channel_type; +// Function to load the shared library +// Implementation is OS dependent. +void *loadOsLibrary(const std::string &Library); + +// Function to get Address of a symbol defined in the shared +// library, implementation is OS dependent. +void *getOsLibraryFuncAddress(void *Library, const std::string &FunctionName); + +// For selection of SYCL RT back-end, now manually through the "SYCL_BE" +// environment variable. +enum Backend { SYCL_BE_PI_OPENCL, SYCL_BE_PI_OTHER }; + +// Check for manually selected BE at run-time. +bool useBackend(Backend Backend); + // Get a string representing a _pi_platform_info enum std::string platformInfoToString(pi_platform_info info); // Report error and no return (keeps compiler happy about no return statements). [[noreturn]] void die(const char *Message); + void assertion(bool Condition, const char *Message = nullptr); // Want all the needed casts be explicit, do not define conversion operators. template To cast(From value); // Holds the PluginInformation for the plugin that is bound. -// TODO: Move this into sycl::platform. Currenlty, we have only a single Plugin -// connection possible. -extern pi_plugin PluginInformation; +// Currently a global varaible is used to store OpenCL plugin information to be +// used with SYCL Interoperability Constructors. +extern std::shared_ptr GlobalPlugin; // Performs PI one-time initialization. -void initialize(); +vector_class initialize(); + +// Utility Functions to get Function Name for a PI Api. +template struct PiFuncInfo {}; + +#define _PI_API(api) \ + template <> struct PiFuncInfo { \ + inline std::string getFuncName() { return #api; } \ + inline decltype(&::api) getFuncPtr(PiPlugin MPlugin) { \ + return MPlugin.PiFunctionTable.api; \ + } \ + }; +#include +// Helper utilities for PI Tracing // The run-time tracing of PI calls. // Print functions used by Trace class. template inline void print(T val) { - std::cout << " : " << val; + std::cout << " : " << val << std::endl; } template <> inline void print<>(PiPlatform val) { - std::cout << "pi_platform : " << val; + std::cout << "pi_platform : " << val << std::endl; } template <> inline void print<>(PiResult val) { std::cout << "pi_result : "; if (val == PI_SUCCESS) - std::cout << "PI_SUCCESS"; + std::cout << "PI_SUCCESS" << std::endl; else - std::cout << val; + std::cout << val << std::endl; } // cout does not resolve a nullptr. @@ -112,123 +132,19 @@ template <> inline void print<>(std::nullptr_t val) { print(val); } inline void printArgs(void) {} template void printArgs(Arg0 arg0, Args... args) { - std::cout << std::endl << " "; + std::cout << " "; print(arg0); printArgs(std::forward(args)...); } - -// Utility function to check return from pi calls. -// Throws if pi_result is not a PI_SUCCESS. -template -inline void checkPiResult(PiResult pi_result) { - CHECK_OCL_CODE_THROW(pi_result, Exception); -} - -// Class to call PI API, trace and get the result. -// To Trace : Set SYCL_PI_TRACE environment variable. -// Template Arguments: -// FnType - Type of Function pointer to the PI API. -// FnOffset- Offset to the Function Pointer in the piPlugin::FunctionPointers -// structure. Used to differentiate between APIs with same pointer type, -// E.g.: piDeviceRelease and piDeviceRetain. Differentiation needed to avoid -// redefinition error during explicit specialization of class in pi.cpp. -// Members: Initialized in default constructor in Class Template Specialization. -// Usage: -// Operator() - Call, Trace and Get result -// Use Macro PI_CALL_NOCHECK call the constructor directly. -template class CallPi { -private: - FnType MFnPtr; - std::string MFnName; - static bool MEnableTrace; - -public: - CallPi(); - template PiResult operator()(Args... args) { - if (MEnableTrace) { - std::cout << "---> " << MFnName << "("; - printArgs(args...); - } - - PiResult r = MFnPtr(args...); - - if (MEnableTrace) { - std::cout << ") ---> "; - std::cout << (print(r), "") << std::endl; - } - return r; - } -}; - -template -bool CallPi::MEnableTrace = (std::getenv("SYCL_PI_TRACE") != - nullptr); - -// Class to call PI API, trace, check the return result and throw Exception. -// To Trace : Set SYCL_PI_TRACE environment variable. -// Template Arguments: -// FnType, FnOffset - for CallPi Class. -// Exception - The type of exception to throw if PiResult of a call is not -// PI_SUCCESS. Default value is cl::sycl::runtime_error. -// Usage: -// Operator() - Call, Trace, check Result and Throw Exception. -// Use Macro PI_CALL and PI_CALL_THROW to call the constructor directly. -template -class CallPiAndCheck : private CallPi { -public: - CallPiAndCheck() : CallPi(){}; - - template void operator()(Args... args) { - PiResult Err = (CallPi::operator()(args...)); - checkPiResult(Err); - } -}; - -// Explicit specialization declarations for Trace class for every FnType. -// The offsetof is used as a template argument to uniquely identify every -// api. -#define _PI_API(api) \ - template <> \ - CallPi::CallPi(); - -#include - } // namespace pi namespace RT = cl::sycl::detail::pi; -// Use this macro to call the API, trace the call, check the return and throw a -// runtime_error exception. -// Usage: PI_CALL(pi)(Args); -#define PI_CALL(pi) \ - RT::CallPiAndCheck() - -// Use this macro to call the API, trace the call and return the result. -// To check the result use checkPiResult. -// Usage: -// PiResult Err = PI_CALL_NOCHECK(pi)(args); -// RT::checkPiResult(Err); <- Checks Result and throws a runtime_error -// exception. -#define PI_CALL_NOCHECK(pi) \ - RT::CallPi() - -// Use this macro to call the API, trace the call, check the return and throw an -// Exception as given in the MACRO. -// Usage: PI_CALL_THROW(pi, compile_program_error)(args); -#define PI_CALL_THROW(pi, Exception) \ - RT::CallPiAndCheck() - -#define PI_ASSERT(cond, msg) RT::assertion((cond), "assert: " msg); - // Want all the needed casts be explicit, do not define conversion // operators. template To pi::cast(From value) { // TODO: see if more sanity checks are possible. - PI_ASSERT(sizeof(From) == sizeof(To), "cast failed size check"); + RT::assertion((sizeof(From) == sizeof(To)), "assert: cast failed size check"); return (To)(value); } diff --git a/sycl/include/CL/sycl/detail/platform_impl.hpp b/sycl/include/CL/sycl/detail/platform_impl.hpp index 86a785470ddb1..991bba721bed8 100644 --- a/sycl/include/CL/sycl/detail/platform_impl.hpp +++ b/sycl/include/CL/sycl/detail/platform_impl.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -32,7 +33,11 @@ class platform_impl { /// Constructs platform_impl from a plug-in interoperability platform handle. /// /// @param Platform is a raw plug-in platform handle. - explicit platform_impl(RT::PiPlatform Platform) : MPlatform(Platform) {} + explicit platform_impl(RT::PiPlatform APlatform, const plugin &APlugin) + : MPlatform(APlatform), MPlugin(std::make_shared(APlugin)) {} + + explicit platform_impl(RT::PiPlatform APlatform, std::shared_ptr APlugin) + : MPlatform(APlatform), MPlugin(APlugin) {} ~platform_impl() = default; @@ -97,9 +102,16 @@ class platform_impl { /// @return a vector of all available SYCL platforms. static vector_class get_platforms(); + // @return the Plugin associated with this platform. + const plugin &getPlugin() const { + assert(!MHostPlatform && "Plugin is not available for Host."); + return *MPlugin; + } + private: bool MHostPlatform = false; RT::PiPlatform MPlatform = 0; + std::shared_ptr MPlugin; }; } // namespace detail } // namespace sycl diff --git a/sycl/include/CL/sycl/detail/platform_info.hpp b/sycl/include/CL/sycl/detail/platform_info.hpp index 820d10b0756ca..96a3a3d3309ab 100644 --- a/sycl/include/CL/sycl/detail/platform_info.hpp +++ b/sycl/include/CL/sycl/detail/platform_info.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include __SYCL_INLINE namespace cl { @@ -21,18 +22,19 @@ template struct get_platform_info {}; template struct get_platform_info { - static string_class get(RT::PiPlatform plt) { + static string_class get(RT::PiPlatform plt, const plugin &Plugin) { size_t resultSize; // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piPlatformGetInfo)(plt, pi::cast(param), 0, - nullptr, &resultSize); + Plugin.call( + plt, pi::cast(param), 0, nullptr, &resultSize); if (resultSize == 0) { return ""; } unique_ptr_class result(new char[resultSize]); // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piPlatformGetInfo)(plt, pi::cast(param), - resultSize, result.get(), nullptr); + Plugin.call( + plt, pi::cast(param), resultSize, result.get(), + nullptr); return result.get(); } }; @@ -40,9 +42,11 @@ struct get_platform_info { template <> struct get_platform_info, info::platform::extensions> { - static vector_class get(RT::PiPlatform plt) { + static vector_class get(RT::PiPlatform plt, + const plugin &Plugin) { string_class result = - get_platform_info::get(plt); + get_platform_info::get( + plt, Plugin); return split_string(result, ' '); } }; diff --git a/sycl/include/CL/sycl/detail/plugin.hpp b/sycl/include/CL/sycl/detail/plugin.hpp new file mode 100644 index 0000000000000..ca7413c669225 --- /dev/null +++ b/sycl/include/CL/sycl/detail/plugin.hpp @@ -0,0 +1,76 @@ +//==--------------------- plugin.hpp - SYCL platform-------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once +#include +#include +#include + +__SYCL_INLINE namespace cl { +namespace sycl { +namespace detail { + +class plugin { +public: + plugin() = delete; + + plugin(RT::PiPlugin Plugin) : MPlugin(Plugin) { + MPiEnableTrace = (std::getenv("SYCL_PI_TRACE") != nullptr); + } + + ~plugin() = default; + + // Utility function to check return from PI calls. + // Throws if pi_result is not a PI_SUCCESS. + // Exception - The type of exception to throw if PiResult of a call is not + // PI_SUCCESS. Default value is cl::sycl::runtime_error. + template + void checkPiResult(RT::PiResult pi_result) const { + CHECK_OCL_CODE_THROW(pi_result, Exception); + } + + // Call the PiApi, trace the call and return the result. + // To check the result use checkPiResult. + // Usage: + // PiResult Err = plugin.call(Args); + // Plugin.checkPiResult(Err); <- Checks Result and throws a runtime_error + // exception. + template + RT::PiResult call_nocheck(ArgsT... Args) const { + RT::PiFuncInfo PiCallInfo; + if (MPiEnableTrace) { + std::string FnName = PiCallInfo.getFuncName(); + std::cout << "---> " << FnName << "(" << std::endl; + RT::printArgs(Args...); + } + RT::PiResult R = PiCallInfo.getFuncPtr(MPlugin)(Args...); + if (MPiEnableTrace) { + std::cout << ") ---> "; + RT::printArgs(R); + } + return R; + } + + // Call the API, trace the call, check the result and throw + // a runtime_error Exception + template + void call(ArgsT... Args) const { + RT::PiResult Err = call_nocheck(Args...); + checkPiResult(Err); + } + // TODO: Make this private. Currently used in program_manager to create a + // pointer to PiProgram. + RT::PiPlugin MPlugin; + +private: + bool MPiEnableTrace; + +}; // class plugin +} // namespace detail +} // namespace sycl +} // namespace cl diff --git a/sycl/include/CL/sycl/detail/program_impl.hpp b/sycl/include/CL/sycl/detail/program_impl.hpp index ed2eff56b9d42..e7c395c441153 100644 --- a/sycl/include/CL/sycl/detail/program_impl.hpp +++ b/sycl/include/CL/sycl/detail/program_impl.hpp @@ -234,6 +234,12 @@ class program_impl { return createSyclObjFromImpl(MContext); } + // @return the Plugin associated withh the context of this program. + const plugin &getPlugin() const { + assert(!is_host() && "Plugin is not available for Host."); + return MContext->getPlugin(); + } + /// @return a vector of devices that are associated with this program. vector_class get_devices() const { return MDevices; } diff --git a/sycl/include/CL/sycl/detail/program_manager/program_manager.hpp b/sycl/include/CL/sycl/detail/program_manager/program_manager.hpp index 63a50cace5a01..a7e300ce8d4c2 100644 --- a/sycl/include/CL/sycl/detail/program_manager/program_manager.hpp +++ b/sycl/include/CL/sycl/detail/program_manager/program_manager.hpp @@ -34,6 +34,8 @@ namespace sycl { class context; namespace detail { +class context_impl; +using ContextImplPtr = std::shared_ptr; using DeviceImage = pi_device_binary_struct; // Custom deleter for the DeviceImage. Must only be called for "orphan" images @@ -58,13 +60,15 @@ class ProgramManager { RT::PiProgram getBuiltPIProgram(OSModuleHandle M, const context &Context, const string_class &KernelName); RT::PiKernel getOrCreateKernel(OSModuleHandle M, const context &Context, - const string_class &KernelName); - RT::PiProgram getClProgramFromClKernel(RT::PiKernel Kernel); + const string_class &KernelName); + RT::PiProgram getClProgramFromClKernel(RT::PiKernel Kernel, + const ContextImplPtr Context); void addImages(pi_device_binaries DeviceImages); void debugDumpBinaryImages() const; void debugDumpBinaryImage(const DeviceImage *Img) const; - static string_class getProgramBuildLog(const RT::PiProgram &Program); + static string_class getProgramBuildLog(const RT::PiProgram &Program, + const ContextImplPtr Context); private: ProgramManager(); @@ -76,7 +80,7 @@ class ProgramManager { const context &Context); using ProgramPtr = unique_ptr_class, decltype(&::piProgramRelease)>; - ProgramPtr build(ProgramPtr Program, RT::PiContext Context, + ProgramPtr build(ProgramPtr Program, const ContextImplPtr Context, const string_class &CompileOptions, const string_class &LinkOptions, const std::vector &Devices, diff --git a/sycl/include/CL/sycl/detail/queue_impl.hpp b/sycl/include/CL/sycl/detail/queue_impl.hpp index 55166af763085..a6040a41e5267 100644 --- a/sycl/include/CL/sycl/detail/queue_impl.hpp +++ b/sycl/include/CL/sycl/detail/queue_impl.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -25,8 +26,8 @@ __SYCL_INLINE namespace cl { namespace sycl { namespace detail { -using ContextImplPtr = shared_ptr_class; -using DeviceImplPtr = shared_ptr_class; +using ContextImplPtr = std::shared_ptr; +using DeviceImplPtr = std::shared_ptr; /// Sets max number of queues supported by FPGA RT. const size_t MaxNumQueues = 256; @@ -87,26 +88,28 @@ class queue_impl { MCommandQueue = pi::cast(PiQueue); RT::PiDevice Device = nullptr; + const detail::plugin &Plugin = getPlugin(); // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piQueueGetInfo)(MCommandQueue, PI_QUEUE_INFO_DEVICE, sizeof(Device), - &Device, nullptr); - MDevice = std::make_shared(Device); + Plugin.call(MCommandQueue, PI_QUEUE_INFO_DEVICE, + sizeof(Device), &Device, nullptr); + MDevice = + std::make_shared(Device, Context->getPlatformImpl()); // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piQueueRetain)(MCommandQueue); + Plugin.call(MCommandQueue); } ~queue_impl() { throw_asynchronous(); if (MOpenCLInterop) { - PI_CALL(piQueueRelease)(MCommandQueue); + getPlugin().call(MCommandQueue); } } /// @return an OpenCL interoperability queue handle. cl_command_queue get() { if (MOpenCLInterop) { - PI_CALL(piQueueRetain)(MCommandQueue); + getPlugin().call(MCommandQueue); return pi::cast(MCommandQueue); } throw invalid_object_error( @@ -118,6 +121,8 @@ class queue_impl { return createSyclObjFromImpl(MContext); } + const plugin &getPlugin() const { return MContext->getPlugin(); } + ContextImplPtr getContextImplPtr() const { return MContext; } /// @return an associated SYCL device. @@ -226,8 +231,9 @@ class queue_impl { RT::PiQueue Queue; RT::PiContext Context = MContext->getHandleRef(); RT::PiDevice Device = MDevice->getHandleRef(); - RT::PiResult Error = - PI_CALL_NOCHECK(piQueueCreate)(Context, Device, CreationFlags, &Queue); + const detail::plugin &Plugin = getPlugin(); + RT::PiResult Error = Plugin.call_nocheck( + Context, Device, CreationFlags, &Queue); // If creating out-of-order queue failed and this property is not // supported (for example, on FPGA), it will return @@ -236,7 +242,7 @@ class queue_impl { MSupportOOO = false; Queue = createQueue(QueueOrder::Ordered); } else { - RT::checkPiResult(Error); + Plugin.checkPiResult(Error); } return Queue; @@ -260,7 +266,7 @@ class queue_impl { MQueueNumber %= MaxNumQueues; size_t FreeQueueNum = MQueueNumber++; - PI_CALL(piQueueFinish)(MQueues[FreeQueueNum]); + getPlugin().call(MQueues[FreeQueueNum]); return MQueues[FreeQueueNum]; } diff --git a/sycl/include/CL/sycl/detail/scheduler/commands.hpp b/sycl/include/CL/sycl/detail/scheduler/commands.hpp index 2ebc00e4c0864..85023f165c5ea 100644 --- a/sycl/include/CL/sycl/detail/scheduler/commands.hpp +++ b/sycl/include/CL/sycl/detail/scheduler/commands.hpp @@ -131,9 +131,9 @@ class Command { QueueImplPtr MQueue; std::vector MDepsEvents; - void waitForEvents(QueueImplPtr Queue, std::vector &RawEvents, + void waitForEvents(QueueImplPtr Queue, std::vector &RawEvents, RT::PiEvent &Event); - std::vector prepareEvents(ContextImplPtr Context); + std::vector prepareEvents(ContextImplPtr Context); // Private interface. Derived classes should implement this method. virtual cl_int enqueueImp() = 0; diff --git a/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp b/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp index a16f6ffc5c626..af2bab740d4b0 100644 --- a/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp +++ b/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp @@ -9,7 +9,9 @@ #pragma once #include +#include #include +#include #include #include #include @@ -81,6 +83,11 @@ class SYCLMemObjT : public SYCLMemObjI { virtual ~SYCLMemObjT() = default; + const plugin &getPlugin() const { + assert((MInteropContext != nullptr) && + "Trying to get Plugin from SYCLMemObjT with nullptr ContextImpl."); + return (MInteropContext->getPlugin()); + } size_t getSize() const override { return MSizeInBytes; } size_t get_count() const { size_t AllocatorValueSize = MAllocator->getValueSize(); diff --git a/sycl/include/CL/sycl/detail/usm_dispatch.hpp b/sycl/include/CL/sycl/detail/usm_dispatch.hpp index 89d56e87dc315..966f53991f8e5 100644 --- a/sycl/include/CL/sycl/detail/usm_dispatch.hpp +++ b/sycl/include/CL/sycl/detail/usm_dispatch.hpp @@ -51,7 +51,8 @@ class USMDispatcher { pi_event *Event); pi_result enqueuePrefetch(pi_queue Queue, void *Ptr, size_t Size, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event); + const pi_event *EventWaitList, pi_event *Event, + const plugin &Plugin); private: bool mEmulated = false; diff --git a/sycl/include/CL/sycl/intel/function_pointer.hpp b/sycl/include/CL/sycl/intel/function_pointer.hpp index 31d5f65010e75..297bb0b3b656b 100644 --- a/sycl/include/CL/sycl/intel/function_pointer.hpp +++ b/sycl/include/CL/sycl/intel/function_pointer.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -22,8 +23,8 @@ cl_ulong getDeviceFunctionPointerImpl(device &D, program &P, } namespace intel { -// This is a preview extension implementation, intended to provide early access -// to a feature for review and community feedback. +// This is a preview extension implementation, intended to provide early +// access to a feature for review and community feedback. // // Because the interfaces defined by this header file are not final and are // subject to change they are not intended to be used by shipping software @@ -48,17 +49,15 @@ using enable_if_is_function_pointer_t = typename std::enable_if< std::is_function::type>::value, int>::type; -/// \brief this function can be used only on host side to obtain device function -/// pointer for the specified function. +/// \brief this function can be used only on host side to obtain device +/// function pointer for the specified function. /// /// \param F - pointer to function to make it work for SYCL Host device -/// \param FuncName - name of the function. Please note that by default names of -/// functions are mangled since SYCL is a C++. To avoid the need ot specifying -/// mangled name here, use `extern "C"` -/// \param P - sycl::program object which will be used to extract device -/// function pointer -/// \param D - sycl::device object which will be used to extract device -/// function pointer +/// \param FuncName - name of the function. Please note that by default names +/// of functions are mangled since SYCL is a C++. To avoid the need ot +/// specifying mangled name here, use `extern "C"` \param P - sycl::program +/// object which will be used to extract device function pointer \param D - +/// sycl::device object which will be used to extract device function pointer /// /// \returns device_func_ptr_holder_t object which can be used inside a device /// code. This object must be converted back to a function pointer using @@ -70,8 +69,8 @@ using enable_if_is_function_pointer_t = typename std::enable_if< template = 0> device_func_ptr_holder_t get_device_func_ptr(FuncType F, const char *FuncName, program &P, device &D) { - // TODO: drop function name argument and map host function pointer directly to - // a device function pointer + // TODO: drop function name argument and map host function pointer directly + // to a device function pointer if (D.is_host()) { return reinterpret_cast(F); } @@ -83,7 +82,6 @@ device_func_ptr_holder_t get_device_func_ptr(FuncType F, const char *FuncName, return detail::getDeviceFunctionPointerImpl(D, P, FuncName); } - } // namespace intel } // namespace sycl } // namespace cl diff --git a/sycl/source/context.cpp b/sycl/source/context.cpp index 8e7a7e36a7a88..022c439b2682d 100644 --- a/sycl/source/context.cpp +++ b/sycl/source/context.cpp @@ -60,7 +60,8 @@ context::context(const vector_class &DeviceList, } context::context(cl_context ClContext, async_handler AsyncHandler) { impl = std::make_shared( - detail::pi::cast(ClContext), AsyncHandler); + detail::pi::cast(ClContext), AsyncHandler, + *RT::GlobalPlugin); } #define PARAM_TRAITS_SPEC(param_type, param, ret_type) \ diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 2f468cf64718d..5a65da6defd1c 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -23,7 +23,9 @@ namespace detail { context_impl::context_impl(const device &Device, async_handler AsyncHandler) : MAsyncHandler(AsyncHandler), MDevices(1, Device), MContext(nullptr), - MPlatform(), MPluginInterop(false), MHostContext(true) {} + MPlatform(), MPluginInterop(false), MHostContext(true) { + MKernelProgramCache.setContextPtr(this); +} context_impl::context_impl(const vector_class Devices, async_handler AsyncHandler) @@ -35,39 +37,46 @@ context_impl::context_impl(const vector_class Devices, DeviceIds.push_back(getSyclObjImpl(D)->getHandleRef()); } - PI_CALL(piContextCreate)(nullptr, DeviceIds.size(), DeviceIds.data(), nullptr, - nullptr, &MContext); + getPlugin().call( + nullptr, DeviceIds.size(), DeviceIds.data(), nullptr, nullptr, &MContext); + + MKernelProgramCache.setContextPtr(this); } -context_impl::context_impl(RT::PiContext PiContext, async_handler AsyncHandler) +context_impl::context_impl(RT::PiContext PiContext, async_handler AsyncHandler, + const plugin &Plugin) : MAsyncHandler(AsyncHandler), MDevices(), MContext(PiContext), MPlatform(), MPluginInterop(true), MHostContext(false) { vector_class DeviceIds; size_t DevicesNum = 0; // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piContextGetInfo)(MContext, PI_CONTEXT_INFO_NUM_DEVICES, - sizeof(DevicesNum), &DevicesNum, nullptr); + Plugin.call( + MContext, PI_CONTEXT_INFO_NUM_DEVICES, sizeof(DevicesNum), &DevicesNum, + nullptr); DeviceIds.resize(DevicesNum); // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piContextGetInfo)(MContext, PI_CONTEXT_INFO_DEVICES, - sizeof(RT::PiDevice) * DevicesNum, &DeviceIds[0], - nullptr); + Plugin.call(MContext, PI_CONTEXT_INFO_DEVICES, + sizeof(RT::PiDevice) * DevicesNum, + &DeviceIds[0], nullptr); for (auto Dev : DeviceIds) { - MDevices.emplace_back( - createSyclObjFromImpl(std::make_shared(Dev))); + MDevices.emplace_back(createSyclObjFromImpl( + std::make_shared(Dev, Plugin))); } // TODO What if m_Devices if empty? m_Devices[0].get_platform() MPlatform = detail::getSyclObjImpl(MDevices[0].get_platform()); // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piContextRetain)(MContext); + // getPlugin() will be the same as the Plugin passed. This should be taken + // care of when creating device object. + getPlugin().call(MContext); + MKernelProgramCache.setContextPtr(this); } cl_context context_impl::get() const { if (MPluginInterop) { // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piContextRetain)(MContext); + getPlugin().call(MContext); return pi::cast(MContext); } throw invalid_object_error( @@ -79,11 +88,11 @@ bool context_impl::is_host() const { return MHostContext || !MPluginInterop; } context_impl::~context_impl() { if (MPluginInterop) { // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piContextRelease)(MContext); + getPlugin().call(MContext); } for (auto LibProg : MCachedLibPrograms) { assert(LibProg.second && "Null program must not be kept in the cache"); - PI_CALL(piProgramRelease)(LibProg.second); + getPlugin().call(LibProg.second); } } @@ -96,7 +105,7 @@ cl_uint context_impl::get_info() const { if (is_host()) return 0; return get_context_info::get( - this->getHandleRef()); + this->getHandleRef(), this->getPlugin()); } template <> platform context_impl::get_info() const { if (is_host()) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index c49ac7b6d2e45..92f31dc6bf9f8 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -15,31 +15,51 @@ __SYCL_INLINE namespace cl { namespace sycl { namespace detail { -device_impl::device_impl() : MIsHostDevice(true) {} +device_impl::device_impl() + : MIsHostDevice(true), + MPlatform(std::make_shared(platform_impl())) {} -device_impl::device_impl(RT::PiDevice Device) +device_impl::device_impl(RT::PiDevice Device, PlatformImplPtr Platform) + : device_impl(Device, Platform, Platform->getPlugin()) {} + +device_impl::device_impl(RT::PiDevice Device, const plugin &Plugin) + : device_impl(Device, nullptr, Plugin) {} + +device_impl::device_impl(RT::PiDevice Device, PlatformImplPtr Platform, + const plugin &Plugin) : MDevice(Device), MIsHostDevice(false) { // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piDeviceGetInfo)(MDevice, PI_DEVICE_INFO_TYPE, - sizeof(RT::PiDeviceType), &MType, nullptr); + Plugin.call( + MDevice, PI_DEVICE_INFO_TYPE, sizeof(RT::PiDeviceType), &MType, nullptr); RT::PiDevice parent = nullptr; // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piDeviceGetInfo)(MDevice, PI_DEVICE_INFO_PARENT, sizeof(RT::PiDevice), - &parent, nullptr); + Plugin.call( + MDevice, PI_DEVICE_INFO_PARENT, sizeof(RT::PiDevice), &parent, nullptr); MIsRootDevice = (nullptr == parent); if (!MIsRootDevice) { // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piDeviceRetain)(MDevice); + Plugin.call(MDevice); + } + + // set MPlatform + if (!Platform) { + RT::PiPlatform plt = nullptr; // TODO catch an exception and put it to list + // of asynchronous exceptions + Plugin.call(Device, PI_DEVICE_INFO_PLATFORM, + sizeof(plt), &plt, nullptr); + Platform = std::make_shared(plt, Plugin); } + MPlatform = Platform; } device_impl::~device_impl() { if (!MIsRootDevice && !MIsHostDevice) { // TODO catch an exception and put it to list of asynchronous exceptions - CHECK_OCL_CODE_NO_EXC( - RT::PluginInformation.PiFunctionTable.piDeviceRelease(MDevice)); + const detail::plugin &Plugin = getPlugin(); + RT::PiResult Err = Plugin.call_nocheck(MDevice); + CHECK_OCL_CODE_NO_EXC(Err); } } @@ -56,25 +76,15 @@ cl_device_id device_impl::get() const { if (!MIsRootDevice) { // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piDeviceRetain)(MDevice); + const detail::plugin &Plugin = getPlugin(); + Plugin.call(MDevice); } // TODO: check that device is an OpenCL interop one return pi::cast(MDevice); } platform device_impl::get_platform() const { - if (MIsHostDevice) - return platform(); - - RT::PiPlatform plt = nullptr; // TODO catch an exception and put it to list of - // asynchronous exceptions - PI_CALL(piDeviceGetInfo)(MDevice, PI_DEVICE_INFO_PLATFORM, sizeof(plt), &plt, - nullptr); - - // TODO: this possibly will violate common reference semantics, - // particularly, equality comparison may fail for two consecutive - // get_platform() on the same device, as it compares impl objects. - return createSyclObjFromImpl(std::make_shared(plt)); + return createSyclObjFromImpl(MPlatform); } bool device_impl::has_extension(const string_class &ExtensionName) const { @@ -83,7 +93,8 @@ bool device_impl::has_extension(const string_class &ExtensionName) const { return false; string_class AllExtensionNames = - get_device_info::get(MDevice); + get_device_info::get( + this->getHandleRef(), this->getPlugin()); return (AllExtensionNames.find(ExtensionName) != std::string::npos); } @@ -99,8 +110,10 @@ device_impl::create_sub_devices(const cl_device_partition_property *Properties, vector_class SubDevices(SubDevicesCount); pi_uint32 ReturnedSubDevices = 0; - PI_CALL(piDevicePartition)(MDevice, Properties, SubDevicesCount, - SubDevices.data(), &ReturnedSubDevices); + const detail::plugin &Plugin = getPlugin(); + Plugin.call(MDevice, Properties, + SubDevicesCount, SubDevices.data(), + &ReturnedSubDevices); // TODO: check that returned number of sub-devices matches what was // requested, otherwise this walk below is wrong. // @@ -110,9 +123,9 @@ device_impl::create_sub_devices(const cl_device_partition_property *Properties, // vector_class res; std::for_each(SubDevices.begin(), SubDevices.end(), - [&res](const RT::PiDevice &a_pi_device) { + [&res, this](const RT::PiDevice &a_pi_device) { device sycl_device = detail::createSyclObjFromImpl( - std::make_shared(a_pi_device)); + std::make_shared(a_pi_device, MPlatform)); res.push_back(sycl_device); }); return res; diff --git a/sycl/source/detail/device_info.cpp b/sycl/source/detail/device_info.cpp index 52aaedcf8319c..b380bcc72b511 100644 --- a/sycl/source/detail/device_info.cpp +++ b/sycl/source/detail/device_info.cpp @@ -25,18 +25,19 @@ namespace detail { // Specialization for parent device template <> -device -get_device_info::get(RT::PiDevice dev) { +device get_device_info::get( + RT::PiDevice dev, const plugin &Plugin) { typename sycl_to_pi::type result; - PI_CALL(piDeviceGetInfo)( + Plugin.call( dev, pi::cast(info::device::parent_device), sizeof(result), &result, nullptr); if (result == nullptr) throw invalid_object_error( "No parent for device because it is not a subdevice"); - return createSyclObjFromImpl(std::make_shared(result)); + return createSyclObjFromImpl( + std::make_shared(result, Plugin)); } vector_class read_fp_bitfield(cl_device_fp_config bits) { diff --git a/sycl/source/detail/error_handling/enqueue_kernel.cpp b/sycl/source/detail/error_handling/enqueue_kernel.cpp index 2d72f516f3a9a..eda04af33c10b 100644 --- a/sycl/source/detail/error_handling/enqueue_kernel.cpp +++ b/sycl/source/detail/error_handling/enqueue_kernel.cpp @@ -13,6 +13,7 @@ #include "error_handling.hpp" #include +#include __SYCL_INLINE namespace cl { namespace sycl { @@ -20,24 +21,27 @@ namespace detail { namespace enqueue_kernel_launch { -bool handleInvalidWorkGroupSize(pi_device Device, pi_kernel Kernel, +bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, const NDRDescT &NDRDesc) { const bool HasLocalSize = (NDRDesc.LocalSize[0] != 0); + const plugin &Plugin = DeviceImpl.getPlugin(); + RT::PiDevice Device = DeviceImpl.getHandleRef(); + size_t VerSize = 0; - PI_CALL(piDeviceGetInfo)(Device, PI_DEVICE_INFO_VERSION, 0, nullptr, - &VerSize); + Plugin.call(Device, PI_DEVICE_INFO_VERSION, 0, + nullptr, &VerSize); assert(VerSize >= 10 && "Unexpected device version string"); // strlen("OpenCL X.Y") string_class VerStr(VerSize, '\0'); - PI_CALL(piDeviceGetInfo)(Device, PI_DEVICE_INFO_VERSION, VerSize, - &VerStr.front(), nullptr); + Plugin.call(Device, PI_DEVICE_INFO_VERSION, + VerSize, &VerStr.front(), nullptr); const char *Ver = &VerStr[7]; // strlen("OpenCL ") size_t CompileWGSize[3] = {0}; - PI_CALL(piKernelGetGroupInfo)(Kernel, Device, - CL_KERNEL_COMPILE_WORK_GROUP_SIZE, - sizeof(size_t) * 3, CompileWGSize, nullptr); + Plugin.call( + Kernel, Device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof(size_t) * 3, + CompileWGSize, nullptr); if (CompileWGSize[0] != 0) { // OpenCL 1.x && 2.0: @@ -70,8 +74,9 @@ bool handleInvalidWorkGroupSize(pi_device Device, pi_kernel Kernel, // than the value specified by CL_DEVICE_MAX_WORK_GROUP_SIZE in // table 4.3 size_t MaxWGSize = 0; - PI_CALL(piDeviceGetInfo)(Device, PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE, - sizeof(size_t), &MaxWGSize, nullptr); + Plugin.call( + Device, PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE, sizeof(size_t), &MaxWGSize, + nullptr); const size_t TotalNumberOfWIs = NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2]; if (TotalNumberOfWIs > MaxWGSize) @@ -87,8 +92,9 @@ bool handleInvalidWorkGroupSize(pi_device Device, pi_kernel Kernel, // local_work_size[0] * ... * local_work_size[work_dim – 1] is greater // than the value specified by CL_KERNEL_WORK_GROUP_SIZE in table 5.21. size_t KernelWGSize = 0; - PI_CALL(piKernelGetGroupInfo)(Kernel, Device, CL_KERNEL_WORK_GROUP_SIZE, - sizeof(size_t), &KernelWGSize, nullptr); + Plugin.call( + Kernel, Device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), + &KernelWGSize, nullptr); const size_t TotalNumberOfWIs = NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2]; if (TotalNumberOfWIs > KernelWGSize) @@ -126,14 +132,15 @@ bool handleInvalidWorkGroupSize(pi_device Device, pi_kernel Kernel, // given by local_work_size pi_program Program = nullptr; - PI_CALL(piKernelGetInfo)(Kernel, CL_KERNEL_PROGRAM, sizeof(pi_program), - &Program, nullptr); + Plugin.call( + Kernel, CL_KERNEL_PROGRAM, sizeof(pi_program), &Program, nullptr); size_t OptsSize = 0; - PI_CALL(piProgramGetBuildInfo)(Program, Device, CL_PROGRAM_BUILD_OPTIONS, - 0, nullptr, &OptsSize); + Plugin.call( + Program, Device, CL_PROGRAM_BUILD_OPTIONS, 0, nullptr, &OptsSize); string_class Opts(OptsSize, '\0'); - PI_CALL(piProgramGetBuildInfo)(Program, Device, CL_PROGRAM_BUILD_OPTIONS, - OptsSize, &Opts.front(), nullptr); + Plugin.call( + Program, Device, CL_PROGRAM_BUILD_OPTIONS, OptsSize, &Opts.front(), + nullptr); if (NonUniformWGs) { const bool HasStd20 = Opts.find("-cl-std=CL2.0") != string_class::npos; if (!HasStd20) @@ -163,13 +170,13 @@ bool handleInvalidWorkGroupSize(pi_device Device, pi_kernel Kernel, "OpenCL API failed. OpenCL API returns: " + codeToString(Error), Error); } -bool handleError(pi_result Error, pi_device Device, pi_kernel Kernel, - const NDRDescT &NDRDesc) { +bool handleError(pi_result Error, const device_impl &DeviceImpl, + pi_kernel Kernel, const NDRDescT &NDRDesc) { assert(Error != PI_SUCCESS && "Success is expected to be handled on caller side"); switch (Error) { case PI_INVALID_WORK_GROUP_SIZE: - return handleInvalidWorkGroupSize(Device, Kernel, NDRDesc); + return handleInvalidWorkGroupSize(DeviceImpl, Kernel, NDRDesc); // TODO: Handle other error codes default: throw runtime_error( diff --git a/sycl/source/detail/error_handling/error_handling.hpp b/sycl/source/detail/error_handling/error_handling.hpp index be6edba17f2df..c0e77b6a2df8c 100644 --- a/sycl/source/detail/error_handling/error_handling.hpp +++ b/sycl/source/detail/error_handling/error_handling.hpp @@ -8,8 +8,9 @@ #pragma once -#include #include +#include +#include __SYCL_INLINE namespace cl { namespace sycl { @@ -24,7 +25,7 @@ namespace enqueue_kernel_launch { /// /// This function actually never returns and always throws an exception with /// error description. -bool handleError(pi_result, pi_device, pi_kernel, const NDRDescT &); +bool handleError(pi_result, const device_impl &, pi_kernel, const NDRDescT &); } // namespace enqueue_kernel_launch } // namespace detail diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index e9838614e3340..6bd4f49f2ffdb 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -8,6 +8,8 @@ #include #include +#include +#include #include #include @@ -23,7 +25,7 @@ bool event_impl::is_host() const { return MHostEvent || !MOpenCLInterop; } cl_event event_impl::get() const { if (MOpenCLInterop) { - PI_CALL(piEventRetain)(MEvent); + getPlugin().call(MEvent); return pi::cast(MEvent); } throw invalid_object_error( @@ -32,12 +34,12 @@ cl_event event_impl::get() const { event_impl::~event_impl() { if (MEvent) - PI_CALL(piEventRelease)(MEvent); + getPlugin().call(MEvent); } void event_impl::waitInternal() const { if (!MHostEvent) { - PI_CALL(piEventsWait)(1, &MEvent); + getPlugin().call(1, &MEvent); } // Waiting of host events is NOP so far as all operations on host device // are blocking. @@ -48,6 +50,10 @@ RT::PiEvent &event_impl::getHandleRef() { return MEvent; } const ContextImplPtr &event_impl::getContextImpl() { return MContext; } +const plugin &event_impl::getPlugin() const { + return MContext->getPlugin(); +} + void event_impl::setContextImpl(const ContextImplPtr &Context) { MHostEvent = Context->is_host(); MOpenCLInterop = !MHostEvent; @@ -65,15 +71,15 @@ event_impl::event_impl(RT::PiEvent Event, const context &SyclContext) } RT::PiContext TempContext; - PI_CALL(piEventGetInfo)(MEvent, CL_EVENT_CONTEXT, sizeof(RT::PiContext), - &TempContext, nullptr); + getPlugin().call( + MEvent, CL_EVENT_CONTEXT, sizeof(RT::PiContext), &TempContext, nullptr); if (MContext->getHandleRef() != TempContext) { throw cl::sycl::invalid_parameter_error( "The syclContext must match the OpenCL context associated with the " "clEvent."); } - PI_CALL(piEventRetain)(MEvent); + getPlugin().call(MEvent); } event_impl::event_impl(QueueImplPtr Queue) : MQueue(Queue) { @@ -114,7 +120,7 @@ cl_ulong event_impl::get_profiling_info() const { if (!MHostEvent) { return get_event_profiling_info::get( - this->getHandleRef()); + this->getHandleRef(), this->getPlugin()); } if (!MHostProfilingInfo) throw invalid_object_error("Profiling info is not available."); @@ -126,7 +132,7 @@ cl_ulong event_impl::get_profiling_info() const { if (!MHostEvent) { return get_event_profiling_info::get( - this->getHandleRef()); + this->getHandleRef(), this->getPlugin()); } if (!MHostProfilingInfo) throw invalid_object_error("Profiling info is not available."); @@ -138,7 +144,7 @@ cl_ulong event_impl::get_profiling_info() const { if (!MHostEvent) { return get_event_profiling_info::get( - this->getHandleRef()); + this->getHandleRef(), this->getPlugin()); } if (!MHostProfilingInfo) throw invalid_object_error("Profiling info is not available."); @@ -148,7 +154,7 @@ event_impl::get_profiling_info() const { template <> cl_uint event_impl::get_info() const { if (!MHostEvent) { return get_event_info::get( - this->getHandleRef()); + this->getHandleRef(), this->getPlugin()); } return 0; } @@ -158,7 +164,7 @@ info::event_command_status event_impl::get_info() const { if (!MHostEvent) { return get_event_info::get( - this->getHandleRef()); + this->getHandleRef(), this->getPlugin()); } return info::event_command_status::complete; } diff --git a/sycl/source/detail/image_impl.cpp b/sycl/source/detail/image_impl.cpp index 348f929c7df84..eb521977f173c 100644 --- a/sycl/source/detail/image_impl.cpp +++ b/sycl/source/detail/image_impl.cpp @@ -240,30 +240,32 @@ image_impl::image_impl( std::move(Allocator)), MRange(InitializedVal::template get<0>()) { RT::PiMem Mem = pi::cast(BaseT::MInteropMemObject); - PI_CALL(piMemGetInfo)(Mem, CL_MEM_SIZE, sizeof(size_t), - &(BaseT::MSizeInBytes), nullptr); + const ContextImplPtr Context = getSyclObjImpl(SyclContext); + const detail::plugin &Plugin = Context->getPlugin(); + Plugin.call(Mem, CL_MEM_SIZE, sizeof(size_t), + &(BaseT::MSizeInBytes), nullptr); RT::PiMemImageFormat Format; - getImageInfo(PI_IMAGE_INFO_FORMAT, Format); + getImageInfo(Context, PI_IMAGE_INFO_FORMAT, Format); MOrder = detail::convertChannelOrder(Format.image_channel_order); MType = detail::convertChannelType(Format.image_channel_data_type); MNumChannels = getImageNumberChannels(MOrder); - getImageInfo(PI_IMAGE_INFO_ELEMENT_SIZE, MElementSize); + getImageInfo(Context, PI_IMAGE_INFO_ELEMENT_SIZE, MElementSize); assert(getImageElementSize(MNumChannels, MType) == MElementSize); - getImageInfo(PI_IMAGE_INFO_ROW_PITCH, MRowPitch); - getImageInfo(PI_IMAGE_INFO_SLICE_PITCH, MSlicePitch); + getImageInfo(Context, PI_IMAGE_INFO_ROW_PITCH, MRowPitch); + getImageInfo(Context, PI_IMAGE_INFO_SLICE_PITCH, MSlicePitch); switch (Dimensions) { case 3: - getImageInfo(PI_IMAGE_INFO_DEPTH, MRange[2]); + getImageInfo(Context, PI_IMAGE_INFO_DEPTH, MRange[2]); // fall through case 2: - getImageInfo(PI_IMAGE_INFO_HEIGHT, MRange[1]); + getImageInfo(Context, PI_IMAGE_INFO_HEIGHT, MRange[1]); // fall through case 1: - getImageInfo(PI_IMAGE_INFO_WIDTH, MRange[0]); + getImageInfo(Context, PI_IMAGE_INFO_WIDTH, MRange[0]); } } diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 4bb9e56bcb8af..5e1f3e9a8fb81 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -32,12 +32,13 @@ kernel_impl::kernel_impl(RT::PiKernel Kernel, ContextImplPtr ContextImpl, MCreatedFromSource(IsCreatedFromSource) { RT::PiContext Context = nullptr; - PI_CALL(piKernelGetInfo)(MKernel, CL_KERNEL_CONTEXT, sizeof(Context), - &Context, nullptr); + // Using the plugin from the passed ContextImpl + getPlugin().call( + MKernel, CL_KERNEL_CONTEXT, sizeof(Context), &Context, nullptr); if (ContextImpl->getHandleRef() != Context) throw cl::sycl::invalid_parameter_error( "Input context must be the same as the context of cl_kernel"); - PI_CALL(piKernelRetain)(MKernel); + getPlugin().call(MKernel); } kernel_impl::kernel_impl(ContextImplPtr Context, @@ -47,7 +48,7 @@ kernel_impl::kernel_impl(ContextImplPtr Context, kernel_impl::~kernel_impl() { // TODO catch an exception and put it to list of asynchronous exceptions if (!is_host()) { - PI_CALL(piKernelRelease)(MKernel); + getPlugin().call(MKernel); } } @@ -60,7 +61,7 @@ kernel_impl::get_info() const { } return get_kernel_info< typename info::param_traits::return_type, - param>::get(this->getHandleRef()); + param>::get(this->getHandleRef(), getPlugin()); } template <> context kernel_impl::get_info() const { @@ -79,7 +80,8 @@ kernel_impl::get_work_group_info(const device &Device) const { } return get_kernel_work_group_info< typename info::param_traits::return_type, - param>::get(this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef()); + param>::get(this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), + getPlugin()); } template @@ -90,7 +92,8 @@ kernel_impl::get_sub_group_info(const device &Device) const { } return get_kernel_sub_group_info< typename info::param_traits::return_type, - param>::get(this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef()); + param>::get(this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), + getPlugin()); } template @@ -106,7 +109,8 @@ kernel_impl::get_sub_group_info( typename info::param_traits::return_type, param, typename info::param_traits::input_type>:: - get(this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), Value); + get(this->getHandleRef(), getSyclObjImpl(Device)->getHandleRef(), Value, + getPlugin()); } #define PARAM_TRAITS_SPEC(param_type, param, ret_type) \ diff --git a/sycl/source/detail/kernel_program_cache.cpp b/sycl/source/detail/kernel_program_cache.cpp index 03d780653738c..8ef141ebfcf39 100644 --- a/sycl/source/detail/kernel_program_cache.cpp +++ b/sycl/source/detail/kernel_program_cache.cpp @@ -6,7 +6,9 @@ // //===----------------------------------------------------------------------===// +#include #include +#include __SYCL_INLINE namespace cl { namespace sycl { @@ -28,11 +30,14 @@ KernelProgramCache::~KernelProgramCache() { KernelWithBuildStateT &KernelWithState = p.second; PiKernelT *Kern = KernelWithState.Ptr.load(); - if (Kern) - PI_CALL(piKernelRelease)(Kern); + if (Kern) { + const detail::plugin &Plugin = MParentContext->getPlugin(); + Plugin.call(Kern); + } } - PI_CALL(piProgramRelease)(ToBeDeleted); + const detail::plugin &Plugin = MParentContext->getPlugin(); + Plugin.call(ToBeDeleted); } } } diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 254b63592abee..c7d38cfaf67a1 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -21,14 +21,23 @@ __SYCL_INLINE namespace cl { namespace sycl { namespace detail { -static void waitForEvents(const std::vector &Events) { - if (!Events.empty()) - PI_CALL(piEventsWait)(Events.size(), &Events[0]); +static void waitForEvents(const std::vector &Events) { + // Assuming all events will be on the same device or + // devices associated with the same Backend. + if (!Events.empty()) { + const detail::plugin &Plugin = Events[0]->getPlugin(); + std::vector PiEvents(Events.size()); + std::transform(Events.begin(), Events.end(), PiEvents.begin(), + [](const EventImplPtr &EventImpl) { + return EventImpl->getHandleRef(); + }); + Plugin.call(PiEvents.size(), &PiEvents[0]); + } } void MemoryManager::release(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void *MemAllocation, - std::vector DepEvents, + std::vector DepEvents, RT::PiEvent &OutEvent) { // There is no async API for memory releasing. Explicitly wait for all // dependency events and return empty event. @@ -50,12 +59,13 @@ void MemoryManager::releaseMemObj(ContextImplPtr TargetContext, return; } - PI_CALL(piMemRelease)(pi::cast(MemAllocation)); + const detail::plugin &Plugin = TargetContext->getPlugin(); + Plugin.call(pi::cast(MemAllocation)); } void *MemoryManager::allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, bool InitFromUserData, void *HostPtr, - std::vector DepEvents, + std::vector DepEvents, RT::PiEvent &OutEvent) { // There is no async API for memory allocation. Explicitly wait for all // dependency events and return empty event. @@ -91,8 +101,10 @@ void *MemoryManager::allocateInteropMemObject( OutEventToWait = InteropEvent->getHandleRef(); // Retain the event since it will be released during alloca command // destruction - if (nullptr != OutEventToWait) - PI_CALL(piEventRetain)(OutEventToWait); + if (nullptr != OutEventToWait) { + const detail::plugin &Plugin = InteropEvent->getPlugin(); + Plugin.call(OutEventToWait); + } return UserPtr; } // Allocate new cl_mem and initialize from user provided one. @@ -111,8 +123,10 @@ void *MemoryManager::allocateImageObject(ContextImplPtr TargetContext, : PI_MEM_FLAGS_HOST_PTR_USE; RT::PiMem NewMem; - PI_CALL(piMemImageCreate)(TargetContext->getHandleRef(), CreationFlags, - &Format, &Desc, UserPtr, &NewMem); + const detail::plugin &Plugin = TargetContext->getPlugin(); + Plugin.call(TargetContext->getHandleRef(), + CreationFlags, &Format, &Desc, + UserPtr, &NewMem); return NewMem; } @@ -126,8 +140,9 @@ void *MemoryManager::allocateBufferObject(ContextImplPtr TargetContext, : PI_MEM_FLAGS_HOST_PTR_USE; RT::PiMem NewMem; - PI_CALL(piMemBufferCreate)(TargetContext->getHandleRef(), CreationFlags, Size, - UserPtr, &NewMem); + const detail::plugin &Plugin = TargetContext->getPlugin(); + Plugin.call( + TargetContext->getHandleRef(), CreationFlags, Size, UserPtr, &NewMem); return NewMem; } @@ -162,7 +177,7 @@ void *MemoryManager::allocateMemImage( void *MemoryManager::allocateMemSubBuffer(ContextImplPtr TargetContext, void *ParentMemObj, size_t ElemSize, size_t Offset, range<3> Range, - std::vector DepEvents, + std::vector DepEvents, RT::PiEvent &OutEvent) { waitForEvents(DepEvents); OutEvent = nullptr; @@ -178,7 +193,8 @@ void *MemoryManager::allocateMemSubBuffer(ContextImplPtr TargetContext, // TODO replace with pi_buffer_region cl_buffer_region Region{Offset, SizeInBytes}; RT::PiMem NewMem; - Error = PI_CALL_NOCHECK(piMemBufferPartition)( + const detail::plugin &Plugin = TargetContext->getPlugin(); + Error = Plugin.call_nocheck( pi::cast(ParentMemObj), PI_MEM_FLAGS_ACCESS_RW, PI_BUFFER_CREATE_TYPE_REGION, &Region, &NewMem); if (Error == PI_MISALIGNED_SUB_BUFFER_OFFSET) @@ -202,6 +218,7 @@ void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr SrcQueue, // Adjust first dimension of copy range and offset as OpenCL expects size in // bytes. DstSize[0] *= DstElemSize; + const detail::plugin &Plugin = TgtQueue->getPlugin(); if (SYCLMemObj->getType() == detail::SYCLMemObjI::MemObjType::BUFFER) { DstOffset[0] *= DstElemSize; SrcOffset[0] *= SrcElemSize; @@ -210,7 +227,7 @@ void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr SrcQueue, SrcSize[0] *= SrcElemSize; if (1 == DimDst && 1 == DimSrc) { - PI_CALL(piEnqueueMemBufferWrite)( + Plugin.call( Queue, DstMem, /*blocking_write=*/CL_FALSE, DstOffset[0], DstAccessRange[0], SrcMem + SrcOffset[0], DepEvents.size(), &DepEvents[0], &OutEvent); @@ -219,7 +236,7 @@ void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr SrcQueue, size_t BufferSlicePitch = (3 == DimDst) ? DstSize[0] * DstSize[1] : 0; size_t HostRowPitch = (1 == DimSrc) ? 0 : SrcSize[0]; size_t HostSlicePitch = (3 == DimSrc) ? SrcSize[0] * SrcSize[1] : 0; - PI_CALL(piEnqueueMemBufferWriteRect)( + Plugin.call( Queue, DstMem, /*blocking_write=*/CL_FALSE, &DstOffset[0], &SrcOffset[0], &DstAccessRange[0], BufferRowPitch, BufferSlicePitch, HostRowPitch, @@ -228,11 +245,11 @@ void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr SrcQueue, } else { size_t InputRowPitch = (1 == DimDst) ? 0 : DstSize[0]; size_t InputSlicePitch = (3 == DimDst) ? DstSize[0] * DstSize[1] : 0; - PI_CALL(piEnqueueMemImageWrite)(Queue, DstMem, - /*blocking_write=*/CL_FALSE, &DstOffset[0], - &DstAccessRange[0], InputRowPitch, - InputSlicePitch, SrcMem, DepEvents.size(), - &DepEvents[0], &OutEvent); + Plugin.call( + Queue, DstMem, + /*blocking_write=*/CL_FALSE, &DstOffset[0], &DstAccessRange[0], + InputRowPitch, InputSlicePitch, SrcMem, DepEvents.size(), &DepEvents[0], + &OutEvent); } } @@ -249,6 +266,7 @@ void copyD2H(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue, const RT::PiQueue Queue = SrcQueue->getHandleRef(); // Adjust sizes of 1 dimensions as OpenCL expects size in bytes. SrcSize[0] *= SrcElemSize; + const detail::plugin &Plugin = SrcQueue->getPlugin(); if (SYCLMemObj->getType() == detail::SYCLMemObjI::MemObjType::BUFFER) { DstOffset[0] *= DstElemSize; SrcOffset[0] *= SrcElemSize; @@ -257,7 +275,7 @@ void copyD2H(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue, DstSize[0] *= DstElemSize; if (1 == DimDst && 1 == DimSrc) { - PI_CALL(piEnqueueMemBufferRead)( + Plugin.call( Queue, SrcMem, /*blocking_read=*/CL_FALSE, SrcOffset[0], SrcAccessRange[0], DstMem + DstOffset[0], DepEvents.size(), &DepEvents[0], &OutEvent); @@ -267,7 +285,7 @@ void copyD2H(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue, size_t HostRowPitch = (1 == DimDst) ? 0 : DstSize[0]; size_t HostSlicePitch = (3 == DimDst) ? DstSize[0] * DstSize[1] : 0; - PI_CALL(piEnqueueMemBufferReadRect)( + Plugin.call( Queue, SrcMem, /*blocking_read=*/CL_FALSE, &SrcOffset[0], &DstOffset[0], &SrcAccessRange[0], BufferRowPitch, BufferSlicePitch, HostRowPitch, @@ -276,7 +294,7 @@ void copyD2H(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue, } else { size_t RowPitch = (1 == DimSrc) ? 0 : SrcSize[0]; size_t SlicePitch = (3 == DimSrc) ? SrcSize[0] * SrcSize[1] : 0; - PI_CALL(piEnqueueMemImageRead)( + Plugin.call( Queue, SrcMem, CL_FALSE, &SrcOffset[0], &SrcAccessRange[0], RowPitch, SlicePitch, DstMem, DepEvents.size(), &DepEvents[0], &OutEvent); } @@ -293,6 +311,7 @@ void copyD2D(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue, assert(SYCLMemObj && "The SYCLMemObj is nullptr"); const RT::PiQueue Queue = SrcQueue->getHandleRef(); + const detail::plugin &Plugin = SrcQueue->getPlugin(); if (SYCLMemObj->getType() == detail::SYCLMemObjI::MemObjType::BUFFER) { // Adjust sizes of 1 dimensions as OpenCL expects size in bytes. DstOffset[0] *= DstElemSize; @@ -300,9 +319,8 @@ void copyD2D(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue, SrcAccessRange[0] *= SrcElemSize; SrcSize[0] *= SrcElemSize; DstSize[0] *= DstElemSize; - if (1 == DimDst && 1 == DimSrc) { - PI_CALL(piEnqueueMemBufferCopy)( + Plugin.call( Queue, SrcMem, DstMem, SrcOffset[0], DstOffset[0], SrcAccessRange[0], DepEvents.size(), &DepEvents[0], &OutEvent); } else { @@ -312,15 +330,15 @@ void copyD2D(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue, size_t DstRowPitch = (1 == DimDst) ? 0 : DstSize[0]; size_t DstSlicePitch = (3 == DimDst) ? DstSize[0] * DstSize[1] : 0; - PI_CALL(piEnqueueMemBufferCopyRect)( + Plugin.call( Queue, SrcMem, DstMem, &SrcOffset[0], &DstOffset[0], &SrcAccessRange[0], SrcRowPitch, SrcSlicePitch, DstRowPitch, DstSlicePitch, DepEvents.size(), &DepEvents[0], &OutEvent); } } else { - PI_CALL(piEnqueueMemImageCopy)(Queue, SrcMem, DstMem, &SrcOffset[0], - &DstOffset[0], &SrcAccessRange[0], - DepEvents.size(), &DepEvents[0], &OutEvent); + Plugin.call( + Queue, SrcMem, DstMem, &SrcOffset[0], &DstOffset[0], &SrcAccessRange[0], + DepEvents.size(), &DepEvents[0], &OutEvent); } } @@ -399,9 +417,10 @@ void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, RT::PiEvent &OutEvent) { assert(SYCLMemObj && "The SYCLMemObj is nullptr"); + const detail::plugin &Plugin = Queue->getPlugin(); if (SYCLMemObj->getType() == detail::SYCLMemObjI::MemObjType::BUFFER) { if (Dim == 1) { - PI_CALL(piEnqueueMemBufferFill)( + Plugin.call( Queue->getHandleRef(), pi::cast(Mem), Pattern, PatternSize, Offset[0] * ElementSize, Range[0] * ElementSize, DepEvents.size(), &DepEvents[0], &OutEvent); @@ -410,7 +429,7 @@ void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, assert(!"Not supported configuration of fill requested"); throw runtime_error("Not supported configuration of fill requested"); } else { - PI_CALL(piEnqueueMemImageFill)( + Plugin.call( Queue->getHandleRef(), pi::cast(Mem), Pattern, &Offset[0], &Range[0], DepEvents.size(), &DepEvents[0], &OutEvent); } @@ -454,8 +473,8 @@ void *MemoryManager::map(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, void *MappedPtr = nullptr; const size_t BytesToMap = AccessRange[0] * AccessRange[1] * AccessRange[2]; - - PI_CALL(piEnqueueMemBufferMap)( + const detail::plugin &Plugin = Queue->getPlugin(); + Plugin.call( Queue->getHandleRef(), pi::cast(Mem), CL_FALSE, Flags, AccessOffset[0], BytesToMap, DepEvents.size(), DepEvents.empty() ? nullptr : &DepEvents[0], &OutEvent, &MappedPtr); @@ -467,9 +486,14 @@ void MemoryManager::unmap(SYCLMemObjI *SYCLMemObj, void *Mem, std::vector DepEvents, RT::PiEvent &OutEvent) { - PI_CALL(piEnqueueMemUnmap) - (Queue->getHandleRef(), pi::cast(Mem), MappedPtr, DepEvents.size(), - DepEvents.empty() ? nullptr : &DepEvents[0], &OutEvent); + // Host queue is not supported here. + // All DepEvents are to the same Context. + // Using the plugin of the Queue. + + const detail::plugin &Plugin = Queue->getPlugin(); + Plugin.call( + Queue->getHandleRef(), pi::cast(Mem), MappedPtr, + DepEvents.size(), DepEvents.empty() ? nullptr : &DepEvents[0], &OutEvent); } void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, @@ -481,9 +505,11 @@ void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, if (Context.is_host()) { std::memcpy(DstMem, SrcMem, Len); } else { - PI_CALL(piextUSMEnqueueMemcpy)(SrcQueue->getHandleRef(), - /* blocking */ false, DstMem, SrcMem, Len, - DepEvents.size(), &DepEvents[0], &OutEvent); + const detail::plugin &Plugin = SrcQueue->getPlugin(); + Plugin.call(SrcQueue->getHandleRef(), + /* blocking */ false, DstMem, + SrcMem, Len, DepEvents.size(), + &DepEvents[0], &OutEvent); } } @@ -495,8 +521,10 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, if (Context.is_host()) { std::memset(Mem, Pattern, Length); } else { - PI_CALL(piextUSMEnqueueMemset)(Queue->getHandleRef(), Mem, Pattern, Length, - DepEvents.size(), &DepEvents[0], &OutEvent); + const detail::plugin &Plugin = Queue->getPlugin(); + Plugin.call( + Queue->getHandleRef(), Mem, Pattern, Length, DepEvents.size(), + &DepEvents[0], &OutEvent); } } @@ -508,9 +536,10 @@ void MemoryManager::prefetch_usm(void *Mem, QueueImplPtr Queue, size_t Length, if (Context.is_host()) { // TODO: Potentially implement prefetch on the host. } else { - PI_CALL(piextUSMEnqueuePrefetch)(Queue->getHandleRef(), Mem, Length, - PI_USM_MIGRATION_TBD0, DepEvents.size(), - &DepEvents[0], &OutEvent); + const detail::plugin &Plugin = Queue->getPlugin(); + Plugin.call( + Queue->getHandleRef(), Mem, Length, PI_USM_MIGRATION_TBD0, + DepEvents.size(), &DepEvents[0], &OutEvent); } } diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index 10df59f5d496d..0063505cee61f 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include #include +#include #include #include @@ -49,18 +50,19 @@ bool useBackend(Backend TheBackend) { return TheBackend == Use; } -// TODO: Move this global structure into sycl::platform object, -// associate each plugin with a platform. -pi_plugin PluginInformation; +// GlobalPlugin is a global Plugin used with Interoperability constructors that +// use OpenCL objects to construct SYCL class objects. +std::shared_ptr GlobalPlugin; // Find the plugin at the appropriate location and return the location. // TODO: Change the function appropriately when there are multiple plugins. -std::string findPlugin() { +bool findPlugins(vector_class &PluginNames) { // TODO: Based on final design discussions, change the location where the // plugin must be searched; how to identify the plugins etc. Currently the // search is done for libpi_opencl.so/pi_opencl.dll file in LD_LIBRARY_PATH // env only. - return PLUGIN_NAME; + PluginNames.push_back(PLUGIN_NAME); + return true; } // Load the Plugin by calling the OS dependent library loading call. @@ -75,19 +77,19 @@ void *loadPlugin(const std::string &PluginPath) { // call is done to get all Interface API mapping. The plugin interface also // needs to setup infrastructure to route PI_CALLs to the appropriate plugins. // Currently, we bind to a singe plugin. -bool bindPlugin(void *Library) { +bool bindPlugin(void *Library, PiPlugin *PluginInformation) { decltype(::piPluginInit) *PluginInitializeFunction = (decltype( &::piPluginInit))(getOsLibraryFuncAddress(Library, "piPluginInit")); if (PluginInitializeFunction == nullptr) return false; - int err = PluginInitializeFunction(&PluginInformation); + int Err = PluginInitializeFunction(PluginInformation); // TODO: Compare Supported versions and check for backward compatibility. // Make sure err is PI_SUCCESS. - assert((err == PI_SUCCESS) && "Unexpected error when binding to Plugin."); - (void)err; + assert((Err == PI_SUCCESS) && "Unexpected error when binding to Plugin."); + (void)Err; // TODO: Return a more meaningful value/enum. return true; @@ -95,34 +97,41 @@ bool bindPlugin(void *Library) { // Load the plugin based on SYCL_BE. // TODO: Currently only accepting OpenCL plugins. Edit it to identify and load -// other kinds of plugins, do the required changes in the findPlugin, loadPlugin -// and bindPlugin functions. -void initialize() { - static bool Initialized = false; - if (Initialized) { - return; - } +// other kinds of plugins, do the required changes in the findPlugins, +// loadPlugin and bindPlugin functions. +vector_class initialize() { + vector_class Plugins; + if (!useBackend(SYCL_BE_PI_OPENCL)) { die("Unknown SYCL_BE"); } - std::string PluginPath = findPlugin(); - if (PluginPath.empty()) - die("Plugin Not Found."); + bool EnableTrace = (std::getenv("SYCL_PI_TRACE") != nullptr); - void *Library = loadPlugin(PluginPath); - if (!Library) { - std::string Message = - "Check if plugin is present. Failed to load plugin: " + PluginPath; - die(Message.c_str()); - } + vector_class PluginNames; + findPlugins(PluginNames); - if (!bindPlugin(Library)) { - std::string Message = "Failed to bind PI APIs to the plugin: " + PluginPath; - die(Message.c_str()); - } + if (PluginNames.empty() && EnableTrace) + std::cerr << "No Plugins Found." << std::endl; - Initialized = true; + PiPlugin PluginInformation; // TODO: include. + for (unsigned int I = 0; I < PluginNames.size(); I++) { + void *Library = loadPlugin(PluginNames[I]); + if (!Library && EnableTrace) { + std::cerr << "Check if plugin is present. Failed to load plugin: " + << PluginNames[I] << std::endl; + } + + if (!bindPlugin(Library, &PluginInformation) && EnableTrace) { + std::cerr << "Failed to bind PI APIs to the plugin: " << PluginNames[I] + << std::endl; + } + Plugins.push_back(plugin(PluginInformation)); + } + // TODO: Correct the logic to store the appropriate plugin into GlobalPlugin + // variable. Currently it saves the last plugin found. + GlobalPlugin = std::make_shared(PluginInformation); + return Plugins; } // Report error and no return (keeps compiler from printing warnings). @@ -139,20 +148,6 @@ void assertion(bool Condition, const char *Message) { die(Message); } -// TODO: Pass platform object to constructor which will contain the -// PluginInformation class. Platform class with Plugin information is not -// implemented yet. - -#define _PI_API(api) \ - template <> \ - CallPi::CallPi() { \ - initialize(); \ - MFnPtr = (RT::PluginInformation.PiFunctionTable.api); \ - MFnName = #api; \ - } -#include - } // namespace pi } // namespace detail } // namespace sycl diff --git a/sycl/source/detail/platform_impl.cpp b/sycl/source/detail/platform_impl.cpp index 31721c1702a60..201addb70c2f9 100644 --- a/sycl/source/detail/platform_impl.cpp +++ b/sycl/source/detail/platform_impl.cpp @@ -22,22 +22,26 @@ namespace detail { vector_class platform_impl::get_platforms() { vector_class Platforms; + vector_class Plugins = RT::initialize(); - pi_uint32 NumPlatforms = 0; - PI_CALL(piPlatformsGet)(0, nullptr, &NumPlatforms); info::device_type ForcedType = detail::get_forced_type(); - - if (NumPlatforms) { - vector_class PiPlatforms(NumPlatforms); - PI_CALL(piPlatformsGet)(NumPlatforms, PiPlatforms.data(), nullptr); - - for (const auto &PiPlatform : PiPlatforms) { - platform Platform = detail::createSyclObjFromImpl( - std::make_shared(PiPlatform)); - // Skip platforms which do not contain requested device - // types - if (!Platform.get_devices(ForcedType).empty()) - Platforms.push_back(Platform); + for (unsigned int i = 0; i < Plugins.size(); i++) { + + pi_uint32 NumPlatforms = 0; + Plugins[i].call(0, nullptr, &NumPlatforms); + + if (NumPlatforms) { + vector_class PiPlatforms(NumPlatforms); + Plugins[i].call(NumPlatforms, + PiPlatforms.data(), nullptr); + + for (const auto &PiPlatform : PiPlatforms) { + platform Platform = detail::createSyclObjFromImpl( + std::make_shared(PiPlatform, Plugins[i])); + // Skip platforms which do not contain requested device types + if (!Platform.get_devices(ForcedType).empty()) + Platforms.push_back(Platform); + } } } @@ -138,27 +142,29 @@ static std::vector getWhiteListDesc() { } static void filterWhiteList(vector_class &PiDevices, - RT::PiPlatform PiPlatform) { + RT::PiPlatform PiPlatform, + const plugin &Plugin) { const std::vector WhiteList(getWhiteListDesc()); if (WhiteList.empty()) return; const string_class PlatformName = sycl::detail::get_platform_info::get( - PiPlatform); + PiPlatform, Plugin); const string_class PlatformVer = sycl::detail::get_platform_info::get(PiPlatform); + info::platform::version>::get(PiPlatform, + Plugin); int InsertIDx = 0; for (RT::PiDevice Device : PiDevices) { const string_class DeviceName = sycl::detail::get_device_info::get( - Device); + Device, Plugin); const string_class DeviceDriverVer = sycl::detail::get_device_info< - string_class, info::device::driver_version>::get(Device); + string_class, info::device::driver_version>::get(Device, Plugin); for (const DevDescT &Desc : WhiteList) { if (nullptr != Desc.platformName && @@ -205,25 +211,29 @@ platform_impl::get_devices(info::device_type DeviceType) const { return Res; pi_uint32 NumDevices; - PI_CALL(piDevicesGet)(MPlatform, pi::cast(DeviceType), 0, - pi::cast(nullptr), &NumDevices); + const detail::plugin &Plugin = getPlugin(); + Plugin.call( + MPlatform, pi::cast(DeviceType), 0, + pi::cast(nullptr), &NumDevices); if (NumDevices == 0) return Res; vector_class PiDevices(NumDevices); // TODO catch an exception and put it to list of asynchronous exceptions - PI_CALL(piDevicesGet)(MPlatform, pi::cast(DeviceType), - NumDevices, PiDevices.data(), nullptr); + Plugin.call(MPlatform, + pi::cast(DeviceType), + NumDevices, PiDevices.data(), nullptr); // Filter out devices that are not present in the white list if (SYCLConfig::get()) - filterWhiteList(PiDevices, MPlatform); + filterWhiteList(PiDevices, MPlatform, this->getPlugin()); std::transform(PiDevices.begin(), PiDevices.end(), std::back_inserter(Res), - [](const RT::PiDevice &PiDevice) -> device { + [this](const RT::PiDevice &PiDevice) -> device { return detail::createSyclObjFromImpl( - std::make_shared(PiDevice)); + std::make_shared( + PiDevice, std::make_shared(*this))); }); return Res; @@ -235,7 +245,7 @@ bool platform_impl::has_extension(const string_class &ExtensionName) const { string_class AllExtensionNames = get_platform_info::get( - MPlatform); + MPlatform, getPlugin()); return (AllExtensionNames.find(ExtensionName) != std::string::npos); } @@ -247,7 +257,7 @@ platform_impl::get_info() const { return get_platform_info< typename info::param_traits::return_type, - param>::get(this->getHandleRef()); + param>::get(this->getHandleRef(), getPlugin()); } #define PARAM_TRAITS_SPEC(param_type, param, ret_type) \ diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp index 24ed7d4787cbb..abb5f1dca04dd 100644 --- a/sycl/source/detail/program_impl.cpp +++ b/sycl/source/detail/program_impl.cpp @@ -68,10 +68,12 @@ program_impl::program_impl( NonInterOpToLink |= !Prg->MLinkable; Programs.push_back(Prg->MProgram); } - PI_CALL_THROW(piProgramLink, compile_program_error)( + const detail::plugin &Plugin = getPlugin(); + RT::PiResult Err = Plugin.call_nocheck( MContext->getHandleRef(), Devices.size(), Devices.data(), LinkOptions.c_str(), Programs.size(), Programs.data(), nullptr, nullptr, &MProgram); + Plugin.checkPiResult(Err); } } @@ -80,12 +82,13 @@ program_impl::program_impl(ContextImplPtr Context, RT::PiProgram Program) // TODO handle the case when cl_program build is in progress cl_uint NumDevices; - PI_CALL(piProgramGetInfo)(Program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), - &NumDevices, nullptr); + const detail::plugin &Plugin = getPlugin(); + Plugin.call( + Program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &NumDevices, nullptr); vector_class PiDevices(NumDevices); - PI_CALL(piProgramGetInfo)(Program, CL_PROGRAM_DEVICES, - sizeof(RT::PiDevice) * NumDevices, PiDevices.data(), - nullptr); + Plugin.call(Program, CL_PROGRAM_DEVICES, + sizeof(RT::PiDevice) * NumDevices, + PiDevices.data(), nullptr); vector_class SyclContextDevices = MContext->get_info(); @@ -104,15 +107,16 @@ program_impl::program_impl(ContextImplPtr Context, RT::PiProgram Program) RT::PiDevice Device = getSyclObjImpl(MDevices[0])->getHandleRef(); // TODO check build for each device instead cl_program_binary_type BinaryType; - PI_CALL(piProgramGetBuildInfo)(Program, Device, CL_PROGRAM_BINARY_TYPE, - sizeof(cl_program_binary_type), &BinaryType, - nullptr); + Plugin.call( + Program, Device, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), + &BinaryType, nullptr); size_t Size = 0; - PI_CALL(piProgramGetBuildInfo)(Program, Device, CL_PROGRAM_BUILD_OPTIONS, 0, - nullptr, &Size); + Plugin.call( + Program, Device, CL_PROGRAM_BUILD_OPTIONS, 0, nullptr, &Size); std::vector OptionsVector(Size); - PI_CALL(piProgramGetBuildInfo)(Program, Device, CL_PROGRAM_BUILD_OPTIONS, - Size, OptionsVector.data(), nullptr); + Plugin.call(Program, Device, + CL_PROGRAM_BUILD_OPTIONS, Size, + OptionsVector.data(), nullptr); string_class Options(OptionsVector.begin(), OptionsVector.end()); switch (BinaryType) { case CL_PROGRAM_BINARY_TYPE_NONE: @@ -129,18 +133,19 @@ program_impl::program_impl(ContextImplPtr Context, RT::PiProgram Program) MLinkOptions = ""; MBuildOptions = Options; } - PI_CALL(piProgramRetain)(Program); + Plugin.call(Program); } program_impl::program_impl(ContextImplPtr Context, RT::PiKernel Kernel) - : program_impl( - Context, - ProgramManager::getInstance().getClProgramFromClKernel(Kernel)) {} + : program_impl(Context, + ProgramManager::getInstance().getClProgramFromClKernel( + Kernel, Context)) {} program_impl::~program_impl() { // TODO catch an exception and put it to list of asynchronous exceptions if (!is_host() && MProgram != nullptr) { - PI_CALL(piProgramRelease)(MProgram); + const detail::plugin &Plugin = getPlugin(); + Plugin.call(MProgram); } } @@ -149,7 +154,8 @@ cl_program program_impl::get() const { if (is_host()) { throw invalid_object_error("This instance of program is a host instance"); } - PI_CALL(piProgramRetain)(MProgram); + const detail::plugin &Plugin = getPlugin(); + Plugin.call(MProgram); return pi::cast(MProgram); } @@ -187,7 +193,8 @@ void program_impl::build_with_kernel_name(string_class KernelName, MProgramAndKernelCachingAllowed = true; MProgram = ProgramManager::getInstance().getBuiltPIProgram( Module, get_context(), KernelName); - PI_CALL(piProgramRetain)(MProgram); + const detail::plugin &Plugin = getPlugin(); + Plugin.call(MProgram); } else { create_pi_program_with_kernel_name(Module, KernelName); build(BuildOptions); @@ -212,9 +219,11 @@ void program_impl::link(string_class LinkOptions) { if (!is_host()) { check_device_feature_support(MDevices); vector_class Devices(get_pi_devices()); - PI_CALL_THROW(piProgramLink, compile_program_error)( + const detail::plugin &Plugin = getPlugin(); + RT::PiResult Err = Plugin.call_nocheck( MContext->getHandleRef(), Devices.size(), Devices.data(), LinkOptions.c_str(), 1, &MProgram, nullptr, nullptr, &MProgram); + Plugin.checkPiResult(Err); MLinkOptions = LinkOptions; MBuildOptions = LinkOptions; } @@ -249,20 +258,21 @@ kernel program_impl::get_kernel(string_class KernelName, vector_class> program_impl::get_binaries() const { throw_if_state_is(program_state::none); vector_class> Result; + const detail::plugin &Plugin = getPlugin(); if (!is_host()) { vector_class BinarySizes(MDevices.size()); - PI_CALL(piProgramGetInfo)(MProgram, CL_PROGRAM_BINARY_SIZES, - sizeof(size_t) * BinarySizes.size(), - BinarySizes.data(), nullptr); + Plugin.call( + MProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * BinarySizes.size(), + BinarySizes.data(), nullptr); vector_class Pointers; for (size_t I = 0; I < BinarySizes.size(); ++I) { Result.emplace_back(BinarySizes[I]); Pointers.push_back(Result[I].data()); } - PI_CALL(piProgramGetInfo)(MProgram, CL_PROGRAM_BINARIES, - sizeof(char *) * Pointers.size(), Pointers.data(), - nullptr); + Plugin.call(MProgram, CL_PROGRAM_BINARIES, + sizeof(char *) * Pointers.size(), + Pointers.data(), nullptr); } return Result; } @@ -271,20 +281,23 @@ void program_impl::create_cl_program_with_source(const string_class &Source) { assert(!MProgram && "This program already has an encapsulated cl_program"); const char *Src = Source.c_str(); size_t Size = Source.size(); - PI_CALL(piclProgramCreateWithSource)(MContext->getHandleRef(), 1, &Src, &Size, - &MProgram); + const detail::plugin &Plugin = getPlugin(); + Plugin.call( + MContext->getHandleRef(), 1, &Src, &Size, &MProgram); } void program_impl::compile(const string_class &Options) { check_device_feature_support(MDevices); vector_class Devices(get_pi_devices()); - RT::PiResult Err = PI_CALL_NOCHECK(piProgramCompile)( + const detail::plugin &Plugin = getPlugin(); + RT::PiResult Err = Plugin.call_nocheck( MProgram, Devices.size(), Devices.data(), Options.c_str(), 0, nullptr, nullptr, nullptr, nullptr); if (Err != PI_SUCCESS) { - throw compile_program_error("Program compilation error:\n" + - ProgramManager::getProgramBuildLog(MProgram)); + throw compile_program_error( + "Program compilation error:\n" + + ProgramManager::getProgramBuildLog(MProgram, MContext)); } MCompileOptions = Options; } @@ -292,13 +305,15 @@ void program_impl::compile(const string_class &Options) { void program_impl::build(const string_class &Options) { check_device_feature_support(MDevices); vector_class Devices(get_pi_devices()); - RT::PiResult Err = - PI_CALL_NOCHECK(piProgramBuild)(MProgram, Devices.size(), Devices.data(), - Options.c_str(), nullptr, nullptr); + const detail::plugin &Plugin = getPlugin(); + RT::PiResult Err = Plugin.call_nocheck( + MProgram, Devices.size(), Devices.data(), Options.c_str(), nullptr, + nullptr); if (Err != PI_SUCCESS) { - throw compile_program_error("Program build error:\n" + - ProgramManager::getProgramBuildLog(MProgram)); + throw compile_program_error( + "Program build error:\n" + + ProgramManager::getProgramBuildLog(MProgram, MContext)); } MBuildOptions = Options; MCompileOptions = Options; @@ -314,11 +329,13 @@ vector_class program_impl::get_pi_devices() const { bool program_impl::has_cl_kernel(const string_class &KernelName) const { size_t Size; - PI_CALL(piProgramGetInfo)(MProgram, CL_PROGRAM_KERNEL_NAMES, 0, nullptr, - &Size); + const detail::plugin &Plugin = getPlugin(); + Plugin.call(MProgram, CL_PROGRAM_KERNEL_NAMES, 0, + nullptr, &Size); string_class ClResult(Size, ' '); - PI_CALL(piProgramGetInfo)(MProgram, CL_PROGRAM_KERNEL_NAMES, ClResult.size(), - &ClResult[0], nullptr); + Plugin.call(MProgram, CL_PROGRAM_KERNEL_NAMES, + ClResult.size(), &ClResult[0], + nullptr); // Get rid of the null terminator ClResult.pop_back(); vector_class KernelNames(split_string(ClResult, ';')); @@ -337,13 +354,14 @@ RT::PiKernel program_impl::get_pi_kernel(const string_class &KernelName) const { Kernel = ProgramManager::getInstance().getOrCreateKernel( MProgramModuleHandle, get_context(), KernelName); } else { - RT::PiResult Err = - PI_CALL_NOCHECK(piKernelCreate)(MProgram, KernelName.c_str(), &Kernel); + const detail::plugin &Plugin = getPlugin(); + RT::PiResult Err = Plugin.call_nocheck( + MProgram, KernelName.c_str(), &Kernel); if (Err == PI_RESULT_INVALID_KERNEL_NAME) { throw invalid_object_error( "This instance of program does not contain the kernel requested"); } - RT::checkPiResult(Err); + Plugin.checkPiResult(Err); } return Kernel; @@ -385,8 +403,9 @@ cl_uint program_impl::get_info() const { throw invalid_object_error("This instance of program is a host instance"); } cl_uint Result; - PI_CALL(piProgramGetInfo)(MProgram, CL_PROGRAM_REFERENCE_COUNT, - sizeof(cl_uint), &Result, nullptr); + const detail::plugin &Plugin = getPlugin(); + Plugin.call(MProgram, CL_PROGRAM_REFERENCE_COUNT, + sizeof(cl_uint), &Result, nullptr); return Result; } diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index c3f1445244cb3..72971d16a70ad 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -31,6 +31,8 @@ __SYCL_INLINE namespace cl { namespace sycl { namespace detail { +using ContextImplPtr = std::shared_ptr; + static constexpr int DbgProgMgr = 0; enum BuildState { BS_InProgress, BS_Done, BS_Failed }; @@ -43,32 +45,36 @@ ProgramManager &ProgramManager::getInstance() { return Instance; } -static RT::PiDevice getFirstDevice(RT::PiContext Context) { +static RT::PiDevice getFirstDevice(const ContextImplPtr &Context) { cl_uint NumDevices = 0; - PI_CALL(piContextGetInfo)(Context, PI_CONTEXT_INFO_NUM_DEVICES, - sizeof(NumDevices), &NumDevices, - /*param_value_size_ret=*/nullptr); + const detail::plugin &Plugin = Context->getPlugin(); + Plugin.call(Context->getHandleRef(), + PI_CONTEXT_INFO_NUM_DEVICES, + sizeof(NumDevices), &NumDevices, + /*param_value_size_ret=*/nullptr); assert(NumDevices > 0 && "Context without devices?"); vector_class Devices(NumDevices); size_t ParamValueSize = 0; - PI_CALL(piContextGetInfo)(Context, PI_CONTEXT_INFO_DEVICES, - sizeof(cl_device_id) * NumDevices, &Devices[0], - &ParamValueSize); + Plugin.call( + Context->getHandleRef(), PI_CONTEXT_INFO_DEVICES, + sizeof(cl_device_id) * NumDevices, &Devices[0], &ParamValueSize); assert(ParamValueSize == sizeof(cl_device_id) * NumDevices && "Number of CL_CONTEXT_DEVICES should match CL_CONTEXT_NUM_DEVICES."); return Devices[0]; } -static RT::PiProgram createBinaryProgram(const RT::PiContext Context, +static RT::PiProgram createBinaryProgram(const ContextImplPtr Context, const unsigned char *Data, size_t DataLen) { // FIXME: we don't yet support multiple devices with a single binary. + const detail::plugin &Plugin = Context->getPlugin(); #ifndef _NDEBUG cl_uint NumDevices = 0; - PI_CALL(piContextGetInfo)(Context, PI_CONTEXT_INFO_NUM_DEVICES, - sizeof(NumDevices), &NumDevices, - /*param_value_size_ret=*/nullptr); + Plugin.call(Context->getHandleRef(), + PI_CONTEXT_INFO_NUM_DEVICES, + sizeof(NumDevices), &NumDevices, + /*param_value_size_ret=*/nullptr); assert(NumDevices > 0 && "Only a single device is supported for AOT compilation"); #endif @@ -76,17 +82,19 @@ static RT::PiProgram createBinaryProgram(const RT::PiContext Context, RT::PiDevice Device = getFirstDevice(Context); pi_int32 BinaryStatus = CL_SUCCESS; RT::PiProgram Program; - PI_CALL(piclProgramCreateWithBinary)(Context, 1 /*one binary*/, &Device, - &DataLen, &Data, &BinaryStatus, - &Program); + Plugin.call( + Context->getHandleRef(), 1 /*one binary*/, &Device, &DataLen, &Data, + &BinaryStatus, &Program); return Program; } -static RT::PiProgram createSpirvProgram(const RT::PiContext Context, +static RT::PiProgram createSpirvProgram(const ContextImplPtr Context, const unsigned char *Data, size_t DataLen) { RT::PiProgram Program = nullptr; - PI_CALL(piProgramCreate)(Context, Data, DataLen, &Program); + const detail::plugin &Plugin = Context->getPlugin(); + Plugin.call(Context->getHandleRef(), Data, + DataLen, &Program); return Program; } @@ -267,7 +275,7 @@ RT::PiProgram ProgramManager::createPIProgram(const DeviceImage &Img, "Online compilation is not supported in this context"); // Load the image - const RT::PiContext &Ctx = getRawSyclObjImpl(Context)->getHandleRef(); + const ContextImplPtr Ctx = getSyclObjImpl(Context); RT::PiProgram Res = Format == PI_DEVICE_BINARY_TYPE_SPIRV ? createSpirvProgram(Ctx, Img.BinaryStart, ImgSize) : createBinaryProgram(Ctx, Img.BinaryStart, ImgSize); @@ -284,7 +292,7 @@ ProgramManager::getBuiltPIProgram(OSModuleHandle M, const context &Context, const string_class &KernelName) { KernelSetId KSId = getKernelSetId(M, KernelName); - std::shared_ptr Ctx = getSyclObjImpl(Context); + const ContextImplPtr Ctx = getSyclObjImpl(Context); using PiProgramT = KernelProgramCache::PiProgramT; using ProgramCacheT = KernelProgramCache::ProgramCacheT; @@ -300,17 +308,17 @@ ProgramManager::getBuiltPIProgram(OSModuleHandle M, const context &Context, auto BuildF = [this, &M, &KSId, &Context] { const DeviceImage &Img = getDeviceImage(M, KSId, Context); + ContextImplPtr ContextImpl = getSyclObjImpl(Context); + const detail::plugin &Plugin = ContextImpl->getPlugin(); RT::PiProgram Prg = createPIProgram(Img, Context); - ProgramPtr ProgramManaged( - Prg, RT::PluginInformation.PiFunctionTable.piProgramRelease); + ProgramPtr ProgramManaged(Prg, + Plugin.MPlugin.PiFunctionTable.piProgramRelease); // Link a fallback implementation of device libraries if they are not // supported by a device compiler. // Pre-compiled programs are supposed to be already linked. const bool LinkDeviceLibs = getFormat(Img) == PI_DEVICE_BINARY_TYPE_SPIRV; - context_impl *ContextImpl = getRawSyclObjImpl(Context); - RT::PiContext PiContext = ContextImpl->getHandleRef(); const std::vector &Devices = ContextImpl->getDevices(); std::vector PiDevices(Devices.size()); std::transform( @@ -318,7 +326,7 @@ ProgramManager::getBuiltPIProgram(OSModuleHandle M, const context &Context, [](const device Dev) { return getRawSyclObjImpl(Dev)->getHandleRef(); }); ProgramPtr BuiltProgram = - build(std::move(ProgramManaged), PiContext, Img.CompileOptions, + build(std::move(ProgramManaged), ContextImpl, Img.CompileOptions, Img.LinkOptions, PiDevices, ContextImpl->getCachedLibPrograms(), LinkDeviceLibs); @@ -338,7 +346,7 @@ RT::PiKernel ProgramManager::getOrCreateKernel(OSModuleHandle M, } RT::PiProgram Program = getBuiltPIProgram(M, Context, KernelName); - std::shared_ptr Ctx = getSyclObjImpl(Context); + const ContextImplPtr Ctx = getSyclObjImpl(Context); using PiKernelT = KernelProgramCache::PiKernelT; using KernelCacheT = KernelProgramCache::KernelCacheT; @@ -352,12 +360,14 @@ RT::PiKernel ProgramManager::getOrCreateKernel(OSModuleHandle M, auto GetF = [&Program] (const Locked &LockedCache) -> KernelByNameT& { return LockedCache.get()[Program]; }; - auto BuildF = [this, &Program, &KernelName] { + auto BuildF = [this, &Program, &KernelName, &Ctx] { PiKernelT *Result = nullptr; // TODO need some user-friendly error/exception // instead of currently obscure one - PI_CALL(piKernelCreate)(Program, KernelName.c_str(), &Result); + const detail::plugin &Plugin = Ctx->getPlugin(); + Plugin.call(Program, KernelName.c_str(), + &Result); return Result; }; @@ -366,31 +376,39 @@ RT::PiKernel ProgramManager::getOrCreateKernel(OSModuleHandle M, Cache, KernelName, AcquireF, GetF, BuildF); } -RT::PiProgram ProgramManager::getClProgramFromClKernel(RT::PiKernel Kernel) { +RT::PiProgram +ProgramManager::getClProgramFromClKernel(RT::PiKernel Kernel, + const ContextImplPtr Context) { RT::PiProgram Program; - PI_CALL(piKernelGetInfo)(Kernel, CL_KERNEL_PROGRAM, sizeof(cl_program), - &Program, nullptr); + const detail::plugin &Plugin = Context->getPlugin(); + Plugin.call( + Kernel, CL_KERNEL_PROGRAM, sizeof(cl_program), &Program, nullptr); return Program; } -string_class ProgramManager::getProgramBuildLog(const RT::PiProgram &Program) { +string_class ProgramManager::getProgramBuildLog(const RT::PiProgram &Program, + const ContextImplPtr Context) { size_t Size = 0; - PI_CALL(piProgramGetInfo)(Program, CL_PROGRAM_DEVICES, 0, nullptr, &Size); + const detail::plugin &Plugin = Context->getPlugin(); + Plugin.call(Program, CL_PROGRAM_DEVICES, 0, + nullptr, &Size); vector_class PIDevices(Size / sizeof(RT::PiDevice)); - PI_CALL(piProgramGetInfo)(Program, CL_PROGRAM_DEVICES, Size, PIDevices.data(), - nullptr); + Plugin.call(Program, CL_PROGRAM_DEVICES, Size, + PIDevices.data(), nullptr); string_class Log = "The program was built for " + std::to_string(PIDevices.size()) + " devices"; for (RT::PiDevice &Device : PIDevices) { - PI_CALL(piProgramGetBuildInfo)(Program, Device, CL_PROGRAM_BUILD_LOG, 0, - nullptr, &Size); + Plugin.call( + Program, Device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &Size); vector_class DeviceBuildInfo(Size); - PI_CALL(piProgramGetBuildInfo)(Program, Device, CL_PROGRAM_BUILD_LOG, Size, - DeviceBuildInfo.data(), nullptr); - PI_CALL(piDeviceGetInfo)(Device, PI_DEVICE_INFO_NAME, 0, nullptr, &Size); + Plugin.call( + Program, Device, CL_PROGRAM_BUILD_LOG, Size, DeviceBuildInfo.data(), + nullptr); + Plugin.call(Device, PI_DEVICE_INFO_NAME, 0, + nullptr, &Size); vector_class DeviceName(Size); - PI_CALL(piDeviceGetInfo)(Device, PI_DEVICE_INFO_NAME, Size, - DeviceName.data(), nullptr); + Plugin.call(Device, PI_DEVICE_INFO_NAME, Size, + DeviceName.data(), nullptr); Log += "\nBuild program log for '" + string_class(DeviceName.data()) + "':\n" + string_class(DeviceBuildInfo.data()); @@ -398,7 +416,7 @@ string_class ProgramManager::getProgramBuildLog(const RT::PiProgram &Program) { return Log; } -static bool loadDeviceLib(const RT::PiContext &Context, const char *Name, +static bool loadDeviceLib(const ContextImplPtr Context, const char *Name, RT::PiProgram &Prog) { std::string LibSyclDir = OSUtil::getCurrentDSODir(); std::ifstream File(LibSyclDir + OSUtil::DirSep + Name, @@ -435,10 +453,8 @@ static const char* getDeviceLibExtensionStr(DeviceLibExt Extension) { throw compile_program_error("Unhandled (new?) device library extension"); } -static RT::PiProgram -loadDeviceLibFallback( - const RT::PiContext &Context, - DeviceLibExt Extension, +static RT::PiProgram loadDeviceLibFallback( + const ContextImplPtr Context, DeviceLibExt Extension, const std::vector &Devices, std::map &CachedLibPrograms) { @@ -458,7 +474,8 @@ loadDeviceLibFallback( throw compile_program_error(std::string("Failed to load ") + LibFileName); } - RT::PiResult Error = PI_CALL_NOCHECK(piProgramCompile)( + const detail::plugin &Plugin = Context->getPlugin(); + RT::PiResult Error = Plugin.call_nocheck( LibProg, // Assume that Devices contains all devices from Context. Devices.size(), Devices.data(), @@ -469,7 +486,8 @@ loadDeviceLibFallback( "", 0, nullptr, nullptr, nullptr, nullptr); if (Error != PI_SUCCESS) { CachedLibPrograms.erase(LibProgIt); - throw compile_program_error(ProgramManager::getProgramBuildLog(LibProg)); + throw compile_program_error( + ProgramManager::getProgramBuildLog(LibProg, Context)); } return LibProg; @@ -541,7 +559,7 @@ DeviceImage &ProgramManager::getDeviceImage(OSModuleHandle M, KernelSetId KSId, << "\", " << getRawSyclObjImpl(Context) << ")\n"; std::lock_guard Guard(Sync::getGlobalLock()); std::vector &Imgs = *m_DeviceImages[KSId]; - const RT::PiContext &Ctx = getRawSyclObjImpl(Context)->getHandleRef(); + const ContextImplPtr Ctx = getSyclObjImpl(Context); DeviceImage *Img = nullptr; // TODO: There may be cases with cl::sycl::program class usage in source code @@ -551,8 +569,8 @@ DeviceImage &ProgramManager::getDeviceImage(OSModuleHandle M, KernelSetId KSId, // Ask the native runtime under the given context to choose the device image // it prefers. if (Imgs.size() > 1) { - PI_CALL(piextDeviceSelectBinary)(getFirstDevice(Ctx), Imgs.data(), - (cl_uint)Imgs.size(), &Img); + Ctx->getPlugin().call( + getFirstDevice(Ctx), Imgs.data(), (cl_uint)Imgs.size(), &Img); } else Img = Imgs[0]; @@ -568,10 +586,10 @@ DeviceImage &ProgramManager::getDeviceImage(OSModuleHandle M, KernelSetId KSId, return *Img; } -static std::vector getDeviceLibPrograms( - const RT::PiContext Context, - const std::vector &Devices, - std::map &CachedLibPrograms) { +static std::vector +getDeviceLibPrograms(const ContextImplPtr Context, + const std::vector &Devices, + std::map &CachedLibPrograms) { std::vector Programs; @@ -586,7 +604,8 @@ static std::vector getDeviceLibPrograms( // support it. for (RT::PiDevice Dev : Devices) { std::string DevExtList = - get_device_info::get(Dev); + get_device_info::get( + Dev, Context->getPlugin()); for (auto &Pair : RequiredDeviceLibExt) { DeviceLibExt Ext = Pair.first; bool &FallbackIsLoaded = Pair.second; @@ -615,7 +634,7 @@ static std::vector getDeviceLibPrograms( } ProgramManager::ProgramPtr -ProgramManager::build(ProgramPtr Program, RT::PiContext Context, +ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context, const string_class &CompileOptions, const string_class &LinkOptions, const std::vector &Devices, @@ -642,29 +661,30 @@ ProgramManager::build(ProgramPtr Program, RT::PiContext Context, LinkPrograms = getDeviceLibPrograms(Context, Devices, CachedLibPrograms); } + const detail::plugin &Plugin = Context->getPlugin(); if (LinkPrograms.empty()) { std::string Opts(CompileOpts); Opts += " "; Opts += LinkOpts; - RT::PiResult Error = PI_CALL_NOCHECK(piProgramBuild)( + RT::PiResult Error = Plugin.call_nocheck( Program.get(), Devices.size(), Devices.data(), Opts.c_str(), nullptr, nullptr); if (Error != PI_SUCCESS) - throw compile_program_error(getProgramBuildLog(Program.get())); + throw compile_program_error(getProgramBuildLog(Program.get(), Context)); return Program; } // Include the main program and compile/link everything together - PI_CALL(piProgramCompile)(Program.get(), Devices.size(), Devices.data(), - CompileOpts, 0, nullptr, nullptr, nullptr, nullptr); + Plugin.call(Program.get(), Devices.size(), + Devices.data(), CompileOpts, 0, + nullptr, nullptr, nullptr, nullptr); LinkPrograms.push_back(Program.get()); RT::PiProgram LinkedProg = nullptr; - - RT::PiResult Error = PI_CALL_NOCHECK(piProgramLink)( - Context, Devices.size(), Devices.data(), LinkOpts, LinkPrograms.size(), - LinkPrograms.data(), nullptr, nullptr, &LinkedProg); + RT::PiResult Error = Plugin.call_nocheck( + Context->getHandleRef(), Devices.size(), Devices.data(), LinkOpts, + LinkPrograms.size(), LinkPrograms.data(), nullptr, nullptr, &LinkedProg); // Link program call returns a new program object if all parameters are valid, // or NULL otherwise. Release the original (user) program. @@ -673,9 +693,9 @@ ProgramManager::build(ProgramPtr Program, RT::PiContext Context, if (LinkedProg) { // A non-trivial error occurred during linkage: get a build log, release // an incomplete (but valid) LinkedProg, and throw. - throw compile_program_error(getProgramBuildLog(LinkedProg)); + throw compile_program_error(getProgramBuildLog(LinkedProg, Context)); } - pi::checkPiResult(Error); + Plugin.checkPiResult(Error); } return Program; } diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index e04fc017975ae..bcd82d62ac58d 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -22,8 +22,9 @@ namespace detail { template <> cl_uint queue_impl::get_info() const { RT::PiResult result = PI_SUCCESS; if (!is_host()) - PI_CALL(piQueueGetInfo)(MCommandQueue, PI_QUEUE_INFO_REFERENCE_COUNT, - sizeof(result), &result, nullptr); + getPlugin().call( + MCommandQueue, PI_QUEUE_INFO_REFERENCE_COUNT, sizeof(result), &result, + nullptr); return result; } @@ -67,8 +68,9 @@ event queue_impl::mem_advise(const void *Ptr, size_t Length, int Advice) { // non-Host device RT::PiEvent Event = nullptr; - PI_CALL(piextUSMEnqueueMemAdvise)(getHandleRef(), Ptr, Length, Advice, - &Event); + const detail::plugin &Plugin = getPlugin(); + Plugin.call(getHandleRef(), Ptr, Length, + Advice, &Event); return event(pi::cast(Event), Context); } diff --git a/sycl/source/detail/sampler_impl.cpp b/sycl/source/detail/sampler_impl.cpp index 808208f6eb547..91d6b4cc48092 100644 --- a/sycl/source/detail/sampler_impl.cpp +++ b/sycl/source/detail/sampler_impl.cpp @@ -23,21 +23,24 @@ sampler_impl::sampler_impl(cl_sampler clSampler, const context &syclContext) { RT::PiSampler Sampler = pi::cast(clSampler); m_contextToSampler[syclContext] = Sampler; - PI_CALL(piSamplerRetain)(Sampler); - PI_CALL(piSamplerGetInfo)(Sampler, PI_SAMPLER_INFO_NORMALIZED_COORDS, - sizeof(pi_bool), &m_CoordNormMode, nullptr); - PI_CALL(piSamplerGetInfo)(Sampler, PI_SAMPLER_INFO_ADDRESSING_MODE, - sizeof(pi_sampler_addressing_mode), &m_AddrMode, - nullptr); - PI_CALL(piSamplerGetInfo)(Sampler, PI_SAMPLER_INFO_FILTER_MODE, - sizeof(pi_sampler_filter_mode), &m_FiltMode, - nullptr); + const detail::plugin &Plugin = getSyclObjImpl(syclContext)->getPlugin(); + Plugin.call(Sampler); + Plugin.call( + Sampler, PI_SAMPLER_INFO_NORMALIZED_COORDS, sizeof(pi_bool), + &m_CoordNormMode, nullptr); + Plugin.call( + Sampler, PI_SAMPLER_INFO_ADDRESSING_MODE, + sizeof(pi_sampler_addressing_mode), &m_AddrMode, nullptr); + Plugin.call(Sampler, PI_SAMPLER_INFO_FILTER_MODE, + sizeof(pi_sampler_filter_mode), + &m_FiltMode, nullptr); } sampler_impl::~sampler_impl() { for (auto &Iter : m_contextToSampler) { // TODO catch an exception and add it to the list of asynchronous exceptions - PI_CALL(piSamplerRelease)(Iter.second); + const detail::plugin &Plugin = getSyclObjImpl(Iter.first)->getPlugin(); + Plugin.call(Iter.second); } } @@ -56,13 +59,15 @@ RT::PiSampler sampler_impl::getOrCreateSampler(const context &Context) { RT::PiResult errcode_ret = PI_SUCCESS; RT::PiSampler resultSampler = nullptr; - errcode_ret = PI_CALL_NOCHECK(piSamplerCreate)( + const detail::plugin &Plugin = getSyclObjImpl(Context)->getPlugin(); + + errcode_ret = Plugin.call_nocheck( getSyclObjImpl(Context)->getHandleRef(), sprops, &resultSampler); if (errcode_ret == PI_INVALID_OPERATION) throw feature_not_supported("Images are not supported by this device."); - RT::checkPiResult(errcode_ret); + Plugin.checkPiResult(errcode_ret); m_contextToSampler[Context] = resultSampler; return m_contextToSampler[Context]; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index a8a055f3713a9..5fb547215d773 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -82,14 +82,26 @@ static std::string accessModeToString(access::mode Mode) { } } +static std::vector +getPiEvents(const std::vector &EventImpls) { + std::vector RetPiEvents; + for (auto &EventImpl : EventImpls) + RetPiEvents.push_back(EventImpl->getHandleRef()); + return RetPiEvents; +} + void EventCompletionClbk(RT::PiEvent, pi_int32, void *data) { // TODO: Handle return values. Store errors to async handler. - PI_CALL(piEventSetStatus)(pi::cast(data), CL_COMPLETE); + EventImplPtr *Event = (reinterpret_cast(data)); + RT::PiEvent &EventHandle = (*Event)->getHandleRef(); + const detail::plugin &Plugin = (*Event)->getPlugin(); + Plugin.call(EventHandle, CL_COMPLETE); + delete (Event); } // Method prepares PI event's from list sycl::event's -std::vector Command::prepareEvents(ContextImplPtr Context) { - std::vector Result; +std::vector Command::prepareEvents(ContextImplPtr Context) { + std::vector Result; std::vector GlueEvents; for (EventImplPtr &Event : MDepsEvents) { // Async work is not supported for host device. @@ -103,37 +115,43 @@ std::vector Command::prepareEvents(ContextImplPtr Context) { continue; } ContextImplPtr EventContext = Event->getContextImpl(); - + const detail::plugin &Plugin = Event->getPlugin(); // If contexts don't match - connect them using user event if (EventContext != Context && !Context->is_host()) { EventImplPtr GlueEvent(new detail::event_impl()); GlueEvent->setContextImpl(Context); - RT::PiEvent &GlueEventHandle = GlueEvent->getHandleRef(); - PI_CALL(piEventCreate)(Context->getHandleRef(), &GlueEventHandle); - PI_CALL(piEventSetCallback)(Event->getHandleRef(), CL_COMPLETE, - EventCompletionClbk, - /*data=*/GlueEventHandle); - GlueEvents.push_back(std::move(GlueEvent)); - Result.push_back(GlueEventHandle); + Plugin.call(Context->getHandleRef(), + &GlueEventHandle); + EventImplPtr *GlueEventCopy = + new EventImplPtr(GlueEvent); // To increase the reference count by 1. + Plugin.call( + Event->getHandleRef(), CL_COMPLETE, EventCompletionClbk, + /*void *data=*/(GlueEventCopy)); + GlueEvents.push_back(GlueEvent); + Result.push_back(std::move(GlueEvent)); continue; } - Result.push_back(Event->getHandleRef()); + Result.push_back(Event); } MDepsEvents.insert(MDepsEvents.end(), GlueEvents.begin(), GlueEvents.end()); return Result; } void Command::waitForEvents(QueueImplPtr Queue, - std::vector &RawEvents, + std::vector &EventImpls, RT::PiEvent &Event) { - if (!RawEvents.empty()) { + + if (!EventImpls.empty()) { + std::vector RawEvents = getPiEvents(EventImpls); if (Queue->is_host()) { - PI_CALL(piEventsWait)(RawEvents.size(), &RawEvents[0]); + const detail::plugin &Plugin = EventImpls[0]->getPlugin(); + Plugin.call(RawEvents.size(), &RawEvents[0]); } else { - PI_CALL(piEnqueueEventsWait)(Queue->getHandleRef(), RawEvents.size(), - &RawEvents[0], &Event); + const detail::plugin &Plugin = Queue->getPlugin(); + Plugin.call( + Queue->getHandleRef(), RawEvents.size(), &RawEvents[0], &Event); } } } @@ -188,7 +206,7 @@ bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking) { } cl_int AllocaCommand::enqueueImp() { - std::vector RawEvents = + std::vector EventImpls = Command::prepareEvents(detail::getSyclObjImpl(MQueue->get_context())); RT::PiEvent &Event = MEvent->getHandleRef(); @@ -198,15 +216,16 @@ cl_int AllocaCommand::enqueueImp() { if (MQueue->is_host()) { // Do not need to make allocation if we have a linked device allocation - Command::waitForEvents(MQueue, RawEvents, Event); + Command::waitForEvents(MQueue, EventImpls, Event); return CL_SUCCESS; } HostPtr = MLinkedAllocaCmd->getMemAllocation(); } - + // TODO: Check if it is correct to use std::move on stack variable and + // delete it RawEvents below. MMemAllocation = MemoryManager::allocate( detail::getSyclObjImpl(MQueue->get_context()), getSYCLMemObj(), - MInitFromUserData, HostPtr, std::move(RawEvents), Event); + MInitFromUserData, HostPtr, std::move(EventImpls), Event); return CL_SUCCESS; } @@ -232,7 +251,7 @@ void AllocaCommand::printDot(std::ostream &Stream) const { } cl_int AllocaSubBufCommand::enqueueImp() { - std::vector RawEvents = + std::vector EventImpls = Command::prepareEvents(detail::getSyclObjImpl(MQueue->get_context())); RT::PiEvent &Event = MEvent->getHandleRef(); @@ -240,7 +259,7 @@ cl_int AllocaSubBufCommand::enqueueImp() { detail::getSyclObjImpl(MQueue->get_context()), MParentAlloca->getMemAllocation(), MRequirement.MElemSize, MRequirement.MOffsetInBytes, MRequirement.MAccessRange, - std::move(RawEvents), Event); + std::move(EventImpls), Event); return CL_SUCCESS; } @@ -267,9 +286,9 @@ void AllocaSubBufCommand::printDot(std::ostream &Stream) const { } cl_int ReleaseCommand::enqueueImp() { - std::vector RawEvents = + std::vector EventImpls = Command::prepareEvents(detail::getSyclObjImpl(MQueue->get_context())); - + std::vector RawEvents = getPiEvents(EventImpls); bool SkipRelease = false; // On host side we only allocate memory for full buffers. @@ -298,7 +317,11 @@ cl_int ReleaseCommand::enqueueImp() { const QueueImplPtr &Queue = CurAllocaIsHost ? MAllocaCmd->MLinkedAllocaCmd->getQueue() : MAllocaCmd->getQueue(); - RT::PiEvent UnmapEvent = nullptr; + + EventImplPtr UnmapEventImpl(new event_impl(Queue)); + UnmapEventImpl->setContextImpl( + detail::getSyclObjImpl(Queue->get_context())); + RT::PiEvent &UnmapEvent = UnmapEventImpl->getHandleRef(); void *Src = CurAllocaIsHost ? MAllocaCmd->getMemAllocation() @@ -309,20 +332,20 @@ cl_int ReleaseCommand::enqueueImp() { : MAllocaCmd->MLinkedAllocaCmd->getMemAllocation(); MemoryManager::unmap(MAllocaCmd->getSYCLMemObj(), Dst, Queue, Src, - std::move(RawEvents), UnmapEvent); + RawEvents, UnmapEvent); std::swap(MAllocaCmd->MIsActive, MAllocaCmd->MLinkedAllocaCmd->MIsActive); - RawEvents.push_back(UnmapEvent); + EventImpls.clear(); + EventImpls.push_back(UnmapEventImpl); } - RT::PiEvent &Event = MEvent->getHandleRef(); if (SkipRelease) - Command::waitForEvents(MQueue, RawEvents, Event); + Command::waitForEvents(MQueue, EventImpls, Event); else MemoryManager::release(detail::getSyclObjImpl(MQueue->get_context()), MAllocaCmd->getSYCLMemObj(), - MAllocaCmd->getMemAllocation(), std::move(RawEvents), - Event); + MAllocaCmd->getMemAllocation(), + std::move(EventImpls), Event); return CL_SUCCESS; } @@ -351,8 +374,9 @@ MapMemObject::MapMemObject(AllocaCommandBase *SrcAllocaCmd, Requirement Req, MSrcAllocaCmd(SrcAllocaCmd), MSrcReq(std::move(Req)), MDstPtr(DstPtr) {} cl_int MapMemObject::enqueueImp() { - std::vector RawEvents = + std::vector EventImpls = Command::prepareEvents(detail::getSyclObjImpl(MQueue->get_context())); + std::vector RawEvents = getPiEvents(EventImpls); RT::PiEvent &Event = MEvent->getHandleRef(); *MDstPtr = MemoryManager::map( @@ -386,8 +410,9 @@ UnMapMemObject::UnMapMemObject(AllocaCommandBase *DstAllocaCmd, Requirement Req, MDstAllocaCmd(DstAllocaCmd), MDstReq(std::move(Req)), MSrcPtr(SrcPtr) {} cl_int UnMapMemObject::enqueueImp() { - std::vector RawEvents = + std::vector EventImpls = Command::prepareEvents(detail::getSyclObjImpl(MQueue->get_context())); + std::vector RawEvents = getPiEvents(EventImpls); RT::PiEvent &Event = MEvent->getHandleRef(); MemoryManager::unmap(MDstAllocaCmd->getSYCLMemObj(), @@ -427,20 +452,22 @@ MemCpyCommand::MemCpyCommand(Requirement SrcReq, } cl_int MemCpyCommand::enqueueImp() { - std::vector RawEvents; + std::vector EventImpls; QueueImplPtr Queue = MQueue->is_host() ? MSrcQueue : MQueue; - RawEvents = + EventImpls = Command::prepareEvents(detail::getSyclObjImpl(Queue->get_context())); RT::PiEvent &Event = MEvent->getHandleRef(); + auto RawEvents = getPiEvents(EventImpls); + // Omit copying if mode is discard one. // TODO: Handle this at the graph building time by, for example, creating // empty node instead of memcpy. if (MDstReq.MAccessMode == access::mode::discard_read_write || MDstReq.MAccessMode == access::mode::discard_write || MSrcAllocaCmd->getMemAllocation() == MDstAllocaCmd->getMemAllocation()) { - Command::waitForEvents(Queue, RawEvents, Event); + Command::waitForEvents(Queue, EventImpls, Event); } else { MemoryManager::copy( MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(), @@ -491,11 +518,11 @@ void ExecCGCommand::flushStreams() { } cl_int UpdateHostRequirementCommand::enqueueImp() { - std::vector RawEvents; - RawEvents = + std::vector EventImpls; + EventImpls = Command::prepareEvents(detail::getSyclObjImpl(MQueue->get_context())); RT::PiEvent &Event = MEvent->getHandleRef(); - Command::waitForEvents(MQueue, RawEvents, Event); + Command::waitForEvents(MQueue, EventImpls, Event); assert(MSrcAllocaCmd && "Expected valid alloca command"); assert(MSrcAllocaCmd->getMemAllocation() && "Expected valid source pointer"); @@ -541,8 +568,9 @@ MemCpyCommandHost::MemCpyCommandHost(Requirement SrcReq, cl_int MemCpyCommandHost::enqueueImp() { QueueImplPtr Queue = MQueue->is_host() ? MSrcQueue : MQueue; - std::vector RawEvents = + std::vector EventImpls = Command::prepareEvents(detail::getSyclObjImpl(Queue->get_context())); + std::vector RawEvents = getPiEvents(EventImpls); RT::PiEvent &Event = MEvent->getHandleRef(); // Omit copying if mode is discard one. @@ -550,7 +578,7 @@ cl_int MemCpyCommandHost::enqueueImp() { // empty node instead of memcpy. if (MDstReq.MAccessMode == access::mode::discard_read_write || MDstReq.MAccessMode == access::mode::discard_write) { - Command::waitForEvents(Queue, RawEvents, Event); + Command::waitForEvents(Queue, EventImpls, Event); return CL_SUCCESS; } @@ -668,7 +696,7 @@ void ExecCGCommand::printDot(std::ostream &Stream) const { // runtime, or by the number of work - groups and number of work - items for // users who need more control. static void adjustNDRangePerKernel(NDRDescT &NDR, RT::PiKernel Kernel, - RT::PiDevice Device) { + const device_impl &DeviceImpl) { if (NDR.GlobalSize[0] != 0) return; // GlobalSize is set - no need to adjust // check the prerequisites: @@ -676,18 +704,16 @@ static void adjustNDRangePerKernel(NDRDescT &NDR, RT::PiKernel Kernel, // TODO might be good to cache this info together with the kernel info to // avoid get_kernel_work_group_info on every kernel run range<3> WGSize = get_kernel_work_group_info< - range<3>, - cl::sycl::info::kernel_work_group::compile_work_group_size>::get(Kernel, - Device); + range<3>, cl::sycl::info::kernel_work_group::compile_work_group_size>:: + get(Kernel, DeviceImpl.getHandleRef(), DeviceImpl.getPlugin()); if (WGSize[0] == 0) { // kernel does not request specific workgroup shape - set one // TODO maximum work group size as the local size might not be the best // choice for CPU or FPGA devices size_t WGSize1D = get_kernel_work_group_info< - size_t, - cl::sycl::info::kernel_work_group::work_group_size>::get(Kernel, - Device); + size_t, cl::sycl::info::kernel_work_group::work_group_size>:: + get(Kernel, DeviceImpl.getHandleRef(), DeviceImpl.getPlugin()); assert(WGSize1D != 0); // TODO implement better default for 2D/3D case: WGSize = {WGSize1D, 1, 1}; @@ -727,9 +753,11 @@ void DispatchNativeKernel(void *Blob) { } cl_int ExecCGCommand::enqueueImp() { - std::vector RawEvents = + std::vector EventImpls = Command::prepareEvents(detail::getSyclObjImpl(MQueue->get_context())); + auto RawEvents = getPiEvents(EventImpls); + RT::PiEvent &Event = MEvent->getHandleRef(); switch (MCommandGroup->getType()) { @@ -818,8 +846,11 @@ cl_int ExecCGCommand::enqueueImp() { NextArg++; } - if (!RawEvents.empty()) - PI_CALL(piEventsWait)(RawEvents.size(), &RawEvents[0]); + if (!RawEvents.empty()) { + // Assuming that the events are for devices to the same Plugin. + const detail::plugin &Plugin = EventImpls[0]->getPlugin(); + Plugin.call(RawEvents.size(), &RawEvents[0]); + } DispatchNativeKernel((void *)ArgsBlob.data()); return CL_SUCCESS; } @@ -841,8 +872,8 @@ cl_int ExecCGCommand::enqueueImp() { MemLocs.push_back(NextArg); NextArg++; } - - pi_result Error = PI_CALL_NOCHECK(piEnqueueNativeKernel)( + const detail::plugin &Plugin = MQueue->getPlugin(); + pi_result Error = Plugin.call_nocheck( MQueue->getHandleRef(), DispatchNativeKernel, (void *)ArgsBlob.data(), ArgsBlob.size() * sizeof(ArgsBlob[0]), Buffers.size(), Buffers.data(), const_cast(MemLocs.data()), RawEvents.size(), @@ -871,8 +902,11 @@ cl_int ExecCGCommand::enqueueImp() { AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); Req->MData = AllocaCmd->getMemAllocation(); } - if (!RawEvents.empty()) - PI_CALL(piEventsWait)(RawEvents.size(), &RawEvents[0]); + if (!RawEvents.empty()) { + // Assuming that the events are for devices to the same Plugin. + const detail::plugin &Plugin = EventImpls[0]->getPlugin(); + Plugin.call(RawEvents.size(), &RawEvents[0]); + } ExecKernel->MHostKernel->call(NDRDesc, getEvent()->getHostProfilingInfo()); return CL_SUCCESS; @@ -880,10 +914,12 @@ cl_int ExecCGCommand::enqueueImp() { // Run OpenCL kernel sycl::context Context = MQueue->get_context(); + const detail::plugin &Plugin = MQueue->getPlugin(); RT::PiKernel Kernel = nullptr; if (nullptr != ExecKernel->MSyclKernel) { - assert(ExecKernel->MSyclKernel->get_info() == Context); + assert(ExecKernel->MSyclKernel->get_info() == + Context); Kernel = ExecKernel->MSyclKernel->getHandleRef(); } else Kernel = detail::ProgramManager::getInstance().getOrCreateKernel( @@ -895,24 +931,26 @@ cl_int ExecCGCommand::enqueueImp() { Requirement *Req = (Requirement *)(Arg.MPtr); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); cl_mem MemArg = (cl_mem)AllocaCmd->getMemAllocation(); - PI_CALL(piKernelSetArg)(Kernel, Arg.MIndex, sizeof(cl_mem), &MemArg); + Plugin.call(Kernel, Arg.MIndex, + sizeof(cl_mem), &MemArg); break; } case kernel_param_kind_t::kind_std_layout: { - PI_CALL(piKernelSetArg)(Kernel, Arg.MIndex, Arg.MSize, Arg.MPtr); + Plugin.call(Kernel, Arg.MIndex, Arg.MSize, + Arg.MPtr); break; } case kernel_param_kind_t::kind_sampler: { sampler *SamplerPtr = (sampler *)Arg.MPtr; RT::PiSampler Sampler = detail::getSyclObjImpl(*SamplerPtr)->getOrCreateSampler(Context); - PI_CALL(piKernelSetArg)(Kernel, Arg.MIndex, sizeof(cl_sampler), - &Sampler); + Plugin.call(Kernel, Arg.MIndex, + sizeof(cl_sampler), &Sampler); break; } case kernel_param_kind_t::kind_pointer: { - PI_CALL(piextKernelSetArgPointer)(Kernel, Arg.MIndex, Arg.MSize, - Arg.MPtr); + Plugin.call(Kernel, Arg.MIndex, + Arg.MSize, Arg.MPtr); break; } default: @@ -920,21 +958,20 @@ cl_int ExecCGCommand::enqueueImp() { } } - adjustNDRangePerKernel( - NDRDesc, Kernel, - detail::getSyclObjImpl(MQueue->get_device())->getHandleRef()); + adjustNDRangePerKernel(NDRDesc, Kernel, + *(detail::getSyclObjImpl(MQueue->get_device()))); // Some PI Plugins (like OpenCL) require this call to enable USM // For others, PI will turn this into a NOP. - PI_CALL(piKernelSetExecInfo)(Kernel, PI_USM_INDIRECT_ACCESS, - sizeof(pi_bool), &PI_TRUE); + Plugin.call(Kernel, PI_USM_INDIRECT_ACCESS, + sizeof(pi_bool), &PI_TRUE); // Remember this information before the range dimensions are reversed const bool HasLocalSize = (NDRDesc.LocalSize[0] != 0); ReverseRangeDimensionsForKernel(NDRDesc); - pi_result Error = PI_CALL_NOCHECK(piEnqueueKernelLaunch)( + pi_result Error = Plugin.call_nocheck( MQueue->getHandleRef(), Kernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0], HasLocalSize ? &NDRDesc.LocalSize[0] : nullptr, RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0], &Event); @@ -942,10 +979,10 @@ cl_int ExecCGCommand::enqueueImp() { if (PI_SUCCESS != Error) { // If we have got non-success error code, let's analyze it to emit nice // exception explaining what was wrong - pi_device Device = - detail::getSyclObjImpl(MQueue->get_device())->getHandleRef(); - return detail::enqueue_kernel_launch::handleError(Error, Device, Kernel, - NDRDesc); + const device_impl &DeviceImpl = + *(detail::getSyclObjImpl(MQueue->get_device())); + return detail::enqueue_kernel_launch::handleError(Error, DeviceImpl, + Kernel, NDRDesc); } return PI_SUCCESS; } diff --git a/sycl/source/detail/scheduler/graph_processor.cpp b/sycl/source/detail/scheduler/graph_processor.cpp index b3cb7f80438c6..87cb1dfd0386c 100644 --- a/sycl/source/detail/scheduler/graph_processor.cpp +++ b/sycl/source/detail/scheduler/graph_processor.cpp @@ -46,8 +46,10 @@ void Scheduler::GraphProcessor::waitForEvent(EventImplPtr Event) { throw runtime_error("Enqueue process failed."); RT::PiEvent &CLEvent = Cmd->getEvent()->getHandleRef(); - if (CLEvent) - PI_CALL(piEventsWait)(1, &CLEvent); + if (CLEvent) { + const detail::plugin &Plugin = Event->getPlugin(); + Plugin.call(1, &CLEvent); + } } bool Scheduler::GraphProcessor::enqueueCommand(Command *Cmd, diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 313226ee77d53..e8f70cfbab71c 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -31,13 +31,14 @@ SYCLMemObjT::SYCLMemObjT(cl_mem MemObject, const context &SyclContext, RT::PiMem Mem = pi::cast(MInteropMemObject); RT::PiContext Context = nullptr; - PI_CALL(piMemGetInfo)(Mem, CL_MEM_CONTEXT, sizeof(Context), &Context, - nullptr); + const plugin &Plugin = getPlugin(); + Plugin.call(Mem, CL_MEM_CONTEXT, sizeof(Context), + &Context, nullptr); if (MInteropContext->getHandleRef() != Context) throw cl::sycl::invalid_parameter_error( "Input context must be the same as the context of cl_mem"); - PI_CALL(piMemRetain)(Mem); + Plugin.call(Mem); } void SYCLMemObjT::releaseMem(ContextImplPtr Context, void *MemAllocation) { @@ -73,8 +74,11 @@ void SYCLMemObjT::updateHostMemory() { Scheduler::getInstance().removeMemoryObject(this); releaseHostMem(MShadowCopy); - if (MOpenCLInterop) - PI_CALL(piMemRelease)(pi::cast(MInteropMemObject)); + if (MOpenCLInterop) { + const plugin &Plugin = getPlugin(); + Plugin.call( + pi::cast(MInteropMemObject)); + } } } // namespace detail } // namespace sycl diff --git a/sycl/source/detail/usm/usm_dispatch.cpp b/sycl/source/detail/usm/usm_dispatch.cpp index 599aeeffa0d88..2daf449182f04 100644 --- a/sycl/source/detail/usm/usm_dispatch.cpp +++ b/sycl/source/detail/usm/usm_dispatch.cpp @@ -7,12 +7,13 @@ // ===--------------------------------------------------------------------=== // #include +#include #include __SYCL_INLINE namespace cl { -namespace sycl { -namespace detail { -namespace usm { + namespace sycl { + namespace detail { + namespace usm { /*** @@ -297,11 +298,12 @@ pi_result USMDispatcher::enqueueMigrateMem(pi_queue Queue, const void *Ptr, if (mEmulated) { // We could check for OpenCL 2.1 and call the SVM migrate // functions, but for now we'll just enqueue a marker. + // TODO: Implement a PI call for this openCL API RetVal = pi::cast(clEnqueueMarkerWithWaitList( CLQueue, NumEventsInWaitList, reinterpret_cast(EventWaitList), reinterpret_cast(Event))); - pi::checkPiResult(RetVal); + RT::GlobalPlugin->checkPiResult(RetVal); } else { RetVal = pi::cast(pfn_clEnqueueMigrateMemINTEL( CLQueue, Ptr, Size, Flags, NumEventsInWaitList, @@ -349,19 +351,21 @@ void USMDispatcher::memAdvise(pi_queue Queue, const void *Ptr, size_t Length, if (mEmulated) { // memAdvise does nothing here // TODO: Implement a PI call for this openCL API - RT::checkPiResult(RT::cast(clEnqueueMarkerWithWaitList( - CLQueue, 0, nullptr, reinterpret_cast(Event)))); + RT::GlobalPlugin->checkPiResult( + RT::cast(clEnqueueMarkerWithWaitList( + CLQueue, 0, nullptr, reinterpret_cast(Event)))); } else { // Temporary until driver supports // memAdvise doesn't do anything on an iGPU anyway // TODO: Implement a PI call for this openCL API - RT::checkPiResult(RT::cast(clEnqueueMarkerWithWaitList( - CLQueue, 0, nullptr, reinterpret_cast(Event)))); + RT::GlobalPlugin->checkPiResult( + RT::cast(clEnqueueMarkerWithWaitList( + CLQueue, 0, nullptr, reinterpret_cast(Event)))); /* // Enable once this is supported in the driver auto CLAdvice = *reinterpret_cast(&Advice); // TODO: Implement a PI call for this openCL API - RT::checkPiResult(RT::cast(pfn_clEnqueueMemAdviseINTEL( + RT::GlobalPlugin->checkPiResult(RT::cast(pfn_clEnqueueMemAdviseINTEL( CLQueue, Ptr, Length, CLAdvice, 0, nullptr, reinterpret_cast(Event)))); */ @@ -372,19 +376,20 @@ void USMDispatcher::memAdvise(pi_queue Queue, const void *Ptr, size_t Length, pi_result USMDispatcher::enqueuePrefetch(pi_queue Queue, void *Ptr, size_t Size, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, - pi_event *Event) { + pi_event *Event, + const plugin &Plugin) { pi_result RetVal = PI_INVALID_OPERATION; if (pi::useBackend(pi::Backend::SYCL_BE_PI_OPENCL)) { if (mEmulated) { // Prefetch is a hint, so ignoring it is always safe. - RetVal = PI_CALL_NOCHECK(piEnqueueEventsWait)(Queue, NumEventsInWaitList, - EventWaitList, Event); + RetVal = Plugin.call_nocheck( + Queue, NumEventsInWaitList, EventWaitList, Event); } else { // TODO: Replace this with real prefetch support when the driver enables // it. - RetVal = PI_CALL_NOCHECK(piEnqueueEventsWait)(Queue, NumEventsInWaitList, - EventWaitList, Event); + RetVal = Plugin.call_nocheck( + Queue, NumEventsInWaitList, EventWaitList, Event); } } diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index f71a506a0eccb..3ab1873ecf02a 100644 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -43,12 +43,13 @@ void *alignedAllocHost(size_t Alignment, size_t Size, const context &Ctxt, } else { std::shared_ptr CtxImpl = detail::getSyclObjImpl(Ctxt); pi_context C = CtxImpl->getHandleRef(); + const detail::plugin &Plugin = CtxImpl->getPlugin(); pi_result Error; switch (Kind) { case alloc::host: { - Error = PI_CALL_NOCHECK(piextUSMHostAlloc)(&RetVal, C, nullptr, Size, - Alignment); + Error = Plugin.call_nocheck( + &RetVal, C, nullptr, Size, Alignment); break; } case alloc::device: @@ -91,20 +92,21 @@ void *alignedAlloc(size_t Alignment, size_t Size, const context &Ctxt, } else { std::shared_ptr CtxImpl = detail::getSyclObjImpl(Ctxt); pi_context C = CtxImpl->getHandleRef(); + const detail::plugin &Plugin = CtxImpl->getPlugin(); pi_result Error; pi_device Id; switch (Kind) { case alloc::device: { Id = detail::getSyclObjImpl(Dev)->getHandleRef(); - Error = PI_CALL_NOCHECK(piextUSMDeviceAlloc)(&RetVal, C, Id, nullptr, - Size, Alignment); + Error = Plugin.call_nocheck( + &RetVal, C, Id, nullptr, Size, Alignment); break; } case alloc::shared: { Id = detail::getSyclObjImpl(Dev)->getHandleRef(); - Error = PI_CALL_NOCHECK(piextUSMSharedAlloc)(&RetVal, C, Id, nullptr, - Size, Alignment); + Error = Plugin.call_nocheck( + &RetVal, C, Id, nullptr, Size, Alignment); break; } case alloc::host: @@ -130,7 +132,8 @@ void free(void *Ptr, const context &Ctxt) { } else { std::shared_ptr CtxImpl = detail::getSyclObjImpl(Ctxt); pi_context C = CtxImpl->getHandleRef(); - PI_CALL(piextUSMFree)(C, Ptr); + const detail::plugin &Plugin = CtxImpl->getPlugin(); + Plugin.call(C, Ptr); } } @@ -247,8 +250,9 @@ alloc get_pointer_type(const void *Ptr, const context &Ctxt) { pi_usm_type AllocTy; // query type using PI function - PI_CALL(piextUSMGetMemAllocInfo)(PICtx, Ptr, PI_MEM_ALLOC_TYPE, - sizeof(pi_usm_type), &AllocTy, nullptr); + const detail::plugin &Plugin = CtxImpl->getPlugin(); + Plugin.call( + PICtx, Ptr, PI_MEM_ALLOC_TYPE, sizeof(pi_usm_type), &AllocTy, nullptr); alloc ResultAlloc; switch (AllocTy) { @@ -294,8 +298,9 @@ device get_pointer_device(const void *Ptr, const context &Ctxt) { pi_device DeviceId; // query device using PI function - PI_CALL(piextUSMGetMemAllocInfo)(PICtx, Ptr, PI_MEM_ALLOC_DEVICE, - sizeof(pi_device), &DeviceId, nullptr); + const detail::plugin &Plugin = CtxImpl->getPlugin(); + Plugin.call( + PICtx, Ptr, PI_MEM_ALLOC_DEVICE, sizeof(pi_device), &DeviceId, nullptr); for (const device &Dev : CtxImpl->getDevices()) { // Try to find the real sycl device used in the context diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 7263b629c27b0..513017a4ae926 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -28,7 +28,8 @@ device::device() : impl(std::make_shared()) {} device::device(cl_device_id deviceId) : impl(std::make_shared( - detail::pi::cast(deviceId))) {} + detail::pi::cast(deviceId), + *RT::GlobalPlugin)) {} device::device(const device_selector &deviceSelector) { *this = deviceSelector.select_device(); diff --git a/sycl/source/function_pointer.cpp b/sycl/source/function_pointer.cpp index 94d53b35dba20..24db6e41b7ffb 100644 --- a/sycl/source/function_pointer.cpp +++ b/sycl/source/function_pointer.cpp @@ -18,7 +18,8 @@ getDeviceFunctionPointerImpl(device &D, program &P, const char *FuncName) { intel::device_func_ptr_holder_t FPtr = 0; // FIXME: return value must be checked here, but since we cannot yet check // if corresponding extension is supported, let's silently ignore it here. - PI_CALL(piextGetDeviceFunctionPointer)( + const detail::plugin &Plugin = detail::getSyclObjImpl(P)->getPlugin(); + Plugin.call( detail::pi::cast(detail::getSyclObjImpl(D)->getHandleRef()), detail::pi::cast(detail::getSyclObjImpl(P)->getHandleRef()), FuncName, &FPtr); diff --git a/sycl/source/ordered_queue.cpp b/sycl/source/ordered_queue.cpp index 8a17e2d7ef256..85c4b1b63b62d 100644 --- a/sycl/source/ordered_queue.cpp +++ b/sycl/source/ordered_queue.cpp @@ -50,9 +50,10 @@ ordered_queue::ordered_queue(cl_command_queue clQueue, const async_handler &asyncHandler) { cl_command_queue_properties reportedProps; RT::PiQueue m_CommandQueue = detail::pi::cast(clQueue); - PI_CALL(piQueueGetInfo) - (m_CommandQueue, PI_QUEUE_INFO_DEVICE, sizeof(reportedProps), &reportedProps, - nullptr); + const detail::plugin &Plugin = detail::getSyclObjImpl(syclContext)->getPlugin(); + Plugin.call( + m_CommandQueue, PI_QUEUE_INFO_DEVICE, sizeof(reportedProps), + &reportedProps, nullptr); if (reportedProps & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) throw runtime_error( "Failed to build a sycl ordered queue from a cl OOO queue."); diff --git a/sycl/source/platform.cpp b/sycl/source/platform.cpp index 020637c5551ac..c78dde56a0e10 100644 --- a/sycl/source/platform.cpp +++ b/sycl/source/platform.cpp @@ -20,7 +20,8 @@ platform::platform() : impl(std::make_shared()) {} platform::platform(cl_platform_id PlatformId) : impl(std::make_shared( - detail::pi::cast(PlatformId))) {} + detail::pi::cast(PlatformId), + RT::GlobalPlugin)) {} platform::platform(const device_selector &dev_selector) { *this = dev_selector.select_device().get_platform(); diff --git a/sycl/unittests/pi/PlatformTest.cpp b/sycl/unittests/pi/PlatformTest.cpp index c18065f638af5..e100ba50060b6 100644 --- a/sycl/unittests/pi/PlatformTest.cpp +++ b/sycl/unittests/pi/PlatformTest.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include +#include #include #include @@ -17,8 +18,8 @@ using namespace cl::sycl; class PlatformTest : public ::testing::Test { protected: std::vector _platforms; - - PlatformTest() : _platforms{} { detail::pi::initialize(); }; + std::vector Plugins; + PlatformTest() : _platforms{} { Plugins = detail::pi::initialize(); }; ~PlatformTest() override = default; @@ -32,7 +33,10 @@ class PlatformTest : public ::testing::Test { // Initialize the logged number of platforms before the following assertion. RecordProperty(platform_count_key, platform_count); - ASSERT_EQ((PI_CALL_NOCHECK(piPlatformsGet)(0, nullptr, &platform_count)), + // TODO: Change the test to check this for all plugins present. + // Currently, it is only checking for the first plugin attached. + ASSERT_EQ((Plugins[0].call_nocheck( + 0, nullptr, &platform_count)), PI_SUCCESS); // Overwrite previous log value with queried number of platforms. @@ -49,8 +53,8 @@ class PlatformTest : public ::testing::Test { _platforms.resize(platform_count, nullptr); - ASSERT_EQ((PI_CALL_NOCHECK(piPlatformsGet)(_platforms.size(), - _platforms.data(), nullptr)), + ASSERT_EQ((Plugins[0].call_nocheck( + _platforms.size(), _platforms.data(), nullptr)), PI_SUCCESS); } }; @@ -61,17 +65,17 @@ TEST_F(PlatformTest, piPlatformsGet) { } TEST_F(PlatformTest, piPlatformGetInfo) { - auto get_info_test = [](pi_platform platform, _pi_platform_info info) { + auto get_info_test = [&](pi_platform platform, _pi_platform_info info) { size_t reported_string_length = 0; - EXPECT_EQ((PI_CALL_NOCHECK(piPlatformGetInfo)(platform, info, 0u, nullptr, - &reported_string_length)), + EXPECT_EQ((Plugins[0].call_nocheck( + platform, info, 0u, nullptr, &reported_string_length)), PI_SUCCESS); // Create a larger result string to catch overwrites. std::vector param_value(reported_string_length * 2u, '\0'); EXPECT_EQ( - (PI_CALL_NOCHECK(piPlatformGetInfo)(platform, info, param_value.size(), - param_value.data(), nullptr)), + (Plugins[0].call_nocheck( + platform, info, param_value.size(), param_value.data(), nullptr)), PI_SUCCESS) << "piPlatformGetInfo for " << RT::platformInfoToString(info) << " failed.\n";