From 25a76c503b0c3b2f07dafc95b2afd460238db8f5 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Wed, 8 Jan 2020 18:01:08 +0000 Subject: [PATCH 1/4] [SYCL][PI][CUDA] Implements get_native interoperability Implements get_native for CUDA allowing queries for native handles on SYCL objects; queue, event, context, and device. Signed-off-by: Steffen Larsen --- sycl/include/CL/sycl.hpp | 1 + sycl/include/CL/sycl/backend.hpp | 34 ++++++ sycl/include/CL/sycl/backend/cuda.hpp | 57 ++++++---- sycl/include/CL/sycl/backend_types.hpp | 19 ++++ sycl/include/CL/sycl/context.hpp | 13 +++ sycl/include/CL/sycl/detail/buffer_impl.hpp | 2 + .../CL/sycl/detail/cuda_definitions.hpp | 24 +++++ sycl/include/CL/sycl/detail/pi.def | 3 + sycl/include/CL/sycl/detail/pi.h | 18 ++++ sycl/include/CL/sycl/device.hpp | 12 +++ sycl/include/CL/sycl/event.hpp | 11 ++ sycl/include/CL/sycl/queue.hpp | 12 +++ sycl/plugins/cuda/pi_cuda.cpp | 44 +++++++- sycl/plugins/cuda/pi_cuda.hpp | 4 +- sycl/source/context.cpp | 2 + sycl/source/detail/context_impl.cpp | 10 +- sycl/source/detail/context_impl.hpp | 7 ++ sycl/source/detail/device_impl.cpp | 8 ++ sycl/source/detail/device_impl.hpp | 7 ++ sycl/source/detail/event_impl.cpp | 8 ++ sycl/source/detail/event_impl.hpp | 7 ++ sycl/source/detail/platform_impl.hpp | 2 + sycl/source/detail/queue_impl.cpp | 8 ++ sycl/source/detail/queue_impl.hpp | 7 ++ sycl/source/device.cpp | 2 + sycl/source/event.cpp | 2 + sycl/source/queue.cpp | 3 + sycl/unittests/pi/cuda/CMakeLists.txt | 1 + sycl/unittests/pi/cuda/test_base_objects.cpp | 2 +- .../pi/cuda/test_interop_get_native.cpp | 77 +++++++++++++ sycl/unittests/pi/cuda/test_mem_obj.cpp | 2 +- .../pi/cuda/test_primary_context.cpp | 102 ++++++++---------- sycl/unittests/pi/cuda/test_queue.cpp | 2 +- 33 files changed, 428 insertions(+), 85 deletions(-) create mode 100644 sycl/include/CL/sycl/backend.hpp create mode 100644 sycl/include/CL/sycl/backend_types.hpp create mode 100644 sycl/include/CL/sycl/detail/cuda_definitions.hpp create mode 100644 sycl/unittests/pi/cuda/test_interop_get_native.cpp diff --git a/sycl/include/CL/sycl.hpp b/sycl/include/CL/sycl.hpp index 3455e0ab07219..c845488d37749 100644 --- a/sycl/include/CL/sycl.hpp +++ b/sycl/include/CL/sycl.hpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/sycl/include/CL/sycl/backend.hpp b/sycl/include/CL/sycl/backend.hpp new file mode 100644 index 0000000000000..9edca021dca6b --- /dev/null +++ b/sycl/include/CL/sycl/backend.hpp @@ -0,0 +1,34 @@ +//==---------------- backend.hpp - SYCL PI backends ------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { + +template +auto get_native(const SyclObjectT &Obj) -> + typename interop::type { + return Obj.template get_native(); +} + +// Native handle of an accessor should be accessed through interop_handler +template +auto get_native(const accessor &Obj) -> + typename interop>::type = + delete; + +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/backend/cuda.hpp b/sycl/include/CL/sycl/backend/cuda.hpp index aff3e96d54e4a..d215800e673b7 100644 --- a/sycl/include/CL/sycl/backend/cuda.hpp +++ b/sycl/include/CL/sycl/backend/cuda.hpp @@ -1,3 +1,4 @@ + //==---------------- cuda.hpp - SYCL CUDA backend --------------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -6,29 +7,45 @@ // //===----------------------------------------------------------------------===// +#pragma once + +#include +#include +#include #include +#include +#include +#include + +typedef int CUdevice; +typedef struct CUctx_st *CUcontext; +typedef struct CUstream_st *CUstream; +typedef struct CUevent_st *CUevent; + +// As defined in the CUDA 10.1 header file. This requires CUDA version > 3.2 +#if defined(_WIN64) || defined(__LP64__) +typedef unsigned long long CUdeviceptr; +#else +typedef unsigned int CUdeviceptr; +#endif __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -namespace backend { -namespace cuda { - -// CUDA backend specific options -// TODO: Use values that won't overlap with others - -// Mem Object info: Retrieve the raw CUDA pointer from a cl_mem -#define PI_CUDA_RAW_POINTER (0xFF01) -// Context creation: Use a primary CUDA context instead of a custom one by -// providing a property value of PI_TRUE for the following -// property ID. -#define PI_CONTEXT_PROPERTIES_CUDA_PRIMARY (0xFF02) - -// PI Command Queue using Default stream -#define PI_CUDA_USE_DEFAULT_STREAM (0xFF03) -// PI Command queue will sync with default stream -#define PI_CUDA_SYNC_WITH_DEFAULT (0xFF04) - -} // namespace cuda -} // namespace backend + +template <> struct interop { using type = CUdevice; }; + +template <> struct interop { using type = CUcontext; }; + +template <> struct interop { using type = CUstream; }; + +template <> struct interop { using type = CUevent; }; + +template +struct interop> { + using type = CUdeviceptr; +}; + } // namespace sycl } // namespace cl diff --git a/sycl/include/CL/sycl/backend_types.hpp b/sycl/include/CL/sycl/backend_types.hpp new file mode 100644 index 0000000000000..3db71b8b47485 --- /dev/null +++ b/sycl/include/CL/sycl/backend_types.hpp @@ -0,0 +1,19 @@ +//==-------------- backend_types.hpp - SYCL backend types ------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +namespace cl { +namespace sycl { + +enum class backend { host, opencl, cuda }; + +template struct interop; + +} // namespace sycl +} // namespace cl \ No newline at end of file diff --git a/sycl/include/CL/sycl/context.hpp b/sycl/include/CL/sycl/context.hpp index 1dfe69f7336ad..3fd83ba913b93 100644 --- a/sycl/include/CL/sycl/context.hpp +++ b/sycl/include/CL/sycl/context.hpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #pragma once + +#include #include #include #include @@ -135,10 +137,21 @@ class __SYCL_EXPORT context { /// \return a vector of valid SYCL device instances. vector_class get_devices() const; + /// Gets the native handle of the SYCL context. + /// + /// \return a native handle, the type of which defined by the backend. + template + auto get_native() const -> typename interop::type { + return reinterpret_cast::type>( + getNative()); + } + private: /// Constructs a SYCL context object from a valid context_impl instance. context(shared_ptr_class Impl); + pi_native_handle getNative() const; + shared_ptr_class impl; template friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject); diff --git a/sycl/include/CL/sycl/detail/buffer_impl.hpp b/sycl/include/CL/sycl/detail/buffer_impl.hpp index b1561928218e5..98b6415d34ceb 100644 --- a/sycl/include/CL/sycl/detail/buffer_impl.hpp +++ b/sycl/include/CL/sycl/detail/buffer_impl.hpp @@ -44,6 +44,8 @@ class __SYCL_EXPORT buffer_impl final : public SYCLMemObjT { using typename BaseT::MemObjType; public: + static constexpr pi_handle_type PIHandleType = PI_NATIVE_HANDLE_MEM; + buffer_impl(size_t SizeInBytes, size_t RequiredAlign, const property_list &Props, unique_ptr_class Allocator) diff --git a/sycl/include/CL/sycl/detail/cuda_definitions.hpp b/sycl/include/CL/sycl/detail/cuda_definitions.hpp new file mode 100644 index 0000000000000..f3b1e030a332e --- /dev/null +++ b/sycl/include/CL/sycl/detail/cuda_definitions.hpp @@ -0,0 +1,24 @@ +//==------------ cuda_definitions.hpp - SYCL CUDA backend ------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +// CUDA backend specific options +// TODO: Use values that won't overlap with others + +// Mem Object info: Retrieve the raw CUDA pointer from a cl_mem +#define PI_CUDA_RAW_POINTER (0xFF01) +// Context creation: Use a primary CUDA context instead of a custom one by +// providing a property value of PI_TRUE for the following +// property ID. +#define PI_CONTEXT_PROPERTIES_CUDA_PRIMARY (0xFF02) + +// PI Command Queue using Default stream +#define PI_CUDA_USE_DEFAULT_STREAM (0xFF03) +// PI Command queue will sync with default stream +#define PI_CUDA_SYNC_WITH_DEFAULT (0xFF04) diff --git a/sycl/include/CL/sycl/detail/pi.def b/sycl/include/CL/sycl/detail/pi.def index c16f1d4331695..193ce53dbb199 100644 --- a/sycl/include/CL/sycl/detail/pi.def +++ b/sycl/include/CL/sycl/detail/pi.def @@ -113,4 +113,7 @@ _PI_API(piextUSMGetMemAllocInfo) _PI_API(piextKernelSetArgMemObj) +// Interop +_PI_API(piGetNativeHandle) + #undef _PI_API diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index d51db55f85cb0..1a33dfa1919ed 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -59,6 +59,16 @@ using pi_uint32 = uint32_t; using pi_uint64 = uint64_t; using pi_bool = pi_uint32; using pi_bitfield = pi_uint64; +using pi_native_handle = uintptr_t; + +enum pi_handle_type { + PI_NATIVE_HANDLE_CONTEXT, + PI_NATIVE_HANDLE_DEVICE, + PI_NATIVE_HANDLE_QUEUE, + PI_NATIVE_HANDLE_EVENT, + PI_NATIVE_HANDLE_MEM, + PI_NATIVE_HANDLE_PLATFORM +}; // // NOTE: prefer to map 1:1 to OpenCL so that no translation is needed @@ -1346,6 +1356,14 @@ pi_result piextUSMGetMemAllocInfo(pi_context context, const void *ptr, size_t param_value_size, void *param_value, size_t *param_value_size_ret); +/// Gets the native handle of a SYCL object. +/// +/// \param handleType is a representation of the native handle. +/// \param piObject is the PI object to get the native handle of. +/// \param nativeHandle is the native handle of piObject. +pi_result piGetNativeHandle(pi_handle_type handleType, void *piObject, + pi_native_handle *nativeHandle); + struct _pi_plugin { // PI version supported by host passed to the plugin. The Plugin // checks and writes the appropriate Function Pointers in diff --git a/sycl/include/CL/sycl/device.hpp b/sycl/include/CL/sycl/device.hpp index c1ea233eaa68c..f198e28f8c5e8 100644 --- a/sycl/include/CL/sycl/device.hpp +++ b/sycl/include/CL/sycl/device.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -165,10 +166,21 @@ class __SYCL_EXPORT device { static vector_class get_devices(info::device_type deviceType = info::device_type::all); + /// Gets the native handle of the SYCL device. + /// + /// \return a native handle, the type of which defined by the backend. + template + auto get_native() const -> typename interop::type { + return static_cast::type>( + getNative()); + } + private: shared_ptr_class impl; device(shared_ptr_class impl) : impl(impl) {} + pi_native_handle getNative() const; + template friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject); diff --git a/sycl/include/CL/sycl/event.hpp b/sycl/include/CL/sycl/event.hpp index 001f5ee209c15..5b97043897690 100644 --- a/sycl/include/CL/sycl/event.hpp +++ b/sycl/include/CL/sycl/event.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -113,9 +114,19 @@ class __SYCL_EXPORT event { typename info::param_traits::return_type get_profiling_info() const; + /// Gets the native handle of the SYCL event. + /// + /// \return a native handle, the type of which defined by the backend. + template + auto get_native() const -> typename interop::type { + return static_cast::type>(getNative()); + } + private: event(shared_ptr_class EventImpl); + pi_native_handle getNative() const; + shared_ptr_class impl; template diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index bea3df4857094..685bcdb577d6e 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -638,7 +639,18 @@ class __SYCL_EXPORT queue { /// Equivalent to has_property() bool is_in_order() const; + /// Gets the native handle of the SYCL queue. + /// + /// \return a native handle, the type of which defined by the backend. + template + auto get_native() const -> typename interop::type { + return reinterpret_cast::type>( + getNative()); + } + private: + pi_native_handle getNative() const; + shared_ptr_class impl; template friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject); diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 42270c2f32cbc..3c099ef3d0ea2 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -11,7 +11,7 @@ /// /// \ingroup sycl_pi_cuda -#include +#include #include #include @@ -3554,6 +3554,46 @@ pi_result cuda_piextUSMGetMemAllocInfo(pi_context context, const void *ptr, return result; } +// Native interop + +pi_result cuda_piGetNativeHandle(pi_handle_type handleType, void *piObject, + pi_native_handle *nativeHandle) { + switch (handleType) { + case pi_handle_type::PI_NATIVE_HANDLE_CONTEXT: { + pi_context context = static_cast(piObject); + *nativeHandle = reinterpret_cast(context->get()); + return PI_SUCCESS; + } + case pi_handle_type::PI_NATIVE_HANDLE_DEVICE: { + pi_device device = static_cast(piObject); + *nativeHandle = static_cast(device->get()); + return PI_SUCCESS; + } + case pi_handle_type::PI_NATIVE_HANDLE_QUEUE: { + pi_queue queue = static_cast(piObject); + *nativeHandle = reinterpret_cast(queue->get()); + return PI_SUCCESS; + } + case pi_handle_type::PI_NATIVE_HANDLE_EVENT: { + pi_event event = static_cast(piObject); + if (event->is_user_event()) { + return PI_INVALID_EVENT; + } + *nativeHandle = reinterpret_cast(event->get()); + return PI_SUCCESS; + } + case pi_handle_type::PI_NATIVE_HANDLE_MEM: { + pi_mem mem = static_cast(piObject); + *nativeHandle = static_cast(mem->get()); + return PI_SUCCESS; + } + default: + PI_HANDLE_UNKNOWN_PARAM_NAME(handleType); + } + cl::sycl::detail::pi::die("Native handle request not implemented"); + return {}; +} + const char SupportedVersion[] = _PI_H_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { @@ -3673,6 +3713,8 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextUSMGetMemAllocInfo, cuda_piextUSMGetMemAllocInfo) _PI_CL(piextKernelSetArgMemObj, cuda_piextKernelSetArgMemObj) + // Interop + _PI_CL(piGetNativeHandle, cuda_piGetNativeHandle) #undef _PI_CL diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/plugins/cuda/pi_cuda.hpp index d6989a87cf66f..382ef5ed85c3e 100644 --- a/sycl/plugins/cuda/pi_cuda.hpp +++ b/sycl/plugins/cuda/pi_cuda.hpp @@ -289,7 +289,7 @@ struct _pi_queue { cuda_piDeviceRelease(device_); } - native_type get() const { return stream_; }; + native_type get() const noexcept { return stream_; }; _pi_context *get_context() const { return context_; }; @@ -517,7 +517,7 @@ struct _pi_program { pi_context get_context() const { return context_; }; - native_type get() const { return module_; }; + native_type get() const noexcept { return module_; }; pi_uint32 increment_reference_count() noexcept { return ++refCount_; } diff --git a/sycl/source/context.cpp b/sycl/source/context.cpp index 49b49914eabf6..ca58bdac310df 100644 --- a/sycl/source/context.cpp +++ b/sycl/source/context.cpp @@ -96,5 +96,7 @@ vector_class context::get_devices() const { context::context(shared_ptr_class Impl) : impl(Impl) {} +pi_native_handle context::getNative() const { return impl->getNative(); } + } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index d1bee11d0d8ec..43ed003e970c0 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -6,9 +6,9 @@ // // ===--------------------------------------------------------------------=== // -#include #include #include +#include #include #include #include @@ -151,6 +151,14 @@ context_impl::hasDevice(shared_ptr_class Device) const { return false; } +pi_native_handle context_impl::getNative() const { + auto Plugin = getPlugin(); + pi_native_handle Handle; + Plugin.call(PIHandleType, getHandleRef(), + &Handle); + return Handle; +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 266c3879366bc..7b966f02baf83 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -29,6 +29,8 @@ namespace detail { using PlatformImplPtr = std::shared_ptr; class context_impl { public: + static constexpr pi_handle_type PIHandleType = PI_NATIVE_HANDLE_CONTEXT; + /// Constructs a context_impl using a single SYCL devices. /// /// The constructed context_impl will use the AsyncHandler parameter to @@ -143,6 +145,11 @@ class context_impl { /// Returns true if and only if context contains the given device. bool hasDevice(shared_ptr_class Device) const; + /// Gets the native handle of the SYCL context. + /// + /// \return a native handle. + pi_native_handle getNative() const; + private: async_handler MAsyncHandler; vector_class MDevices; diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index b7b4a76c5a395..a4ea7a6e8286f 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -214,6 +214,14 @@ vector_class device_impl::create_sub_devices( return create_sub_devices(Properties, SubDevicesCount); } +pi_native_handle device_impl::getNative() const { + auto Plugin = getPlugin(); + pi_native_handle Handle; + Plugin.call(PIHandleType, getHandleRef(), + &Handle); + return Handle; +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 71572216478d5..aecd4b68f3df4 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -35,6 +35,8 @@ using device_interop_handle_t = _device_interop_handle_t *; // TODO: Make code thread-safe class device_impl { public: + static constexpr pi_handle_type PIHandleType = PI_NATIVE_HANDLE_DEVICE; + /// Constructs a SYCL device instance as a host device. device_impl(); @@ -203,6 +205,11 @@ class device_impl { bool is_affinity_supported(info::partition_affinity_domain AffinityDomain) const; + /// Gets the native handle of the SYCL device. + /// + /// \return a native handle. + pi_native_handle getNative() const; + private: explicit device_impl(device_interop_handle_t InteropDevice, RT::PiDevice Device, PlatformImplPtr Platform, diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index b4678181df937..2e51beab229f1 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -258,6 +258,14 @@ void HostProfilingInfo::start() { StartTime = getTimestamp(); } void HostProfilingInfo::end() { EndTime = getTimestamp(); } +pi_native_handle event_impl::getNative() const { + auto Plugin = getPlugin(); + pi_native_handle Handle; + Plugin.call(PIHandleType, getHandleRef(), + &Handle); + return Handle; +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index d21c84c6b1a96..e1e02ff87eadb 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -29,6 +29,8 @@ using QueueImplWPtr = std::weak_ptr; class event_impl { public: + static constexpr pi_handle_type PIHandleType = PI_NATIVE_HANDLE_EVENT; + /// Constructs a ready SYCL event. /// /// If the constructed SYCL event is waited on it will complete immediately. @@ -145,6 +147,11 @@ class event_impl { /// @return a pointer to HostProfilingInfo instance. HostProfilingInfo *getHostProfilingInfo() { return MHostProfilingInfo.get(); } + /// Gets the native handle of the SYCL event. + /// + /// \return a native handle. + pi_native_handle getNative() const; + private: // When instrumentation is enabled emits trace event for event wait begin and // returns the telemetry event generated for the wait diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp index 80a62b56af96d..b4ac1fab7cf89 100644 --- a/sycl/source/detail/platform_impl.hpp +++ b/sycl/source/detail/platform_impl.hpp @@ -28,6 +28,8 @@ namespace detail { // TODO: implement parameters treatment for host device class platform_impl { public: + static constexpr pi_handle_type PIHandleType = PI_NATIVE_HANDLE_PLATFORM; + /// Constructs platform_impl for a SYCL host platform. platform_impl() : MHostPlatform(true) {} diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 8a504c9d03438..d0c971d36cd30 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -185,6 +185,14 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { #endif } +pi_native_handle queue_impl::getNative() const { + auto Plugin = getPlugin(); + pi_native_handle Handle; + Plugin.call(PIHandleType, MCommandQueue, + &Handle); + return Handle; +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index faf58914f2354..1f7390c7c01ef 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -36,6 +36,8 @@ enum QueueOrder { Ordered, OOO }; class queue_impl { public: + static constexpr pi_handle_type PIHandleType = PI_NATIVE_HANDLE_QUEUE; + /// Constructs a SYCL queue from a device using an async_handler and /// property_list provided. /// @@ -346,6 +348,11 @@ class queue_impl { MExceptions.PushBack(ExceptionPtr); } + /// Gets the native handle of the SYCL queue. + /// + /// \return a native handle. + pi_native_handle getNative() const; + private: /// Performs command group submission to the queue. /// diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 8d28be7ac855b..c3feb3676fec3 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -122,5 +122,7 @@ device::get_info() const { #undef PARAM_TRAITS_SPEC +pi_native_handle device::getNative() const { return impl->getNative(); } + } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp index 900a6a06a4af9..220c68eaa36c2 100644 --- a/sycl/source/event.cpp +++ b/sycl/source/event.cpp @@ -85,5 +85,7 @@ event::event(shared_ptr_class event_impl) #undef PARAM_TRAITS_SPEC +pi_native_handle event::getNative() const { return impl->getNative(); } + } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 1ca3605eb494e..ab4da16014beb 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -151,5 +151,8 @@ queue::get_property() const; bool queue::is_in_order() const { return impl->has_property(); } + +pi_native_handle queue::getNative() const { return impl->getNative(); } + } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/unittests/pi/cuda/CMakeLists.txt b/sycl/unittests/pi/cuda/CMakeLists.txt index a01c771af5d9b..259b2fa9e517f 100644 --- a/sycl/unittests/pi/cuda/CMakeLists.txt +++ b/sycl/unittests/pi/cuda/CMakeLists.txt @@ -3,6 +3,7 @@ add_sycl_unittest(PiCudaTests OBJECT test_base_objects.cpp test_commands.cpp test_device.cpp + test_interop_get_native.cpp test_kernels.cpp test_mem_obj.cpp test_primary_context.cpp diff --git a/sycl/unittests/pi/cuda/test_base_objects.cpp b/sycl/unittests/pi/cuda/test_base_objects.cpp index 3c4f8888a4bc2..34c361a5c43f1 100644 --- a/sycl/unittests/pi/cuda/test_base_objects.cpp +++ b/sycl/unittests/pi/cuda/test_base_objects.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include #include diff --git a/sycl/unittests/pi/cuda/test_interop_get_native.cpp b/sycl/unittests/pi/cuda/test_interop_get_native.cpp new file mode 100644 index 0000000000000..d7a74126a101c --- /dev/null +++ b/sycl/unittests/pi/cuda/test_interop_get_native.cpp @@ -0,0 +1,77 @@ +// REQUIRES: cuda +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -I%opencl_include_dir -I%cuda_toolkit_include -o %t.out -lcuda -lsycl +// RUN: env SYCL_DEVICE_TYPE=GPU %t.out + +//==---------- interop_get_native.cpp - SYCL cuda get_native tests ---------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "gtest/gtest.h" + +#include +#include +#include +#include + +using namespace cl::sycl; + +struct DISABLED_CudaInteropGetNativeTests : public ::testing::Test { + +protected: + queue syclQueue_; + context syclContext_; + device syclDevice_; + + static bool isCudaDevice(const device &dev) { + const platform platform = dev.get_info(); + const std::string platformVersion = + platform.get_info(); + // If using PI_CUDA, don't accept a non-CUDA device + return platformVersion.find("CUDA") != std::string::npos; + } + + class cuda_device_selector : public device_selector { + public: + int operator()(const device &dev) const { + return isCudaDevice(dev) ? 1 : -1; + } + }; + + void SetUp() override { + syclQueue_ = queue{cuda_device_selector()}; + context syclContext_ = syclQueue_.get_context(); + device syclDevice_ = syclQueue_.get_device(); + ASSERT_TRUE(isCudaDevice(syclDevice_)); + } + + void TearDown() override {} +}; + +TEST_F(DISABLED_CudaInteropGetNativeTests, getNativeDevice) { + CUdevice cudaDevice = get_native(syclDevice_); + char cudaDeviceName[2] = {0, 0}; + CUresult result = cuDeviceGetName(cudaDeviceName, 2, cudaDevice); + ASSERT_EQ(result, CUDA_SUCCESS); + ASSERT_NE(cudaDeviceName[0], 0); +} + +TEST_F(DISABLED_CudaInteropGetNativeTests, getNativeContext) { + CUcontext cudaContext = get_native(syclContext_); + ASSERT_NE(cudaContext, nullptr); +} + +TEST_F(DISABLED_CudaInteropGetNativeTests, getNativeQueue) { + CUstream cudaStream = get_native(syclQueue_); + ASSERT_NE(cudaStream, nullptr); + + CUcontext streamContext = nullptr; + CUresult result = cuStreamGetCtx(cudaStream, &streamContext); + ASSERT_EQ(result, CUDA_SUCCESS); + + CUcontext cudaContext = get_native(syclContext_); + ASSERT_EQ(streamContext, cudaContext); +} \ No newline at end of file diff --git a/sycl/unittests/pi/cuda/test_mem_obj.cpp b/sycl/unittests/pi/cuda/test_mem_obj.cpp index 3715da83b68e8..8adf994ae2bdc 100644 --- a/sycl/unittests/pi/cuda/test_mem_obj.cpp +++ b/sycl/unittests/pi/cuda/test_mem_obj.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include #include diff --git a/sycl/unittests/pi/cuda/test_primary_context.cpp b/sycl/unittests/pi/cuda/test_primary_context.cpp index 199765debeeed..8e387cb4a2536 100644 --- a/sycl/unittests/pi/cuda/test_primary_context.cpp +++ b/sycl/unittests/pi/cuda/test_primary_context.cpp @@ -12,7 +12,6 @@ #include #include -#include #include #include @@ -20,61 +19,47 @@ using namespace cl::sycl; -void check(bool condition, const char *conditionString, const char *filename, - const long line) noexcept { - if (!condition) { - std::cerr << "CHECK failed in " << filename << "#" << line << " " - << conditionString << "\n"; - std::abort(); - } -} - -#define CHECK(CONDITION) check(CONDITION, #CONDITION, __FILE__, __LINE__) - -bool isCudaDevice(const device &dev) { - const platform platform = dev.get_info(); - const std::string platformVersion = - platform.get_info(); - // If using PI_CUDA, don't accept a non-CUDA device - return platformVersion.find("CUDA") != std::string::npos; -} - -class cuda_device_selector : public device_selector { -public: - int operator()(const device &dev) const { return isCudaDevice(dev) ? 1 : -1; } -}; - -class other_cuda_device_selector : public device_selector { -public: - other_cuda_device_selector(const device &dev) : excludeDevice{dev} {} - - int operator()(const device &dev) const { - if (!isCudaDevice(dev)) { - return -1; - } - if (dev.get() == excludeDevice.get()) { - // Return only this device if it is the only available - return 0; - } - return 1; - } - -private: - const device &excludeDevice; -}; - -using namespace cl::sycl; - struct DISABLED_CudaPrimaryContextTests : public ::testing::Test { protected: - std::vector Plugins; - - pi_platform platform_; device deviceA_; device deviceB_; context context_; + static bool isCudaDevice(const device &dev) { + const platform platform = dev.get_info(); + const std::string platformVersion = + platform.get_info(); + // If using PI_CUDA, don't accept a non-CUDA device + return platformVersion.find("CUDA") != std::string::npos; + } + + class cuda_device_selector : public device_selector { + public: + int operator()(const device &dev) const { + return isCudaDevice(dev) ? 1 : -1; + } + }; + + class other_cuda_device_selector : public device_selector { + public: + other_cuda_device_selector(const device &dev) : excludeDevice{dev} {} + + int operator()(const device &dev) const { + if (!isCudaDevice(dev)) { + return -1; + } + if (dev.get() == excludeDevice.get()) { + // Return only this device if it is the only available + return 0; + } + return 1; + } + + private: + const device &excludeDevice; + }; + void SetUp() override { try { @@ -96,8 +81,8 @@ TEST_F(DISABLED_CudaPrimaryContextTests, piSingleContext) { std::cout << "create single context" << std::endl; context Context(deviceA_, async_handler{}, /*UsePrimaryContext=*/true); - CUdevice CudaDevice = reinterpret_cast(deviceA_.get())->get(); - CUcontext CudaContext = reinterpret_cast(Context.get())->get(); + CUdevice CudaDevice = deviceA_.get_native(); + CUcontext CudaContext = Context.get_native(); CUcontext PrimaryCudaContext; cuDevicePrimaryCtxRetain(&PrimaryCudaContext, CudaDevice); @@ -112,22 +97,23 @@ TEST_F(DISABLED_CudaPrimaryContextTests, piMultiContextSingleDevice) { context ContextA(deviceA_, async_handler{}, /*UsePrimaryContext=*/true); context ContextB(deviceA_, async_handler{}, /*UsePrimaryContext=*/true); - CUcontext CudaContextA = reinterpret_cast(ContextA.get())->get(); - CUcontext CudaContextB = reinterpret_cast(ContextB.get())->get(); + CUcontext CudaContextA = ContextA.get_native(); + CUcontext CudaContextB = ContextB.get_native(); ASSERT_EQ(CudaContextA, CudaContextB); } TEST_F(DISABLED_CudaPrimaryContextTests, piMultiContextMultiDevice) { - if (isCudaDevice(deviceB_) && deviceA_.get() != deviceB_.get()) { + CUdevice CudaDeviceA = deviceA_.get_native(); + CUdevice CudaDeviceB = deviceB_.get_native(); + + if (isCudaDevice(deviceB_) && CudaDeviceA != CudaDeviceB) { std::cout << "create multiple contexts for multiple devices" << std::endl; context ContextA(deviceA_, async_handler{}, /*UsePrimaryContext=*/true); context ContextB(deviceB_, async_handler{}, /*UsePrimaryContext=*/true); - CUcontext CudaContextA = - reinterpret_cast(ContextA.get())->get(); - CUcontext CudaContextB = - reinterpret_cast(ContextB.get())->get(); + CUcontext CudaContextA = ContextA.get_native(); + CUcontext CudaContextB = ContextB.get_native(); ASSERT_NE(CudaContextA, CudaContextB); } diff --git a/sycl/unittests/pi/cuda/test_queue.cpp b/sycl/unittests/pi/cuda/test_queue.cpp index 38de62ec2dd71..ca983e4c55c79 100644 --- a/sycl/unittests/pi/cuda/test_queue.cpp +++ b/sycl/unittests/pi/cuda/test_queue.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include #include From a884b1350e5114038f6dbe0afe527d640a036c1a Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Wed, 18 Mar 2020 18:22:40 +0000 Subject: [PATCH 2/4] [SYCL][PI][OpenCL] Generalizing interop handler getters This commit makes get_mem and get_queue of interop_handler return types based on a specified backend. The backend defaults to OpenCL to avoid breakages. Signed-off-by: Steffen Larsen --- sycl/include/CL/sycl.hpp | 1 + sycl/include/CL/sycl/backend/opencl.hpp | 31 +++++++++++ sycl/include/CL/sycl/detail/cg.hpp | 54 +++++++++++++++---- sycl/plugins/opencl/pi_opencl.cpp | 19 +++++++ sycl/source/detail/cg.cpp | 27 ++++++---- sycl/source/detail/scheduler/commands.cpp | 9 ++-- .../pi/cuda/test_interop_get_native.cpp | 32 ++++++++--- 7 files changed, 142 insertions(+), 31 deletions(-) create mode 100644 sycl/include/CL/sycl/backend/opencl.hpp diff --git a/sycl/include/CL/sycl.hpp b/sycl/include/CL/sycl.hpp index c845488d37749..27b3d3d6c137f 100644 --- a/sycl/include/CL/sycl.hpp +++ b/sycl/include/CL/sycl.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/sycl/include/CL/sycl/backend/opencl.hpp b/sycl/include/CL/sycl/backend/opencl.hpp new file mode 100644 index 0000000000000..efd0f7df8a365 --- /dev/null +++ b/sycl/include/CL/sycl/backend/opencl.hpp @@ -0,0 +1,31 @@ + +//==---------------- opencl.hpp - SYCL OpenCL backend ----------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { + +template <> struct interop { + using type = cl_command_queue; +}; + +template +struct interop> { + using type = cl_mem; +}; + +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index 6795ff3eeca7b..8dbe84d10e209 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -29,6 +30,12 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +// Forward declaration +class queue; +namespace detail { +class queue_impl; +} // namespace detail + // Interoperability handler // class interop_handler { @@ -37,26 +44,51 @@ class interop_handler { access::target AccTarget, access::placeholder isPlaceholder> friend class accessor; public: + using QueueImplPtr = std::shared_ptr; using ReqToMem = std::pair; - interop_handler(std::vector MemObjs, cl_command_queue PiQueue) : - MQueue(PiQueue), MMemObjs(MemObjs) {} + interop_handler(std::vector MemObjs, QueueImplPtr Queue) + : MQueue(std::move(Queue)), MMemObjs(std::move(MemObjs)) {} - cl_command_queue get_queue() const noexcept { return MQueue; }; + template + auto get_queue() const -> typename interop::type { + return reinterpret_cast::type>( + GetNativeQueue()); + } - template - cl_mem get_mem(accessor - Acc) const { + auto get_mem(accessor + Acc) const -> + typename interop>::type { detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Acc; - return getMemImpl(detail::getSyclObjImpl(*AccBase).get()); + return getMemImpl( + detail::getSyclObjImpl(*AccBase).get()); } + private: - cl_command_queue MQueue; + QueueImplPtr MQueue; std::vector MMemObjs; - __SYCL_EXPORT cl_mem getMemImpl(detail::Requirement *Req) const; + + template + __SYCL_EXPORT auto + getMemImpl(detail::Requirement *Req) const -> typename interop< + BackendName, + accessor>::type { + return (typename interop>::type)GetNativeMem(Req); + } + + __SYCL_EXPORT pi_native_handle GetNativeMem(detail::Requirement *Req) const; + __SYCL_EXPORT pi_native_handle GetNativeQueue() const; }; namespace detail { diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 5f3f832c35e38..ca703fbb25f83 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1043,6 +1043,23 @@ static pi_result OCL(piextProgramSetSpecializationConstantImpl)( return cast(Res); } +/// API to get the native handle of a PI object +/// +/// \param handleType is an identifier representing the type of the handle +/// \param piObject is the PI object to get the handle of +/// \param nativeHandle is the native handle of piObject +pi_result OCL(piGetNativeHandle)(pi_handle_type handleType, void *piObject, + pi_native_handle *nativeHandle) { + switch (handleType) { + case pi_handle_type::PI_NATIVE_HANDLE_MEM: + case pi_handle_type::PI_NATIVE_HANDLE_QUEUE: + *nativeHandle = reinterpret_cast(piObject); + return PI_SUCCESS; + default: + return PI_INVALID_VALUE; + } +} + pi_result piPluginInit(pi_plugin *PluginInit) { int CompareVersions = strcmp(PluginInit->PiVersion, SupportedVersion); if (CompareVersions < 0) { @@ -1154,6 +1171,8 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextUSMEnqueuePrefetch, OCL(piextUSMEnqueuePrefetch)) _PI_CL(piextUSMEnqueueMemAdvise, OCL(piextUSMEnqueueMemAdvise)) _PI_CL(piextUSMGetMemAllocInfo, OCL(piextUSMGetMemAllocInfo)) + // Native + _PI_CL(piGetNativeHandle, OCL(piGetNativeHandle)) _PI_CL(piextKernelSetArgMemObj, OCL(piextKernelSetArgMemObj)) diff --git a/sycl/source/detail/cg.cpp b/sycl/source/detail/cg.cpp index d4c5a1563cdba..ff9f3c63da47b 100644 --- a/sycl/source/detail/cg.cpp +++ b/sycl/source/detail/cg.cpp @@ -8,11 +8,11 @@ #include "CL/sycl/detail/cg.hpp" #include +#include #include #include #include - #include #include #include @@ -21,17 +21,24 @@ namespace cl { namespace sycl { -cl_mem interop_handler::getMemImpl(detail::Requirement* Req) const { - auto Iter = std::find_if(std::begin(MMemObjs), std::end(MMemObjs), - [=](ReqToMem Elem) { - return (Elem.first == Req); - }); +pi_native_handle interop_handler::GetNativeQueue() const { + return MQueue->getNative(); +} + +pi_native_handle interop_handler::GetNativeMem(detail::Requirement *Req) const { + auto Iter = std::find_if(std::begin(MMemObjs), std::end(MMemObjs), + [=](ReqToMem Elem) { return (Elem.first == Req); }); - if (Iter == std::end(MMemObjs)) { - throw("Invalid memory object used inside interop"); - } - return detail::pi::cast(Iter->second); + if (Iter == std::end(MMemObjs)) { + throw("Invalid memory object used inside interop"); } + auto Plugin = MQueue->getPlugin(); + pi_native_handle Handle; + Plugin.call( + pi_handle_type::PI_NATIVE_HANDLE_MEM, Iter->second, &Handle); + return Handle; +} + } // sycl } // cl diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index bb783468ad604..0ba4d01462b7e 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1772,12 +1772,13 @@ cl_int ExecCGCommand::enqueueImp() { ReqMemObjs.emplace_back(ReqToMem); }); - auto interop_queue = MQueue->get(); std::sort(std::begin(ReqMemObjs), std::end(ReqMemObjs)); - interop_handler InteropHandler(std::move(ReqMemObjs), interop_queue); + interop_handler InteropHandler(std::move(ReqMemObjs), MQueue); ExecInterop->MInteropTask->call(InteropHandler); - Plugin.call(MQueue->getHandleRef(), 0, nullptr, &Event); - Plugin.call(reinterpret_cast(interop_queue)); + Plugin.call(MQueue->getHandleRef(), 0, + nullptr, &Event); + Plugin.call( + reinterpret_cast(MQueue->get())); return CL_SUCCESS; } case CG::CGTYPE::NONE: diff --git a/sycl/unittests/pi/cuda/test_interop_get_native.cpp b/sycl/unittests/pi/cuda/test_interop_get_native.cpp index d7a74126a101c..1aec0b4c26ab0 100644 --- a/sycl/unittests/pi/cuda/test_interop_get_native.cpp +++ b/sycl/unittests/pi/cuda/test_interop_get_native.cpp @@ -1,8 +1,4 @@ -// REQUIRES: cuda -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -I%opencl_include_dir -I%cuda_toolkit_include -o %t.out -lcuda -lsycl -// RUN: env SYCL_DEVICE_TYPE=GPU %t.out - -//==---------- interop_get_native.cpp - SYCL cuda get_native tests ---------==// +//==------- test_interop_get_native.cpp - SYCL CUDA get_native tests -------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -74,4 +70,28 @@ TEST_F(DISABLED_CudaInteropGetNativeTests, getNativeQueue) { CUcontext cudaContext = get_native(syclContext_); ASSERT_EQ(streamContext, cudaContext); -} \ No newline at end of file +} + +TEST_F(DISABLED_CudaInteropGetNativeTests, interopTaskGetMem) { + buffer syclBuffer(range<1>{1}); + syclQueue_.submit([&](cl::sycl::handler &cgh) { + auto syclAccessor = syclBuffer.get_access(cgh); + cgh.interop_task([=](sycl::interop_handler ih) { + CUdeviceptr cudaPtr = ih.get_mem(syclAccessor); + CUdeviceptr cudaPtrBase; + size_t cudaPtrSize = 0; + cuMemGetAddressRange(&cudaPtrBase, &cudaPtrSize, cudaPtr); + ASSERT_EQ(cudaPtrSize, sizeof(int)); + }); + }); +} + +TEST_F(DISABLED_CudaInteropGetNativeTests, interopTaskGetBufferMem) { + CUstream cudaStream = get_native(syclQueue_); + syclQueue_.submit([&](cl::sycl::handler &cgh) { + cgh.interop_task([=](sycl::interop_handler ih) { + CUstream cudaInteropStream = ih.get_queue(); + ASSERT_EQ(cudaInteropStream, cudaStream); + }); + }); +} From f67949a4af16074adb86291508fb6da42f38dddc Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Thu, 2 Apr 2020 16:41:54 +0100 Subject: [PATCH 3/4] [SYCL][PI] Splitting the converter function Signed-off-by: Steffen Larsen --- sycl/include/CL/sycl/detail/buffer_impl.hpp | 2 - sycl/include/CL/sycl/detail/pi.def | 17 +- sycl/include/CL/sycl/detail/pi.h | 142 +++++++--- sycl/include/CL/sycl/detail/pi.hpp | 4 +- sycl/plugins/cuda/pi_cuda.cpp | 271 +++++++++++++++----- sycl/plugins/opencl/pi_opencl.cpp | 129 ++++++---- sycl/source/detail/cg.cpp | 4 +- sycl/source/detail/context_impl.cpp | 3 +- sycl/source/detail/context_impl.hpp | 2 - sycl/source/detail/device_impl.cpp | 26 +- sycl/source/detail/device_impl.hpp | 14 +- sycl/source/detail/event_impl.cpp | 3 +- sycl/source/detail/event_impl.hpp | 2 - sycl/source/detail/platform_impl.hpp | 2 - sycl/source/detail/program_impl.cpp | 14 +- sycl/source/detail/program_impl.hpp | 9 +- sycl/source/detail/queue_impl.cpp | 3 +- sycl/source/detail/queue_impl.hpp | 2 - sycl/source/device.cpp | 3 +- sycl/source/program.cpp | 2 +- 20 files changed, 431 insertions(+), 223 deletions(-) diff --git a/sycl/include/CL/sycl/detail/buffer_impl.hpp b/sycl/include/CL/sycl/detail/buffer_impl.hpp index 98b6415d34ceb..b1561928218e5 100644 --- a/sycl/include/CL/sycl/detail/buffer_impl.hpp +++ b/sycl/include/CL/sycl/detail/buffer_impl.hpp @@ -44,8 +44,6 @@ class __SYCL_EXPORT buffer_impl final : public SYCLMemObjT { using typename BaseT::MemObjType; public: - static constexpr pi_handle_type PIHandleType = PI_NATIVE_HANDLE_MEM; - buffer_impl(size_t SizeInBytes, size_t RequiredAlign, const property_list &Props, unique_ptr_class Allocator) diff --git a/sycl/include/CL/sycl/detail/pi.def b/sycl/include/CL/sycl/detail/pi.def index 193ce53dbb199..a23364b3f7554 100644 --- a/sycl/include/CL/sycl/detail/pi.def +++ b/sycl/include/CL/sycl/detail/pi.def @@ -18,7 +18,6 @@ _PI_API(piPlatformsGet) _PI_API(piPlatformGetInfo) // Device -_PI_API(piextDeviceConvert) _PI_API(piDevicesGet) _PI_API(piDeviceGetInfo) _PI_API(piDevicePartition) @@ -26,18 +25,24 @@ _PI_API(piDeviceRetain) _PI_API(piDeviceRelease) _PI_API(piextDeviceSelectBinary) _PI_API(piextGetDeviceFunctionPointer) +_PI_API(piextDeviceGetNativeHandle) +_PI_API(piextDeviceCreateWithNativeHandle) // Context _PI_API(piContextCreate) _PI_API(piContextGetInfo) _PI_API(piContextRetain) _PI_API(piContextRelease) _PI_API(piextContextSetExtendedDeleter) +_PI_API(piextContextGetNativeHandle) +_PI_API(piextContextCreateWithNativeHandle) // Queue _PI_API(piQueueCreate) _PI_API(piQueueGetInfo) _PI_API(piQueueFinish) _PI_API(piQueueRetain) _PI_API(piQueueRelease) +_PI_API(piextQueueGetNativeHandle) +_PI_API(piextQueueCreateWithNativeHandle) // Memory _PI_API(piMemBufferCreate) _PI_API(piMemImageCreate) @@ -46,8 +51,9 @@ _PI_API(piMemImageGetInfo) _PI_API(piMemRetain) _PI_API(piMemRelease) _PI_API(piMemBufferPartition) +_PI_API(piextMemGetNativeHandle) +_PI_API(piextMemCreateWithNativeHandle) // Program -_PI_API(piextProgramConvert) _PI_API(piProgramCreate) _PI_API(piclProgramCreateWithSource) _PI_API(piclProgramCreateWithBinary) @@ -59,6 +65,8 @@ _PI_API(piProgramGetBuildInfo) _PI_API(piProgramRetain) _PI_API(piProgramRelease) _PI_API(piextProgramSetSpecializationConstant) +_PI_API(piextProgramGetNativeHandle) +_PI_API(piextProgramCreateWithNativeHandle) // Kernel _PI_API(piKernelCreate) _PI_API(piKernelSetArg) @@ -78,6 +86,8 @@ _PI_API(piEventSetCallback) _PI_API(piEventSetStatus) _PI_API(piEventRetain) _PI_API(piEventRelease) +_PI_API(piextEventGetNativeHandle) +_PI_API(piextEventCreateWithNativeHandle) // Sampler _PI_API(piSamplerCreate) _PI_API(piSamplerGetInfo) @@ -113,7 +123,4 @@ _PI_API(piextUSMGetMemAllocInfo) _PI_API(piextKernelSetArgMemObj) -// Interop -_PI_API(piGetNativeHandle) - #undef _PI_API diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 1a33dfa1919ed..edd26a447c1c6 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -61,15 +61,6 @@ using pi_bool = pi_uint32; using pi_bitfield = pi_uint64; using pi_native_handle = uintptr_t; -enum pi_handle_type { - PI_NATIVE_HANDLE_CONTEXT, - PI_NATIVE_HANDLE_DEVICE, - PI_NATIVE_HANDLE_QUEUE, - PI_NATIVE_HANDLE_EVENT, - PI_NATIVE_HANDLE_MEM, - PI_NATIVE_HANDLE_PLATFORM -}; - // // NOTE: prefer to map 1:1 to OpenCL so that no translation is needed // for PI <-> OpenCL ways. The PI <-> to other BE translation is almost @@ -772,19 +763,6 @@ pi_result piPlatformGetInfo(pi_platform platform, pi_platform_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); -// -// Device -// -/// -/// Create PI device from the given raw device handle (if the "device" -/// points to null), or, vice versa, extract the raw device handle into -/// the "handle" (if it was pointing to a null) from the given PI device. -/// NOTE: The instance of the PI device created is retained. -/// -pi_result piextDeviceConvert( - pi_device *device, ///< [in,out] the pointer to PI device - void **handle); ///< [in,out] the pointer to the raw device handle - pi_result piDevicesGet(pi_platform platform, pi_device_type device_type, pi_uint32 num_entries, pi_device *devices, pi_uint32 *num_devices); @@ -802,6 +780,23 @@ pi_result piDevicePartition(pi_device device, pi_uint32 num_devices, pi_device *out_devices, pi_uint32 *out_num_devices); +/// Gets the native handle of a PI device object. +/// +/// \param device is the PI device to get the native handle of. +/// \param nativeHandle is the native handle of device. +pi_result piextDeviceGetNativeHandle(pi_device device, + pi_native_handle *nativeHandle); + +/// Creates PI device object from a native handle. +/// NOTE: If the created PI object shares reference count with the native object +/// the reference count is increased by one. Otherwise the newly created +/// PI object has a reference count of 1. +/// +/// \param nativeHandle is the native handle to create PI device from. +/// \param device is the PI device created from the native handle. +pi_result piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_device *device); + /// Selects the most appropriate device binary based on runtime information /// and the IR characteristics. /// @@ -845,6 +840,23 @@ pi_result piextContextSetExtendedDeleter(pi_context context, pi_context_extended_deleter func, void *user_data); +/// Gets the native handle of a PI context object. +/// +/// \param context is the PI context to get the native handle of. +/// \param nativeHandle is the native handle of context. +pi_result piextContextGetNativeHandle(pi_context context, + pi_native_handle *nativeHandle); + +/// Creates PI context object from a native handle. +/// NOTE: If the created PI object shares reference count with the native object +/// the reference count is increased by one. Otherwise the newly created +/// PI object has a reference count of 1. +/// +/// \param nativeHandle is the native handle to create PI context from. +/// \param context is the PI context created from the native handle. +pi_result piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_context *context); + // // Queue // @@ -861,6 +873,23 @@ pi_result piQueueRelease(pi_queue command_queue); pi_result piQueueFinish(pi_queue command_queue); +/// Gets the native handle of a PI queue object. +/// +/// \param queue is the PI queue to get the native handle of. +/// \param nativeHandle is the native handle of queue. +pi_result piextQueueGetNativeHandle(pi_queue queue, + pi_native_handle *nativeHandle); + +/// Creates PI queue object from a native handle. +/// NOTE: If the created PI object shares reference count with the native object +/// the reference count is increased by one. Otherwise the newly created +/// PI object has a reference count of 1. +/// +/// \param nativeHandle is the native handle to create PI queue from. +/// \param queue is the PI queue created from the native handle. +pi_result piextQueueCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_queue *queue); + // // Memory // @@ -888,19 +917,26 @@ pi_result piMemRelease(pi_mem mem); pi_result piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, pi_buffer_create_type buffer_create_type, void *buffer_create_info, pi_mem *ret_mem); + +/// Gets the native handle of a PI mem object. +/// +/// \param mem is the PI mem to get the native handle of. +/// \param nativeHandle is the native handle of mem. +pi_result piextMemGetNativeHandle(pi_mem mem, pi_native_handle *nativeHandle); + +/// Creates PI mem object from a native handle. +/// NOTE: If the created PI object shares reference count with the native object +/// the reference count is increased by one. Otherwise the newly created +/// PI object has a reference count of 1. +/// +/// \param nativeHandle is the native handle to create PI mem from. +/// \param mem is the PI mem created from the native handle. +pi_result piextMemCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_mem *mem); + // // Program // -/// -/// Create PI program from the given raw program handle (if the "program" -/// points to null), or, vice versa, extract the raw program handle into -/// the "handle" (if it was pointing to a null) from the given PI program. -/// NOTE: The instance of the PI program created is retained. -/// -pi_result piextProgramConvert( - pi_context context, ///< [in] the PI context of the program - pi_program *program, ///< [in,out] the pointer to PI program - void **handle); ///< [in,out] the pointer to the raw program handle pi_result piProgramCreate(pi_context context, const void *il, size_t length, pi_program *res_program); @@ -960,6 +996,23 @@ pi_result piextProgramSetSpecializationConstant(pi_program prog, size_t spec_size, const void *spec_value); +/// Gets the native handle of a PI program object. +/// +/// \param program is the PI program to get the native handle of. +/// \param nativeHandle is the native handle of program. +pi_result piextProgramGetNativeHandle(pi_program program, + pi_native_handle *nativeHandle); + +/// Creates PI program object from a native handle. +/// NOTE: If the created PI object shares reference count with the native object +/// the reference count is increased by one. Otherwise the newly created +/// PI object has a reference count of 1. +/// +/// \param nativeHandle is the native handle to create PI program from. +/// \param program is the PI program created from the native handle. +pi_result piextProgramCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_program *program); + // // Kernel // @@ -1051,6 +1104,23 @@ pi_result piEventRetain(pi_event event); pi_result piEventRelease(pi_event event); +/// Gets the native handle of a PI event object. +/// +/// \param event is the PI event to get the native handle of. +/// \param nativeHandle is the native handle of event. +pi_result piextEventGetNativeHandle(pi_event event, + pi_native_handle *nativeHandle); + +/// Creates PI event object from a native handle. +/// NOTE: If the created PI object shares reference count with the native object +/// the reference count is increased by one. Otherwise the newly created +/// PI object has a reference count of 1. +/// +/// \param nativeHandle is the native handle to create PI event from. +/// \param event is the PI event created from the native handle. +pi_result piextEventCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_event *event); + // // Sampler // @@ -1356,14 +1426,6 @@ pi_result piextUSMGetMemAllocInfo(pi_context context, const void *ptr, size_t param_value_size, void *param_value, size_t *param_value_size_ret); -/// Gets the native handle of a SYCL object. -/// -/// \param handleType is a representation of the native handle. -/// \param piObject is the PI object to get the native handle of. -/// \param nativeHandle is the native handle of piObject. -pi_result piGetNativeHandle(pi_handle_type handleType, void *piObject, - pi_native_handle *nativeHandle); - struct _pi_plugin { // PI version supported by host passed to the plugin. The Plugin // checks and writes the appropriate Function Pointers in diff --git a/sycl/include/CL/sycl/detail/pi.hpp b/sycl/include/CL/sycl/detail/pi.hpp index a374288d703b6..7be744c635f59 100644 --- a/sycl/include/CL/sycl/detail/pi.hpp +++ b/sycl/include/CL/sycl/detail/pi.hpp @@ -311,12 +311,12 @@ template inline To cast(From value) { // These conversions should use PI interop API. template <> inline pi::PiProgram cast(cl_program interop) { - RT::assertion(false, "pi::cast -> use piextProgramConvert"); + RT::assertion(false, "pi::cast -> use piextProgramFromNative"); return {}; } template <> inline pi::PiDevice cast(cl_device_id interop) { - RT::assertion(false, "pi::cast -> use piextDeviceConvert"); + RT::assertion(false, "pi::cast -> use piextDeviceFromNative"); return {}; } } // namespace pi diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 3c099ef3d0ea2..3ed2cc1dfd78d 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -705,12 +705,6 @@ pi_result cuda_piPlatformGetInfo(pi_platform platform, return {}; } -/// \TODO Not implemented -pi_result cuda_piextDeviceConvert(pi_device *device, void **handle) { - cl::sycl::detail::pi::die("cuda_piextDeviceConvert not implemented"); - return {}; -} - /// \param devices List of devices available on the system /// \param num_devices Number of elements in the list of devices /// Requesting a non-GPU device triggers an error, all PI CUDA devices @@ -1358,6 +1352,38 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, return {}; } +/// Gets the native CUDA handle of a PI device object +/// +/// \param[in] device The PI device to get the native CUDA object of. +/// \param[out] nativeHandle Set to the native handle of the PI device object. +/// +/// \return PI_SUCCESS +pi_result cuda_piextDeviceGetNativeHandle(pi_device device, + pi_native_handle *nativeHandle) { + *nativeHandle = static_cast(device->get()); + return PI_SUCCESS; +} + +/// Created a PI device object from a CUDA device handle. +/// TODO: Implement this. +/// NOTE: Approaches for native object lifetime: +/// 1. Make the created PI object the owner of the native object, +/// deleting it upon its own destruction. +/// 2. Make a special-case where the PI object does not destroy the +/// native object. +/// Both options may make lifetime tracking difficult for the user. +/// +/// \param[in] nativeHandle The native handle to create PI device object from. +/// \param[out] device Set to the PI device object created from native handle. +/// +/// \return TBD +pi_result cuda_piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_device *device) { + cl::sycl::detail::pi::die( + "Creation of PI device from native handle not implemented"); + return {}; +} + /* Context APIs */ /// Create a PI CUDA context. @@ -1493,6 +1519,38 @@ pi_result cuda_piContextRelease(pi_context ctxt) { } } +/// Gets the native CUDA handle of a PI context object +/// +/// \param[in] context The PI context to get the native CUDA object of. +/// \param[out] nativeHandle Set to the native handle of the PI context object. +/// +/// \return PI_SUCCESS +pi_result cuda_piextContextGetNativeHandle(pi_context context, + pi_native_handle *nativeHandle) { + *nativeHandle = reinterpret_cast(context->get()); + return PI_SUCCESS; +} + +/// Created a PI context object from a CUDA context handle. +/// TODO: Implement this. +/// NOTE: Approaches for native object lifetime: +/// 1. Make the created PI object the owner of the native object, +/// deleting it upon its own destruction. +/// 2. Make a special-case where the PI object does not destroy the +/// native object. +/// Both options may make lifetime tracking difficult for the user. +/// +/// \param[in] nativeHandle The native handle to create PI context object from. +/// \param[out] context Set to the PI context object created from native handle. +/// +/// \return TBD +pi_result cuda_piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_context *context) { + cl::sycl::detail::pi::die( + "Creation of PI context from native handle not implemented"); + return {}; +} + /// Creates a PI Memory object using a CUDA memory allocation. /// Can trigger a manual copy depending on the mode. /// \TODO Implement USE_HOST_PTR using cuHostRegister @@ -1675,6 +1733,38 @@ pi_result cuda_piMemGetInfo(pi_mem memObj, cl_mem_info queriedInfo, cl::sycl::detail::pi::die("cuda_piMemGetInfo not implemented"); } +/// Gets the native CUDA handle of a PI mem object +/// +/// \param[in] mem The PI mem to get the native CUDA object of. +/// \param[out] nativeHandle Set to the native handle of the PI mem object. +/// +/// \return PI_SUCCESS +pi_result cuda_piextMemGetNativeHandle(pi_mem mem, + pi_native_handle *nativeHandle) { + *nativeHandle = static_cast(mem->get()); + return PI_SUCCESS; +} + +/// Created a PI mem object from a CUDA mem handle. +/// TODO: Implement this. +/// NOTE: Approaches for native object lifetime: +/// 1. Make the created PI object the owner of the native object, +/// deleting it upon its own destruction. +/// 2. Make a special-case where the PI object does not destroy the +/// native object. +/// Both options may make lifetime tracking difficult for the user. +/// +/// \param[in] nativeHandle The native handle to create PI mem object from. +/// \param[out] mem Set to the PI mem object created from native handle. +/// +/// \return TBD +pi_result cuda_piextMemCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_mem *mem) { + cl::sycl::detail::pi::die( + "Creation of PI mem from native handle not implemented"); + return {}; +} + /// Creates a `pi_queue` object on the CUDA backend. /// Valid properties /// * PI_CUDA_USE_DEFAULT_STREAM -> CU_STREAM_DEFAULT @@ -1810,6 +1900,38 @@ pi_result cuda_piQueueFinish(pi_queue command_queue) { return result; } +/// Gets the native CUDA handle of a PI queue object +/// +/// \param[in] queue The PI queue to get the native CUDA object of. +/// \param[out] nativeHandle Set to the native handle of the PI queue object. +/// +/// \return PI_SUCCESS +pi_result cuda_piextQueueGetNativeHandle(pi_queue queue, + pi_native_handle *nativeHandle) { + *nativeHandle = reinterpret_cast(queue->get()); + return PI_SUCCESS; +} + +/// Created a PI queue object from a CUDA queue handle. +/// TODO: Implement this. +/// NOTE: Approaches for native object lifetime: +/// 1. Make the created PI object the owner of the native object, +/// deleting it upon its own destruction. +/// 2. Make a special-case where the PI object does not destroy the +/// native object. +/// Both options may make lifetime tracking difficult for the user. +/// +/// \param[in] nativeHandle The native handle to create PI queue object from. +/// \param[out] queue Set to the PI queue object created from native handle. +/// +/// \return TBD +pi_result cuda_piextQueueCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_queue *queue) { + cl::sycl::detail::pi::die( + "Creation of PI queue from native handle not implemented"); + return {}; +} + pi_result cuda_piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, @@ -2188,16 +2310,6 @@ pi_result cuda_piProgramBuild(pi_program program, pi_uint32 num_devices, return retError; } -/// \TODO Not implemented -pi_result cuda_piextProgramConvert( - pi_context context, ///< [in] the PI context of the program - pi_program *program, ///< [in,out] the pointer to PI program - void **handle) ///< [in,out] the pointer to the raw program handle -{ - cl::sycl::detail::pi::die("cuda_piextProgramConvert not implemented"); - return {}; -} - /// \TODO Not implemented pi_result cuda_piProgramCreate(pi_context context, const void *il, size_t length, pi_program *res_program) { @@ -2407,6 +2519,38 @@ pi_result cuda_piProgramRelease(pi_program program) { return PI_SUCCESS; } +/// Gets the native CUDA handle of a PI program object +/// +/// \param[in] program The PI program to get the native CUDA object of. +/// \param[out] nativeHandle Set to the native handle of the PI program object. +/// +/// \return TBD +pi_result cuda_piextProgramGetNativeHandle(pi_program program, + pi_native_handle *nativeHandle) { + *nativeHandle = reinterpret_cast(program->get()); + return PI_SUCCESS; +} + +/// Created a PI program object from a CUDA program handle. +/// TODO: Implement this. +/// NOTE: Approaches for native object lifetime: +/// 1. Make the created PI object the owner of the native object, +/// deleting it upon its own destruction. +/// 2. Make a special-case where the PI object does not destroy the +/// native object. +/// Both options may make lifetime tracking difficult for the user. +/// +/// \param[in] nativeHandle The native handle to create PI program object from. +/// \param[out] program Set to the PI program object created from native handle. +/// +/// \return TBD +pi_result cuda_piextProgramCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_program *program) { + cl::sycl::detail::pi::die( + "Creation of PI program from native handle not implemented"); + return {}; +} + pi_result cuda_piKernelGetInfo( pi_kernel kernel, pi_kernel_info param_name, @@ -2762,6 +2906,41 @@ pi_result cuda_piEnqueueEventsWait(pi_queue command_queue, } } +/// Gets the native CUDA handle of a PI event object +/// +/// \param[in] event The PI event to get the native CUDA object of. +/// \param[out] nativeHandle Set to the native handle of the PI event object. +/// +/// \return PI_SUCCESS on success. PI_INVALID_EVENT if given a user event. +pi_result cuda_piextEventGetNativeHandle(pi_event event, + pi_native_handle *nativeHandle) { + if (event->is_user_event()) { + return PI_INVALID_EVENT; + } + *nativeHandle = reinterpret_cast(event->get()); + return PI_SUCCESS; +} + +/// Created a PI event object from a CUDA event handle. +/// TODO: Implement this. +/// NOTE: Approaches for native object lifetime: +/// 1. Make the created PI object the owner of the native object, +/// deleting it upon its own destruction. +/// 2. Make a special-case where the PI object does not destroy the +/// native object. +/// Both options may make lifetime tracking difficult for the user. +/// +/// \param[in] nativeHandle The native handle to create PI event object from. +/// \param[out] event Set to the PI event object created from native handle. +/// +/// \return TBD +pi_result cuda_piextEventCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_event *event) { + cl::sycl::detail::pi::die( + "Creation of PI event from native handle not implemented"); + return {}; +} + /// \TODO Not implemented in CUDA, need untie from OpenCL pi_result cuda_piSamplerCreate(pi_context context, const cl_sampler_properties *sampler_properties, @@ -3554,46 +3733,6 @@ pi_result cuda_piextUSMGetMemAllocInfo(pi_context context, const void *ptr, return result; } -// Native interop - -pi_result cuda_piGetNativeHandle(pi_handle_type handleType, void *piObject, - pi_native_handle *nativeHandle) { - switch (handleType) { - case pi_handle_type::PI_NATIVE_HANDLE_CONTEXT: { - pi_context context = static_cast(piObject); - *nativeHandle = reinterpret_cast(context->get()); - return PI_SUCCESS; - } - case pi_handle_type::PI_NATIVE_HANDLE_DEVICE: { - pi_device device = static_cast(piObject); - *nativeHandle = static_cast(device->get()); - return PI_SUCCESS; - } - case pi_handle_type::PI_NATIVE_HANDLE_QUEUE: { - pi_queue queue = static_cast(piObject); - *nativeHandle = reinterpret_cast(queue->get()); - return PI_SUCCESS; - } - case pi_handle_type::PI_NATIVE_HANDLE_EVENT: { - pi_event event = static_cast(piObject); - if (event->is_user_event()) { - return PI_INVALID_EVENT; - } - *nativeHandle = reinterpret_cast(event->get()); - return PI_SUCCESS; - } - case pi_handle_type::PI_NATIVE_HANDLE_MEM: { - pi_mem mem = static_cast(piObject); - *nativeHandle = static_cast(mem->get()); - return PI_SUCCESS; - } - default: - PI_HANDLE_UNKNOWN_PARAM_NAME(handleType); - } - cl::sycl::detail::pi::die("Native handle request not implemented"); - return {}; -} - const char SupportedVersion[] = _PI_H_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { @@ -3620,7 +3759,6 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piPlatformsGet, cuda_piPlatformsGet) _PI_CL(piPlatformGetInfo, cuda_piPlatformGetInfo) // Device - _PI_CL(piextDeviceConvert, cuda_piextDeviceConvert) _PI_CL(piDevicesGet, cuda_piDevicesGet) _PI_CL(piDeviceGetInfo, cuda_piDeviceGetInfo) _PI_CL(piDevicePartition, cuda_piDevicePartition) @@ -3628,18 +3766,27 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piDeviceRelease, cuda_piDeviceRelease) _PI_CL(piextDeviceSelectBinary, cuda_piextDeviceSelectBinary) _PI_CL(piextGetDeviceFunctionPointer, cuda_piextGetDeviceFunctionPointer) + _PI_CL(piextDeviceGetNativeHandle, cuda_piextDeviceGetNativeHandle) + _PI_CL(piextDeviceCreateWithNativeHandle, + cuda_piextDeviceCreateWithNativeHandle) // Context _PI_CL(piextContextSetExtendedDeleter, cuda_piextContextSetExtendedDeleter) _PI_CL(piContextCreate, cuda_piContextCreate) _PI_CL(piContextGetInfo, cuda_piContextGetInfo) _PI_CL(piContextRetain, cuda_piContextRetain) _PI_CL(piContextRelease, cuda_piContextRelease) + _PI_CL(piextContextGetNativeHandle, cuda_piextContextGetNativeHandle) + _PI_CL(piextContextCreateWithNativeHandle, + cuda_piextContextCreateWithNativeHandle) // Queue _PI_CL(piQueueCreate, cuda_piQueueCreate) _PI_CL(piQueueGetInfo, cuda_piQueueGetInfo) _PI_CL(piQueueFinish, cuda_piQueueFinish) _PI_CL(piQueueRetain, cuda_piQueueRetain) _PI_CL(piQueueRelease, cuda_piQueueRelease) + _PI_CL(piextQueueGetNativeHandle, cuda_piextQueueGetNativeHandle) + _PI_CL(piextQueueCreateWithNativeHandle, + cuda_piextQueueCreateWithNativeHandle) // Memory _PI_CL(piMemBufferCreate, cuda_piMemBufferCreate) _PI_CL(piMemImageCreate, cuda_piMemImageCreate) @@ -3648,8 +3795,9 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piMemRetain, cuda_piMemRetain) _PI_CL(piMemRelease, cuda_piMemRelease) _PI_CL(piMemBufferPartition, cuda_piMemBufferPartition) + _PI_CL(piextMemGetNativeHandle, cuda_piextMemGetNativeHandle) + _PI_CL(piextMemCreateWithNativeHandle, cuda_piextMemCreateWithNativeHandle) // Program - _PI_CL(piextProgramConvert, cuda_piextProgramConvert) _PI_CL(piProgramCreate, cuda_piProgramCreate) _PI_CL(piclProgramCreateWithSource, cuda_piclProgramCreateWithSource) _PI_CL(piclProgramCreateWithBinary, cuda_piclProgramCreateWithBinary) @@ -3660,6 +3808,8 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piProgramGetBuildInfo, cuda_piProgramGetBuildInfo) _PI_CL(piProgramRetain, cuda_piProgramRetain) _PI_CL(piProgramRelease, cuda_piProgramRelease) + _PI_CL(piextMemGetNativeHandle, cuda_piextMemGetNativeHandle) + _PI_CL(piextMemCreateWithNativeHandle, cuda_piextMemCreateWithNativeHandle) // Kernel _PI_CL(piKernelCreate, cuda_piKernelCreate) _PI_CL(piKernelSetArg, cuda_piKernelSetArg) @@ -3679,6 +3829,9 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piEventSetStatus, cuda_piEventSetStatus) _PI_CL(piEventRetain, cuda_piEventRetain) _PI_CL(piEventRelease, cuda_piEventRelease) + _PI_CL(piextEventGetNativeHandle, cuda_piextEventGetNativeHandle) + _PI_CL(piextEventCreateWithNativeHandle, + cuda_piextEventCreateWithNativeHandle) // Sampler _PI_CL(piSamplerCreate, cuda_piSamplerCreate) _PI_CL(piSamplerGetInfo, cuda_piSamplerGetInfo) @@ -3713,8 +3866,6 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextUSMGetMemAllocInfo, cuda_piextUSMGetMemAllocInfo) _PI_CL(piextKernelSetArgMemObj, cuda_piextKernelSetArgMemObj) - // Interop - _PI_CL(piGetNativeHandle, cuda_piGetNativeHandle) #undef _PI_CL diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index ca703fbb25f83..f0523eb2ca87c 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -179,24 +179,6 @@ pi_result OCL(piPlatformsGet)(pi_uint32 num_entries, pi_platform *platforms, return static_cast(result); } -pi_result OCL(piextDeviceConvert)(pi_device *device, void **handle) { - // The PI device is the same as OpenCL device handle. - assert(device); - assert(handle); - - if (*device == nullptr) { - // unitialized *device. - assert(*handle); - *device = cast(*handle); - } else { - assert(*handle == nullptr); - *handle = *device; - } - - cl_int result = clRetainDevice(cast(*handle)); - return cast(result); -} - // Example of a PI interface that does not map exactly to an OpenCL one. pi_result OCL(piDevicesGet)(pi_platform platform, pi_device_type device_type, pi_uint32 num_entries, pi_device *devices, @@ -287,6 +269,14 @@ pi_result OCL(piextDeviceSelectBinary)(pi_device device, return PI_INVALID_BINARY; } +pi_result OCL(piextDeviceCreateWithNativeHandle)(pi_native_handle nativeHandle, + pi_device *piDevice) { + assert(piDevice != nullptr); + *piDevice = reinterpret_cast(nativeHandle); + clRetainDevice(cast(*piDevice)); + return PI_SUCCESS; +} + pi_result OCL(piQueueCreate)(pi_context context, pi_device device, pi_queue_properties properties, pi_queue *queue) { assert(queue && "piQueueCreate failed, queue argument is null"); @@ -327,25 +317,12 @@ pi_result OCL(piQueueCreate)(pi_context context, pi_device device, return cast(ret_err); } -pi_result OCL(piextProgramConvert)( - pi_context context, ///< [in] the PI context of the program - pi_program *program, ///< [in,out] the pointer to PI program - void **handle) ///< [in,out] the pointer to the raw program handle -{ - // The PI program is the same as OpenCL program handle. - assert(program); - assert(handle); - - if (*program == nullptr) { - // uninitialized *program. - assert(*handle); - *program = cast(*handle); - } else { - assert(*handle == nullptr); - *handle = *program; - } - cl_int result = clRetainProgram(cast(*handle)); - return cast(result); +pi_result OCL(piextQueueCreateWithNativeHandle)(pi_native_handle nativeHandle, + pi_queue *piQueue) { + assert(piQueue != nullptr); + *piQueue = reinterpret_cast(nativeHandle); + clRetainCommandQueue(cast(*piQueue)); + return PI_SUCCESS; } pi_result OCL(piProgramCreate)(pi_context context, const void *il, @@ -426,6 +403,14 @@ pi_result OCL(piProgramCreate)(pi_context context, const void *il, return err; } +pi_result OCL(piextProgramCreateWithNativeHandle)(pi_native_handle nativeHandle, + pi_program *piProgram) { + assert(piProgram != nullptr); + *piProgram = reinterpret_cast(nativeHandle); + clRetainProgram(cast(*piProgram)); + return PI_SUCCESS; +} + pi_result OCL(piSamplerCreate)(pi_context context, const pi_sampler_properties *sampler_properties, pi_sampler *result_sampler) { @@ -517,6 +502,14 @@ pi_result OCL(piContextCreate)(const pi_context_properties *properties, return ret; } +pi_result OCL(piextContextCreateWithNativeHandle)(pi_native_handle nativeHandle, + pi_context *piContext) { + assert(piContext != nullptr); + *piContext = reinterpret_cast(nativeHandle); + clRetainContext(cast(*piContext)); + return PI_SUCCESS; +} + pi_result OCL(piMemBufferCreate)(pi_context context, pi_mem_flags flags, size_t size, void *host_ptr, pi_mem *ret_mem) { pi_result ret_err = PI_INVALID_OPERATION; @@ -553,6 +546,14 @@ pi_result OCL(piMemBufferPartition)(pi_mem buffer, pi_mem_flags flags, return ret_err; } +pi_result OCL(piextMemCreateWithNativeHandle)(pi_native_handle nativeHandle, + pi_mem *piMem) { + assert(piMem != nullptr); + *piMem = reinterpret_cast(nativeHandle); + clRetainMemObject(cast(*piMem)); + return PI_SUCCESS; +} + pi_result OCL(piclProgramCreateWithSource)(pi_context context, pi_uint32 count, const char **strings, const size_t *lengths, @@ -614,6 +615,14 @@ pi_result OCL(piEventCreate)(pi_context context, pi_event *ret_event) { return ret_err; } +pi_result OCL(piextEventCreateWithNativeHandle)(pi_native_handle nativeHandle, + pi_event *piEvent) { + assert(piEvent != nullptr); + *piEvent = reinterpret_cast(nativeHandle); + clRetainEvent(cast(*piEvent)); + return PI_SUCCESS; +} + pi_result OCL(piEnqueueMemBufferMap)( pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, cl_map_flags map_flags, // TODO: untie from OpenCL @@ -1043,21 +1052,17 @@ static pi_result OCL(piextProgramSetSpecializationConstantImpl)( return cast(Res); } -/// API to get the native handle of a PI object +/// Common API for getting the native handle of a PI object /// -/// \param handleType is an identifier representing the type of the handle -/// \param piObject is the PI object to get the handle of -/// \param nativeHandle is the native handle of piObject -pi_result OCL(piGetNativeHandle)(pi_handle_type handleType, void *piObject, - pi_native_handle *nativeHandle) { - switch (handleType) { - case pi_handle_type::PI_NATIVE_HANDLE_MEM: - case pi_handle_type::PI_NATIVE_HANDLE_QUEUE: - *nativeHandle = reinterpret_cast(piObject); - return PI_SUCCESS; - default: - return PI_INVALID_VALUE; - } +/// \param piObj is the pi object to get the native handle of +/// \param nativeHandle is a pointer to be set to the native handle +/// +/// PI_SUCCESS +pi_result OCL(piextGetNativeHandle)(void *piObj, + pi_native_handle *nativeHandle) { + assert(nativeHandle != nullptr); + *nativeHandle = reinterpret_cast(piObj); + return PI_SUCCESS; } pi_result piPluginInit(pi_plugin *PluginInit) { @@ -1078,7 +1083,6 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piPlatformsGet, OCL(piPlatformsGet)) _PI_CL(piPlatformGetInfo, clGetPlatformInfo) // Device - _PI_CL(piextDeviceConvert, OCL(piextDeviceConvert)) _PI_CL(piDevicesGet, OCL(piDevicesGet)) _PI_CL(piDeviceGetInfo, clGetDeviceInfo) _PI_CL(piDevicePartition, clCreateSubDevices) @@ -1086,17 +1090,26 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piDeviceRelease, clReleaseDevice) _PI_CL(piextDeviceSelectBinary, OCL(piextDeviceSelectBinary)) _PI_CL(piextGetDeviceFunctionPointer, OCL(piextGetDeviceFunctionPointer)) + _PI_CL(piextDeviceGetNativeHandle, OCL(piextGetNativeHandle)) + _PI_CL(piextDeviceCreateWithNativeHandle, + OCL(piextDeviceCreateWithNativeHandle)) // Context _PI_CL(piContextCreate, OCL(piContextCreate)) _PI_CL(piContextGetInfo, clGetContextInfo) _PI_CL(piContextRetain, clRetainContext) _PI_CL(piContextRelease, clReleaseContext) + _PI_CL(piextContextGetNativeHandle, OCL(piextGetNativeHandle)) + _PI_CL(piextContextCreateWithNativeHandle, + OCL(piextContextCreateWithNativeHandle)) // Queue _PI_CL(piQueueCreate, OCL(piQueueCreate)) _PI_CL(piQueueGetInfo, clGetCommandQueueInfo) _PI_CL(piQueueFinish, clFinish) _PI_CL(piQueueRetain, clRetainCommandQueue) _PI_CL(piQueueRelease, clReleaseCommandQueue) + _PI_CL(piextQueueGetNativeHandle, OCL(piextGetNativeHandle)) + _PI_CL(piextQueueCreateWithNativeHandle, + OCL(piextQueueCreateWithNativeHandle)) // Memory _PI_CL(piMemBufferCreate, OCL(piMemBufferCreate)) _PI_CL(piMemImageCreate, OCL(piMemImageCreate)) @@ -1105,8 +1118,9 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piMemRetain, clRetainMemObject) _PI_CL(piMemRelease, clReleaseMemObject) _PI_CL(piMemBufferPartition, OCL(piMemBufferPartition)) + _PI_CL(piextMemGetNativeHandle, OCL(piextGetNativeHandle)) + _PI_CL(piextMemCreateWithNativeHandle, OCL(piextMemCreateWithNativeHandle)) // Program - _PI_CL(piextProgramConvert, OCL(piextProgramConvert)) _PI_CL(piProgramCreate, OCL(piProgramCreate)) _PI_CL(piclProgramCreateWithSource, OCL(piclProgramCreateWithSource)) _PI_CL(piclProgramCreateWithBinary, OCL(piclProgramCreateWithBinary)) @@ -1119,7 +1133,9 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piProgramRelease, clReleaseProgram) _PI_CL(piextProgramSetSpecializationConstant, OCL(piextProgramSetSpecializationConstantImpl)) - + _PI_CL(piextProgramGetNativeHandle, OCL(piextGetNativeHandle)) + _PI_CL(piextProgramCreateWithNativeHandle, + OCL(piextProgramCreateWithNativeHandle)) // Kernel _PI_CL(piKernelCreate, OCL(piKernelCreate)) _PI_CL(piKernelSetArg, clSetKernelArg) @@ -1139,6 +1155,9 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piEventSetStatus, clSetUserEventStatus) _PI_CL(piEventRetain, clRetainEvent) _PI_CL(piEventRelease, clReleaseEvent) + _PI_CL(piextEventGetNativeHandle, OCL(piextGetNativeHandle)) + _PI_CL(piextEventCreateWithNativeHandle, + OCL(piextEventCreateWithNativeHandle)) // Sampler _PI_CL(piSamplerCreate, OCL(piSamplerCreate)) _PI_CL(piSamplerGetInfo, clGetSamplerInfo) @@ -1171,8 +1190,6 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextUSMEnqueuePrefetch, OCL(piextUSMEnqueuePrefetch)) _PI_CL(piextUSMEnqueueMemAdvise, OCL(piextUSMEnqueueMemAdvise)) _PI_CL(piextUSMGetMemAllocInfo, OCL(piextUSMGetMemAllocInfo)) - // Native - _PI_CL(piGetNativeHandle, OCL(piGetNativeHandle)) _PI_CL(piextKernelSetArgMemObj, OCL(piextKernelSetArgMemObj)) diff --git a/sycl/source/detail/cg.cpp b/sycl/source/detail/cg.cpp index ff9f3c63da47b..2441d1cb7e919 100644 --- a/sycl/source/detail/cg.cpp +++ b/sycl/source/detail/cg.cpp @@ -35,8 +35,8 @@ pi_native_handle interop_handler::GetNativeMem(detail::Requirement *Req) const { auto Plugin = MQueue->getPlugin(); pi_native_handle Handle; - Plugin.call( - pi_handle_type::PI_NATIVE_HANDLE_MEM, Iter->second, &Handle); + Plugin.call(Iter->second, + &Handle); return Handle; } diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 43ed003e970c0..669d047aae730 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -154,8 +154,7 @@ context_impl::hasDevice(shared_ptr_class Device) const { pi_native_handle context_impl::getNative() const { auto Plugin = getPlugin(); pi_native_handle Handle; - Plugin.call(PIHandleType, getHandleRef(), - &Handle); + Plugin.call(getHandleRef(), &Handle); return Handle; } diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 7b966f02baf83..4df0fde2cf0ea 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -29,8 +29,6 @@ namespace detail { using PlatformImplPtr = std::shared_ptr; class context_impl { public: - static constexpr pi_handle_type PIHandleType = PI_NATIVE_HANDLE_CONTEXT; - /// Constructs a context_impl using a single SYCL devices. /// /// The constructed context_impl will use the AsyncHandler parameter to diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index a4ea7a6e8286f..f060986438c63 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -19,27 +19,29 @@ device_impl::device_impl() : MIsHostDevice(true), MPlatform(std::make_shared(platform_impl())) {} -device_impl::device_impl(device_interop_handle_t InteropDeviceHandle, +device_impl::device_impl(pi_native_handle InteropDeviceHandle, const plugin &Plugin) : device_impl(InteropDeviceHandle, nullptr, nullptr, Plugin) {} device_impl::device_impl(RT::PiDevice Device, PlatformImplPtr Platform) - : device_impl(nullptr, Device, Platform, Platform->getPlugin()) {} + : device_impl(reinterpret_cast(nullptr), Device, Platform, + Platform->getPlugin()) {} device_impl::device_impl(RT::PiDevice Device, const plugin &Plugin) - : device_impl(nullptr, Device, nullptr, Plugin) {} + : device_impl(reinterpret_cast(nullptr), Device, nullptr, + Plugin) {} -device_impl::device_impl(device_interop_handle_t InteropDeviceHandle, +device_impl::device_impl(pi_native_handle InteropDeviceHandle, RT::PiDevice Device, PlatformImplPtr Platform, const plugin &Plugin) : MDevice(Device), MIsHostDevice(false) { bool InteroperabilityConstructor = false; if (Device == nullptr) { - assert(InteropDeviceHandle != nullptr); + assert(InteropDeviceHandle); // Get PI device from the raw device handle. - Plugin.call(&MDevice, - (void **)&InteropDeviceHandle); + Plugin.call( + InteropDeviceHandle, &MDevice); InteroperabilityConstructor = true; } @@ -56,7 +58,7 @@ device_impl::device_impl(device_interop_handle_t InteropDeviceHandle, if (!MIsRootDevice && !InteroperabilityConstructor) { // TODO catch an exception and put it to list of asynchronous exceptions // Interoperability Constructor already calls DeviceRetain in - // piextDeviceConvert. + // piextDeviceFromNative. Plugin.call(MDevice); } @@ -97,10 +99,7 @@ cl_device_id device_impl::get() const { // TODO catch an exception and put it to list of asynchronous exceptions Plugin.call(MDevice); } - void *handle = nullptr; - Plugin.call( - const_cast(&MDevice), &handle); - return pi::cast(handle); + return pi::cast(getNative()); } platform device_impl::get_platform() const { @@ -217,8 +216,7 @@ vector_class device_impl::create_sub_devices( pi_native_handle device_impl::getNative() const { auto Plugin = getPlugin(); pi_native_handle Handle; - Plugin.call(PIHandleType, getHandleRef(), - &Handle); + Plugin.call(getHandleRef(), &Handle); return Handle; } diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index aecd4b68f3df4..56b47cefffccb 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -27,21 +27,14 @@ namespace detail { class platform_impl; using PlatformImplPtr = std::shared_ptr; -// TODO: SYCL BE generalization will change this to something better. -// For now this saves us from unwanted implicit casts. -struct _device_interop_handle_t; -using device_interop_handle_t = _device_interop_handle_t *; - // TODO: Make code thread-safe class device_impl { public: - static constexpr pi_handle_type PIHandleType = PI_NATIVE_HANDLE_DEVICE; - /// Constructs a SYCL device instance as a host device. device_impl(); /// Constructs a SYCL device instance using the provided raw device handle. - explicit device_impl(device_interop_handle_t, const plugin &Plugin); + explicit device_impl(pi_native_handle, const plugin &Plugin); /// Constructs a SYCL device instance using the provided /// PI device instance. @@ -211,9 +204,8 @@ class device_impl { pi_native_handle getNative() const; private: - explicit device_impl(device_interop_handle_t InteropDevice, - RT::PiDevice Device, PlatformImplPtr Platform, - const plugin &Plugin); + explicit device_impl(pi_native_handle InteropDevice, RT::PiDevice Device, + PlatformImplPtr Platform, const plugin &Plugin); RT::PiDevice MDevice = 0; RT::PiDeviceType MType; bool MIsRootDevice = false; diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 2e51beab229f1..f3263720a2bb4 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -261,8 +261,7 @@ void HostProfilingInfo::end() { EndTime = getTimestamp(); } pi_native_handle event_impl::getNative() const { auto Plugin = getPlugin(); pi_native_handle Handle; - Plugin.call(PIHandleType, getHandleRef(), - &Handle); + Plugin.call(getHandleRef(), &Handle); return Handle; } diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index e1e02ff87eadb..94600f5eb6f9f 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -29,8 +29,6 @@ using QueueImplWPtr = std::weak_ptr; class event_impl { public: - static constexpr pi_handle_type PIHandleType = PI_NATIVE_HANDLE_EVENT; - /// Constructs a ready SYCL event. /// /// If the constructed SYCL event is waited on it will complete immediately. diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp index b4ac1fab7cf89..80a62b56af96d 100644 --- a/sycl/source/detail/platform_impl.hpp +++ b/sycl/source/detail/platform_impl.hpp @@ -28,8 +28,6 @@ namespace detail { // TODO: implement parameters treatment for host device class platform_impl { public: - static constexpr pi_handle_type PIHandleType = PI_NATIVE_HANDLE_PLATFORM; - /// Constructs platform_impl for a SYCL host platform. platform_impl() : MHostPlatform(true) {} diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp index 71eb1c4e9464e..bb880ce9242c1 100644 --- a/sycl/source/detail/program_impl.cpp +++ b/sycl/source/detail/program_impl.cpp @@ -83,21 +83,21 @@ program_impl::program_impl( } program_impl::program_impl(ContextImplPtr Context, - program_interop_handle_t InteropProgram) + pi_native_handle InteropProgram) : program_impl(Context, InteropProgram, nullptr) {} program_impl::program_impl(ContextImplPtr Context, - program_interop_handle_t InteropProgram, + pi_native_handle InteropProgram, RT::PiProgram Program) : MProgram(Program), MContext(Context), MLinkable(true) { const detail::plugin &Plugin = getPlugin(); if (MProgram == nullptr) { - assert(InteropProgram != nullptr && - "No InteropProgram/PiProgram defined with piextProgramConvert"); + assert(InteropProgram && + "No InteropProgram/PiProgram defined with piextProgramFromNative"); // Translate the raw program handle into PI program. - Plugin.call( - Context->getHandleRef(), &MProgram, (void **)&InteropProgram); + Plugin.call(InteropProgram, + &MProgram); } else Plugin.call(Program); @@ -158,7 +158,7 @@ program_impl::program_impl(ContextImplPtr Context, } program_impl::program_impl(ContextImplPtr Context, RT::PiKernel Kernel) - : program_impl(Context, nullptr, + : program_impl(Context, reinterpret_cast(nullptr), ProgramManager::getInstance().getPiProgramFromPiKernel( Kernel, Context)) {} diff --git a/sycl/source/detail/program_impl.hpp b/sycl/source/detail/program_impl.hpp index ddf79492f2f3d..3c172d706e84c 100644 --- a/sycl/source/detail/program_impl.hpp +++ b/sycl/source/detail/program_impl.hpp @@ -31,11 +31,6 @@ namespace detail { using ContextImplPtr = std::shared_ptr; -// TODO: SYCL BE generalization will change this to something better. -// For now this saves us from unwanted implicit casts. -struct _program_interop_handle_t; -using program_interop_handle_t = _program_interop_handle_t *; - class program_impl { public: program_impl() = delete; @@ -89,7 +84,7 @@ class program_impl { /// \param Context is a pointer to SYCL context impl. /// \param InteropProgram is an instance of plugin interface interoperability /// program. - program_impl(ContextImplPtr Context, program_interop_handle_t InteropProgram); + program_impl(ContextImplPtr Context, pi_native_handle InteropProgram); /// Constructs a program instance from plugin interface interoperability /// kernel. @@ -305,7 +300,7 @@ class program_impl { private: // Deligating Constructor used in Implementation. - program_impl(ContextImplPtr Context, program_interop_handle_t InteropProgram, + program_impl(ContextImplPtr Context, pi_native_handle InteropProgram, RT::PiProgram Program); /// Checks feature support for specific devices. /// diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index d0c971d36cd30..0758519720fff 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -188,8 +188,7 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { pi_native_handle queue_impl::getNative() const { auto Plugin = getPlugin(); pi_native_handle Handle; - Plugin.call(PIHandleType, MCommandQueue, - &Handle); + Plugin.call(MCommandQueue, &Handle); return Handle; } diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 1f7390c7c01ef..e48d59694af27 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -36,8 +36,6 @@ enum QueueOrder { Ordered, OOO }; class queue_impl { public: - static constexpr pi_handle_type PIHandleType = PI_NATIVE_HANDLE_QUEUE; - /// Constructs a SYCL queue from a device using an async_handler and /// property_list provided. /// diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index c3feb3676fec3..1d9d6be3509fe 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -30,8 +30,7 @@ device::device() : impl(std::make_shared()) {} device::device(cl_device_id deviceId) : impl(std::make_shared( - detail::pi::cast(deviceId), - *RT::GlobalPlugin)) {} + detail::pi::cast(deviceId), *RT::GlobalPlugin)) {} device::device(const device_selector &deviceSelector) { *this = deviceSelector.select_device(); diff --git a/sycl/source/program.cpp b/sycl/source/program.cpp index 4e24ee9088929..5d38b87d2d01e 100644 --- a/sycl/source/program.cpp +++ b/sycl/source/program.cpp @@ -30,7 +30,7 @@ program::program(vector_class programList, string_class linkOptions) { program::program(const context &context, cl_program clProgram) : impl(std::make_shared( detail::getSyclObjImpl(context), - detail::pi::cast(clProgram))) {} + detail::pi::cast(clProgram))) {} program::program(std::shared_ptr impl) : impl(impl) {} cl_program program::get() const { return impl->get(); } From 9a6794969f32bacb3b512eaceec287845a1e89d4 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Mon, 13 Apr 2020 16:59:56 +0100 Subject: [PATCH 4/4] [SYCL][PI] PI objects created with native handles takes ownership PI objects created with the associated piextCreate*WithNativeHandle function takes ownership of the native handle passed to it. For OpenCL this means that the native OpenCL object is not retained, so the PI object effectively owns the reference of the caller. The OpenCL interoperability have been changed to retain the native OpenCL handle as the implementation no longer does it. This is required by the SYCL 1.2.1 specification (Rev6, section 4.3.1.) Signed-off-by: Steffen Larsen --- sycl/include/CL/sycl/detail/pi.h | 24 +++++------------- sycl/plugins/cuda/pi_cuda.cpp | 42 +++++-------------------------- sycl/plugins/opencl/pi_opencl.cpp | 6 ----- sycl/source/device.cpp | 6 ++++- sycl/source/program.cpp | 6 ++++- 5 files changed, 22 insertions(+), 62 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index edd26a447c1c6..49e569077f9fd 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -788,9 +788,7 @@ pi_result piextDeviceGetNativeHandle(pi_device device, pi_native_handle *nativeHandle); /// Creates PI device object from a native handle. -/// NOTE: If the created PI object shares reference count with the native object -/// the reference count is increased by one. Otherwise the newly created -/// PI object has a reference count of 1. +/// NOTE: The created PI object takes ownership of the native handle. /// /// \param nativeHandle is the native handle to create PI device from. /// \param device is the PI device created from the native handle. @@ -848,9 +846,7 @@ pi_result piextContextGetNativeHandle(pi_context context, pi_native_handle *nativeHandle); /// Creates PI context object from a native handle. -/// NOTE: If the created PI object shares reference count with the native object -/// the reference count is increased by one. Otherwise the newly created -/// PI object has a reference count of 1. +/// NOTE: The created PI object takes ownership of the native handle. /// /// \param nativeHandle is the native handle to create PI context from. /// \param context is the PI context created from the native handle. @@ -881,9 +877,7 @@ pi_result piextQueueGetNativeHandle(pi_queue queue, pi_native_handle *nativeHandle); /// Creates PI queue object from a native handle. -/// NOTE: If the created PI object shares reference count with the native object -/// the reference count is increased by one. Otherwise the newly created -/// PI object has a reference count of 1. +/// NOTE: The created PI object takes ownership of the native handle. /// /// \param nativeHandle is the native handle to create PI queue from. /// \param queue is the PI queue created from the native handle. @@ -925,9 +919,7 @@ pi_result piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, pi_result piextMemGetNativeHandle(pi_mem mem, pi_native_handle *nativeHandle); /// Creates PI mem object from a native handle. -/// NOTE: If the created PI object shares reference count with the native object -/// the reference count is increased by one. Otherwise the newly created -/// PI object has a reference count of 1. +/// NOTE: The created PI object takes ownership of the native handle. /// /// \param nativeHandle is the native handle to create PI mem from. /// \param mem is the PI mem created from the native handle. @@ -1004,9 +996,7 @@ pi_result piextProgramGetNativeHandle(pi_program program, pi_native_handle *nativeHandle); /// Creates PI program object from a native handle. -/// NOTE: If the created PI object shares reference count with the native object -/// the reference count is increased by one. Otherwise the newly created -/// PI object has a reference count of 1. +/// NOTE: The created PI object takes ownership of the native handle. /// /// \param nativeHandle is the native handle to create PI program from. /// \param program is the PI program created from the native handle. @@ -1112,9 +1102,7 @@ pi_result piextEventGetNativeHandle(pi_event event, pi_native_handle *nativeHandle); /// Creates PI event object from a native handle. -/// NOTE: If the created PI object shares reference count with the native object -/// the reference count is increased by one. Otherwise the newly created -/// PI object has a reference count of 1. +/// NOTE: The created PI object takes ownership of the native handle. /// /// \param nativeHandle is the native handle to create PI event from. /// \param event is the PI event created from the native handle. diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 3ed2cc1dfd78d..bbe3bc4442630 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1366,12 +1366,7 @@ pi_result cuda_piextDeviceGetNativeHandle(pi_device device, /// Created a PI device object from a CUDA device handle. /// TODO: Implement this. -/// NOTE: Approaches for native object lifetime: -/// 1. Make the created PI object the owner of the native object, -/// deleting it upon its own destruction. -/// 2. Make a special-case where the PI object does not destroy the -/// native object. -/// Both options may make lifetime tracking difficult for the user. +/// NOTE: The created PI object takes ownership of the native handle. /// /// \param[in] nativeHandle The native handle to create PI device object from. /// \param[out] device Set to the PI device object created from native handle. @@ -1533,12 +1528,7 @@ pi_result cuda_piextContextGetNativeHandle(pi_context context, /// Created a PI context object from a CUDA context handle. /// TODO: Implement this. -/// NOTE: Approaches for native object lifetime: -/// 1. Make the created PI object the owner of the native object, -/// deleting it upon its own destruction. -/// 2. Make a special-case where the PI object does not destroy the -/// native object. -/// Both options may make lifetime tracking difficult for the user. +/// NOTE: The created PI object takes ownership of the native handle. /// /// \param[in] nativeHandle The native handle to create PI context object from. /// \param[out] context Set to the PI context object created from native handle. @@ -1747,12 +1737,7 @@ pi_result cuda_piextMemGetNativeHandle(pi_mem mem, /// Created a PI mem object from a CUDA mem handle. /// TODO: Implement this. -/// NOTE: Approaches for native object lifetime: -/// 1. Make the created PI object the owner of the native object, -/// deleting it upon its own destruction. -/// 2. Make a special-case where the PI object does not destroy the -/// native object. -/// Both options may make lifetime tracking difficult for the user. +/// NOTE: The created PI object takes ownership of the native handle. /// /// \param[in] nativeHandle The native handle to create PI mem object from. /// \param[out] mem Set to the PI mem object created from native handle. @@ -1914,12 +1899,7 @@ pi_result cuda_piextQueueGetNativeHandle(pi_queue queue, /// Created a PI queue object from a CUDA queue handle. /// TODO: Implement this. -/// NOTE: Approaches for native object lifetime: -/// 1. Make the created PI object the owner of the native object, -/// deleting it upon its own destruction. -/// 2. Make a special-case where the PI object does not destroy the -/// native object. -/// Both options may make lifetime tracking difficult for the user. +/// NOTE: The created PI object takes ownership of the native handle. /// /// \param[in] nativeHandle The native handle to create PI queue object from. /// \param[out] queue Set to the PI queue object created from native handle. @@ -2533,12 +2513,7 @@ pi_result cuda_piextProgramGetNativeHandle(pi_program program, /// Created a PI program object from a CUDA program handle. /// TODO: Implement this. -/// NOTE: Approaches for native object lifetime: -/// 1. Make the created PI object the owner of the native object, -/// deleting it upon its own destruction. -/// 2. Make a special-case where the PI object does not destroy the -/// native object. -/// Both options may make lifetime tracking difficult for the user. +/// NOTE: The created PI object takes ownership of the native handle. /// /// \param[in] nativeHandle The native handle to create PI program object from. /// \param[out] program Set to the PI program object created from native handle. @@ -2923,12 +2898,7 @@ pi_result cuda_piextEventGetNativeHandle(pi_event event, /// Created a PI event object from a CUDA event handle. /// TODO: Implement this. -/// NOTE: Approaches for native object lifetime: -/// 1. Make the created PI object the owner of the native object, -/// deleting it upon its own destruction. -/// 2. Make a special-case where the PI object does not destroy the -/// native object. -/// Both options may make lifetime tracking difficult for the user. +/// NOTE: The created PI object takes ownership of the native handle. /// /// \param[in] nativeHandle The native handle to create PI event object from. /// \param[out] event Set to the PI event object created from native handle. diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index f0523eb2ca87c..733322fe0f577 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -273,7 +273,6 @@ pi_result OCL(piextDeviceCreateWithNativeHandle)(pi_native_handle nativeHandle, pi_device *piDevice) { assert(piDevice != nullptr); *piDevice = reinterpret_cast(nativeHandle); - clRetainDevice(cast(*piDevice)); return PI_SUCCESS; } @@ -321,7 +320,6 @@ pi_result OCL(piextQueueCreateWithNativeHandle)(pi_native_handle nativeHandle, pi_queue *piQueue) { assert(piQueue != nullptr); *piQueue = reinterpret_cast(nativeHandle); - clRetainCommandQueue(cast(*piQueue)); return PI_SUCCESS; } @@ -407,7 +405,6 @@ pi_result OCL(piextProgramCreateWithNativeHandle)(pi_native_handle nativeHandle, pi_program *piProgram) { assert(piProgram != nullptr); *piProgram = reinterpret_cast(nativeHandle); - clRetainProgram(cast(*piProgram)); return PI_SUCCESS; } @@ -506,7 +503,6 @@ pi_result OCL(piextContextCreateWithNativeHandle)(pi_native_handle nativeHandle, pi_context *piContext) { assert(piContext != nullptr); *piContext = reinterpret_cast(nativeHandle); - clRetainContext(cast(*piContext)); return PI_SUCCESS; } @@ -550,7 +546,6 @@ pi_result OCL(piextMemCreateWithNativeHandle)(pi_native_handle nativeHandle, pi_mem *piMem) { assert(piMem != nullptr); *piMem = reinterpret_cast(nativeHandle); - clRetainMemObject(cast(*piMem)); return PI_SUCCESS; } @@ -619,7 +614,6 @@ pi_result OCL(piextEventCreateWithNativeHandle)(pi_native_handle nativeHandle, pi_event *piEvent) { assert(piEvent != nullptr); *piEvent = reinterpret_cast(nativeHandle); - clRetainEvent(cast(*piEvent)); return PI_SUCCESS; } diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 1d9d6be3509fe..d08fa5a63922e 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -30,7 +30,11 @@ device::device() : impl(std::make_shared()) {} device::device(cl_device_id deviceId) : impl(std::make_shared( - detail::pi::cast(deviceId), *RT::GlobalPlugin)) {} + detail::pi::cast(deviceId), *RT::GlobalPlugin)) { + // The implementation constructor takes ownership of the native handle so we + // must retain it in order to adhere to SYCL 1.2.1 spec (Rev6, section 4.3.1.) + clRetainDevice(deviceId); +} device::device(const device_selector &deviceSelector) { *this = deviceSelector.select_device(); diff --git a/sycl/source/program.cpp b/sycl/source/program.cpp index 5d38b87d2d01e..6884e91cc566b 100644 --- a/sycl/source/program.cpp +++ b/sycl/source/program.cpp @@ -30,7 +30,11 @@ program::program(vector_class programList, string_class linkOptions) { program::program(const context &context, cl_program clProgram) : impl(std::make_shared( detail::getSyclObjImpl(context), - detail::pi::cast(clProgram))) {} + detail::pi::cast(clProgram))) { + // The implementation constructor takes ownership of the native handle so we + // must retain it in order to adhere to SYCL 1.2.1 spec (Rev6, section 4.3.1.) + clRetainProgram(clProgram); +} program::program(std::shared_ptr impl) : impl(impl) {} cl_program program::get() const { return impl->get(); }