From 444338ea55ba15057bb2bb5f295841ec78c0a055 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Thu, 10 Feb 2022 11:28:21 +0000 Subject: [PATCH 1/9] [SYCL][CUDA] Add context interop specialization --- sycl/include/CL/sycl/backend.hpp | 4 + .../backend/backend_traits_cuda.hpp | 144 ++++++++++++++++++ .../ext/oneapi/experimental/backend/cuda.hpp | 36 +++++ 3 files changed, 184 insertions(+) create mode 100644 sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp create mode 100644 sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp diff --git a/sycl/include/CL/sycl/backend.hpp b/sycl/include/CL/sycl/backend.hpp index 2954217f1ca64..2a228b66805c8 100644 --- a/sycl/include/CL/sycl/backend.hpp +++ b/sycl/include/CL/sycl/backend.hpp @@ -18,8 +18,12 @@ #include #endif #if SYCL_EXT_ONEAPI_BACKEND_CUDA +#ifdef SYCL_EXT_ONEAPI_BACKEND_CUDA_EXPERIMENTAL +#include +#else #include #endif +#endif #if SYCL_EXT_ONEAPI_BACKEND_LEVEL_ZERO #include #endif diff --git a/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp b/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp new file mode 100644 index 0000000000000..3d30043a3aa1e --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp @@ -0,0 +1,144 @@ +//===------- backend_traits_cuda.hpp - Backend traits for CUDA ---*-C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the specializations of the sycl::detail::interop, +// sycl::detail::BackendInput and sycl::detail::BackendReturn class templates +// for the CUDA backend but there is no sycl::detail::InteropFeatureSupportMap +// specialization for the CUDA backend. +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +typedef int CUdevice; +typedef struct CUctx_st *CUcontext; +typedef struct CUstream_st *CUstream; +typedef struct CUevent_st *CUevent; +typedef struct CUmod_st *CUmodule; + +// As defined in the CUDA 10.1 header file. This requires CUDA version > 3.2 +#if defined(_WIN64) || defined(__LP64__) +typedef unsigned long long CUdeviceptr; +#else +typedef unsigned int CUdeviceptr; +#endif + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { + +// TODO the interops for context, device, event, platform and program +// may be removed after removing the deprecated 'get_native()' methods +// from the corresponding classes. The interop specialization +// is also used in the get_queue() method of the deprecated class +// interop_handler and also can be removed after API cleanup. +template <> struct interop { + using type = CUcontext; +}; + +template <> struct interop { + using type = CUdevice; +}; + +template <> struct interop { + using type = CUevent; +}; + +template <> struct interop { + using type = CUstream; +}; + +#ifdef __SYCL_INTERNAL_API +template <> struct interop { + using type = CUmodule; +}; +#endif + +// TODO the interops for accessor is used in the already deprecated class +// interop_handler and can be removed after API cleanup. +template +struct interop> { + using type = CUdeviceptr; +}; + +template +struct interop< + backend::ext_oneapi_cuda, + accessor> { + using type = CUdeviceptr; +}; + +template +struct BackendInput> { + using type = CUdeviceptr; +}; + +template +struct BackendReturn> { + using type = CUdeviceptr; +}; + +template <> struct BackendInput { + using type = CUcontext; +}; + +template <> struct BackendReturn { + using type = std::vector; +}; + +template <> struct BackendInput { + using type = CUdevice; +}; + +template <> struct BackendReturn { + using type = CUdevice; +}; + +template <> struct BackendInput { + using type = CUevent; +}; + +template <> struct BackendReturn { + using type = CUevent; +}; + +template <> struct BackendInput { + using type = CUstream; +}; + +template <> struct BackendReturn { + using type = CUstream; +}; + +#ifdef __SYCL_INTERNAL_API +template <> struct BackendInput { + using type = CUmodule; +}; + +template <> struct BackendReturn { + using type = CUmodule; +}; +#endif + +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp b/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp new file mode 100644 index 0000000000000..519a0f21714ad --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp @@ -0,0 +1,36 @@ +//==--------- cuda.hpp - SYCL CUDA backend ---------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { + +// CUDA context specialization +template <> +auto get_native(const context &C) + -> backend_return_t { + // create a vector to be returned + backend_return_t ret; + + // get the native CUDA context from the SYCL object + auto native = reinterpret_cast< + backend_return_t::value_type>( + C.getNative()); + ret.push_back(native); + + return ret; +} + +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) From 195b96916ce3ce02e88ab4cb41f7317591047185 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Thu, 17 Feb 2022 16:45:49 +0000 Subject: [PATCH 2/9] [SYCL] Unify interop implementations --- sycl/include/CL/sycl/context.hpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sycl/include/CL/sycl/context.hpp b/sycl/include/CL/sycl/context.hpp index a78e30f385f16..87cbc21808e03 100644 --- a/sycl/include/CL/sycl/context.hpp +++ b/sycl/include/CL/sycl/context.hpp @@ -27,6 +27,8 @@ class platform; namespace detail { class context_impl; } +template +auto get_native(const SyclT &Obj) -> backend_return_t; /// The context class represents a SYCL context on which kernel functions may /// be executed. @@ -220,7 +222,7 @@ class __SYCL_EXPORT context { template __SYCL_DEPRECATED("Use SYCL 2020 sycl::get_native free function") backend_return_t get_native() const { - return reinterpret_cast>(getNative()); + return sycl::get_native(*this); } private: @@ -230,6 +232,10 @@ class __SYCL_EXPORT context { pi_native_handle getNative() const; std::shared_ptr impl; + + template + friend auto get_native(const SyclT &Obj) -> backend_return_t; + template friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject); From efec76c569099228c4167dfa25baa45d0adf3de0 Mon Sep 17 00:00:00 2001 From: "aidan.belton" Date: Fri, 18 Feb 2022 11:55:19 +0000 Subject: [PATCH 3/9] add experimental platform and device interop --- .../backend/backend_traits_cuda.hpp | 23 +++++ .../ext/oneapi/experimental/backend/cuda.hpp | 49 ++++++++++ sycl/plugins/cuda/pi_cuda.cpp | 98 ++++++++++++++++++- sycl/source/backend.cpp | 2 + sycl/source/detail/pi.cpp | 1 + 5 files changed, 168 insertions(+), 5 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp b/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp index 3d30043a3aa1e..c460d2f854f67 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp @@ -62,6 +62,10 @@ template <> struct interop { using type = CUstream; }; +template <> struct interop { + using type = std::vector; +}; + #ifdef __SYCL_INTERNAL_API template <> struct interop { using type = CUmodule; @@ -129,6 +133,14 @@ template <> struct BackendReturn { using type = CUstream; }; +template <> struct BackendInput { + using type = std::vector; +}; + +template <> struct BackendReturn { + using type = std::vector; +}; + #ifdef __SYCL_INTERNAL_API template <> struct BackendInput { using type = CUmodule; @@ -139,6 +151,17 @@ template <> struct BackendReturn { }; #endif +template <> struct InteropFeatureSupportMap { + static constexpr bool MakePlatform = true; + static constexpr bool MakeDevice = true; + static constexpr bool MakeContext = true; + static constexpr bool MakeQueue = true; + static constexpr bool MakeEvent = true; + static constexpr bool MakeBuffer = true; + static constexpr bool MakeKernel = true; + static constexpr bool MakeKernelBundle = true; +}; + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp b/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp index 519a0f21714ad..76d92018aaf5d 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp @@ -15,6 +15,23 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +namespace ext { +namespace oneapi { +namespace cuda { + +// Implementation of cuda::make +__SYCL_EXPORT device make_device(pi_native_handle NativeHandle) { + return detail::make_device(NativeHandle, backend::cuda); +} + +// Implementation of cuda::make +__SYCL_EXPORT platform make_platform(pi_native_handle NativeHandle) { + return detail::make_platform(NativeHandle, backend::cuda); +} + +} // namespace cuda +} // namespace oneapi +} // namespace ext // CUDA context specialization template <> @@ -32,5 +49,37 @@ auto get_native(const context &C) return ret; } +// CUDA device specialization +template <> +device make_device( + const backend_input_t &BackendObject) { + pi_native_handle NativeHandle = static_cast(BackendObject); + return ext::oneapi::cuda::make_device(NativeHandle); +} + +// CUDA platform specialization +template <> +auto get_native(const platform &C) + -> backend_return_t { + // get list of platform devices, and transfer to native platform type + std::vector platform_devices = C.get_devices(); + std::vector native_devices(platform_devices.size()); + + // Get the native CUdevice type for each device in platform + for (unsigned int i = 0; i < platform_devices.size(); ++i) + native_devices[i] = + get_native(platform_devices[i]); + + return native_devices; +} + +template <> +platform make_platform( + const backend_input_t &BackendObject) { + pi_native_handle NativeHandle = + detail::pi::cast(&BackendObject); + return ext::oneapi::cuda::make_platform(NativeHandle); +} + } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 4b05475f86cc8..1f228aa1a194c 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -834,6 +834,53 @@ pi_result cuda_piDevicesGet(pi_platform platform, pi_device_type device_type, } } +pi_result cuda_piextPlatformGetNativeHandle(pi_platform platform, + pi_native_handle *nativeHandle) { + cl::sycl::detail::pi::die( + "cuda_piextPlatformGetNativeHandle not implemented"); + return {}; +} + +pi_result +cuda_piextPlatformCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_platform *platform) { + assert(platform); + assert(nativeHandle); + + auto native_platform = + reinterpret_cast *>(nativeHandle); + + // Get list of platforms + pi_uint32 num_platforms; + pi_result result = cuda_piPlatformsGet(0, nullptr, &num_platforms); + + pi_platform *plat = + static_cast(malloc(num_platforms * sizeof(pi_platform))); + result = cuda_piPlatformsGet(num_platforms, plat, nullptr); + + // Iterate through platforms to find device that matches nativeHandle + bool found_match = false; + for (pi_uint32 j = 0; j < num_platforms; ++j) { + bool is_same = true; + for (auto &dev : plat[j]->devices_) { + auto it = + find(native_platform->begin(), native_platform->end(), dev->get()); + if (it == native_platform->end()) + is_same = false; + } + if (is_same) { + found_match = true; + *platform = plat[j]; + } + } + + if (!found_match) { + return PI_INVALID_VALUE; + } + + return result; +} + /// \return PI_SUCCESS if the function is executed successfully /// CUDA devices are always root devices so retain always returns success. pi_result cuda_piDeviceRetain(pi_device) { return PI_SUCCESS; } @@ -1772,11 +1819,49 @@ pi_result cuda_piextDeviceGetNativeHandle(pi_device device, /// \param[out] device Set to the PI device object created from native handle. /// /// \return TBD -pi_result cuda_piextDeviceCreateWithNativeHandle(pi_native_handle, pi_platform, - pi_device *) { - cl::sycl::detail::pi::die( - "Creation of PI device from native handle not implemented"); - return {}; +pi_result cuda_piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_platform platform, + pi_device *piDevice) { + assert(piDevice != nullptr); + + // If a platform is provided just check if the device is in it + if (platform) { + bool found_match = false; + for (auto &dev : platform->devices_) { + if (dev->get() == static_cast(nativeHandle)) { + *piDevice = dev.get(); + found_match = true; + } + } + if (!found_match) + return PI_INVALID_VALUE; + return PI_SUCCESS; + } + + // Get list of platforms + pi_uint32 num_platforms; + pi_result result = cuda_piPlatformsGet(0, nullptr, &num_platforms); + + pi_platform *plat = + static_cast(malloc(num_platforms * sizeof(pi_platform))); + result = cuda_piPlatformsGet(num_platforms, plat, nullptr); + + // Iterate through platforms to find device that matches nativeHandle + bool found_match = false; + for (pi_uint32 j = 0; j < num_platforms; ++j) { + for (auto &dev : plat[j]->devices_) { + if (dev->get() == static_cast(nativeHandle)) { + *piDevice = dev.get(); + found_match = true; + } + } + } + + // If the provided nativeHandle cannot be matched to an + // existing device return error + if (!found_match) + return PI_INVALID_VALUE; + return result; } /* Context APIs */ @@ -4937,6 +5022,9 @@ pi_result piPluginInit(pi_plugin *PluginInit) { // Platform _PI_CL(piPlatformsGet, cuda_piPlatformsGet) _PI_CL(piPlatformGetInfo, cuda_piPlatformGetInfo) + _PI_CL(piextPlatformGetNativeHandle, cuda_piextPlatformGetNativeHandle) + _PI_CL(piextPlatformCreateWithNativeHandle, + cuda_piextPlatformCreateWithNativeHandle) // Device _PI_CL(piDevicesGet, cuda_piDevicesGet) _PI_CL(piDeviceGetInfo, cuda_piDeviceGetInfo) diff --git a/sycl/source/backend.cpp b/sycl/source/backend.cpp index 691bdbf0ab91c..f5cbf0b5d9088 100644 --- a/sycl/source/backend.cpp +++ b/sycl/source/backend.cpp @@ -35,6 +35,8 @@ static const plugin &getPlugin(backend Backend) { return pi::getPlugin(); case backend::ext_oneapi_level_zero: return pi::getPlugin(); + case backend::ext_oneapi_cuda: + return pi::getPlugin(); default: throw sycl::runtime_error{"Unsupported backend", PI_INVALID_OPERATION}; } diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index 34f830de3cacc..4136d6b82cf51 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -529,6 +529,7 @@ template __SYCL_EXPORT const plugin & getPlugin(); template __SYCL_EXPORT const plugin & getPlugin(); +template __SYCL_EXPORT const plugin &getPlugin(); // Report error and no return (keeps compiler from printing warnings). // TODO: Probably change that to throw a catchable exception, From 06afe8bd2757d0470e8427c0dd627474087cc878 Mon Sep 17 00:00:00 2001 From: "aidan.belton" Date: Fri, 18 Feb 2022 14:54:06 +0000 Subject: [PATCH 4/9] Change get_native_mem type --- .../ext/oneapi/experimental/backend/backend_traits_cuda.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp b/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp index c460d2f854f67..d6f3555757b91 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp @@ -98,7 +98,7 @@ struct BackendInput struct BackendReturn> { - using type = CUdeviceptr; + using type = void *; }; template <> struct BackendInput { From d9daac27135f78954204b1b3f52817792188abc4 Mon Sep 17 00:00:00 2001 From: "aidan.belton" Date: Mon, 21 Feb 2022 11:13:02 +0000 Subject: [PATCH 5/9] add cuda experimental interop testing --- .../basic_tests/interop-backend-traits.cpp | 13 ++ .../basic_tests/interop-cuda-experimental.cpp | 111 ++++++++++++++++++ 2 files changed, 124 insertions(+) create mode 100644 sycl/test/basic_tests/interop-cuda-experimental.cpp diff --git a/sycl/test/basic_tests/interop-backend-traits.cpp b/sycl/test/basic_tests/interop-backend-traits.cpp index 996b896a024e7..f3c04a05c641c 100644 --- a/sycl/test/basic_tests/interop-backend-traits.cpp +++ b/sycl/test/basic_tests/interop-backend-traits.cpp @@ -1,6 +1,7 @@ // RUN: %clangxx -fsycl -DUSE_OPENCL %s // RUN: %clangxx -fsycl -DUSE_L0 %s // RUN: %clangxx -fsycl -DUSE_CUDA %s +// RUN: %clangxx -fsycl -DUSE_CUDA_EXPERIMENTAL %s #ifdef USE_OPENCL #include @@ -24,6 +25,14 @@ constexpr auto Backend = sycl::backend::ext_oneapi_level_zero; constexpr auto Backend = sycl::backend::ext_oneapi_cuda; #endif +#ifdef USE_CUDA_EXPERIMENTAL +#define SYCL_EXT_ONEAPI_BACKEND_CUDA_EXPERIMENTAL 1 +#include +#include + +constexpr auto Backend = sycl::backend::ext_oneapi_cuda; +#endif + #include int main() { @@ -55,9 +64,13 @@ int main() { static_assert( std::is_same_v::return_type, sycl::detail::interop::type>); + +// CUDA experimental return type is different to inpt type +#ifndef USE_CUDA_EXPERIMENTAL static_assert( std::is_same_v::return_type, sycl::detail::interop::type>); +#endif static_assert( std::is_same_v::return_type, sycl::detail::interop::type>); diff --git a/sycl/test/basic_tests/interop-cuda-experimental.cpp b/sycl/test/basic_tests/interop-cuda-experimental.cpp new file mode 100644 index 0000000000000..b04a92f9cbfad --- /dev/null +++ b/sycl/test/basic_tests/interop-cuda-experimental.cpp @@ -0,0 +1,111 @@ +// REQUIRES: cuda +// RUN: %clangxx %fsycl-host-only -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected=note %s -o %t.out +// RUN: %clangxx %fsycl-host-only -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected=note -D__SYCL_INTERNAL_API %s -o %t.out + +// Test for experimental CUDA interop API + +#define SYCL_EXT_ONEAPI_BACKEND_CUDA_EXPERIMENTAL 1 +#include +#include + +#include + +using namespace sycl; + +// +// 4.5.1 SYCL application interoperability may be provided for +// platform, +// device, +// context, +// queue, +// event, +// TODO: +// buffer, +// device_image, +// sampled_image, +// unsampled_image. + +int main() { + + // Create SYCL objects + device Device; + platform Platform = Device.get_info(); + context Context(Device); + queue Queue(Device); + event Event; + + // 4.5.1.1 For each SYCL runtime class T which supports SYCL application + // interoperability with the SYCL backend, a specialization of return_type + // must be defined as the type of SYCL application interoperability native + // backend object associated with T for the SYCL backend, specified in the + // SYCL backend specification. + // + // return_type is used when retrieving the backend specific native object from + // a SYCL object. See the relevant backend specification for details. + + backend_traits::return_type cu_platform; + backend_traits::return_type cu_device; + backend_traits::return_type cu_context; + backend_traits::return_type cu_queue; + backend_traits::return_type cu_event; + + // 4.5.1.2 For each SYCL runtime class T which supports SYCL application + // interoperability, a specialization of get_native must be defined, which + // takes an instance of T and returns a SYCL application interoperability + // native backend object associated with syclObject which can be used for SYCL + // application interoperability. The lifetime of the object returned are + // backend-defined and specified in the backend specification. + + cu_platform = get_native(Platform); + cu_device = get_native(Device); + cu_context = get_native(Context); + cu_queue = get_native(Queue); + cu_event = get_native(Event); + + // Check deprecated + // expected-warning@+2 {{'get_native' is deprecated: Use SYCL 2020 sycl::get_native free function}} + // expected-warning@+1 {{'get_native' is deprecated: Use SYCL 2020 sycl::get_native free function}} + cu_platform = Platform.get_native(); + // expected-warning@+2 {{'get_native' is deprecated: Use SYCL 2020 sycl::get_native free function}} + // expected-warning@+1 {{'get_native' is deprecated: Use SYCL 2020 sycl::get_native free function}} + cu_device = Device.get_native(); + // expected-warning@+2 {{'get_native' is deprecated: Use SYCL 2020 sycl::get_native free function}} + // expected-warning@+1 {{'get_native' is deprecated: Use SYCL 2020 sycl::get_native free function}} + cu_context = Context.get_native(); + // expected-warning@+2 {{'get_native' is deprecated: Use SYCL 2020 sycl::get_native free function}} + // expected-warning@+1 {{'get_native' is deprecated: Use SYCL 2020 sycl::get_native free function}} + cu_queue = Queue.get_native(); + // expected-warning@+2 {{'get_native' is deprecated: Use SYCL 2020 sycl::get_native free function}} + // expected-warning@+1 {{'get_native' is deprecated: Use SYCL 2020 sycl::get_native free function}} + cu_event = Event.get_native(); + + // 4.5.1.1 For each SYCL runtime class T which supports SYCL application + // interoperability with the SYCL backend, a specialization of input_type must + // be defined as the type of SYCL application interoperability native backend + // object associated with T for the SYCL backend, specified in the SYCL + // backend specification. input_type is used when constructing SYCL objects + // from backend specific native objects. See the relevant backend + // specification for details. + + // 4.5.1.3 For each SYCL runtime class T which supports SYCL application + // interoperability, a specialization of the appropriate template function + // make_{sycl_class} where {sycl_class} is the class name of T, must be + // defined, which takes a SYCL application interoperability native backend + // object and constructs and returns an instance of T. The availability and + // behavior of these template functions is defined by the SYCL backend + // specification document. + + backend_input_t InteropPlatformInput{cu_platform}; + platform InteropPlatform = make_platform(InteropPlatformInput); + + backend_input_t InteropDeviceInput{cu_device}; + device InteropDevice = make_device(InteropDeviceInput); + + backend_input_t InteropContextInput{cu_context[0]}; + context InteropContext = make_context(InteropContextInput); + + queue InteropQueue = make_queue(cu_queue, Context); + event InteropEvent = make_event(cu_event, Context); + + return 0; +} From 88429feaa64604f5d8dabe2aa1d51b9f0241da64 Mon Sep 17 00:00:00 2001 From: "aidan.belton" Date: Mon, 21 Feb 2022 11:13:50 +0000 Subject: [PATCH 6/9] update platform --- sycl/include/CL/sycl/platform.hpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sycl/include/CL/sycl/platform.hpp b/sycl/include/CL/sycl/platform.hpp index e6249a4883bd5..aed37a00389e9 100644 --- a/sycl/include/CL/sycl/platform.hpp +++ b/sycl/include/CL/sycl/platform.hpp @@ -27,6 +27,8 @@ class device; namespace detail { class platform_impl; } +template +auto get_native(const SyclT &Obj) -> backend_return_t; /// Encapsulates a SYCL platform on which kernels may be executed. /// @@ -123,7 +125,7 @@ class __SYCL_EXPORT platform { template __SYCL_DEPRECATED("Use SYCL 2020 sycl::get_native free function") backend_return_t get_native() const { - return reinterpret_cast>(getNative()); + return sycl::get_native(*this); } /// Indicates if all of the SYCL devices on this platform have the @@ -147,6 +149,9 @@ class __SYCL_EXPORT platform { std::shared_ptr impl; platform(std::shared_ptr impl) : impl(impl) {} + template + friend auto get_native(const SyclT &Obj) -> backend_return_t; + template friend T detail::createSyclObjFromImpl(decltype(T::impl) ImplObj); template From 589ec40f1cbfe4396df1f31ae2d26faa0ab177d4 Mon Sep 17 00:00:00 2001 From: "aidan.belton" Date: Wed, 16 Mar 2022 09:39:46 +0000 Subject: [PATCH 7/9] add specialisation for interop_handle get_native_context --- .../ext/oneapi/experimental/backend/cuda.hpp | 13 ++++++++++++ .../basic_tests/interop-backend-traits.cpp | 2 +- .../basic_tests/interop-cuda-experimental.cpp | 20 ++++++++++++------- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp b/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp index 76d92018aaf5d..333ee5ba18b83 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp @@ -10,6 +10,7 @@ #include #include +#include #include @@ -81,5 +82,17 @@ platform make_platform( return ext::oneapi::cuda::make_platform(NativeHandle); } +// Specialisation of interop_handles get_native_context +template <> +backend_return_t +interop_handle::get_native_context() const { +#ifndef __SYCL_DEVICE_ONLY__ + return std::vector{reinterpret_cast(getNativeContext())}; +#else + // we believe this won't be ever called on device side + return {}; +#endif +} + } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/test/basic_tests/interop-backend-traits.cpp b/sycl/test/basic_tests/interop-backend-traits.cpp index f3c04a05c641c..d4384cde80ad0 100644 --- a/sycl/test/basic_tests/interop-backend-traits.cpp +++ b/sycl/test/basic_tests/interop-backend-traits.cpp @@ -27,8 +27,8 @@ constexpr auto Backend = sycl::backend::ext_oneapi_cuda; #ifdef USE_CUDA_EXPERIMENTAL #define SYCL_EXT_ONEAPI_BACKEND_CUDA_EXPERIMENTAL 1 -#include #include +#include constexpr auto Backend = sycl::backend::ext_oneapi_cuda; #endif diff --git a/sycl/test/basic_tests/interop-cuda-experimental.cpp b/sycl/test/basic_tests/interop-cuda-experimental.cpp index b04a92f9cbfad..8dcdb1de6c410 100644 --- a/sycl/test/basic_tests/interop-cuda-experimental.cpp +++ b/sycl/test/basic_tests/interop-cuda-experimental.cpp @@ -5,8 +5,8 @@ // Test for experimental CUDA interop API #define SYCL_EXT_ONEAPI_BACKEND_CUDA_EXPERIMENTAL 1 -#include #include +#include #include @@ -95,14 +95,20 @@ int main() { // behavior of these template functions is defined by the SYCL backend // specification document. - backend_input_t InteropPlatformInput{cu_platform}; - platform InteropPlatform = make_platform(InteropPlatformInput); + backend_input_t InteropPlatformInput{ + cu_platform}; + platform InteropPlatform = + make_platform(InteropPlatformInput); - backend_input_t InteropDeviceInput{cu_device}; - device InteropDevice = make_device(InteropDeviceInput); + backend_input_t InteropDeviceInput{ + cu_device}; + device InteropDevice = + make_device(InteropDeviceInput); - backend_input_t InteropContextInput{cu_context[0]}; - context InteropContext = make_context(InteropContextInput); + backend_input_t InteropContextInput{ + cu_context[0]}; + context InteropContext = + make_context(InteropContextInput); queue InteropQueue = make_queue(cu_queue, Context); event InteropEvent = make_event(cu_event, Context); From d17840dfe449cab90405699a1c1e8999730fd7bb Mon Sep 17 00:00:00 2001 From: "aidan.belton" Date: Thu, 14 Apr 2022 10:58:47 +0100 Subject: [PATCH 8/9] Add support for kernel function interop --- sycl/include/CL/sycl/accessor.hpp | 6 +++++ sycl/include/CL/sycl/backend.hpp | 18 ++++++++++---- .../backend/backend_traits_cuda.hpp | 24 +++++++++---------- .../ext/oneapi/experimental/backend/cuda.hpp | 14 +++++------ 4 files changed, 38 insertions(+), 24 deletions(-) diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp index cf13264b4fca0..d21015c086f05 100644 --- a/sycl/include/CL/sycl/accessor.hpp +++ b/sycl/include/CL/sycl/accessor.hpp @@ -2134,6 +2134,12 @@ class __SYCL_SPECIAL_CLASS accessor +using local_accessor = + accessor; + /// Image accessors. /// /// Available only when accessTarget == access::target::image. diff --git a/sycl/include/CL/sycl/backend.hpp b/sycl/include/CL/sycl/backend.hpp index 2a228b66805c8..79009c217404e 100644 --- a/sycl/include/CL/sycl/backend.hpp +++ b/sycl/include/CL/sycl/backend.hpp @@ -115,15 +115,25 @@ inline backend_return_t get_native< } #endif -// Native handle of an accessor should be accessed through interop_handler +// Native accessor handle for kernel function interop template auto get_native(const accessor &Obj) -> - typename detail::interop< - BackendName, accessor>::type = delete; + typename detail::interop>::type { +#ifdef __SYCL_DEVICE_ONLY__ + return reinterpret_cast>::type>(Obj.get_pointer().get()); + +#else + throw runtime_error("Get native accessor is not support on host.", + PI_INVALID_VALUE); +#endif +} namespace detail { // Forward declaration diff --git a/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp b/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp index d6f3555757b91..8c658106e1536 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp @@ -30,13 +30,6 @@ typedef struct CUstream_st *CUstream; typedef struct CUevent_st *CUevent; typedef struct CUmod_st *CUmodule; -// As defined in the CUDA 10.1 header file. This requires CUDA version > 3.2 -#if defined(_WIN64) || defined(__LP64__) -typedef unsigned long long CUdeviceptr; -#else -typedef unsigned int CUdeviceptr; -#endif - __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { @@ -72,13 +65,11 @@ template <> struct interop { }; #endif -// TODO the interops for accessor is used in the already deprecated class -// interop_handler and can be removed after API cleanup. template struct interop> { - using type = CUdeviceptr; + using type = DataT *; }; template @@ -86,19 +77,26 @@ struct interop< backend::ext_oneapi_cuda, accessor> { - using type = CUdeviceptr; + using type = DataT *; +}; + +template +struct interop> { + using type = DataT *; }; template struct BackendInput> { - using type = CUdeviceptr; + using type = DataT *; }; template struct BackendReturn> { - using type = void *; + using type = DataT *; }; template <> struct BackendInput { diff --git a/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp b/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp index 333ee5ba18b83..d3aa8c074ece5 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp @@ -21,12 +21,12 @@ namespace oneapi { namespace cuda { // Implementation of cuda::make -__SYCL_EXPORT device make_device(pi_native_handle NativeHandle) { +inline __SYCL_EXPORT device make_device(pi_native_handle NativeHandle) { return detail::make_device(NativeHandle, backend::cuda); } // Implementation of cuda::make -__SYCL_EXPORT platform make_platform(pi_native_handle NativeHandle) { +inline __SYCL_EXPORT platform make_platform(pi_native_handle NativeHandle) { return detail::make_platform(NativeHandle, backend::cuda); } @@ -36,7 +36,7 @@ __SYCL_EXPORT platform make_platform(pi_native_handle NativeHandle) { // CUDA context specialization template <> -auto get_native(const context &C) +inline auto get_native(const context &C) -> backend_return_t { // create a vector to be returned backend_return_t ret; @@ -52,7 +52,7 @@ auto get_native(const context &C) // CUDA device specialization template <> -device make_device( +inline device make_device( const backend_input_t &BackendObject) { pi_native_handle NativeHandle = static_cast(BackendObject); return ext::oneapi::cuda::make_device(NativeHandle); @@ -60,7 +60,7 @@ device make_device( // CUDA platform specialization template <> -auto get_native(const platform &C) +inline auto get_native(const platform &C) -> backend_return_t { // get list of platform devices, and transfer to native platform type std::vector platform_devices = C.get_devices(); @@ -75,7 +75,7 @@ auto get_native(const platform &C) } template <> -platform make_platform( +inline platform make_platform( const backend_input_t &BackendObject) { pi_native_handle NativeHandle = detail::pi::cast(&BackendObject); @@ -84,7 +84,7 @@ platform make_platform( // Specialisation of interop_handles get_native_context template <> -backend_return_t +inline backend_return_t interop_handle::get_native_context() const { #ifndef __SYCL_DEVICE_ONLY__ return std::vector{reinterpret_cast(getNativeContext())}; From 80568c6d2c060039cc3bdc41f900403a8511a6c1 Mon Sep 17 00:00:00 2001 From: "aidan.belton" Date: Thu, 5 May 2022 10:17:57 +0100 Subject: [PATCH 9/9] add cuda interop context, queue, event --- .../ext/oneapi/experimental/backend/cuda.hpp | 4 +- sycl/plugins/cuda/pi_cuda.cpp | 97 +++++++++++++++---- sycl/plugins/cuda/pi_cuda.hpp | 8 ++ 3 files changed, 89 insertions(+), 20 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp b/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp index d3aa8c074ece5..184a109a321a9 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp @@ -22,12 +22,12 @@ namespace cuda { // Implementation of cuda::make inline __SYCL_EXPORT device make_device(pi_native_handle NativeHandle) { - return detail::make_device(NativeHandle, backend::cuda); + return sycl::detail::make_device(NativeHandle, backend::cuda); } // Implementation of cuda::make inline __SYCL_EXPORT platform make_platform(pi_native_handle NativeHandle) { - return detail::make_platform(NativeHandle, backend::cuda); + return sycl::detail::make_platform(NativeHandle, backend::cuda); } } // namespace cuda diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 1f228aa1a194c..173bbee37e05b 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -344,6 +344,12 @@ _pi_event::_pi_event(pi_command_type type, pi_context context, pi_queue queue) cuda_piContextRetain(context_); } +_pi_event::_pi_event(pi_context context, CUevent eventNative) + : commandType_{PI_COMMAND_TYPE_USER}, refCount_{1}, hasBeenWaitedOn_{false}, + isRecorded_{false}, isStarted_{false}, evEnd_{eventNative}, + evStart_{nullptr}, evQueued_{nullptr}, queue_{nullptr}, context_{ + context} {} + _pi_event::~_pi_event() { if (queue_ != nullptr) { cuda_piQueueRelease(queue_); @@ -1977,8 +1983,6 @@ pi_result cuda_piContextRelease(pi_context ctxt) { std::unique_ptr<_pi_context> context{ctxt}; - PI_CHECK_ERROR(cuEventDestroy(context->evBase_)); - if (!ctxt->is_primary()) { CUcontext cuCtxt = ctxt->get(); CUcontext current = nullptr; @@ -1986,6 +1990,7 @@ pi_result cuda_piContextRelease(pi_context ctxt) { if (cuCtxt != current) { PI_CHECK_ERROR(cuCtxPushCurrent(cuCtxt)); } + PI_CHECK_ERROR(cuEventDestroy(context->evBase_)); PI_CHECK_ERROR(cuCtxSynchronize()); cuCtxGetCurrent(¤t); if (cuCtxt == current) { @@ -1994,6 +1999,7 @@ pi_result cuda_piContextRelease(pi_context ctxt) { return PI_CHECK_ERROR(cuCtxDestroy(cuCtxt)); } else { // Primary context is not destroyed, but released + PI_CHECK_ERROR(cuEventDestroy(context->evBase_)); CUdevice cuDev = ctxt->get_device()->get(); CUcontext current; cuCtxPopCurrent(¤t); @@ -2021,12 +2027,43 @@ pi_result cuda_piextContextGetNativeHandle(pi_context context, /// \param[out] context Set to the PI context object created from native handle. /// /// \return TBD -pi_result cuda_piextContextCreateWithNativeHandle(pi_native_handle, pi_uint32, - const pi_device *, bool, - pi_context *) { - cl::sycl::detail::pi::die( - "Creation of PI context from native handle not implemented"); - return {}; +pi_result cuda_piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_uint32 num_devices, + const pi_device *devices, + bool ownNativeHandle, + pi_context *piContext) { + (void)num_devices; + (void)devices; + (void)ownNativeHandle; + assert(piContext != nullptr); + assert(ownNativeHandle == false); + + CUcontext newContext = reinterpret_cast(nativeHandle); + + // Push native context to thread + pi_result retErr = PI_CHECK_ERROR(cuCtxPushCurrent(newContext)); + + // Get context's native device + CUdevice cu_device; + retErr = PI_CHECK_ERROR(cuCtxGetDevice(&cu_device)); + + // Create a SYCL device from the ctx device + pi_device device = nullptr; + retErr = cuda_piextDeviceCreateWithNativeHandle(cu_device, nullptr, &device); + + // Create sycl context + *piContext = + new _pi_context{_pi_context::kind::user_defined, newContext, device}; + + // Use default stream to record base event counter + retErr = + PI_CHECK_ERROR(cuEventCreate(&(*piContext)->evBase_, CU_EVENT_DEFAULT)); + retErr = PI_CHECK_ERROR(cuEventRecord((*piContext)->evBase_, 0)); + + // Pop native context + retErr = PI_CHECK_ERROR(cuCtxPopCurrent(nullptr)); + + return retErr; } /// Creates a PI Memory object using a CUDA memory allocation. @@ -2430,13 +2467,29 @@ pi_result cuda_piextQueueGetNativeHandle(pi_queue queue, /// the native handle, if it can. /// /// \return TBD -pi_result cuda_piextQueueCreateWithNativeHandle(pi_native_handle, pi_context, - pi_queue *, +pi_result cuda_piextQueueCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_context context, + pi_queue *queue, bool ownNativeHandle) { (void)ownNativeHandle; - cl::sycl::detail::pi::die( - "Creation of PI queue from native handle not implemented"); - return {}; + assert(ownNativeHandle == 1); + + unsigned int flags; + CUstream cuStream = reinterpret_cast(nativeHandle); + + auto retErr = PI_CHECK_ERROR(cuStreamGetFlags(cuStream, &flags)); + + pi_queue_properties properties = 0; + if (flags == CU_STREAM_DEFAULT) + properties = __SYCL_PI_CUDA_USE_DEFAULT_STREAM; + else if (flags == CU_STREAM_NON_BLOCKING) + properties = __SYCL_PI_CUDA_SYNC_WITH_DEFAULT; + else + cl::sycl::detail::pi::die("Unknown cuda stream"); + + *queue = new _pi_queue{cuStream, context, context->get_device(), properties}; + + return retErr; } pi_result cuda_piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, @@ -3699,11 +3752,19 @@ pi_result cuda_piextEventGetNativeHandle(pi_event event, /// \param[out] event Set to the PI event object created from native handle. /// /// \return TBD -pi_result cuda_piextEventCreateWithNativeHandle(pi_native_handle, pi_context, - bool, pi_event *) { - cl::sycl::detail::pi::die( - "Creation of PI event from native handle not implemented"); - return {}; +pi_result cuda_piextEventCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_context context, + bool ownNativeHandle, + pi_event *event) { + (void)ownNativeHandle; + assert(ownNativeHandle == true); + + std::unique_ptr<_pi_event> event_ptr{nullptr}; + + *event = _pi_event::make_with_native(context, + reinterpret_cast(nativeHandle)); + + return PI_SUCCESS; } /// Creates a PI sampler object diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/plugins/cuda/pi_cuda.hpp index aa8c8945265a2..a23a4f7ca9b9d 100644 --- a/sycl/plugins/cuda/pi_cuda.hpp +++ b/sycl/plugins/cuda/pi_cuda.hpp @@ -477,6 +477,10 @@ struct _pi_event { return new _pi_event(type, queue->get_context(), queue); } + static pi_event make_with_native(pi_context context, CUevent eventNative) { + return new _pi_event(context, eventNative); + } + pi_result release(); ~_pi_event(); @@ -486,6 +490,10 @@ struct _pi_event { // make_user static members in order to create a pi_event for CUDA. _pi_event(pi_command_type type, pi_context context, pi_queue queue); + // This constructor is private to force programmers to use the + // make_from_native / for event introp + _pi_event(pi_context context, CUevent eventNative); + pi_command_type commandType_; // The type of command associated with event. std::atomic_uint32_t refCount_; // Event reference count.