diff --git a/sycl/include/CL/sycl/backend.hpp b/sycl/include/CL/sycl/backend.hpp index 9eb8f6bc57ef8..40a3b76bbe235 100644 --- a/sycl/include/CL/sycl/backend.hpp +++ b/sycl/include/CL/sycl/backend.hpp @@ -18,8 +18,12 @@ #include #endif #if SYCL_EXT_ONEAPI_BACKEND_CUDA +#ifdef SYCL_EXT_ONEAPI_BACKEND_CUDA_EXPERIMENTAL +#include +#else #include #endif +#endif #if SYCL_EXT_ONEAPI_BACKEND_HIP #include #endif diff --git a/sycl/include/CL/sycl/context.hpp b/sycl/include/CL/sycl/context.hpp index a78e30f385f16..1716f65df6b6e 100644 --- a/sycl/include/CL/sycl/context.hpp +++ b/sycl/include/CL/sycl/context.hpp @@ -27,6 +27,8 @@ class platform; namespace detail { class context_impl; } +template +auto get_native(const SyclT &Obj) -> backend_return_t; /// The context class represents a SYCL context on which kernel functions may /// be executed. @@ -230,6 +232,10 @@ class __SYCL_EXPORT context { pi_native_handle getNative() const; std::shared_ptr impl; + + template + friend auto get_native(const SyclT &Obj) -> backend_return_t; + template friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject); diff --git a/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp b/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp new file mode 100644 index 0000000000000..ef7ba55b4b2f8 --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp @@ -0,0 +1,143 @@ +//===------- backend_traits_cuda.hpp - Backend traits for CUDA ---*-C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the specializations of the sycl::detail::interop, +// sycl::detail::BackendInput and sycl::detail::BackendReturn class templates +// for the CUDA backend but there is no sycl::detail::InteropFeatureSupportMap +// specialization for the CUDA backend. +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +typedef int CUdevice; +typedef struct CUctx_st *CUcontext; +typedef struct CUstream_st *CUstream; +typedef struct CUevent_st *CUevent; +typedef struct CUmod_st *CUmodule; + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { + +// TODO the interops for context, device, event, platform and program +// may be removed after removing the deprecated 'get_native()' methods +// from the corresponding classes. The interop specialization +// is also used in the get_queue() method of the deprecated class +// interop_handler and also can be removed after API cleanup. +template <> struct interop { + using type = CUcontext; +}; + +template <> struct interop { + using type = CUdevice; +}; + +template <> struct interop { + using type = CUevent; +}; + +template <> struct interop { + using type = CUstream; +}; + +template <> struct interop { + using type = std::vector; +}; + +#ifdef __SYCL_INTERNAL_API +template <> struct interop { + using type = CUmodule; +}; +#endif + +template +struct BackendInput> { + using type = DataT *; +}; + +template +struct BackendReturn> { + using type = DataT *; +}; + +template <> struct BackendInput { + using type = CUcontext; +}; + +template <> struct BackendReturn { + using type = std::vector; +}; + +template <> struct BackendInput { + using type = CUdevice; +}; + +template <> struct BackendReturn { + using type = CUdevice; +}; + +template <> struct BackendInput { + using type = CUevent; +}; + +template <> struct BackendReturn { + using type = CUevent; +}; + +template <> struct BackendInput { + using type = CUstream; +}; + +template <> struct BackendReturn { + using type = CUstream; +}; + +template <> struct BackendInput { + using type = std::vector; +}; + +template <> struct BackendReturn { + using type = std::vector; +}; + +#ifdef __SYCL_INTERNAL_API +template <> struct BackendInput { + using type = CUmodule; +}; + +template <> struct BackendReturn { + using type = CUmodule; +}; +#endif + +template <> struct InteropFeatureSupportMap { + static constexpr bool MakePlatform = false; + static constexpr bool MakeDevice = true; + static constexpr bool MakeContext = true; + static constexpr bool MakeQueue = false; + static constexpr bool MakeEvent = false; + static constexpr bool MakeBuffer = false; + static constexpr bool MakeKernel = false; + static constexpr bool MakeKernelBundle = false; +}; + +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp b/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp new file mode 100644 index 0000000000000..85680c989dd96 --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp @@ -0,0 +1,75 @@ +//==--------- cuda.hpp - SYCL CUDA backend ---------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace ext { +namespace oneapi { +namespace cuda { + +// Implementation of ext_oneapi_cuda::make +inline __SYCL_EXPORT device make_device(pi_native_handle NativeHandle) { + return sycl::detail::make_device(NativeHandle, backend::ext_oneapi_cuda); +} + +} // namespace cuda +} // namespace oneapi +} // namespace ext + +// CUDA context specialization +template <> +inline auto get_native(const context &C) + -> backend_return_t { + // create a vector to be returned + backend_return_t ret; + + // get the native CUDA context from the SYCL object + auto native = reinterpret_cast< + backend_return_t::value_type>( + C.getNative()); + ret.push_back(native); + + return ret; +} + +// Specialisation of non-free context get_native +template <> +inline backend_return_t +context::get_native() const { + return sycl::get_native(*this); +} + +// Specialisation of interop_handles get_native_context +template <> +inline backend_return_t +interop_handle::get_native_context() const { +#ifndef __SYCL_DEVICE_ONLY__ + return std::vector{reinterpret_cast(getNativeContext())}; +#else + // we believe this won't be ever called on device side + return {}; +#endif +} + +// CUDA device specialization +template <> +inline device make_device( + const backend_input_t &BackendObject) { + pi_native_handle NativeHandle = static_cast(BackendObject); + return ext::oneapi::cuda::make_device(NativeHandle); +} + +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 5e8dfb0dbf6f5..3a0dc83f95f44 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -184,7 +184,15 @@ class ScopedContext { throw PI_ERROR_INVALID_CONTEXT; } - CUcontext desired = ctxt->get(); + set_context(ctxt->get()); + } + + ScopedContext(CUcontext ctxt) { set_context(ctxt); } + + ~ScopedContext() {} + +private: + void set_context(CUcontext desired) { CUcontext original = nullptr; PI_CHECK_ERROR(cuCtxGetCurrent(&original)); @@ -195,8 +203,6 @@ class ScopedContext { PI_CHECK_ERROR(cuCtxSetCurrent(desired)); } } - - ~ScopedContext() {} }; /// \cond NODOXY @@ -1845,19 +1851,59 @@ pi_result cuda_piextDeviceGetNativeHandle(pi_device device, } /// Created a PI device object from a CUDA device handle. -/// TODO: Implement this. -/// NOTE: The created PI object takes ownership of the native handle. +/// NOTE: The created PI object does not take ownership of the native handle. /// /// \param[in] nativeHandle The native handle to create PI device object from. /// \param[in] platform is the PI platform of the device. /// \param[out] device Set to the PI device object created from native handle. /// /// \return TBD -pi_result cuda_piextDeviceCreateWithNativeHandle(pi_native_handle, pi_platform, - pi_device *) { - cl::sycl::detail::pi::die( - "Creation of PI device from native handle not implemented"); - return {}; +pi_result cuda_piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_platform platform, + pi_device *piDevice) { + assert(piDevice != nullptr); + + CUdevice cu_device = static_cast(nativeHandle); + + auto is_device = [=](std::unique_ptr<_pi_device> &dev) { + return dev->get() == cu_device; + }; + + // If a platform is provided just check if the device is in it + if (platform) { + auto search_res = std::find_if(begin(platform->devices_), + end(platform->devices_), is_device); + if (search_res != end(platform->devices_)) { + *piDevice = (*search_res).get(); + return PI_SUCCESS; + } + } + + // Get list of platforms + pi_uint32 num_platforms; + pi_result result = cuda_piPlatformsGet(0, nullptr, &num_platforms); + if (result != PI_SUCCESS) + return result; + + pi_platform *plat = + static_cast(malloc(num_platforms * sizeof(pi_platform))); + result = cuda_piPlatformsGet(num_platforms, plat, nullptr); + if (result != PI_SUCCESS) + return result; + + // Iterate through platforms to find device that matches nativeHandle + for (pi_uint32 j = 0; j < num_platforms; ++j) { + auto search_res = std::find_if(begin(plat[j]->devices_), + end(plat[j]->devices_), is_device); + if (search_res != end(plat[j]->devices_)) { + *piDevice = (*search_res).get(); + return PI_SUCCESS; + } + } + + // If the provided nativeHandle cannot be matched to an + // existing device return error + return PI_ERROR_INVALID_OPERATION; } /* Context APIs */ @@ -1980,6 +2026,9 @@ pi_result cuda_piContextRelease(pi_context ctxt) { std::unique_ptr<_pi_context> context{ctxt}; + if (!ctxt->backend_has_ownership()) + return PI_SUCCESS; + if (!ctxt->is_primary()) { CUcontext cuCtxt = ctxt->get(); CUcontext current = nullptr; @@ -1993,13 +2042,13 @@ pi_result cuda_piContextRelease(pi_context ctxt) { PI_CHECK_ERROR(cuCtxPopCurrent(¤t)); } return PI_CHECK_ERROR(cuCtxDestroy(cuCtxt)); - } else { - // Primary context is not destroyed, but released - CUdevice cuDev = ctxt->get_device()->get(); - CUcontext current; - cuCtxPopCurrent(¤t); - return PI_CHECK_ERROR(cuDevicePrimaryCtxRelease(cuDev)); } + + // Primary context is not destroyed, but released + CUdevice cuDev = ctxt->get_device()->get(); + CUcontext current; + cuCtxPopCurrent(¤t); + return PI_CHECK_ERROR(cuDevicePrimaryCtxRelease(cuDev)); } /// Gets the native CUDA handle of a PI context object @@ -2015,19 +2064,40 @@ pi_result cuda_piextContextGetNativeHandle(pi_context context, } /// Created a PI context object from a CUDA context handle. -/// TODO: Implement this. -/// NOTE: The created PI object takes ownership of the native handle. +/// NOTE: The created PI object does not take ownership of the native handle. /// /// \param[in] nativeHandle The native handle to create PI context object from. /// \param[out] context Set to the PI context object created from native handle. /// /// \return TBD -pi_result cuda_piextContextCreateWithNativeHandle(pi_native_handle, pi_uint32, - const pi_device *, bool, - pi_context *) { - cl::sycl::detail::pi::die( - "Creation of PI context from native handle not implemented"); - return {}; +pi_result cuda_piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, + pi_uint32 num_devices, + const pi_device *devices, + bool ownNativeHandle, + pi_context *piContext) { + (void)num_devices; + (void)devices; + (void)ownNativeHandle; + assert(piContext != nullptr); + assert(ownNativeHandle == false); + + CUcontext newContext = reinterpret_cast(nativeHandle); + + ScopedContext active(newContext); + + // Get context's native device + CUdevice cu_device; + pi_result retErr = PI_CHECK_ERROR(cuCtxGetDevice(&cu_device)); + + // Create a SYCL device from the ctx device + pi_device device = nullptr; + retErr = cuda_piextDeviceCreateWithNativeHandle(cu_device, nullptr, &device); + + // Create sycl context + *piContext = new _pi_context{_pi_context::kind::user_defined, newContext, + device, /*backend_owns*/ false}; + + return retErr; } /// Creates a PI Memory object using a CUDA memory allocation. diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/plugins/cuda/pi_cuda.hpp index 24d9ac9827d2f..b085429defd17 100644 --- a/sycl/plugins/cuda/pi_cuda.hpp +++ b/sycl/plugins/cuda/pi_cuda.hpp @@ -163,8 +163,10 @@ struct _pi_context { _pi_device *deviceId_; std::atomic_uint32_t refCount_; - _pi_context(kind k, CUcontext ctxt, _pi_device *devId) - : kind_{k}, cuContext_{ctxt}, deviceId_{devId}, refCount_{1} { + _pi_context(kind k, CUcontext ctxt, _pi_device *devId, + bool backend_owns = true) + : kind_{k}, cuContext_{ctxt}, deviceId_{devId}, refCount_{1}, + has_ownership{backend_owns} { cuda_piDeviceRetain(deviceId_); }; @@ -195,9 +197,12 @@ struct _pi_context { pi_uint32 get_reference_count() const noexcept { return refCount_; } + bool backend_has_ownership() const noexcept { return has_ownership; } + private: std::mutex mutex_; std::vector extended_deleters_; + const bool has_ownership; }; /// PI Mem mapping to CUDA memory allocations, both data and texture/surface. diff --git a/sycl/source/backend.cpp b/sycl/source/backend.cpp index b19cb9abb30f7..f492716c47fa0 100644 --- a/sycl/source/backend.cpp +++ b/sycl/source/backend.cpp @@ -35,6 +35,8 @@ static const plugin &getPlugin(backend Backend) { return pi::getPlugin(); case backend::ext_oneapi_level_zero: return pi::getPlugin(); + case backend::ext_oneapi_cuda: + return pi::getPlugin(); default: throw sycl::runtime_error{"Unsupported backend", PI_ERROR_INVALID_OPERATION}; diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index b3ee74f1c8fe4..7d212ca2282ed 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -529,6 +529,7 @@ template __SYCL_EXPORT const plugin & getPlugin(); template __SYCL_EXPORT const plugin & getPlugin(); +template __SYCL_EXPORT const plugin &getPlugin(); // Report error and no return (keeps compiler from printing warnings). // TODO: Probably change that to throw a catchable exception, diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 655c608e381b7..836e0af8d8523 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3932,6 +3932,7 @@ _ZN2cl4sycl6detail2pi9assertionEbPKc _ZN2cl4sycl6detail2pi9getPluginILNS0_7backendE1EEERKNS1_6pluginEv _ZN2cl4sycl6detail2pi9getPluginILNS0_7backendE2EEERKNS1_6pluginEv _ZN2cl4sycl6detail2pi9getPluginILNS0_7backendE5EEERKNS1_6pluginEv +_ZN2cl4sycl6detail2pi9getPluginILNS0_7backendE3EEERKNS1_6pluginEv _ZN2cl4sycl6detail36get_empty_interop_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EE _ZN2cl4sycl6detail3usm12alignedAllocEmmRKNS0_7contextERKNS0_6deviceENS0_3usm5allocE _ZN2cl4sycl6detail3usm16alignedAllocHostEmmRKNS0_7contextENS0_3usm5allocE diff --git a/sycl/test/basic_tests/interop-backend-traits.cpp b/sycl/test/basic_tests/interop-backend-traits.cpp index b392b3fb89efd..fcf5de70b1aee 100644 --- a/sycl/test/basic_tests/interop-backend-traits.cpp +++ b/sycl/test/basic_tests/interop-backend-traits.cpp @@ -2,6 +2,7 @@ // RUN: %clangxx -fsycl -DUSE_L0 %s // RUN: %clangxx -fsycl -DUSE_CUDA %s // RUN: %clangxx -fsycl -DUSE_HIP %s +// RUN: %clangxx -fsycl -DUSE_CUDA_EXPERIMENTAL %s #ifdef USE_OPENCL #include @@ -31,6 +32,14 @@ constexpr auto Backend = sycl::backend::ext_oneapi_cuda; constexpr auto Backend = sycl::backend::ext_oneapi_hip; #endif +#ifdef USE_CUDA_EXPERIMENTAL +#define SYCL_EXT_ONEAPI_BACKEND_CUDA_EXPERIMENTAL 1 +#include +#include + +constexpr auto Backend = sycl::backend::ext_oneapi_cuda; +#endif + #include int main() { @@ -63,9 +72,12 @@ int main() { static_assert( std::is_same_v::return_type, sycl::detail::interop::type>); +// CUDA experimental return type is different to inpt type +#ifndef USE_CUDA_EXPERIMENTAL static_assert( std::is_same_v::return_type, sycl::detail::interop::type>); +#endif static_assert( std::is_same_v::return_type, sycl::detail::interop::type>); diff --git a/sycl/test/basic_tests/interop-cuda-experimental.cpp b/sycl/test/basic_tests/interop-cuda-experimental.cpp new file mode 100644 index 0000000000000..4c652ba9ce993 --- /dev/null +++ b/sycl/test/basic_tests/interop-cuda-experimental.cpp @@ -0,0 +1,89 @@ +// REQUIRES: cuda +// RUN: %clangxx %fsycl-host-only -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected=note %s -o %t.out +// RUN: %clangxx %fsycl-host-only -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected=note -D__SYCL_INTERNAL_API %s -o %t.out + +// Test for experimental CUDA interop API + +#define SYCL_EXT_ONEAPI_BACKEND_CUDA_EXPERIMENTAL 1 +#include +#include + +using namespace sycl; + +// +// 4.5.1 SYCL application interoperability may be provided for +// platform, +// device, +// context, +// queue, +// event, +// buffer, +// device_image, +// sampled_image, +// unsampled_image. + +int main() { + + // Create SYCL objects + device Device; + context Context(Device); + queue Queue(Device); + + // 4.5.1.1 For each SYCL runtime class T which supports SYCL application + // interoperability with the SYCL backend, a specialization of return_type + // must be defined as the type of SYCL application interoperability native + // backend object associated with T for the SYCL backend, specified in the + // SYCL backend specification. + // + // return_type is used when retrieving the backend specific native object from + // a SYCL object. See the relevant backend specification for details. + + backend_traits::return_type cu_device; + backend_traits::return_type cu_context; + + // 4.5.1.2 For each SYCL runtime class T which supports SYCL application + // interoperability, a specialization of get_native must be defined, which + // takes an instance of T and returns a SYCL application interoperability + // native backend object associated with syclObject which can be used for SYCL + // application interoperability. The lifetime of the object returned are + // backend-defined and specified in the backend specification. + + cu_device = get_native(Device); + cu_context = get_native(Context); + + // Check deprecated + // expected-warning@+2 {{'get_native' is deprecated: Use SYCL 2020 sycl::get_native free function}} + // expected-warning@+1 {{'get_native' is deprecated: Use SYCL 2020 sycl::get_native free function}} + cu_device = Device.get_native(); + // expected-warning@+2 {{'get_native' is deprecated: Use SYCL 2020 sycl::get_native free function}} + // expected-warning@+1 {{'get_native' is deprecated: Use SYCL 2020 sycl::get_native free function}} + cu_context = Context.get_native(); + + // 4.5.1.1 For each SYCL runtime class T which supports SYCL application + // interoperability with the SYCL backend, a specialization of input_type must + // be defined as the type of SYCL application interoperability native backend + // object associated with T for the SYCL backend, specified in the SYCL + // backend specification. input_type is used when constructing SYCL objects + // from backend specific native objects. See the relevant backend + // specification for details. + + // 4.5.1.3 For each SYCL runtime class T which supports SYCL application + // interoperability, a specialization of the appropriate template function + // make_{sycl_class} where {sycl_class} is the class name of T, must be + // defined, which takes a SYCL application interoperability native backend + // object and constructs and returns an instance of T. The availability and + // behavior of these template functions is defined by the SYCL backend + // specification document. + + backend_input_t InteropDeviceInput{ + cu_device}; + device InteropDevice = + make_device(InteropDeviceInput); + + backend_input_t InteropContextInput{ + cu_context[0]}; + context InteropContext = + make_context(InteropContextInput); + + return 0; +}