diff --git a/sycl/include/sycl/detail/properties_traits.def b/sycl/include/sycl/detail/properties_traits.def index 61101b9985d04..581380cc51f05 100644 --- a/sycl/include/sycl/detail/properties_traits.def +++ b/sycl/include/sycl/detail/properties_traits.def @@ -11,9 +11,9 @@ __SYCL_PARAM_TRAITS_SPEC(sycl::property::no_init) __SYCL_PARAM_TRAITS_SPEC( sycl::property::context::cuda::use_primary_context) // Deprecated __SYCL_PARAM_TRAITS_SPEC( - sycl::ext::oneapi::cuda::property::context::use_primary_context) + sycl::ext::oneapi::cuda::property::context::use_primary_context) // Deprecated __SYCL_PARAM_TRAITS_SPEC(sycl::property::queue::in_order) __SYCL_PARAM_TRAITS_SPEC(sycl::property::reduction::initialize_to_identity) __SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::queue::priority_low) __SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::queue::priority_high) -__SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::queue::priority_normal) \ No newline at end of file +__SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::queue::priority_normal) diff --git a/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp b/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp index 52d8cc81366dd..370866eb126d5 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp @@ -114,7 +114,7 @@ template <> struct BackendReturn { template <> struct InteropFeatureSupportMap { static constexpr bool MakePlatform = false; static constexpr bool MakeDevice = true; - static constexpr bool MakeContext = true; + static constexpr bool MakeContext = false; static constexpr bool MakeQueue = true; static constexpr bool MakeEvent = true; static constexpr bool MakeBuffer = false; diff --git a/sycl/include/sycl/properties/context_properties.hpp b/sycl/include/sycl/properties/context_properties.hpp index 443448684e001..d0764493fb063 100644 --- a/sycl/include/sycl/properties/context_properties.hpp +++ b/sycl/include/sycl/properties/context_properties.hpp @@ -15,8 +15,9 @@ namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace ext::oneapi::cuda::property::context { -class use_primary_context : public ::sycl::detail::DataLessProperty< - ::sycl::detail::UsePrimaryContext> {}; +class __SYCL_DEPRECATED("the primary contexts are now always used") + use_primary_context : public ::sycl::detail::DataLessProperty< + ::sycl::detail::UsePrimaryContext> {}; } // namespace ext::oneapi::cuda::property::context namespace property::context { diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index b7c64ef9f9a58..2a1168f0b2067 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -917,8 +917,11 @@ pi_result cuda_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, for (int i = 0; i < numDevices; ++i) { CUdevice device; err = PI_CHECK_ERROR(cuDeviceGet(&device, i)); + CUcontext context; + err = PI_CHECK_ERROR(cuDevicePrimaryCtxRetain(&context, device)); + platformIds[i].devices_.emplace_back( - new _pi_device{device, &platformIds[i]}); + new _pi_device{device, context, &platformIds[i]}); { const auto &dev = platformIds[i].devices_.back().get(); @@ -1183,6 +1186,8 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, assert(device != nullptr); + ScopedContext active(device->get_context()); + switch (param_name) { case PI_DEVICE_INFO_TYPE: { return getInfo(param_value_size, param_value, param_value_size_ret, @@ -1961,7 +1966,6 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, } case PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY: { - ScopedContext active(device); size_t FreeMemory = 0; size_t TotalMemory = 0; sycl::detail::pi::assertion(cuMemGetInfo(&FreeMemory, &TotalMemory) == @@ -2121,50 +2125,10 @@ pi_result cuda_piContextCreate(const pi_context_properties *properties, assert(retcontext != nullptr); pi_result errcode_ret = PI_SUCCESS; - // Parse properties. - bool property_cuda_primary = false; - while (properties && (0 != *properties)) { - // Consume property ID. - pi_context_properties id = *properties; - ++properties; - // Consume property value. - pi_context_properties value = *properties; - ++properties; - switch (id) { - case __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY: - assert(value == PI_FALSE || value == PI_TRUE); - property_cuda_primary = static_cast(value); - break; - default: - // Unknown property. - sycl::detail::pi::die( - "Unknown piContextCreate property in property list"); - return PI_ERROR_INVALID_VALUE; - } - } - std::unique_ptr<_pi_context> piContextPtr{nullptr}; try { - CUcontext current = nullptr; - - if (property_cuda_primary) { - // Use the CUDA primary context and assume that we want to use it - // immediately as we want to forge context switches. - CUcontext Ctxt; - errcode_ret = - PI_CHECK_ERROR(cuDevicePrimaryCtxRetain(&Ctxt, devices[0]->get())); - piContextPtr = std::unique_ptr<_pi_context>( - new _pi_context{_pi_context::kind::primary, Ctxt, *devices}); - errcode_ret = PI_CHECK_ERROR(cuCtxPushCurrent(Ctxt)); - } else { - // Create a scoped context. - CUcontext newContext; - PI_CHECK_ERROR(cuCtxGetCurrent(¤t)); - errcode_ret = PI_CHECK_ERROR( - cuCtxCreate(&newContext, CU_CTX_MAP_HOST, devices[0]->get())); - piContextPtr = std::unique_ptr<_pi_context>(new _pi_context{ - _pi_context::kind::user_defined, newContext, *devices}); - } + piContextPtr = std::unique_ptr<_pi_context>(new _pi_context{*devices}); + static std::once_flag initFlag; std::call_once( initFlag, @@ -2176,14 +2140,6 @@ pi_result cuda_piContextCreate(const pi_context_properties *properties, }, errcode_ret); - // For non-primary scoped contexts keep the last active on top of the stack - // as `cuCtxCreate` replaces it implicitly otherwise. - // Primary contexts are kept on top of the stack, so the previous context - // is not queried and therefore not recovered. - if (current != nullptr) { - PI_CHECK_ERROR(cuCtxSetCurrent(current)); - } - *retcontext = piContextPtr.release(); } catch (pi_result err) { errcode_ret = err; @@ -2194,7 +2150,6 @@ pi_result cuda_piContextCreate(const pi_context_properties *properties, } pi_result cuda_piContextRelease(pi_context ctxt) { - assert(ctxt != nullptr); if (ctxt->decrement_reference_count() > 0) { @@ -2204,29 +2159,7 @@ pi_result cuda_piContextRelease(pi_context ctxt) { std::unique_ptr<_pi_context> context{ctxt}; - if (!ctxt->backend_has_ownership()) - return PI_SUCCESS; - - if (!ctxt->is_primary()) { - CUcontext cuCtxt = ctxt->get(); - CUcontext current = nullptr; - cuCtxGetCurrent(¤t); - if (cuCtxt != current) { - PI_CHECK_ERROR(cuCtxPushCurrent(cuCtxt)); - } - PI_CHECK_ERROR(cuCtxSynchronize()); - cuCtxGetCurrent(¤t); - if (cuCtxt == current) { - PI_CHECK_ERROR(cuCtxPopCurrent(¤t)); - } - return PI_CHECK_ERROR(cuCtxDestroy(cuCtxt)); - } - - // Primary context is not destroyed, but released - CUdevice cuDev = ctxt->get_device()->get(); - CUcontext current; - cuCtxPopCurrent(¤t); - return PI_CHECK_ERROR(cuDevicePrimaryCtxRelease(cuDev)); + return PI_SUCCESS; } /// Gets the native CUDA handle of a PI context object @@ -2253,29 +2186,15 @@ pi_result cuda_piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, const pi_device *devices, bool ownNativeHandle, pi_context *piContext) { + (void)nativeHandle; (void)num_devices; (void)devices; (void)ownNativeHandle; + (void)piContext; assert(piContext != nullptr); assert(ownNativeHandle == false); - CUcontext newContext = reinterpret_cast(nativeHandle); - - ScopedContext active(newContext); - - // Get context's native device - CUdevice cu_device; - pi_result retErr = PI_CHECK_ERROR(cuCtxGetDevice(&cu_device)); - - // Create a SYCL device from the ctx device - pi_device device = nullptr; - retErr = cuda_piextDeviceCreateWithNativeHandle(cu_device, nullptr, &device); - - // Create sycl context - *piContext = new _pi_context{_pi_context::kind::user_defined, newContext, - device, /*backend_owns*/ false}; - - return retErr; + return PI_ERROR_INVALID_OPERATION; } /// Creates a PI Memory object using a CUDA memory allocation. @@ -2469,8 +2388,6 @@ pi_result cuda_piMemBufferPartition(pi_mem parent_buffer, pi_mem_flags flags, std::unique_ptr<_pi_mem> retMemObj{nullptr}; try { - ScopedContext active(context); - retMemObj = std::unique_ptr<_pi_mem>{new _pi_mem{ context, parent_buffer, allocMode, ptr, hostPtr, bufferRegion.size}}; } catch (pi_result err) { diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/plugins/cuda/pi_cuda.hpp index b4949b03ad046..a957b8df603c7 100644 --- a/sycl/plugins/cuda/pi_cuda.hpp +++ b/sycl/plugins/cuda/pi_cuda.hpp @@ -86,28 +86,29 @@ struct _pi_device { using native_type = CUdevice; native_type cuDevice_; + CUcontext cuContext_; std::atomic_uint32_t refCount_; pi_platform platform_; - pi_context context_; static constexpr pi_uint32 max_work_item_dimensions = 3u; size_t max_work_item_sizes[max_work_item_dimensions]; int max_work_group_size; public: - _pi_device(native_type cuDevice, pi_platform platform) - : cuDevice_(cuDevice), refCount_{1}, platform_(platform) {} + _pi_device(native_type cuDevice, CUcontext cuContext, pi_platform platform) + : cuDevice_(cuDevice), cuContext_(cuContext), refCount_{1}, + platform_(platform) {} + + ~_pi_device() { cuDevicePrimaryCtxRelease(cuDevice_); } native_type get() const noexcept { return cuDevice_; }; + CUcontext get_context() const noexcept { return cuContext_; }; + pi_uint32 get_reference_count() const noexcept { return refCount_; } pi_platform get_platform() const noexcept { return platform_; }; - void set_context(pi_context ctx) { context_ = ctx; }; - - pi_context get_context() { return context_; }; - void save_max_work_item_sizes(size_t size, size_t *save_max_work_item_sizes) noexcept { memcpy(max_work_item_sizes, save_max_work_item_sizes, size); @@ -174,16 +175,12 @@ struct _pi_context { using native_type = CUcontext; - enum class kind { primary, user_defined } kind_; native_type cuContext_; _pi_device *deviceId_; std::atomic_uint32_t refCount_; - _pi_context(kind k, CUcontext ctxt, _pi_device *devId, - bool backend_owns = true) - : kind_{k}, cuContext_{ctxt}, deviceId_{devId}, refCount_{1}, - has_ownership{backend_owns} { - deviceId_->set_context(this); + _pi_context(_pi_device *devId) + : cuContext_{devId->get_context()}, deviceId_{devId}, refCount_{1} { cuda_piDeviceRetain(deviceId_); }; @@ -206,20 +203,15 @@ struct _pi_context { native_type get() const noexcept { return cuContext_; } - bool is_primary() const noexcept { return kind_ == kind::primary; } - pi_uint32 increment_reference_count() noexcept { return ++refCount_; } pi_uint32 decrement_reference_count() noexcept { return --refCount_; } pi_uint32 get_reference_count() const noexcept { return refCount_; } - bool backend_has_ownership() const noexcept { return has_ownership; } - private: std::mutex mutex_; std::vector extended_deleters_; - const bool has_ownership; }; /// PI Mem mapping to CUDA memory allocations, both data and texture/surface. diff --git a/sycl/test/basic_tests/interop-cuda.cpp b/sycl/test/basic_tests/interop-cuda.cpp index b2e1cb3d9026f..234f1ed9e1da0 100644 --- a/sycl/test/basic_tests/interop-cuda.cpp +++ b/sycl/test/basic_tests/interop-cuda.cpp @@ -87,8 +87,6 @@ int main() { backend_input_t InteropContextInput{ cu_context[0]}; - context InteropContext = - make_context(InteropContextInput); event InteropEvent = make_event(cu_event, Context); queue InteropQueue = make_queue(cu_queue, Context); diff --git a/sycl/unittests/pi/cuda/test_base_objects.cpp b/sycl/unittests/pi/cuda/test_base_objects.cpp index 15f7f7d2651b1..9bcc9e9f24d56 100644 --- a/sycl/unittests/pi/cuda/test_base_objects.cpp +++ b/sycl/unittests/pi/cuda/test_base_objects.cpp @@ -79,96 +79,6 @@ TEST_F(CudaBaseObjectsTest, piContextCreate) { cuCtxGetApiVersion(cudaContext, &version); EXPECT_EQ(version, LATEST_KNOWN_CUDA_DRIVER_API_VERSION); - CUresult cuErr = cuCtxDestroy(cudaContext); - ASSERT_EQ(cuErr, CUDA_SUCCESS); -} - -TEST_F(CudaBaseObjectsTest, piContextCreatePrimaryTrue) { - pi_uint32 numPlatforms = 0; - pi_platform platform; - pi_device device; - - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - numPlatforms, &platform, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - platform, PI_DEVICE_TYPE_GPU, 1, &device, nullptr)), - PI_SUCCESS); - pi_context_properties properties[] = { - __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY, PI_TRUE, 0}; - - pi_context ctxt; - ASSERT_EQ((plugin->call_nocheck( - properties, 1, &device, nullptr, nullptr, &ctxt)), - PI_SUCCESS); - EXPECT_NE(ctxt, nullptr); - EXPECT_EQ(ctxt->get_device(), device); - EXPECT_TRUE(ctxt->is_primary()); - - // Retrieve the cuCtxt to check information is correct - CUcontext cudaContext = ctxt->get(); - unsigned int version = 0; - CUresult cuErr = cuCtxGetApiVersion(cudaContext, &version); - ASSERT_EQ(cuErr, CUDA_SUCCESS); - EXPECT_EQ(version, LATEST_KNOWN_CUDA_DRIVER_API_VERSION); - - // Current context in the stack? - CUcontext current; - cuErr = cuCtxGetCurrent(¤t); - ASSERT_EQ(cuErr, CUDA_SUCCESS); - ASSERT_EQ(current, cudaContext); - ASSERT_EQ((plugin->call_nocheck(ctxt)), - PI_SUCCESS); -} - -TEST_F(CudaBaseObjectsTest, piContextCreatePrimaryFalse) { - pi_uint32 numPlatforms = 0; - pi_platform platform; - pi_device device; - - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - numPlatforms, &platform, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - platform, PI_DEVICE_TYPE_GPU, 1, &device, nullptr)), - PI_SUCCESS); - pi_context_properties properties[] = { - __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY, PI_FALSE, 0}; - - pi_context ctxt; - ASSERT_EQ((plugin->call_nocheck( - properties, 1, &device, nullptr, nullptr, &ctxt)), - PI_SUCCESS); - EXPECT_NE(ctxt, nullptr); - EXPECT_EQ(ctxt->get_device(), device); - EXPECT_FALSE(ctxt->is_primary()); - - // Retrieve the cuCtxt to check information is correct - CUcontext cudaContext = ctxt->get(); - unsigned int version = 0; - CUresult cuErr = cuCtxGetApiVersion(cudaContext, &version); - ASSERT_EQ(cuErr, CUDA_SUCCESS); - EXPECT_EQ(version, LATEST_KNOWN_CUDA_DRIVER_API_VERSION); - - // Current context in the stack? - CUcontext current; - cuErr = cuCtxGetCurrent(¤t); - ASSERT_EQ(cuErr, CUDA_SUCCESS); - ASSERT_EQ(current, cudaContext); ASSERT_EQ((plugin->call_nocheck(ctxt)), PI_SUCCESS); }