intel · bader · Feb 9, 2023 · Feb 1, 2023 · Feb 3, 2023
@@ -11,9 +11,9 @@ __SYCL_PARAM_TRAITS_SPEC(sycl::property::no_init)
 __SYCL_PARAM_TRAITS_SPEC(
     sycl::property::context::cuda::use_primary_context) // Deprecated
 __SYCL_PARAM_TRAITS_SPEC(
-    sycl::ext::oneapi::cuda::property::context::use_primary_context)
+    sycl::ext::oneapi::cuda::property::context::use_primary_context) // Deprecated
 __SYCL_PARAM_TRAITS_SPEC(sycl::property::queue::in_order)
 __SYCL_PARAM_TRAITS_SPEC(sycl::property::reduction::initialize_to_identity)
 __SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::queue::priority_low)
 __SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::queue::priority_high)
-__SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::queue::priority_normal)
+__SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::queue::priority_normal)
@@ -114,7 +114,7 @@ template <> struct BackendReturn<backend::ext_oneapi_cuda, platform> {
 template <> struct InteropFeatureSupportMap<backend::ext_oneapi_cuda> {
   static constexpr bool MakePlatform = false;
   static constexpr bool MakeDevice = true;
-  static constexpr bool MakeContext = true;
+  static constexpr bool MakeContext = false;
   static constexpr bool MakeQueue = true;
   static constexpr bool MakeEvent = true;
   static constexpr bool MakeBuffer = false;

@@ -15,8 +15,9 @@
 namespace sycl {
 __SYCL_INLINE_VER_NAMESPACE(_V1) {
 namespace ext::oneapi::cuda::property::context {
-class use_primary_context : public ::sycl::detail::DataLessProperty<
-                                ::sycl::detail::UsePrimaryContext> {};
+class __SYCL_DEPRECATED("the primary contexts are now always used")
+    use_primary_context : public ::sycl::detail::DataLessProperty<
+                              ::sycl::detail::UsePrimaryContext> {};
 } // namespace ext::oneapi::cuda::property::context
 
 namespace property::context {

@@ -917,8 +917,11 @@ pi_result cuda_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms,
             for (int i = 0; i < numDevices; ++i) {
               CUdevice device;
               err = PI_CHECK_ERROR(cuDeviceGet(&device, i));
+              CUcontext context;
+              err = PI_CHECK_ERROR(cuDevicePrimaryCtxRetain(&context, device));
+
               platformIds[i].devices_.emplace_back(
-                  new _pi_device{device, &platformIds[i]});
+                  new _pi_device{device, context, &platformIds[i]});
 
               {
                 const auto &dev = platformIds[i].devices_.back().get();
@@ -1183,6 +1186,8 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
 
   assert(device != nullptr);
 
+  ScopedContext active(device->get_context());
+
   switch (param_name) {
   case PI_DEVICE_INFO_TYPE: {
     return getInfo(param_value_size, param_value, param_value_size_ret,
@@ -1961,7 +1966,6 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
   }
 
   case PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY: {
-    ScopedContext active(device);
     size_t FreeMemory = 0;
     size_t TotalMemory = 0;
     sycl::detail::pi::assertion(cuMemGetInfo(&FreeMemory, &TotalMemory) ==
@@ -2121,50 +2125,10 @@ pi_result cuda_piContextCreate(const pi_context_properties *properties,
   assert(retcontext != nullptr);
   pi_result errcode_ret = PI_SUCCESS;
 
-  // Parse properties.
-  bool property_cuda_primary = false;
-  while (properties && (0 != *properties)) {
-    // Consume property ID.
-    pi_context_properties id = *properties;
-    ++properties;
-    // Consume property value.
-    pi_context_properties value = *properties;
-    ++properties;
-    switch (id) {
-    case __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY:
-      assert(value == PI_FALSE || value == PI_TRUE);
-      property_cuda_primary = static_cast<bool>(value);
-      break;
-    default:
-      // Unknown property.
-      sycl::detail::pi::die(
-          "Unknown piContextCreate property in property list");
-      return PI_ERROR_INVALID_VALUE;
-    }
-  }
-
   std::unique_ptr<_pi_context> piContextPtr{nullptr};
   try {
-    CUcontext current = nullptr;
-
-    if (property_cuda_primary) {
-      // Use the CUDA primary context and assume that we want to use it
-      // immediately as we want to forge context switches.
-      CUcontext Ctxt;
-      errcode_ret =
-          PI_CHECK_ERROR(cuDevicePrimaryCtxRetain(&Ctxt, devices[0]->get()));
-      piContextPtr = std::unique_ptr<_pi_context>(
-          new _pi_context{_pi_context::kind::primary, Ctxt, *devices});
-      errcode_ret = PI_CHECK_ERROR(cuCtxPushCurrent(Ctxt));
-    } else {
-      // Create a scoped context.
-      CUcontext newContext;
-      PI_CHECK_ERROR(cuCtxGetCurrent(&current));
-      errcode_ret = PI_CHECK_ERROR(
-          cuCtxCreate(&newContext, CU_CTX_MAP_HOST, devices[0]->get()));
-      piContextPtr = std::unique_ptr<_pi_context>(new _pi_context{
-          _pi_context::kind::user_defined, newContext, *devices});
-    }
+    piContextPtr = std::unique_ptr<_pi_context>(new _pi_context{*devices});
+
     static std::once_flag initFlag;
     std::call_once(
         initFlag,
@@ -2176,14 +2140,6 @@ pi_result cuda_piContextCreate(const pi_context_properties *properties,
         },
         errcode_ret);
 
-    // For non-primary scoped contexts keep the last active on top of the stack
-    // as `cuCtxCreate` replaces it implicitly otherwise.
-    // Primary contexts are kept on top of the stack, so the previous context
-    // is not queried and therefore not recovered.
-    if (current != nullptr) {
-      PI_CHECK_ERROR(cuCtxSetCurrent(current));
-    }
-
     *retcontext = piContextPtr.release();
   } catch (pi_result err) {
     errcode_ret = err;
@@ -2194,7 +2150,6 @@ pi_result cuda_piContextCreate(const pi_context_properties *properties,
 }
 
 pi_result cuda_piContextRelease(pi_context ctxt) {
-
   assert(ctxt != nullptr);
 
   if (ctxt->decrement_reference_count() > 0) {
@@ -2204,29 +2159,7 @@ pi_result cuda_piContextRelease(pi_context ctxt) {
 
   std::unique_ptr<_pi_context> context{ctxt};
 
-  if (!ctxt->backend_has_ownership())
-    return PI_SUCCESS;
-
-  if (!ctxt->is_primary()) {
-    CUcontext cuCtxt = ctxt->get();
-    CUcontext current = nullptr;
-    cuCtxGetCurrent(&current);
-    if (cuCtxt != current) {
-      PI_CHECK_ERROR(cuCtxPushCurrent(cuCtxt));
-    }
-    PI_CHECK_ERROR(cuCtxSynchronize());
-    cuCtxGetCurrent(&current);
-    if (cuCtxt == current) {
-      PI_CHECK_ERROR(cuCtxPopCurrent(&current));
-    }
-    return PI_CHECK_ERROR(cuCtxDestroy(cuCtxt));
-  }
-
-  // Primary context is not destroyed, but released
-  CUdevice cuDev = ctxt->get_device()->get();
-  CUcontext current;
-  cuCtxPopCurrent(&current);
-  return PI_CHECK_ERROR(cuDevicePrimaryCtxRelease(cuDev));
+  return PI_SUCCESS;
 }
 
 /// Gets the native CUDA handle of a PI context object
@@ -2253,29 +2186,15 @@ pi_result cuda_piextContextCreateWithNativeHandle(pi_native_handle nativeHandle,
                                                   const pi_device *devices,
                                                   bool ownNativeHandle,
                                                   pi_context *piContext) {
+  (void)nativeHandle;
   (void)num_devices;
   (void)devices;
   (void)ownNativeHandle;
+  (void)piContext;
   assert(piContext != nullptr);
   assert(ownNativeHandle == false);
 
-  CUcontext newContext = reinterpret_cast<CUcontext>(nativeHandle);
-
-  ScopedContext active(newContext);
-
-  // Get context's native device
-  CUdevice cu_device;
-  pi_result retErr = PI_CHECK_ERROR(cuCtxGetDevice(&cu_device));
-
-  // Create a SYCL device from the ctx device
-  pi_device device = nullptr;
-  retErr = cuda_piextDeviceCreateWithNativeHandle(cu_device, nullptr, &device);
-
-  // Create sycl context
-  *piContext = new _pi_context{_pi_context::kind::user_defined, newContext,
-                               device, /*backend_owns*/ false};
-
-  return retErr;
+  return PI_ERROR_INVALID_OPERATION;
 }
 
 /// Creates a PI Memory object using a CUDA memory allocation.
@@ -2469,8 +2388,6 @@ pi_result cuda_piMemBufferPartition(pi_mem parent_buffer, pi_mem_flags flags,
 
   std::unique_ptr<_pi_mem> retMemObj{nullptr};
   try {
-    ScopedContext active(context);
-
     retMemObj = std::unique_ptr<_pi_mem>{new _pi_mem{
         context, parent_buffer, allocMode, ptr, hostPtr, bufferRegion.size}};
   } catch (pi_result err) {

@@ -86,28 +86,29 @@ struct _pi_device {
   using native_type = CUdevice;
 
   native_type cuDevice_;
+  CUcontext cuContext_;
   std::atomic_uint32_t refCount_;
   pi_platform platform_;
-  pi_context context_;
 
   static constexpr pi_uint32 max_work_item_dimensions = 3u;
   size_t max_work_item_sizes[max_work_item_dimensions];
   int max_work_group_size;
 
 public:
-  _pi_device(native_type cuDevice, pi_platform platform)
-      : cuDevice_(cuDevice), refCount_{1}, platform_(platform) {}
+  _pi_device(native_type cuDevice, CUcontext cuContext, pi_platform platform)
+      : cuDevice_(cuDevice), cuContext_(cuContext), refCount_{1},
+        platform_(platform) {}
+
+  ~_pi_device() { cuDevicePrimaryCtxRelease(cuDevice_); }
 
   native_type get() const noexcept { return cuDevice_; };
 
+  CUcontext get_context() const noexcept { return cuContext_; };
+
   pi_uint32 get_reference_count() const noexcept { return refCount_; }
 
   pi_platform get_platform() const noexcept { return platform_; };
 
-  void set_context(pi_context ctx) { context_ = ctx; };
-
-  pi_context get_context() { return context_; };
-
   void save_max_work_item_sizes(size_t size,
                                 size_t *save_max_work_item_sizes) noexcept {
     memcpy(max_work_item_sizes, save_max_work_item_sizes, size);
@@ -174,16 +175,12 @@ struct _pi_context {
 
   using native_type = CUcontext;
 
-  enum class kind { primary, user_defined } kind_;
   native_type cuContext_;
   _pi_device *deviceId_;
   std::atomic_uint32_t refCount_;
 
-  _pi_context(kind k, CUcontext ctxt, _pi_device *devId,
-              bool backend_owns = true)
-      : kind_{k}, cuContext_{ctxt}, deviceId_{devId}, refCount_{1},
-        has_ownership{backend_owns} {
-    deviceId_->set_context(this);
+  _pi_context(_pi_device *devId)
+      : cuContext_{devId->get_context()}, deviceId_{devId}, refCount_{1} {
     cuda_piDeviceRetain(deviceId_);
   };
 
@@ -206,20 +203,15 @@ struct _pi_context {
 
   native_type get() const noexcept { return cuContext_; }
 
-  bool is_primary() const noexcept { return kind_ == kind::primary; }
-
   pi_uint32 increment_reference_count() noexcept { return ++refCount_; }
 
   pi_uint32 decrement_reference_count() noexcept { return --refCount_; }
 
   pi_uint32 get_reference_count() const noexcept { return refCount_; }
 
-  bool backend_has_ownership() const noexcept { return has_ownership; }
-
 private:
   std::mutex mutex_;
   std::vector<deleter_data> extended_deleters_;
-  const bool has_ownership;
 };
 
 /// PI Mem mapping to CUDA memory allocations, both data and texture/surface.

@@ -87,8 +87,6 @@ int main() {
 
   backend_input_t<backend::ext_oneapi_cuda, context> InteropContextInput{
       cu_context[0]};
-  context InteropContext =
-      make_context<backend::ext_oneapi_cuda>(InteropContextInput);
   event InteropEvent = make_event<backend::ext_oneapi_cuda>(cu_event, Context);
 
   queue InteropQueue = make_queue<backend::ext_oneapi_cuda>(cu_queue, Context);

@@ -79,96 +79,6 @@ TEST_F(CudaBaseObjectsTest, piContextCreate) {
   cuCtxGetApiVersion(cudaContext, &version);
   EXPECT_EQ(version, LATEST_KNOWN_CUDA_DRIVER_API_VERSION);
 
-  CUresult cuErr = cuCtxDestroy(cudaContext);
-  ASSERT_EQ(cuErr, CUDA_SUCCESS);
-}
-
-TEST_F(CudaBaseObjectsTest, piContextCreatePrimaryTrue) {
-  pi_uint32 numPlatforms = 0;
-  pi_platform platform;
-  pi_device device;
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                0, nullptr, &numPlatforms)),
-            PI_SUCCESS)
-      << "piPlatformsGet failed.\n";
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                numPlatforms, &platform, nullptr)),
-            PI_SUCCESS)
-      << "piPlatformsGet failed.\n";
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>(
-                platform, PI_DEVICE_TYPE_GPU, 1, &device, nullptr)),
-            PI_SUCCESS);
-  pi_context_properties properties[] = {
-      __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY, PI_TRUE, 0};
-
-  pi_context ctxt;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                properties, 1, &device, nullptr, nullptr, &ctxt)),
-            PI_SUCCESS);
-  EXPECT_NE(ctxt, nullptr);
-  EXPECT_EQ(ctxt->get_device(), device);
-  EXPECT_TRUE(ctxt->is_primary());
-
-  // Retrieve the cuCtxt to check information is correct
-  CUcontext cudaContext = ctxt->get();
-  unsigned int version = 0;
-  CUresult cuErr = cuCtxGetApiVersion(cudaContext, &version);
-  ASSERT_EQ(cuErr, CUDA_SUCCESS);
-  EXPECT_EQ(version, LATEST_KNOWN_CUDA_DRIVER_API_VERSION);
-
-  // Current context in the stack?
-  CUcontext current;
-  cuErr = cuCtxGetCurrent(&current);
-  ASSERT_EQ(cuErr, CUDA_SUCCESS);
-  ASSERT_EQ(current, cudaContext);
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextRelease>(ctxt)),
-            PI_SUCCESS);
-}
-
-TEST_F(CudaBaseObjectsTest, piContextCreatePrimaryFalse) {
-  pi_uint32 numPlatforms = 0;
-  pi_platform platform;
-  pi_device device;
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                0, nullptr, &numPlatforms)),
-            PI_SUCCESS)
-      << "piPlatformsGet failed.\n";
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
-                numPlatforms, &platform, nullptr)),
-            PI_SUCCESS)
-      << "piPlatformsGet failed.\n";
-
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>(
-                platform, PI_DEVICE_TYPE_GPU, 1, &device, nullptr)),
-            PI_SUCCESS);
-  pi_context_properties properties[] = {
-      __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY, PI_FALSE, 0};
-
-  pi_context ctxt;
-  ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
-                properties, 1, &device, nullptr, nullptr, &ctxt)),
-            PI_SUCCESS);
-  EXPECT_NE(ctxt, nullptr);
-  EXPECT_EQ(ctxt->get_device(), device);
-  EXPECT_FALSE(ctxt->is_primary());
-
-  // Retrieve the cuCtxt to check information is correct
-  CUcontext cudaContext = ctxt->get();
-  unsigned int version = 0;
-  CUresult cuErr = cuCtxGetApiVersion(cudaContext, &version);
-  ASSERT_EQ(cuErr, CUDA_SUCCESS);
-  EXPECT_EQ(version, LATEST_KNOWN_CUDA_DRIVER_API_VERSION);
-
-  // Current context in the stack?
-  CUcontext current;
-  cuErr = cuCtxGetCurrent(&current);
-  ASSERT_EQ(cuErr, CUDA_SUCCESS);
-  ASSERT_EQ(current, cudaContext);
   ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextRelease>(ctxt)),
             PI_SUCCESS);
 }