From 1af5e56672b2f92552283095acb63ee7edecd4b9 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 8 Nov 2022 18:05:59 -0600 Subject: [PATCH 1/3] Added support for consuming DLPack allocated on a sub-device USM allocation must be bound to the default context. When producing DLPack, device_id is populated with id of the ancestor root device. Code remains functional on systems where default_context extension support is not enabled (e.g. Windows), but DLPack sharing is limited to allocation made on root devices only. --- dpctl/tensor/_dlpack.pyx | 102 +++++++++++++++++++++++++++------------ 1 file changed, 71 insertions(+), 31 deletions(-) diff --git a/dpctl/tensor/_dlpack.pyx b/dpctl/tensor/_dlpack.pyx index f218ef8c8e..a58f4c29bb 100644 --- a/dpctl/tensor/_dlpack.pyx +++ b/dpctl/tensor/_dlpack.pyx @@ -33,12 +33,18 @@ from .._backend cimport ( ) from ._usmarray cimport usm_ndarray +from platform import system as sys_platform + import numpy as np import dpctl import dpctl.memory as dpmem +cdef bint _IS_LINUX = sys_platform() == "Linux" + +del sys_platform + cdef extern from 'dlpack/dlpack.h' nogil: cdef int DLPACK_VERSION @@ -140,6 +146,7 @@ cpdef to_dlpack_capsule(usm_ndarray usm_ary) except+: cdef c_dpctl.SyclQueue ary_sycl_queue cdef c_dpctl.SyclDevice ary_sycl_device cdef DPCTLSyclDeviceRef pDRef = NULL + cdef DPCTLSyclDeviceRef tDRef = NULL cdef DLManagedTensor *dlm_tensor = NULL cdef DLTensor *dl_tensor = NULL cdef int nd = usm_ary.get_ndim() @@ -157,19 +164,45 @@ cpdef to_dlpack_capsule(usm_ndarray usm_ary) except+: ary_sycl_queue = usm_ary.get_sycl_queue() ary_sycl_device = ary_sycl_queue.get_sycl_device() - # check that ary_sycl_device is a non-partitioned device - pDRef = DPCTLDevice_GetParentDevice(ary_sycl_device.get_device_ref()) - if pDRef is not NULL: - DPCTLDevice_Delete(pDRef) - raise DLPackCreationError( - "to_dlpack_capsule: DLPack can only export arrays allocated on " - "non-partitioned SYCL devices." - ) - default_context = dpctl.SyclQueue(ary_sycl_device).sycl_context - if not usm_ary.sycl_context == default_context: + try: + if _IS_LINUX: + default_context = ary_sycl_device.sycl_platform.default_context + else: + default_context = None + except RuntimeError: + # RT does not support default_context, e.g. Windows + default_context = None + if default_context is None: + # check that ary_sycl_device is a non-partitioned device + pDRef = DPCTLDevice_GetParentDevice(ary_sycl_device.get_device_ref()) + if pDRef is not NULL: + DPCTLDevice_Delete(pDRef) + raise DLPackCreationError( + "to_dlpack_capsule: DLPack can only export arrays allocated " + "on non-partitioned SYCL devices on platforms where " + "default_context oneAPI extension is not supported." + ) + else: + if not usm_ary.sycl_context == default_context: + raise DLPackCreationError( + "to_dlpack_capsule: DLPack can only export arrays based on USM " + "allocations bound to a default platform SYCL context" + ) + # Find the unpartitioned parent of the allocation device + pDRef = DPCTLDevice_GetParentDevice(ary_sycl_device.get_device_ref()) + if pDRef is not NULL: + tDRef = DPCTLDevice_GetParentDevice(pDRef) + while tDRef is not NULL: + DPCTLDevice_Delete(pDRef) + pDRef = tDRef + tDRef = DPCTLDevice_GetParentDevice(pDRef) + ary_sycl_device = c_dpctl.SyclDevice._create(pDRef) + + # Find ordinal number of the parent device + device_id = ary_sycl_device.get_overall_ordinal() + if device_id < 0: raise DLPackCreationError( - "to_dlpack_capsule: DLPack can only export arrays based on USM " - "allocations bound to a default platform SYCL context" + "to_dlpack_capsule: failed to determine device_id" ) dlm_tensor = stdlib.malloc( @@ -192,14 +225,6 @@ cpdef to_dlpack_capsule(usm_ndarray usm_ary) except+: for i in range(nd): shape_strides_ptr[nd + i] = strides_ptr[i] - device_id = ary_sycl_device.get_overall_ordinal() - if device_id < 0: - stdlib.free(shape_strides_ptr) - stdlib.free(dlm_tensor) - raise DLPackCreationError( - "to_dlpack_capsule: failed to determine device_id" - ) - ary_dt = usm_ary.dtype ary_dtk = ary_dt.kind element_offset = usm_ary.get_offset() @@ -278,8 +303,8 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +: success. Raises: TypeError: if argument is not a "dltensor" capsule. - ValueError: if argument is "used_dltensor" capsule, - if the USM pointer is not bound to the reconstructed + ValueError: if argument is "used_dltensor" capsule + BufferError: if the USM pointer is not bound to the reconstructed sycl context, or the DLPack's device_type is not supported by dpctl. """ @@ -287,6 +312,7 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +: cdef bytes usm_type cdef size_t sz = 1 cdef int i + cdef int device_id = -1 cdef int element_bytesize = 0 cdef Py_ssize_t offset_min = 0 cdef Py_ssize_t offset_max = 0 @@ -308,26 +334,40 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +: py_caps, "dltensor") # Verify that we can work with this device if dlm_tensor.dl_tensor.device.device_type == kDLOneAPI: - q = dpctl.SyclQueue(str(dlm_tensor.dl_tensor.device.device_id)) + device_id = dlm_tensor.dl_tensor.device.device_id + root_device = dpctl.SyclDevice(str(device_id)) + try: + if _IS_LINUX: + default_context = root_device.sycl_platform.default_context + else: + default_context = dpctl.SyclQueue(root_device).sycl_context + except RuntimeError: + default_context = dpctl.SyclQueue(root_device).sycl_context if dlm_tensor.dl_tensor.data is NULL: usm_type = b"device" + q = dpctl.SyclQueue(default_context, root_device) else: usm_type = c_dpmem._Memory.get_pointer_type( dlm_tensor.dl_tensor.data, - q.sycl_context) - if usm_type == b"unknown": - raise ValueError( - f"Data pointer in DLPack is not bound to default sycl " - "context of device '{device_id}', translated to " - "{q.sycl_device.filter_string}" + default_context) + if usm_type == b"unknown": + raise BufferError( + "Data pointer in DLPack is not bound to default sycl " + f"context of device '{device_id}', translated to " + f"{root_device.filter_string}" + ) + alloc_device = c_dpmem._Memory.get_pointer_device( + dlm_tensor.dl_tensor.data, + default_context ) + q = dpctl.SyclQueue(default_context, alloc_device) if dlm_tensor.dl_tensor.dtype.bits % 8: - raise ValueError( + raise BufferError( "Can not import DLPack tensor whose element's " "bitsize is not a multiple of 8" ) if dlm_tensor.dl_tensor.dtype.lanes != 1: - raise ValueError( + raise BufferError( "Can not import DLPack tensor with lanes != 1" ) if dlm_tensor.dl_tensor.strides is NULL: From 72164a0175a055186ad5fd18a9ef2bb5c5421525 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 8 Nov 2022 19:54:17 -0600 Subject: [PATCH 2/3] Raise RuntimeError is default_context underlying library called faultered --- dpctl/_sycl_platform.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/_sycl_platform.pyx b/dpctl/_sycl_platform.pyx index 00c09cee6d..9356c9f8a7 100644 --- a/dpctl/_sycl_platform.pyx +++ b/dpctl/_sycl_platform.pyx @@ -272,7 +272,7 @@ cdef class SyclPlatform(_SyclPlatform): ) if (CRef == NULL): - raise + raise RuntimeError("Getting default error ran into a problem") else: return SyclContext._create(CRef) From 7bd938d667fbe39872cf7b0d3319342a5a09408d Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 8 Nov 2022 19:56:00 -0600 Subject: [PATCH 3/3] Added entry to changelog for changes in this PR --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index dec900e9e3..fc4a2d9ec8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Introduced `"syclinterface/dpctl_sycl_types_casters.hpp"` header file with declaration of conversion routines between SYCL type pointers and SyclInterface library opaque pointers [#960](https://github.com/IntelPython/dpctl/pull/960). * Added C-API to `dpctl.program.SyclKernel` and `dpctl.program.SyclProgram`. Added type casters for new types to "dpctl4pybind11" and added an example demonstrating its use [#970](https://github.com/IntelPython/dpctl/pull/970). * Introduced "dpctl/sycl.pxd" Cython declaration file to streamline use of SYCL functions from Cython, and added an example demonstrating its use [#981](https://github.com/IntelPython/dpctl/pull/981). +* Added experimental support for sharing data allocated on sub-devices via dlpack [#984](https://github.com/IntelPython/dpctl/pull/984). ### Changed * Improved queue compatibility testing in `dpctl.tensor`'s implementation module [#900](https://github.com/IntelPython/dpctl/pull/900).