diff --git a/docs/doc_sources/api_reference/dpctl/tensor.constants.rst b/docs/doc_sources/api_reference/dpctl/tensor.constants.rst new file mode 100644 index 0000000000..2cb9f770d2 --- /dev/null +++ b/docs/doc_sources/api_reference/dpctl/tensor.constants.rst @@ -0,0 +1,35 @@ +.. _dpctl_tensor_constants: + +Constants +======================== + +The following constants are defined in :py:mod:`dpctl.tensor`: + +.. currentmodule:: dpctl.tensor + +.. autodata:: DLDeviceType + +.. data:: e + + ``float``: + IEEE 754 floating-point representation of Euler's constant. + +.. data:: inf + + ``float``: + IEEE 754 floating-point representation of (positive) infinity. + +.. data:: nan + + ``float``: + IEEE 754 floating-point representation of Not a Number (NaN). + +.. data:: newaxis + + ``NoneType``: + Alias for ``None`` which is useful for indexing. + +.. data:: pi + + ``float``: + IEEE 754 floating-point representation of the mathematical constant π. diff --git a/docs/doc_sources/api_reference/dpctl/tensor.rst b/docs/doc_sources/api_reference/dpctl/tensor.rst index d2aaa6fbc4..10e1f65d9f 100644 --- a/docs/doc_sources/api_reference/dpctl/tensor.rst +++ b/docs/doc_sources/api_reference/dpctl/tensor.rst @@ -29,6 +29,7 @@ This module contains: * :ref:`sorting functions ` * :ref:`statistical functions ` * :ref:`utility functions ` +* :ref:`constants ` .. toctree:: @@ -48,3 +49,4 @@ This module contains: tensor.sorting_functions tensor.statistical_functions tensor.utility_functions + tensor.constants diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index 579b56d3a3..bcbd9d4f32 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -100,7 +100,7 @@ from dpctl.tensor._reshape import reshape from dpctl.tensor._search_functions import where from dpctl.tensor._statistical_functions import mean, std, var -from dpctl.tensor._usmarray import usm_ndarray +from dpctl.tensor._usmarray import DLDeviceType, usm_ndarray from dpctl.tensor._utility_functions import all, any, diff from ._accumulation import cumulative_logsumexp, cumulative_prod, cumulative_sum @@ -383,5 +383,6 @@ "nextafter", "diff", "count_nonzero", + "DLDeviceType", "take_along_axis", ] diff --git a/dpctl/tensor/_dlpack.pxd b/dpctl/tensor/_dlpack.pxd index 9846f54be6..81ecf16967 100644 --- a/dpctl/tensor/_dlpack.pxd +++ b/dpctl/tensor/_dlpack.pxd @@ -18,6 +18,10 @@ # cython: language_level=3 # cython: linetrace=True +cdef extern from "numpy/npy_no_deprecated_api.h": + pass +from numpy cimport ndarray + from .._sycl_device cimport SyclDevice from ._usmarray cimport usm_ndarray @@ -40,7 +44,8 @@ cdef extern from 'dlpack/dlpack.h' nogil: cpdef object to_dlpack_capsule(usm_ndarray array) except + cpdef object to_dlpack_versioned_capsule(usm_ndarray array, bint copied) except + -cpdef usm_ndarray from_dlpack_capsule(object dltensor) except + +cpdef object numpy_to_dlpack_versioned_capsule(ndarray array, bint copied) except + +cpdef object from_dlpack_capsule(object dltensor) except + cdef int get_parent_device_ordinal_id(SyclDevice dev) except * diff --git a/dpctl/tensor/_dlpack.pyx b/dpctl/tensor/_dlpack.pyx index ba2283eb50..098003ead2 100644 --- a/dpctl/tensor/_dlpack.pyx +++ b/dpctl/tensor/_dlpack.pyx @@ -18,9 +18,13 @@ # cython: language_level=3 # cython: linetrace=True +cdef extern from "numpy/npy_no_deprecated_api.h": + pass + cimport cpython from libc cimport stdlib from libc.stdint cimport int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t +from numpy cimport ndarray cimport dpctl as c_dpctl cimport dpctl.memory as c_dpmem @@ -34,6 +38,8 @@ from .._backend cimport ( ) from ._usmarray cimport USM_ARRAY_C_CONTIGUOUS, USM_ARRAY_WRITABLE, usm_ndarray +import ctypes + import numpy as np import dpctl @@ -162,7 +168,7 @@ cdef void _managed_tensor_versioned_deleter(DLManagedTensorVersioned *dlmv_tenso stdlib.free(dlmv_tensor) -cdef object _get_default_context(c_dpctl.SyclDevice dev) except *: +cdef object _get_default_context(c_dpctl.SyclDevice dev): try: default_context = dev.sycl_platform.default_context except RuntimeError: @@ -172,7 +178,7 @@ cdef object _get_default_context(c_dpctl.SyclDevice dev) except *: return default_context -cdef int get_parent_device_ordinal_id(c_dpctl.SyclDevice dev) except *: +cdef int get_parent_device_ordinal_id(c_dpctl.SyclDevice dev) except -1: cdef DPCTLSyclDeviceRef pDRef = NULL cdef DPCTLSyclDeviceRef tDRef = NULL cdef c_dpctl.SyclDevice p_dev @@ -195,7 +201,7 @@ cdef int get_parent_device_ordinal_id(c_dpctl.SyclDevice dev) except *: cdef int get_array_dlpack_device_id( usm_ndarray usm_ary -) except *: +) except -1: """Finds ordinal number of the parent of device where array was allocated. """ @@ -475,6 +481,127 @@ cpdef to_dlpack_versioned_capsule(usm_ndarray usm_ary, bint copied): return cpython.PyCapsule_New(dlmv_tensor, 'dltensor_versioned', _pycapsule_versioned_deleter) +cpdef numpy_to_dlpack_versioned_capsule(ndarray npy_ary, bint copied): + """ + to_dlpack_versioned_capsule(npy_ary, copied) + + Constructs named Python capsule object referencing + instance of ``DLManagedTensorVersioned`` from + :class:`numpy.ndarray` instance. + + Args: + npy_ary: An instance of :class:`numpy.ndarray` + copied: A bint representing whether the data was previously + copied in order to set the flags with the is-copied + bitmask. + Returns: + A new capsule with name ``"dltensor_versioned"`` that + contains a pointer to ``DLManagedTensorVersioned`` struct. + Raises: + DLPackCreationError: when array can be represented as + DLPack tensor. + MemoryError: when host allocation to needed for + ``DLManagedTensorVersioned`` did not succeed. + ValueError: when array elements data type could not be represented + in ``DLManagedTensorVersioned``. + """ + cdef DLManagedTensorVersioned *dlmv_tensor = NULL + cdef DLTensor *dl_tensor = NULL + cdef uint32_t dlmv_flags = 0 + cdef int nd = npy_ary.ndim + cdef Py_ssize_t *shape_ptr = NULL + cdef Py_ssize_t *strides_ptr = NULL + cdef int64_t *shape_strides_ptr = NULL + cdef int i = 0 + cdef int device_id = -1 + cdef Py_ssize_t byte_offset = 0 + cdef int itemsize = npy_ary.itemsize + + dlmv_tensor = stdlib.malloc( + sizeof(DLManagedTensorVersioned)) + if dlmv_tensor is NULL: + raise MemoryError( + "numpy_to_dlpack_versioned_capsule: Could not allocate memory " + "for DLManagedTensorVersioned" + ) + + is_c_contiguous = npy_ary.flags["C"] + shape = npy_ary.ctypes.shape_as(ctypes.c_int64) + strides = npy_ary.ctypes.strides_as(ctypes.c_int64) + if not is_c_contiguous: + if npy_ary.size != 1: + for i in range(nd): + if shape[i] != 1 and strides[i] % itemsize != 0: + stdlib.free(dlmv_tensor) + raise BufferError( + "numpy_to_dlpack_versioned_capsule: DLPack cannot encode " + "an array if strides are not a multiple of itemsize" + ) + shape_strides_ptr = stdlib.malloc((sizeof(int64_t) * 2) * nd) + else: + # no need to pass strides in this case + shape_strides_ptr = stdlib.malloc(sizeof(int64_t) * nd) + if shape_strides_ptr is NULL: + stdlib.free(dlmv_tensor) + raise MemoryError( + "numpy_to_dlpack_versioned_capsule: Could not allocate memory " + "for shape/strides" + ) + for i in range(nd): + shape_strides_ptr[i] = shape[i] + if not is_c_contiguous: + shape_strides_ptr[nd + i] = strides[i] // itemsize + + writable_flag = npy_ary.flags["W"] + + ary_dt = npy_ary.dtype + ary_dtk = ary_dt.kind + + dl_tensor = &dlmv_tensor.dl_tensor + dl_tensor.data = npy_ary.data + dl_tensor.ndim = nd + dl_tensor.byte_offset = byte_offset + dl_tensor.shape = &shape_strides_ptr[0] + if is_c_contiguous: + dl_tensor.strides = NULL + else: + dl_tensor.strides = &shape_strides_ptr[nd] + dl_tensor.device.device_type = kDLCPU + dl_tensor.device.device_id = 0 + dl_tensor.dtype.lanes = 1 + dl_tensor.dtype.bits = (ary_dt.itemsize * 8) + if (ary_dtk == "b"): + dl_tensor.dtype.code = kDLBool + elif (ary_dtk == "u"): + dl_tensor.dtype.code = kDLUInt + elif (ary_dtk == "i"): + dl_tensor.dtype.code = kDLInt + elif (ary_dtk == "f" and ary_dt.itemsize <= 8): + dl_tensor.dtype.code = kDLFloat + elif (ary_dtk == "c" and ary_dt.itemsize <= 16): + dl_tensor.dtype.code = kDLComplex + else: + stdlib.free(shape_strides_ptr) + stdlib.free(dlmv_tensor) + raise ValueError("Unrecognized array data type") + + # set flags down here + if copied: + dlmv_flags |= DLPACK_FLAG_BITMASK_IS_COPIED + if not writable_flag: + dlmv_flags |= DLPACK_FLAG_BITMASK_READ_ONLY + dlmv_tensor.flags = dlmv_flags + + dlmv_tensor.version.major = DLPACK_MAJOR_VERSION + dlmv_tensor.version.minor = DLPACK_MINOR_VERSION + + dlmv_tensor.manager_ctx = npy_ary + cpython.Py_INCREF(npy_ary) + dlmv_tensor.deleter = _managed_tensor_versioned_deleter + + return cpython.PyCapsule_New(dlmv_tensor, 'dltensor_versioned', _pycapsule_versioned_deleter) + + cdef class _DLManagedTensorOwner: """ Helper class managing the lifetime of the DLManagedTensor struct @@ -519,9 +646,86 @@ cdef class _DLManagedTensorVersionedOwner: return res -cpdef usm_ndarray from_dlpack_capsule(object py_caps): +cdef dict _numpy_array_interface_from_dl_tensor(DLTensor *dlt, bint ro_flag): + """Constructs a NumPy `__array_interface__` dictionary from a DLTensor.""" + cdef int i = 0 + cdef int itemsize = 0 + + if dlt.dtype.lanes != 1: + raise BufferError( + "Can not import DLPack tensor with lanes != 1" + ) + itemsize = dlt.dtype.bits // 8 + shape = list() + if (dlt.strides is NULL): + strides = None + for dim in range(dlt.ndim): + shape.append(dlt.shape[dim]) + else: + strides = list() + for dim in range(dlt.ndim): + shape.append(dlt.shape[dim]) + # convert to byte-strides + strides.append(dlt.strides[dim] * itemsize) + strides = tuple(strides) + shape = tuple(shape) + if (dlt.dtype.code == kDLUInt): + ary_dt = "u" + str(itemsize) + elif (dlt.dtype.code == kDLInt): + ary_dt = "i" + str(itemsize) + elif (dlt.dtype.code == kDLFloat): + ary_dt = "f" + str(itemsize) + elif (dlt.dtype.code == kDLComplex): + ary_dt = "c" + str(itemsize) + elif (dlt.dtype.code == kDLBool): + ary_dt = "b" + str(itemsize) + else: + raise BufferError( + "Can not import DLPack tensor with type code {}.".format( + dlt.dtype.code + ) + ) + typestr = "|" + ary_dt + return dict( + version=3, + shape=shape, + strides=strides, + data=( dlt.data, True if ro_flag else False), + offset=dlt.byte_offset, + typestr=typestr, + ) + + +class _numpy_array_interface_wrapper: + """ + Class that wraps a Python capsule and dictionary for consumption by NumPy. + + Implementation taken from + https://github.com/dmlc/dlpack/blob/main/apps/numpy_dlpack/dlpack/to_numpy.py + + Args: + array_interface: + A dictionary describing the underlying memory. Formatted + to match `numpy.ndarray.__array_interface__`. + + pycapsule: + A Python capsule wrapping the dlpack tensor that will be + converted to numpy. + """ + + def __init__(self, array_interface, memory_owner) -> None: + self.__array_interface__ = array_interface + self._memory_owner = memory_owner + + +cdef bint _is_kdlcpu_device(DLDevice *dev): + "Check if DLTensor.DLDevice denotes (kDLCPU, 0)" + return (dev[0].device_type == kDLCPU) and (dev[0].device_id == 0) + + +cpdef object from_dlpack_capsule(object py_caps): """ - from_dlpack_capsule(caps) + from_dlpack_capsule(py_caps) Reconstructs instance of :class:`dpctl.tensor.usm_ndarray` from named Python capsule object referencing instance of ``DLManagedTensor`` @@ -545,7 +749,11 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps): sycl context, or the DLPack's device_type is not supported by :mod:`dpctl`. """ + cdef DLManagedTensorVersioned *dlmv_tensor = NULL cdef DLManagedTensor *dlm_tensor = NULL + cdef DLTensor *dl_tensor = NULL + cdef int versioned = 0 + cdef int readonly = 0 cdef bytes usm_type cdef size_t sz = 1 cdef size_t alloc_sz = 1 @@ -560,214 +768,44 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps): cdef int64_t stride_i = -1 cdef int64_t shape_i = -1 - if not cpython.PyCapsule_IsValid(py_caps, 'dltensor'): - if cpython.PyCapsule_IsValid(py_caps, 'used_dltensor'): - raise ValueError( - "A DLPack tensor object can not be consumed multiple times" - ) - else: - raise TypeError( - "`from_dlpack_capsule` expects a Python 'dltensor' capsule" - ) - dlm_tensor = cpython.PyCapsule_GetPointer( - py_caps, "dltensor") - # Verify that we can work with this device - if dlm_tensor.dl_tensor.device.device_type == kDLOneAPI: - device_id = dlm_tensor.dl_tensor.device.device_id - root_device = dpctl.SyclDevice(str(device_id)) - try: - default_context = root_device.sycl_platform.default_context - except RuntimeError: - default_context = get_device_cached_queue(root_device).sycl_context - if dlm_tensor.dl_tensor.data is NULL: - usm_type = b"device" - q = get_device_cached_queue((default_context, root_device,)) - else: - usm_type = c_dpmem._Memory.get_pointer_type( - dlm_tensor.dl_tensor.data, - default_context) - if usm_type == b"unknown": - raise BufferError( - "Data pointer in DLPack is not bound to default sycl " - f"context of device '{device_id}', translated to " - f"{root_device.filter_string}" - ) - alloc_device = c_dpmem._Memory.get_pointer_device( - dlm_tensor.dl_tensor.data, - default_context - ) - q = get_device_cached_queue((default_context, alloc_device,)) - if dlm_tensor.dl_tensor.dtype.bits % 8: - raise BufferError( - "Can not import DLPack tensor whose element's " - "bitsize is not a multiple of 8" - ) - if dlm_tensor.dl_tensor.dtype.lanes != 1: - raise BufferError( - "Can not import DLPack tensor with lanes != 1" - ) - offset_min = 0 - if dlm_tensor.dl_tensor.strides is NULL: - for i in range(dlm_tensor.dl_tensor.ndim): - sz = sz * dlm_tensor.dl_tensor.shape[i] - offset_max = sz - 1 - else: - offset_max = 0 - for i in range(dlm_tensor.dl_tensor.ndim): - stride_i = dlm_tensor.dl_tensor.strides[i] - shape_i = dlm_tensor.dl_tensor.shape[i] - if shape_i > 1: - shape_i -= 1 - if stride_i > 0: - offset_max = offset_max + stride_i * shape_i - else: - offset_min = offset_min + stride_i * shape_i - sz = offset_max - offset_min + 1 - if sz == 0: - sz = 1 - - element_bytesize = (dlm_tensor.dl_tensor.dtype.bits // 8) - sz = sz * element_bytesize - element_offset = dlm_tensor.dl_tensor.byte_offset // element_bytesize - - # transfer dlm_tensor ownership - dlm_holder = _DLManagedTensorOwner._create(dlm_tensor) - cpython.PyCapsule_SetName(py_caps, 'used_dltensor') - - if dlm_tensor.dl_tensor.data is NULL: - usm_mem = dpmem.MemoryUSMDevice(sz, q) - else: - mem_ptr_delta = dlm_tensor.dl_tensor.byte_offset - ( - element_offset * element_bytesize - ) - mem_ptr = dlm_tensor.dl_tensor.data - alloc_sz = dlm_tensor.dl_tensor.byte_offset + ( - (offset_max + 1) * element_bytesize) - tmp = c_dpmem._Memory.create_from_usm_pointer_size_qref( - mem_ptr, - max(alloc_sz, element_bytesize), - (q).get_queue_ref(), - memory_owner=dlm_holder - ) - if mem_ptr_delta == 0: - usm_mem = tmp - else: - alloc_sz = dlm_tensor.dl_tensor.byte_offset + ( - (offset_max * element_bytesize + mem_ptr_delta)) - usm_mem = c_dpmem._Memory.create_from_usm_pointer_size_qref( - (mem_ptr + (element_bytesize - mem_ptr_delta)), - max(alloc_sz, element_bytesize), - (q).get_queue_ref(), - memory_owner=tmp - ) - py_shape = list() - for i in range(dlm_tensor.dl_tensor.ndim): - py_shape.append(dlm_tensor.dl_tensor.shape[i]) - if (dlm_tensor.dl_tensor.strides is NULL): - py_strides = None - else: - py_strides = list() - for i in range(dlm_tensor.dl_tensor.ndim): - py_strides.append(dlm_tensor.dl_tensor.strides[i]) - if (dlm_tensor.dl_tensor.dtype.code == kDLUInt): - ary_dt = np.dtype("u" + str(element_bytesize)) - elif (dlm_tensor.dl_tensor.dtype.code == kDLInt): - ary_dt = np.dtype("i" + str(element_bytesize)) - elif (dlm_tensor.dl_tensor.dtype.code == kDLFloat): - ary_dt = np.dtype("f" + str(element_bytesize)) - elif (dlm_tensor.dl_tensor.dtype.code == kDLComplex): - ary_dt = np.dtype("c" + str(element_bytesize)) - elif (dlm_tensor.dl_tensor.dtype.code == kDLBool): - ary_dt = np.dtype("?") - else: + if cpython.PyCapsule_IsValid(py_caps, 'dltensor'): + dlm_tensor = cpython.PyCapsule_GetPointer( + py_caps, "dltensor") + dl_tensor = &dlm_tensor.dl_tensor + elif cpython.PyCapsule_IsValid(py_caps, 'dltensor_versioned'): + dlmv_tensor = cpython.PyCapsule_GetPointer( + py_caps, "dltensor_versioned") + if dlmv_tensor.version.major > DLPACK_MAJOR_VERSION: raise BufferError( - "Can not import DLPack tensor with type code {}.".format( - dlm_tensor.dl_tensor.dtype.code - ) + "Can not import DLPack tensor with major version " + f"greater than {DLPACK_MAJOR_VERSION}" ) - res_ary = usm_ndarray( - py_shape, - dtype=ary_dt, - buffer=usm_mem, - strides=py_strides, - offset=element_offset + versioned = 1 + readonly = (dlmv_tensor.flags & DLPACK_FLAG_BITMASK_READ_ONLY) != 0 + dl_tensor = &dlmv_tensor.dl_tensor + elif cpython.PyCapsule_IsValid(py_caps, 'used_dltensor') or cpython.PyCapsule_IsValid(py_caps, 'used_dltensor_versioned'): + raise ValueError( + "A DLPack tensor object can not be consumed multiple times" ) - return res_ary else: - raise BufferError( - "The DLPack tensor resides on unsupported device." + raise TypeError( + "`from_dlpack_capsule` expects a Python 'dltensor' capsule" ) - -cpdef usm_ndarray from_dlpack_versioned_capsule(object py_caps): - """ - from_dlpack_versioned_capsule(caps) - - Reconstructs instance of :class:`dpctl.tensor.usm_ndarray` from - named Python capsule object referencing instance of - ``DLManagedTensorVersioned`` without copy. The instance forms a - view in the memory of the tensor. - - Args: - caps: - Python capsule with name ``"dltensor_versioned"`` expected - to reference an instance of ``DLManagedTensorVersioned`` - struct. - Returns: - Instance of :class:`dpctl.tensor.usm_ndarray` with a view into - memory of the tensor. Capsule is renamed to - ``"used_dltensor_versioned"`` upon success. - Raises: - TypeError: - if argument is not a ``"dltensor_versioned"`` capsule. - ValueError: - if argument is ``"used_dltensor_versioned"`` capsule - BufferError: - if the USM pointer is not bound to the reconstructed - sycl context, or the DLPack's device_type is not supported - by :mod:`dpctl`. - """ - cdef DLManagedTensorVersioned *dlmv_tensor = NULL - cdef bytes usm_type - cdef size_t sz = 1 - cdef size_t alloc_sz = 1 - cdef int i - cdef int device_id = -1 - cdef int element_bytesize = 0 - cdef Py_ssize_t offset_min = 0 - cdef Py_ssize_t offset_max = 0 - cdef char *mem_ptr = NULL - cdef Py_ssize_t mem_ptr_delta = 0 - cdef Py_ssize_t element_offset = 0 - cdef int64_t stride_i = -1 - cdef int64_t shape_i = -1 - - if not cpython.PyCapsule_IsValid(py_caps, 'dltensor_versioned'): - if cpython.PyCapsule_IsValid(py_caps, 'used_dltensor_versioned'): - raise ValueError( - "A DLPack tensor object can not be consumed multiple times" - ) - else: - raise TypeError( - "`from_dlpack_versioned_capsule` expects a Python " - "'dltensor_versioned' capsule" - ) - dlmv_tensor = cpython.PyCapsule_GetPointer( - py_caps, "dltensor_versioned") # Verify that we can work with this device - if dlmv_tensor.dl_tensor.device.device_type == kDLOneAPI: - device_id = dlmv_tensor.dl_tensor.device.device_id + if dl_tensor.device.device_type == kDLOneAPI: + device_id = dl_tensor.device.device_id root_device = dpctl.SyclDevice(str(device_id)) try: default_context = root_device.sycl_platform.default_context except RuntimeError: default_context = get_device_cached_queue(root_device).sycl_context - if dlmv_tensor.dl_tensor.data is NULL: + if dl_tensor.data is NULL: usm_type = b"device" q = get_device_cached_queue((default_context, root_device,)) else: usm_type = c_dpmem._Memory.get_pointer_type( - dlmv_tensor.dl_tensor.data, + dl_tensor.data, default_context) if usm_type == b"unknown": raise BufferError( @@ -776,34 +814,29 @@ cpdef usm_ndarray from_dlpack_versioned_capsule(object py_caps): f"{root_device.filter_string}" ) alloc_device = c_dpmem._Memory.get_pointer_device( - dlmv_tensor.dl_tensor.data, + dl_tensor.data, default_context ) q = get_device_cached_queue((default_context, alloc_device,)) - if dlmv_tensor.dl_tensor.dtype.bits % 8: + if dl_tensor.dtype.bits % 8: raise BufferError( "Can not import DLPack tensor whose element's " "bitsize is not a multiple of 8" ) - if dlmv_tensor.dl_tensor.dtype.lanes != 1: + if dl_tensor.dtype.lanes != 1: raise BufferError( "Can not import DLPack tensor with lanes != 1" ) - if dlmv_tensor.version.major > DLPACK_MAJOR_VERSION: - raise BufferError( - "Can not import DLPack tensor with major version " - f"greater than {DLPACK_MAJOR_VERSION}" - ) offset_min = 0 - if dlmv_tensor.dl_tensor.strides is NULL: - for i in range(dlmv_tensor.dl_tensor.ndim): - sz = sz * dlmv_tensor.dl_tensor.shape[i] + if dl_tensor.strides is NULL: + for i in range(dl_tensor.ndim): + sz = sz * dl_tensor.shape[i] offset_max = sz - 1 else: offset_max = 0 - for i in range(dlmv_tensor.dl_tensor.ndim): - stride_i = dlmv_tensor.dl_tensor.strides[i] - shape_i = dlmv_tensor.dl_tensor.shape[i] + for i in range(dl_tensor.ndim): + stride_i = dl_tensor.strides[i] + shape_i = dl_tensor.shape[i] if shape_i > 1: shape_i -= 1 if stride_i > 0: @@ -814,33 +847,37 @@ cpdef usm_ndarray from_dlpack_versioned_capsule(object py_caps): if sz == 0: sz = 1 - element_bytesize = (dlmv_tensor.dl_tensor.dtype.bits // 8) + element_bytesize = (dl_tensor.dtype.bits // 8) sz = sz * element_bytesize - element_offset = dlmv_tensor.dl_tensor.byte_offset // element_bytesize + element_offset = dl_tensor.byte_offset // element_bytesize - # transfer dlmv_tensor ownership - dlmv_holder = _DLManagedTensorVersionedOwner._create(dlmv_tensor) - cpython.PyCapsule_SetName(py_caps, 'used_dltensor_versioned') + # transfer ownership + if not versioned: + dlm_holder = _DLManagedTensorOwner._create(dlm_tensor) + cpython.PyCapsule_SetName(py_caps, 'used_dltensor') + else: + dlmv_holder = _DLManagedTensorVersionedOwner._create(dlmv_tensor) + cpython.PyCapsule_SetName(py_caps, 'used_dltensor_versioned') - if dlmv_tensor.dl_tensor.data is NULL: + if dl_tensor.data is NULL: usm_mem = dpmem.MemoryUSMDevice(sz, q) else: - mem_ptr_delta = dlmv_tensor.dl_tensor.byte_offset - ( + mem_ptr_delta = dl_tensor.byte_offset - ( element_offset * element_bytesize ) - mem_ptr = dlmv_tensor.dl_tensor.data - alloc_sz = dlmv_tensor.dl_tensor.byte_offset + ( + mem_ptr = dl_tensor.data + alloc_sz = dl_tensor.byte_offset + ( (offset_max + 1) * element_bytesize) tmp = c_dpmem._Memory.create_from_usm_pointer_size_qref( mem_ptr, max(alloc_sz, element_bytesize), (q).get_queue_ref(), - memory_owner=dlmv_holder + memory_owner=dlmv_holder if versioned else dlm_holder ) if mem_ptr_delta == 0: usm_mem = tmp else: - alloc_sz = dlmv_tensor.dl_tensor.byte_offset + ( + alloc_sz = dl_tensor.byte_offset + ( (offset_max * element_bytesize + mem_ptr_delta)) usm_mem = c_dpmem._Memory.create_from_usm_pointer_size_qref( (mem_ptr + (element_bytesize - mem_ptr_delta)), @@ -849,28 +886,28 @@ cpdef usm_ndarray from_dlpack_versioned_capsule(object py_caps): memory_owner=tmp ) py_shape = list() - for i in range(dlmv_tensor.dl_tensor.ndim): - py_shape.append(dlmv_tensor.dl_tensor.shape[i]) - if (dlmv_tensor.dl_tensor.strides is NULL): + for i in range(dl_tensor.ndim): + py_shape.append(dl_tensor.shape[i]) + if (dl_tensor.strides is NULL): py_strides = None else: py_strides = list() - for i in range(dlmv_tensor.dl_tensor.ndim): - py_strides.append(dlmv_tensor.dl_tensor.strides[i]) - if (dlmv_tensor.dl_tensor.dtype.code == kDLUInt): + for i in range(dl_tensor.ndim): + py_strides.append(dl_tensor.strides[i]) + if (dl_tensor.dtype.code == kDLUInt): ary_dt = np.dtype("u" + str(element_bytesize)) - elif (dlmv_tensor.dl_tensor.dtype.code == kDLInt): + elif (dl_tensor.dtype.code == kDLInt): ary_dt = np.dtype("i" + str(element_bytesize)) - elif (dlmv_tensor.dl_tensor.dtype.code == kDLFloat): + elif (dl_tensor.dtype.code == kDLFloat): ary_dt = np.dtype("f" + str(element_bytesize)) - elif (dlmv_tensor.dl_tensor.dtype.code == kDLComplex): + elif (dl_tensor.dtype.code == kDLComplex): ary_dt = np.dtype("c" + str(element_bytesize)) - elif (dlmv_tensor.dl_tensor.dtype.code == kDLBool): + elif (dl_tensor.dtype.code == kDLBool): ary_dt = np.dtype("?") else: raise BufferError( "Can not import DLPack tensor with type code {}.".format( - dlmv_tensor.dl_tensor.dtype.code + dl_tensor.dtype.code ) ) res_ary = usm_ndarray( @@ -880,14 +917,50 @@ cpdef usm_ndarray from_dlpack_versioned_capsule(object py_caps): strides=py_strides, offset=element_offset ) - if (dlmv_tensor.flags & DLPACK_FLAG_BITMASK_READ_ONLY): + if readonly: res_ary.flags_ = (res_ary.flags_ & ~USM_ARRAY_WRITABLE) return res_ary + elif _is_kdlcpu_device(&dl_tensor.device): + ary_iface = _numpy_array_interface_from_dl_tensor(dl_tensor, readonly) + if not versioned: + dlm_holder = _DLManagedTensorOwner._create(dlm_tensor) + cpython.PyCapsule_SetName(py_caps, 'used_dltensor') + return np.ctypeslib.as_array(_numpy_array_interface_wrapper(ary_iface, dlm_holder)) + else: + dlmv_holder = _DLManagedTensorVersionedOwner._create(dlmv_tensor) + cpython.PyCapsule_SetName(py_caps, 'used_dltensor_versioned') + return np.ctypeslib.as_array(_numpy_array_interface_wrapper(ary_iface, dlmv_holder)) else: raise BufferError( "The DLPack tensor resides on unsupported device." ) +cdef usm_ndarray _to_usm_ary_from_host_blob(object host_blob, dev : Device): + q = dev.sycl_queue + np_ary = np.asarray(host_blob) + dt = np_ary.dtype + if dt.char in "dD" and q.sycl_device.has_aspect_fp64 is False: + Xusm_dtype = ( + "float32" if dt.char == "d" else "complex64" + ) + else: + Xusm_dtype = dt + usm_mem = dpmem.MemoryUSMDevice(np_ary.nbytes, queue=q) + usm_ary = usm_ndarray(np_ary.shape, dtype=Xusm_dtype, buffer=usm_mem) + usm_mem.copy_from_host(np.reshape(np_ary.view(dtype="u1"), -1)) + return usm_ary + + +# only cdef to make it private +cdef object _create_device(object device, object dl_device): + if isinstance(device, Device): + return device + elif isinstance(device, dpctl.SyclDevice): + return Device.create_device(device) + else: + root_device = dpctl.SyclDevice(str(dl_device[1])) + return Device.create_device(root_device) + def from_dlpack(x, /, *, device=None, copy=None): """ from_dlpack(x, /, *, device=None, copy=None) @@ -896,7 +969,7 @@ def from_dlpack(x, /, *, device=None, copy=None): object ``x`` that implements ``__dlpack__`` protocol. Args: - x (Python object): + x (object): A Python object representing an array that supports ``__dlpack__`` protocol. device (Optional[str, @@ -912,7 +985,8 @@ def from_dlpack(x, /, *, device=None, copy=None): returned by :attr:`dpctl.tensor.usm_ndarray.device`, or a 2-tuple matching the format of the output of the ``__dlpack_device__`` method, an integer enumerator representing the device type followed by - an integer representing the index of the device. + an integer representing the index of the device. The only supported + :enum:`dpctl.tensor.DLDeviceType` types are "kDLCPU" and "kDLOneAPI". Default: ``None``. copy (bool, optional) Boolean indicating whether or not to copy the input. @@ -961,33 +1035,130 @@ def from_dlpack(x, /, *, device=None, copy=None): C = Container(dpt.linspace(0, 100, num=20, dtype="int16")) X = dpt.from_dlpack(C) + Y = dpt.from_dlpack(C, device=(dpt.DLDeviceType.kDLCPU, 0)) """ - if not hasattr(x, "__dlpack__"): - raise TypeError( - f"The argument of type {type(x)} does not implement " - "`__dlpack__` method." - ) - dlpack_attr = getattr(x, "__dlpack__") - if not callable(dlpack_attr): + dlpack_attr = getattr(x, "__dlpack__", None) + dlpack_dev_attr = getattr(x, "__dlpack_device__", None) + if not callable(dlpack_attr) or not callable(dlpack_dev_attr): raise TypeError( f"The argument of type {type(x)} does not implement " - "`__dlpack__` method." + "`__dlpack__` and `__dlpack_device__` methods." ) - try: - # device is converted to a dlpack_device if necessary - dl_device = None - if device: - if isinstance(device, tuple): - dl_device = device + # device is converted to a dlpack_device if necessary + dl_device = None + if device: + if isinstance(device, tuple): + dl_device = device + if len(dl_device) != 2: + raise ValueError( + "Argument `device` specified as a tuple must have length 2" + ) + else: + if not isinstance(device, dpctl.SyclDevice): + device = Device.create_device(device) + d = device.sycl_device else: - if not isinstance(device, dpctl.SyclDevice): - d = Device.create_device(device).sycl_device - dl_device = (device_OneAPI, get_parent_device_ordinal_id(d)) - else: - dl_device = (device_OneAPI, get_parent_device_ordinal_id(device)) - dlpack_capsule = dlpack_attr(max_version=get_build_dlpack_version(), dl_device=dl_device, copy=copy) - return from_dlpack_versioned_capsule(dlpack_capsule) + d = device + dl_device = (device_OneAPI, get_parent_device_ordinal_id(d)) + if dl_device is not None: + if (dl_device[0] not in [device_OneAPI, device_CPU]): + raise ValueError( + f"Argument `device`={device} is not supported." + ) + got_type_error = False + got_buffer_error = False + got_other_error = False + saved_exception = None + # First DLPack version supporting dl_device, and copy + requested_ver = (1, 0) + cpu_dev = (device_CPU, 0) + try: + # setting max_version to minimal version that supports dl_device/copy keywords + dlpack_capsule = dlpack_attr( + max_version=requested_ver, + dl_device=dl_device, + copy=copy + ) except TypeError: - dlpack_capsule = dlpack_attr() + # exporter does not support max_version keyword + got_type_error = True + except (BufferError, NotImplementedError): + # Either dl_device, or copy can be satisfied + got_buffer_error = True + except Exception as e: + got_other_error = True + saved_exception = e + else: + # execution did not raise exceptions return from_dlpack_capsule(dlpack_capsule) + finally: + if got_type_error: + # max_version/dl_device, copy keywords are not supported by __dlpack__ + x_dldev = dlpack_dev_attr() + if (dl_device is None) or (dl_device == x_dldev): + dlpack_capsule = dlpack_attr() + return from_dlpack_capsule(dlpack_capsule) + # must copy via host + if copy is False: + raise BufferError( + "Importing data via DLPack requires copying, but copy=False was provided" + ) + # when max_version/dl_device/copy are not supported + # we can only support importing to OneAPI devices + # from host, or from another oneAPI device + is_supported_x_dldev = ( + x_dldev == cpu_dev or + (x_dldev[0] == device_OneAPI) + ) + is_supported_dl_device = ( + dl_device == cpu_dev or + dl_device[0] == device_OneAPI + ) + if is_supported_x_dldev and is_supported_dl_device: + dlpack_capsule = dlpack_attr() + blob = from_dlpack_capsule(dlpack_capsule) + else: + raise BufferError(f"Can not import to requested device {dl_device}") + dev = _create_device(device, dl_device) + if x_dldev == cpu_dev and dl_device == cpu_dev: + # both source and destination are CPU + return blob + elif x_dldev == cpu_dev: + # source is CPU, destination is oneAPI + return _to_usm_ary_from_host_blob(blob, dev) + elif dl_device == cpu_dev: + # source is oneAPI, destination is CPU + cpu_caps = blob.__dlpack__( + max_version=get_build_dlpack_version(), + dl_device=cpu_dev + ) + return from_dlpack_capsule(cpu_caps) + else: + import dpctl.tensor as dpt + return dpt.asarray(blob, device=dev) + elif got_buffer_error: + # we are here, because dlpack_attr could not deal with requested dl_device, + # or copying was required + if copy is False: + raise BufferError( + "Importing data via DLPack requires copying, but copy=False was provided" + ) + # must copy via host + if dl_device[0] != device_OneAPI: + raise BufferError(f"Can not import to requested device {dl_device}") + x_dldev = dlpack_dev_attr() + if x_dldev == cpu_dev: + dlpack_capsule = dlpack_attr() + host_blob = from_dlpack_capsule(dlpack_capsule) + else: + dlpack_capsule = dlpack_attr( + max_version=requested_ver, + dl_device=cpu_dev, + copy=copy + ) + host_blob = from_dlpack_capsule(dlpack_capsule) + dev = _create_device(device, dl_device) + return _to_usm_ary_from_host_blob(host_blob, dev) + elif got_other_error: + raise saved_exception diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 5b0c00bbbe..e806dcc956 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -54,6 +54,40 @@ include "_slicing.pxi" class DLDeviceType(IntEnum): + """ + An ``IntEnum`` for the types of DLDevices supported by the DLPack + protocol. + ``kDLCPU``: + CPU (host) device + ``kDLCUDA``: + CUDA GPU device + ``kDLCUDAHost``: + Pinned CUDA CPU memory by cudaMallocHost + ``kDLOpenCL``: + OpenCL device + ``kDLVulkan``: + Vulkan buffer + ``kDLMetal``: + Metal for Apple GPU + ``kDLVPI``: + Verilog simulator buffer + ``kDLROCM``: + ROCm GPU device + ``kDLROCMHost``: + Pinned ROCm CPU memory allocated by hipMallocHost + ``kDLExtDev``: + Reserved extension device type used to test new devices + ``kDLCUDAManaged``: + CUDA managed/unified memory allocated by cudaMallocManaged + ``kDLOneAPI``: + Unified shared memory allocated on a oneAPI non-partitioned device + ``kDLWebGPU``: + Device support for WebGPU standard + ``kDLHexagon``: + Qualcomm Hexagon DSP + ``kDLMAIA``: + Microsoft MAIA device + """ kDLCPU = c_dlpack.device_CPU kDLCUDA = c_dlpack.device_CUDA kDLCUDAHost = c_dlpack.device_CUDAHost @@ -87,10 +121,34 @@ cdef object _as_zero_dim_ndarray(object usm_ary): view.shape = tuple() return view + cdef int _copy_writable(int lhs_flags, int rhs_flags): "Copy the WRITABLE flag to lhs_flags from rhs_flags" return (lhs_flags & ~USM_ARRAY_WRITABLE) | (rhs_flags & USM_ARRAY_WRITABLE) + +cdef bint _is_host_cpu(object dl_device): + "Check if dl_device denotes (kDLCPU, 0)" + cdef object dl_type + cdef object dl_id + cdef Py_ssize_t n_elems = -1 + + try: + n_elems = len(dl_device) + except TypeError: + pass + + if n_elems != 2: + return False + + dl_type = dl_device[0] + dl_id = dl_device[1] + if isinstance(dl_type, str): + return (dl_type == "kDLCPU" and dl_id == 0) + + return (dl_type == DLDeviceType.kDLCPU) and (dl_id == 0) + + cdef class usm_ndarray: """ usm_ndarray(shape, dtype=None, strides=None, buffer="device", \ offset=0, order="C", buffer_ctor_kwargs=dict(), \ @@ -1157,18 +1215,37 @@ cdef class usm_ndarray: raise TypeError( "`__dlpack__` expects `max_version` to be a " "2-tuple of integers `(major, minor)`, instead " - f"got {type(max_version)}" + f"got {max_version}" ) dpctl_dlpack_version = get_build_dlpack_version() if max_version[0] >= dpctl_dlpack_version[0]: # DLManagedTensorVersioned path - # TODO: add logic for targeting a device if dl_device is not None: - if dl_device != self.__dlpack_device__(): - raise NotImplementedError( - "targeting a device with `__dlpack__` is not " - "currently implemented" + if not isinstance(dl_device, tuple) or len(dl_device) != 2: + raise TypeError( + "`__dlpack__` expects `dl_device` to be a 2-tuple " + "of `(device_type, device_id)`, instead " + f"got {dl_device}" ) + if dl_device != self.__dlpack_device__(): + if copy == False: + raise BufferError( + "array cannot be placed on the requested device without a copy" + ) + if _is_host_cpu(dl_device): + if stream is not None: + raise ValueError( + "`stream` must be `None` when `dl_device` is of type `kDLCPU`" + ) + from ._copy_utils import _copy_to_numpy + _arr = _copy_to_numpy(self) + _arr.flags["W"] = self.flags["W"] + return c_dlpack.numpy_to_dlpack_versioned_capsule(_arr, True) + else: + raise BufferError( + f"targeting `dl_device` {dl_device} with `__dlpack__` is not " + "yet implemented" + ) if copy is None: copy = False # TODO: strategy for handling stream on different device from dl_device @@ -1221,6 +1298,8 @@ cdef class usm_ndarray: The tuple describes the non-partitioned device where the array has been allocated, or the non-partitioned parent device of the allocation device. + See ``DLDeviceType`` for a list of devices supported by the DLPack protocol. + Raises: DLPackCreationError: when the ``device_id`` could not be determined. diff --git a/dpctl/tests/test_usm_ndarray_dlpack.py b/dpctl/tests/test_usm_ndarray_dlpack.py index a4994f01e6..2f07abf12a 100644 --- a/dpctl/tests/test_usm_ndarray_dlpack.py +++ b/dpctl/tests/test_usm_ndarray_dlpack.py @@ -17,14 +17,17 @@ import collections import ctypes +import numpy as np import pytest -from helper import skip_if_dtype_not_supported +from helper import get_queue_or_skip, skip_if_dtype_not_supported import dpctl import dpctl.tensor as dpt import dpctl.tensor._dlpack as _dlp +import dpctl.tensor._usmarray as dpt_arr -device_oneAPI = 14 # DLDeviceType.kDLOneAPI +device_CPU = dpt_arr.DLDeviceType.kDLCPU +device_oneAPI = dpt_arr.DLDeviceType.kDLOneAPI _usm_types_list = ["shared", "device", "host"] @@ -301,20 +304,20 @@ def test_versioned_dlpack_capsule(): max_supported_ver = _dlp.get_build_dlpack_version() cap = x.__dlpack__(max_version=max_supported_ver) - y = _dlp.from_dlpack_versioned_capsule(cap) + y = _dlp.from_dlpack_capsule(cap) del cap assert x._pointer == y._pointer x2 = dpt.asarray(dpt.reshape(x, (10, 10)), order="F") cap = x2.__dlpack__(max_version=max_supported_ver) - y = _dlp.from_dlpack_versioned_capsule(cap) + y = _dlp.from_dlpack_capsule(cap) del cap assert x2._pointer == y._pointer del x2 x3 = x[::-2] cap = x3.__dlpack__(max_version=max_supported_ver) - y = _dlp.from_dlpack_versioned_capsule(cap) + y = _dlp.from_dlpack_capsule(cap) assert x3._pointer == y._pointer del x3, y, x del cap @@ -323,13 +326,13 @@ def test_versioned_dlpack_capsule(): x = dpt.arange(100, dtype="i4") x.flags["W"] = False cap = x.__dlpack__(max_version=max_supported_ver) - y = _dlp.from_dlpack_versioned_capsule(cap) + y = _dlp.from_dlpack_capsule(cap) assert x._pointer == y._pointer assert not y.flags.writable # read-only array, and copy cap = x.__dlpack__(max_version=max_supported_ver, copy=True) - y = _dlp.from_dlpack_versioned_capsule(cap) + y = _dlp.from_dlpack_capsule(cap) assert x._pointer != y._pointer assert not y.flags.writable @@ -399,12 +402,12 @@ def test_used_dlpack_capsule(): max_supported_ver = _dlp.get_build_dlpack_version() cap = x.__dlpack__(max_version=max_supported_ver) - _dlp.from_dlpack_versioned_capsule(cap) + _dlp.from_dlpack_capsule(cap) with pytest.raises( ValueError, match="A DLPack tensor object can not be consumed multiple times", ): - _dlp.from_dlpack_versioned_capsule(cap) + _dlp.from_dlpack_capsule(cap) del cap @@ -421,7 +424,7 @@ def test_dlpack_size_0(): max_supported_ver = _dlp.get_build_dlpack_version() cap = x.__dlpack__(max_version=max_supported_ver) - y = _dlp.from_dlpack_versioned_capsule(cap) + y = _dlp.from_dlpack_capsule(cap) assert y._pointer == x._pointer @@ -459,14 +462,366 @@ def test_dlpack_kwargs(): x1 = dpt.arange(100, dtype="i4", sycl_queue=q1) max_supported_ver = _dlp.get_build_dlpack_version() cap = x1.__dlpack__(stream=q2, max_version=max_supported_ver, copy=False) - y = _dlp.from_dlpack_versioned_capsule(cap) + y = _dlp.from_dlpack_capsule(cap) assert y._pointer == x1._pointer del x1, y del cap x2 = dpt.arange(100, dtype="i4", sycl_queue=q1) cap = x2.__dlpack__(stream=q2, max_version=max_supported_ver, copy=True) - y = _dlp.from_dlpack_versioned_capsule(cap) + y = _dlp.from_dlpack_capsule(cap) assert y._pointer != x2._pointer del x2, y del cap + + +def _is_capsule(o): + t = type(o) + return t.__module__ == "builtins" and t.__name__ == "PyCapsule" + + +def test_dlpack_dl_device(): + try: + x = dpt.arange(100, dtype="i4") + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + max_supported_ver = _dlp.get_build_dlpack_version() + cap1 = x.__dlpack__( + dl_device=x.__dlpack_device__(), max_version=max_supported_ver + ) + assert _is_capsule(cap1) + cap2 = x.__dlpack__(dl_device=(1, 0), max_version=max_supported_ver) + assert _is_capsule(cap2) + cap3 = x.__dlpack__( + dl_device=(device_CPU, 0), + max_version=max_supported_ver, + ) + assert _is_capsule(cap3) + cap4 = x.__dlpack__(dl_device=("kDLCPU", 0), max_version=max_supported_ver) + assert _is_capsule(cap4) + with pytest.raises(TypeError): + # pass method instead of return of its __call__ invocation + x.__dlpack__( + dl_device=x.__dlpack_device__, max_version=max_supported_ver + ) + with pytest.raises(TypeError): + # exercise check for length + x.__dlpack__(dl_device=(3,), max_version=max_supported_ver) + + +def test_from_dlpack_kdlcpu_interop_numpy(): + """ + Basic test that usm_ndarray can interoperate with NumPy ndarray + `__dlpack_device__`. + """ + get_queue_or_skip() + + sh = 5 + dt = dpt.int32 + + X = dpt.empty(sh, dtype=dt) + dl_device_np = np.empty(()).__dlpack_device__() + + Y = dpt.from_dlpack(X, device=dl_device_np) + assert isinstance(Y, np.ndarray) + assert X.shape == Y.shape + assert X.dtype == Y.dtype + + V = dpt.from_dlpack(Y) + assert isinstance(V, np.ndarray) + assert Y.shape == V.shape + assert Y.dtype == V.dtype + + +@pytest.mark.parametrize("shape", [tuple(), (2,), (3, 0, 1), (2, 2, 2)]) +def test_from_dlpack_to_kdlcpu(shape, typestr): + q = get_queue_or_skip() + skip_if_dtype_not_supported(typestr, q.sycl_device) + + X = dpt.empty(shape, dtype=typestr, sycl_queue=q) + Y = dpt.from_dlpack(X, device=(device_CPU, 0)) + assert isinstance(Y, np.ndarray) + assert X.shape == Y.shape + assert X.dtype == Y.dtype + # NumPy does not treat size 0 arrays consistently + # w.r.t. strides, so skip these cases + if X.ndim and X.size != 0: + V = Y[::-1] + W = dpt.from_dlpack(V) + assert V.strides == W.strides + + +@pytest.mark.parametrize("mod", [2, 5]) +def test_from_dlpack_to_kdlcpu_strides(mod, typestr): + q = get_queue_or_skip() + skip_if_dtype_not_supported(typestr, q.sycl_device) + + X0 = dpt.empty(3 * mod, dtype=typestr, sycl_queue=q) + for start in range(mod): + X = X0[slice(-start - 1, None, -mod)] + Y = dpt.from_dlpack(X, device=(device_CPU, 0)) + assert X.shape == Y.shape + assert X.dtype == Y.dtype + if Y.ndim: + V = Y[::-1] + W = dpt.from_dlpack(V) + assert V.strides == W.strides + + +def test_dlpack_from_subdevice_to_kdlcpu(): + """ + Check that array allocated on a sub-device can be + imported via DLPack to kDLCPU device (as a NumPy array). + """ + n = 64 + try: + dev = dpctl.SyclDevice() + except dpctl.SyclDeviceCreationError: + pytest.skip("No default device available") + try: + sdevs = dev.create_sub_devices(partition="next_partitionable") + except dpctl.SyclSubDeviceCreationError: + sdevs = None + try: + if sdevs is None: + sdevs = dev.create_sub_devices(partition=[1, 1]) + except dpctl.SyclSubDeviceCreationError: + pytest.skip("Default device can not be partitioned") + assert isinstance(sdevs, list) and len(sdevs) > 0 + try: + ctx = sdevs[0].sycl_platform.default_context + except dpctl.SyclContextCreationError: + pytest.skip("Platform's default_context is not available") + try: + q = dpctl.SyclQueue(ctx, sdevs[0]) + except dpctl.SyclQueueCreationError: + pytest.skip("Queue could not be created") + + ar = dpt.arange(n, dtype=dpt.int32, sycl_queue=q) + ar2 = dpt.from_dlpack(ar, dl_device=(device_CPU, 0)) + assert isinstance(ar2, np.ndarray) + + +def test_legacy_dlpack_capsule_from_numpy(): + """ + Check that NumPy's exported legacy DLPack capsule + will interoperate with from_dlpack_capsule, + especially with zero-copy. + """ + x = np.arange(100, dtype="i4") + cap = x.__dlpack__() + y = _dlp.from_dlpack_capsule(cap) + del cap + assert x.ctypes.data == y.ctypes.data + + x = np.arange(100, dtype="u4").reshape((10, 10)).T + cap = x.__dlpack__() + y = _dlp.from_dlpack_capsule(cap) + del cap + assert x.ctypes.data == y.ctypes.data + del x + + x = np.arange(100, dtype="f4").reshape((10, 10), order="F") + cap = x.__dlpack__() + y = _dlp.from_dlpack_capsule(cap) + del cap + assert x.ctypes.data == y.ctypes.data + + x = np.arange(100, dtype="c8") + x1 = x[::-2] + cap = x1.__dlpack__() + y = _dlp.from_dlpack_capsule(cap) + assert x1.ctypes.data == y.ctypes.data + del x1, y, x + del cap + + x = np.ones(100, dtype="?") + x1 = x[::-2] + cap = x1.__dlpack__() + y = _dlp.from_dlpack_capsule(cap) + assert x1.ctypes.data == y.ctypes.data + del x1, y, x + del cap + + +def test_dlpack_capsule_readonly_array_to_kdlcpu(): + try: + x = dpt.arange(100, dtype="i4") + except dpctl.SyclDeviceCreationError: + pytest.skip("No default device available") + + max_supported_ver = _dlp.get_build_dlpack_version() + # read-only array + x.flags["W"] = False + cap = x.__dlpack__(max_version=max_supported_ver, dl_device=(device_CPU, 0)) + y = _dlp.from_dlpack_capsule(cap) + assert dpt.all(x == dpt.asarray(y)) + assert not y.flags["W"] + + cap1 = _dlp.numpy_to_dlpack_versioned_capsule(y, not y.flags["W"]) + y1 = _dlp.from_dlpack_capsule(cap1) + assert not y1.flags["W"] + + +def test_used_dlpack_capsule_from_numpy(): + get_queue_or_skip() + + x_np = np.arange(100, dtype="i4") + + cap = x_np.__dlpack__() + _dlp.from_dlpack_capsule(cap) + with pytest.raises( + ValueError, + match="A DLPack tensor object can not be consumed multiple times", + ): + _dlp.from_dlpack_capsule(cap) + del cap + + x = dpt.asarray(x_np) + max_supported_ver = _dlp.get_build_dlpack_version() + cap = x.__dlpack__(max_version=max_supported_ver, dl_device=(device_CPU, 0)) + _dlp.from_dlpack_capsule(cap) + with pytest.raises( + ValueError, + match="A DLPack tensor object can not be consumed multiple times", + ): + _dlp.from_dlpack_capsule(cap) + del cap + + +def test_dlpack_size_0_on_kdlcpu(): + get_queue_or_skip() + x_np = np.ones(0, dtype="i4") + + cap = x_np.__dlpack__() + y = _dlp.from_dlpack_capsule(cap) + assert y.ctypes.data == x_np.ctypes.data + + +def test_copy_via_host(): + get_queue_or_skip() + x = dpt.ones(1, dtype="i4") + x_np = np.ones(1, dtype="i4") + x_dl_dev = x.__dlpack_device__() + y = dpt.from_dlpack(x_np, device=x_dl_dev) + assert isinstance(y, dpt.usm_ndarray) + assert y.sycl_device == x.sycl_device + assert y.usm_type == "device" + + with pytest.raises(ValueError): + # uncorrect length of tuple + dpt.from_dlpack(x_np, device=(1, 0, 0)) + with pytest.raises(ValueError): + # only kDLCPU and kDLOneAPI are supported + dpt.from_dlpack(x, device=(2, 0)) + + num_devs = dpctl.get_num_devices() + if num_devs > 1: + j = [i for i in range(num_devs) if i != x_dl_dev[1]][0] + z = dpt.from_dlpack(x, device=(x_dl_dev[0], j)) + assert isinstance(z, dpt.usm_ndarray) + assert z.usm_type == "device" + + +def test_copy_via_host_gh_1789(): + "Test based on review example from gh-1789" + get_queue_or_skip() + x_np = np.ones((10, 10), dtype="i4") + # strides are no longer multiple of itemsize + x_np.strides = (x_np.strides[0] - 1, x_np.strides[1]) + with pytest.raises(BufferError): + dpt.from_dlpack(x_np) + with pytest.raises(BufferError): + dpt.from_dlpack(x_np, device=(14, 0)) + + +class LegacyContainer: + "Helper class implementing legacy `__dlpack__` protocol" + + def __init__(self, array): + self._array = array + + def __dlpack__(self, stream=None): + return self._array.__dlpack__(stream=stream) + + def __dlpack_device__(self): + return self._array.__dlpack_device__() + + +class Container: + "Helper class implementing legacy `__dlpack__` protocol" + + def __init__(self, array): + self._array = array + + def __dlpack__( + self, max_version=None, dl_device=None, copy=None, stream=None + ): + return self._array.__dlpack__( + max_version=max_version, + dl_device=dl_device, + copy=copy, + stream=stream, + ) + + def __dlpack_device__(self): + return self._array.__dlpack_device__() + + +def test_generic_container_legacy(): + get_queue_or_skip() + C = LegacyContainer(dpt.linspace(0, 100, num=20, dtype="int16")) + + X = dpt.from_dlpack(C) + assert isinstance(X, dpt.usm_ndarray) + assert X._pointer == C._array._pointer + assert X.sycl_device == C._array.sycl_device + assert X.dtype == C._array.dtype + + Y = dpt.from_dlpack(C, device=(dpt.DLDeviceType.kDLCPU, 0)) + assert isinstance(Y, np.ndarray) + assert Y.dtype == X.dtype + + Z = dpt.from_dlpack(C, device=X.device) + assert isinstance(Z, dpt.usm_ndarray) + assert Z._pointer == X._pointer + assert Z.device == X.device + + +def test_generic_container_legacy_np(): + get_queue_or_skip() + C = LegacyContainer(np.linspace(0, 100, num=20, dtype="int16")) + + X = dpt.from_dlpack(C) + assert isinstance(X, np.ndarray) + assert X.ctypes.data == C._array.ctypes.data + assert X.dtype == C._array.dtype + + Y = dpt.from_dlpack(C, device=(dpt.DLDeviceType.kDLCPU, 0)) + assert isinstance(Y, np.ndarray) + assert Y.dtype == X.dtype + + dev = dpt.Device.create_device() + Z = dpt.from_dlpack(C, device=dev) + assert isinstance(Z, dpt.usm_ndarray) + assert Z.device == dev + + +def test_generic_container(): + get_queue_or_skip() + C = Container(dpt.linspace(0, 100, num=20, dtype="int16")) + + X = dpt.from_dlpack(C) + assert isinstance(X, dpt.usm_ndarray) + assert X._pointer == C._array._pointer + assert X.sycl_device == C._array.sycl_device + assert X.dtype == C._array.dtype + + Y = dpt.from_dlpack(C, device=(dpt.DLDeviceType.kDLCPU, 0)) + assert isinstance(Y, np.ndarray) + assert Y.dtype == X.dtype + + Z = dpt.from_dlpack(C, device=X.device) + assert isinstance(Z, dpt.usm_ndarray) + assert Z._pointer == X._pointer + assert Z.device == X.device