diff --git a/.flake8 b/.flake8 index 6e9c78b236..c4dc50e407 100644 --- a/.flake8 +++ b/.flake8 @@ -23,6 +23,7 @@ per-file-ignores = dpctl/memory/_memory.pyx: E999, E225, E226, E227 dpctl/program/_program.pyx: E999, E225, E226, E227 dpctl/tensor/_usmarray.pyx: E999, E225, E226, E227 + dpctl/tensor/_dlpack.pyx: E999, E225, E226, E227 dpctl/tensor/numpy_usm_shared.py: F821 dpctl/tests/_cython_api.pyx: E999, E225, E227, E402 dpctl/utils/_compute_follows_data.pyx: E999, E225, E227 diff --git a/CHANGELOG.md b/CHANGELOG.md index c09fea7af3..371ff0a0f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - `dpctl.tensor.asarray`, `dpctl.tensor.empty` implemented (#646). +- `dpctl.tensor.usm_ndarray` adds support for DLPack protocol. `dpctl.tensor.from_dlpack` implemented (#682). ### Changed - dpctl-capi is now renamed to `libsyclinterface` (#666). diff --git a/MANIFEST.in b/MANIFEST.in index 19f37e5a30..5ce66287a8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,6 @@ include versioneer.py recursive-include dpctl/include *.h +recursive-include dpctl/tensor/include * recursive-include dpctl *.pxd include dpctl/_sycl_context.h include dpctl/_sycl_context_api.h diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index 6036099c62..ab2bf72ebb 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -34,6 +34,7 @@ from dpctl.tensor._copy_utils import copy_to_numpy as asnumpy from dpctl.tensor._copy_utils import copy_to_numpy as to_numpy from dpctl.tensor._ctors import asarray, empty +from dpctl.tensor._dlpack import from_dlpack from dpctl.tensor._reshape import reshape from dpctl.tensor._usmarray import usm_ndarray @@ -47,4 +48,5 @@ "from_numpy", "to_numpy", "asnumpy", + "from_dlpack", ] diff --git a/dpctl/tensor/_dlpack.pxd b/dpctl/tensor/_dlpack.pxd new file mode 100644 index 0000000000..439880f7d2 --- /dev/null +++ b/dpctl/tensor/_dlpack.pxd @@ -0,0 +1,41 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# distutils: language = c++ +# cython: language_level=3 +# cython: linetrace=True + +from ._usmarray cimport usm_ndarray + + +cdef extern from './include/dlpack/dlpack.h' nogil: + int device_CPU 'kDLCPU' + int device_oneAPI 'kDLOneAPI' + int device_OpenCL 'kDLOpenCL' + + +cpdef object to_dlpack_capsule(usm_ndarray array) except + +cpdef usm_ndarray from_dlpack_capsule(object dltensor) except + + +cpdef from_dlpack(array) + +cdef class DLPackCreationError(Exception): + """ + A DLPackCreateError exception is raised when constructing + DLPack capsule from `usm_ndarray` based on a USM allocation + on a partitioned SYCL device. + """ + pass diff --git a/dpctl/tensor/_dlpack.pyx b/dpctl/tensor/_dlpack.pyx new file mode 100644 index 0000000000..7d4ebfa0c3 --- /dev/null +++ b/dpctl/tensor/_dlpack.pyx @@ -0,0 +1,437 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# distutils: language = c++ +# cython: language_level=3 +# cython: linetrace=True + +cimport cpython +from libc cimport stdlib +from libc.stdint cimport int32_t, int64_t, uint8_t, uint16_t, uint64_t + +cimport dpctl as c_dpctl +cimport dpctl.memory as c_dpmem + +from .._backend cimport ( + DPCTLDevice_Delete, + DPCTLDevice_GetParentDevice, + DPCTLSyclDeviceRef, + DPCTLSyclUSMRef, +) +from ._usmarray cimport usm_ndarray + +import numpy as np + +import dpctl +import dpctl.memory as dpmem + + +cdef extern from './include/dlpack/dlpack.h' nogil: + cdef int DLPACK_VERSION + + cdef enum DLDeviceType: + kDLCPU + kDLCUDA + kDLCUDAHost + kDLCUDAManaged + kDLROCM + kDLROCMHost + kDLOpenCL + kDLVulkan + kDLMetal + kDLVPI + kDLOneAPI + + ctypedef struct DLDevice: + DLDeviceType device_type + int device_id + + cdef enum DLDataTypeCode: + kDLInt + kDLUInt + kDLFloat + kDLBfloat + kDLComplex + + ctypedef struct DLDataType: + uint8_t code + uint8_t bits + uint16_t lanes + + ctypedef struct DLTensor: + void *data + DLDevice device + int ndim + DLDataType dtype + int64_t *shape + int64_t *strides + uint64_t byte_offset + + ctypedef struct DLManagedTensor: + DLTensor dl_tensor + void *manager_ctx + void (*deleter)(DLManagedTensor *) # noqa: E211 + + +def get_build_dlpack_version(): + """ + Returns the string value of DLPACK_VERSION from dlpack.h + `dpctl.tensor` was built with. + + Returns: + A string value of the version of DLPack used to build + `dpctl`. + """ + return str(DLPACK_VERSION) + + +cdef void _pycapsule_deleter(object dlt_capsule): + cdef DLManagedTensor *dlm_tensor = NULL + if cpython.PyCapsule_IsValid(dlt_capsule, 'dltensor'): + dlm_tensor = cpython.PyCapsule_GetPointer( + dlt_capsule, 'dltensor') + dlm_tensor.deleter(dlm_tensor) + + +cdef void _managed_tensor_deleter(DLManagedTensor *dlm_tensor) with gil: + if dlm_tensor is not NULL: + stdlib.free(dlm_tensor.dl_tensor.shape) + cpython.Py_DECREF(dlm_tensor.manager_ctx) + dlm_tensor.manager_ctx = NULL + stdlib.free(dlm_tensor) + + +cpdef to_dlpack_capsule(usm_ndarray usm_ary) except+: + """ + to_dlpack_capsule(usm_ary) + + Constructs named Python capsule object referencing + instance of `DLManagerTensor` from + :class:`dpctl.tensor.usm_ndarray` instance. + + Args: + usm_ary: An instance of :class:`dpctl.tensor.usm_ndarray` + Returns: + Python a new capsule with name "dltensor" that contains + a pointer to `DLManagedTensor` struct. + Raises: + DLPackCreationError: when array can be represented as + DLPack tensor. This may happen when array was allocated + on a partitioned sycl device, or its USM allocation is + not bound to the platform default SYCL context. + MemoryError: when host allocation to needed for `DLManagedTensor` + did not succeed. + ValueError: when array elements data type could not be represented + in `DLManagedTensor`. + """ + cdef c_dpctl.SyclQueue ary_sycl_queue + cdef c_dpctl.SyclDevice ary_sycl_device + cdef DPCTLSyclDeviceRef pDRef = NULL + cdef DLManagedTensor *dlm_tensor = NULL + cdef DLTensor *dl_tensor = NULL + cdef int nd = usm_ary.get_ndim() + cdef char *data_ptr = usm_ary.get_data() + cdef Py_ssize_t *shape_ptr = NULL + cdef Py_ssize_t *strides_ptr = NULL + cdef int64_t *shape_strides_ptr = NULL + cdef int i = 0 + cdef int device_id = -1 + cdef char *base_ptr = NULL + cdef Py_ssize_t element_offset = 0 + cdef Py_ssize_t byte_offset = 0 + + ary_base = usm_ary.get_base() + ary_sycl_queue = usm_ary.get_sycl_queue() + ary_sycl_device = ary_sycl_queue.get_sycl_device() + + # check that ary_sycl_device is a non-partitioned device + pDRef = DPCTLDevice_GetParentDevice(ary_sycl_device.get_device_ref()) + if pDRef is not NULL: + DPCTLDevice_Delete(pDRef) + raise DLPackCreationError( + "to_dlpack_capsule: DLPack can only export arrays allocated on " + "non-partitioned SYCL devices." + ) + default_context = dpctl.SyclQueue(ary_sycl_device).sycl_context + if not usm_ary.sycl_context == default_context: + raise DLPackCreationError( + "to_dlpack_capsule: DLPack can only export arrays based on USM " + "allocations bound to a default platform SYCL context" + ) + + dlm_tensor = stdlib.malloc( + sizeof(DLManagedTensor)) + if dlm_tensor is NULL: + raise MemoryError( + "to_dlpack_capsule: Could not allocate memory for DLManagedTensor" + ) + shape_strides_ptr = stdlib.malloc((sizeof(int64_t) * 2) * nd) + if shape_strides_ptr is NULL: + stdlib.free(dlm_tensor) + raise MemoryError( + "to_dlpack_capsule: Could not allocate memory for shape/strides" + ) + shape_ptr = usm_ary.get_shape() + for i in range(nd): + shape_strides_ptr[i] = shape_ptr[i] + strides_ptr = usm_ary.get_strides() + if strides_ptr: + for i in range(nd): + shape_strides_ptr[nd + i] = strides_ptr[i] + + device_id = ary_sycl_device.get_overall_ordinal() + if device_id < 0: + stdlib.free(shape_strides_ptr) + stdlib.free(dlm_tensor) + raise DLPackCreationError( + "to_dlpack_capsule: failed to determine device_id" + ) + + ary_dt = usm_ary.dtype + ary_dtk = ary_dt.kind + element_offset = usm_ary.get_offset() + byte_offset = element_offset * (ary_dt.itemsize) + + dl_tensor = &dlm_tensor.dl_tensor + dl_tensor.data = (data_ptr - byte_offset) + dl_tensor.ndim = nd + dl_tensor.byte_offset = byte_offset + dl_tensor.shape = &shape_strides_ptr[0] + if strides_ptr is NULL: + dl_tensor.strides = NULL + else: + dl_tensor.strides = &shape_strides_ptr[nd] + dl_tensor.device.device_type = kDLOneAPI + dl_tensor.device.device_id = device_id + dl_tensor.dtype.lanes = 1 + dl_tensor.dtype.bits = (ary_dt.itemsize * 8) + if (ary_dtk == "b"): + dl_tensor.dtype.code = kDLUInt + elif (ary_dtk == "u"): + dl_tensor.dtype.code = kDLUInt + elif (ary_dtk == "i"): + dl_tensor.dtype.code = kDLInt + elif (ary_dtk == "f"): + dl_tensor.dtype.code = kDLFloat + elif (ary_dtk == "c"): + dl_tensor.dtype.code = kDLComplex + else: + stdlib.free(shape_strides_ptr) + stdlib.free(dlm_tensor) + raise ValueError("Unrecognized array data type") + + dlm_tensor.manager_ctx = usm_ary + cpython.Py_INCREF(usm_ary) + dlm_tensor.deleter = _managed_tensor_deleter + + return cpython.PyCapsule_New(dlm_tensor, 'dltensor', _pycapsule_deleter) + + +cdef class _DLManagedTensorOwner: + """ + Helper class managing the lifetime of the DLManagedTensor struct + transferred from a 'dlpack' capsule. + """ + cdef DLManagedTensor *dlm_tensor + + def __cinit__(self): + self.dlm_tensor = NULL + + def __dealloc__(self): + if self.dlm_tensor: + self.dlm_tensor.deleter(self.dlm_tensor) + + @staticmethod + cdef _DLManagedTensorOwner _create(DLManagedTensor *dlm_tensor_src): + cdef _DLManagedTensorOwner res = _DLManagedTensorOwner.__new__(_DLManagedTensorOwner) + res.dlm_tensor = dlm_tensor_src + return res + + +cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +: + """ + from_dlpack_capsule(caps) + + Reconstructs instance of :class:`dpctl.tensor.usm_ndarray` from + named Python capsule object referencing instance of `DLManagedTensor` + without copy. The instance forms a view in the memory of the tensor. + + Args: + caps: Python capsule with name "dltensor" expected to reference + an instance of `DLManagedTensor` struct. + Returns: + Instance of :class:`dpctl.tensor.usm_ndarray` with a view into + memory of the tensor. Capsule is renamed to "used_dltensor" upon + success. + Raises: + TypeError: if argument is not a "dltensor" capsule. + ValueError: if argument is "used_dltensor" capsule, + if the USM pointer is not bound to the reconstructed + sycl context, or the DLPack's device_type is not supported + by dpctl. + """ + cdef DLManagedTensor *dlm_tensor = NULL + cdef bytes usm_type + cdef size_t sz = 1 + cdef int i + cdef int element_bytesize = 0 + cdef Py_ssize_t offset_min = 0 + cdef Py_ssize_t offset_max = 0 + cdef int64_t stride_i + cdef char *mem_ptr = NULL + cdef Py_ssize_t element_offset = 0 + + if not cpython.PyCapsule_IsValid(py_caps, 'dltensor'): + if cpython.PyCapsule_IsValid(py_caps, 'used_dltensor'): + raise ValueError( + "A DLPack tensor object can not be consumed multiple times" + ) + else: + raise TypeError( + f"A Python 'dltensor' capsule was expected, " + "got {type(dlm_tensor)}" + ) + dlm_tensor = cpython.PyCapsule_GetPointer( + py_caps, "dltensor") + # Verify that we can work with this device + if dlm_tensor.dl_tensor.device.device_type == kDLOneAPI: + q = dpctl.SyclQueue(str(dlm_tensor.dl_tensor.device.device_id)) + if dlm_tensor.dl_tensor.data is NULL: + usm_type = b"device" + else: + usm_type = c_dpmem._Memory.get_pointer_type( + dlm_tensor.dl_tensor.data, + q.sycl_context) + if usm_type == b"unknown": + raise ValueError( + f"Data pointer in DLPack is not bound to default sycl " + "context of device '{device_id}', translated to " + "{q.sycl_device.filter_string}" + ) + if dlm_tensor.dl_tensor.dtype.bits % 8: + raise ValueError( + "Can not import DLPack tensor whose element's " + "bitsize is not a multiple of 8" + ) + if dlm_tensor.dl_tensor.dtype.lanes != 1: + raise ValueError( + "Can not import DLPack tensor with lanes != 1" + ) + if dlm_tensor.dl_tensor.strides is NULL: + for i in range(dlm_tensor.dl_tensor.ndim): + sz = sz * dlm_tensor.dl_tensor.shape[i] + else: + offset_min = 0 + offset_max = 0 + for i in range(dlm_tensor.dl_tensor.ndim): + stride_i = dlm_tensor.dl_tensor.strides[i] + if stride_i > 0: + offset_max = offset_max + stride_i * ( + dlm_tensor.dl_tensor.shape[i] - 1 + ) + else: + offset_min = offset_min + stride_i * ( + dlm_tensor.dl_tensor.shape[i] - 1 + ) + sz = offset_max - offset_min + 1 + if sz == 0: + sz = 1 + + element_bytesize = (dlm_tensor.dl_tensor.dtype.bits // 8) + sz = sz * element_bytesize + element_offset = dlm_tensor.dl_tensor.byte_offset // element_bytesize + + # transfer dlm_tensor ownership + dlm_holder = _DLManagedTensorOwner._create(dlm_tensor) + cpython.PyCapsule_SetName(py_caps, 'used_dltensor') + + if dlm_tensor.dl_tensor.data is NULL: + usm_mem = dpmem.MemoryUSMDevice(sz, q) + else: + mem_ptr = dlm_tensor.dl_tensor.data + dlm_tensor.dl_tensor.byte_offset + mem_ptr = mem_ptr - (element_offset * element_bytesize) + usm_mem = c_dpmem._Memory.create_from_usm_pointer_size_qref( + mem_ptr, + sz, + (q).get_queue_ref(), + memory_owner=dlm_holder + ) + py_shape = list() + for i in range(dlm_tensor.dl_tensor.ndim): + py_shape.append(dlm_tensor.dl_tensor.shape[i]) + if (dlm_tensor.dl_tensor.strides is NULL): + py_strides = None + else: + py_strides = list() + for i in range(dlm_tensor.dl_tensor.ndim): + py_strides.append(dlm_tensor.dl_tensor.strides[i]) + if (dlm_tensor.dl_tensor.dtype.code == kDLUInt): + ary_dt = np.dtype("u" + str(element_bytesize)) + elif (dlm_tensor.dl_tensor.dtype.code == kDLInt): + ary_dt = np.dtype("i" + str(element_bytesize)) + elif (dlm_tensor.dl_tensor.dtype.code == kDLFloat): + ary_dt = np.dtype("f" + str(element_bytesize)) + elif (dlm_tensor.dl_tensor.dtype.code == kDLComplex): + ary_dt = np.dtype("c" + str(element_bytesize)) + else: + raise ValueError( + "Can not import DLPack tensor with type code {}.".format( + dlm_tensor.dl_tensor.dtype.code + ) + ) + res_ary = usm_ndarray( + py_shape, + dtype=ary_dt, + buffer=usm_mem, + strides=py_strides, + offset=element_offset + ) + return res_ary + else: + raise ValueError( + "The DLPack tensor resides on unsupported device." + ) + + +cpdef from_dlpack(array): + """ + dpctl.tensor.from_dlpack(obj) + + Constructs :class:`dpctl.tensor.usm_ndarray` instance from a Python + object `obj` that implements `__dlpack__` protocol. The output + array is always a zero-copy view of the input. + + Args: + A Python object representing an array that supports `__dlpack__` + protocol. + Raises: + TypeError: if `obj` does not implement `__dlpack__` method. + ValueError: if zero copy view can not be constructed because + the input array resides on an unsupported device. + """ + if not hasattr(array, "__dlpack__"): + raise TypeError( + "The argument of type {type(array)} does not implement " + "`__dlpack__` method." + ) + dlpack_attr = getattr(array, "__dlpack__") + if not callable(dlpack_attr): + raise TypeError( + "The argument of type {type(array)} does not implement " + "`__dlpack__` method." + ) + dlpack_capsule = dlpack_attr() + return from_dlpack_capsule(dlpack_capsule) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 57c5225a3e..75131fd916 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -32,6 +32,7 @@ from cpython.tuple cimport PyTuple_New, PyTuple_SetItem cimport dpctl as c_dpctl cimport dpctl.memory as c_dpmem +cimport dpctl.tensor._dlpack as c_dlpack include "_stride_utils.pxi" include "_types.pxi" @@ -738,10 +739,46 @@ cdef class usm_ndarray: return NotImplemented def __dlpack__(self, stream=None): - return NotImplemented + """ + Produces DLPack capsule. + + Raises: + MemoryError: when host memory can not be allocated. + DLPackCreationError: when array is allocated on a partitioned + SYCL device, or with a non-default context. + NotImplementedError: when non-default value of `stream` keyword + is used. + """ + if stream is None: + return c_dlpack.to_dlpack_capsule(self) + else: + raise NotImplementedError( + "Only stream=None is supported. " + "Use `dpctl.SyclQueue.submit_barrier` to synchronize queues." + ) def __dlpack_device__(self): - return NotImplemented + """ + Gives a tuple (`device_type`, `device_id`) corresponding to `DLDevice` + entry in `DLTensor` in DLPack protocol. + + The tuple describes the non-partitioned device where the array + has been allocated. + + Raises: + DLPackCreationError: when array is allocation on a partitioned + SYCL device + """ + cdef int dev_id = (self.sycl_device).get_overall_ordinal() + if dev_id < 0: + raise c_dlpack.DLPackCreationError( + "DLPack protocol is only supported for non-partitioned devices" + ) + else: + return ( + c_dlpack.device_oneAPI, + dev_id, + ) def __eq__(self, other): return _dispatch_binary_elementwise(self, "equal", other) diff --git a/dpctl/tensor/include/dlpack/.clang-format b/dpctl/tensor/include/dlpack/.clang-format new file mode 100644 index 0000000000..9d159247d5 --- /dev/null +++ b/dpctl/tensor/include/dlpack/.clang-format @@ -0,0 +1,2 @@ +DisableFormat: true +SortIncludes: false diff --git a/dpctl/tensor/include/dlpack/LICENSE.third-party b/dpctl/tensor/include/dlpack/LICENSE.third-party new file mode 100644 index 0000000000..20a9c8a7b4 --- /dev/null +++ b/dpctl/tensor/include/dlpack/LICENSE.third-party @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2017 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/dpctl/tensor/include/dlpack/README.md b/dpctl/tensor/include/dlpack/README.md new file mode 100644 index 0000000000..5d9bf51177 --- /dev/null +++ b/dpctl/tensor/include/dlpack/README.md @@ -0,0 +1,7 @@ +# DLPack header + +The header `dlpack.h` downloaded from `https://github.com/dmlc/dlpack.git` remote at commit [`98861a50e5`](https://github.com/dmlc/dlpack/commit/98861a50e5ade5a6b2df388b12d67b418e3baebe). + +The file can also be viewed using github web interface at https://github.com/dmlc/dlpack/blob/98861a50e5ade5a6b2df388b12d67b418e3baebe/include/dlpack/dlpack.h + +License file was retrived from https://github.com/dmlc/dlpack/blob/main/LICENSE diff --git a/dpctl/tensor/include/dlpack/dlpack.h b/dpctl/tensor/include/dlpack/dlpack.h new file mode 100644 index 0000000000..afbac0573a --- /dev/null +++ b/dpctl/tensor/include/dlpack/dlpack.h @@ -0,0 +1,213 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file dlpack.h + * \brief The common header of DLPack. + */ +#ifndef DLPACK_DLPACK_H_ +#define DLPACK_DLPACK_H_ + +#ifdef __cplusplus +#define DLPACK_EXTERN_C extern "C" +#else +#define DLPACK_EXTERN_C +#endif + +/*! \brief The current version of dlpack */ +#define DLPACK_VERSION 60 + +/*! \brief DLPACK_DLL prefix for windows */ +#ifdef _WIN32 +#ifdef DLPACK_EXPORTS +#define DLPACK_DLL __declspec(dllexport) +#else +#define DLPACK_DLL __declspec(dllimport) +#endif +#else +#define DLPACK_DLL +#endif + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +/*! + * \brief The device type in DLDevice. + */ +typedef enum { + /*! \brief CPU device */ + kDLCPU = 1, + /*! \brief CUDA GPU device */ + kDLCUDA = 2, + /*! + * \brief Pinned CUDA CPU memory by cudaMallocHost + */ + kDLCUDAHost = 3, + /*! \brief OpenCL devices. */ + kDLOpenCL = 4, + /*! \brief Vulkan buffer for next generation graphics. */ + kDLVulkan = 7, + /*! \brief Metal for Apple GPU. */ + kDLMetal = 8, + /*! \brief Verilog simulator buffer */ + kDLVPI = 9, + /*! \brief ROCm GPUs for AMD GPUs */ + kDLROCM = 10, + /*! + * \brief Pinned ROCm CPU memory allocated by hipMallocHost + */ + kDLROCMHost = 11, + /*! + * \brief Reserved extension device type, + * used for quickly test extension device + * The semantics can differ depending on the implementation. + */ + kDLExtDev = 12, + /*! + * \brief CUDA managed/unified memory allocated by cudaMallocManaged + */ + kDLCUDAManaged = 13, + /*! + * \brief Unified shared memory allocated on a oneAPI non-partititioned + * device. Call to oneAPI runtime is required to determine the device + * type, the USM allocation type and the sycl context it is bound to. + * + */ + kDLOneAPI = 14, +} DLDeviceType; + +/*! + * \brief A Device for Tensor and operator. + */ +typedef struct { + /*! \brief The device type used in the device. */ + DLDeviceType device_type; + /*! + * \brief The device index. + * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0. + */ + int device_id; +} DLDevice; + +/*! + * \brief The type code options DLDataType. + */ +typedef enum { + /*! \brief signed integer */ + kDLInt = 0U, + /*! \brief unsigned integer */ + kDLUInt = 1U, + /*! \brief IEEE floating point */ + kDLFloat = 2U, + /*! + * \brief Opaque handle type, reserved for testing purposes. + * Frameworks need to agree on the handle data type for the exchange to be well-defined. + */ + kDLOpaqueHandle = 3U, + /*! \brief bfloat16 */ + kDLBfloat = 4U, + /*! + * \brief complex number + * (C/C++/Python layout: compact struct per complex number) + */ + kDLComplex = 5U, +} DLDataTypeCode; + +/*! + * \brief The data type the tensor can hold. + * + * Examples + * - float: type_code = 2, bits = 32, lanes=1 + * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4 + * - int8: type_code = 0, bits = 8, lanes=1 + * - std::complex: type_code = 5, bits = 64, lanes = 1 + */ +typedef struct { + /*! + * \brief Type code of base types. + * We keep it uint8_t instead of DLDataTypeCode for minimal memory + * footprint, but the value should be one of DLDataTypeCode enum values. + * */ + uint8_t code; + /*! + * \brief Number of bits, common choices are 8, 16, 32. + */ + uint8_t bits; + /*! \brief Number of lanes in the type, used for vector types. */ + uint16_t lanes; +} DLDataType; + +/*! + * \brief Plain C Tensor object, does not manage memory. + */ +typedef struct { + /*! + * \brief The data pointer points to the allocated data. This will be CUDA + * device pointer or cl_mem handle in OpenCL. It may be opaque on some device + * types. This pointer is always aligned to 256 bytes as in CUDA. The + * `byte_offset` field should be used to point to the beginning of the data. + * + * Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow, + * TVM, perhaps others) do not adhere to this 256 byte aligment requirement + * on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed + * (after which this note will be updated); at the moment it is recommended + * to not rely on the data pointer being correctly aligned. + * + * For given DLTensor, the size of memory required to store the contents of + * data is calculated as follows: + * + * \code{.c} + * static inline size_t GetDataSize(const DLTensor* t) { + * size_t size = 1; + * for (tvm_index_t i = 0; i < t->ndim; ++i) { + * size *= t->shape[i]; + * } + * size *= (t->dtype.bits * t->dtype.lanes + 7) / 8; + * return size; + * } + * \endcode + */ + void* data; + /*! \brief The device of the tensor */ + DLDevice device; + /*! \brief Number of dimensions */ + int ndim; + /*! \brief The data type of the pointer*/ + DLDataType dtype; + /*! \brief The shape of the tensor */ + int64_t* shape; + /*! + * \brief strides of the tensor (in number of elements, not bytes) + * can be NULL, indicating tensor is compact and row-majored. + */ + int64_t* strides; + /*! \brief The offset in bytes to the beginning pointer to data */ + uint64_t byte_offset; +} DLTensor; + +/*! + * \brief C Tensor object, manage memory of DLTensor. This data structure is + * intended to facilitate the borrowing of DLTensor by another framework. It is + * not meant to transfer the tensor. When the borrowing framework doesn't need + * the tensor, it should call the deleter to notify the host that the resource + * is no longer needed. + */ +typedef struct DLManagedTensor { + /*! \brief DLTensor which is being memory managed */ + DLTensor dl_tensor; + /*! \brief the context of the original host framework of DLManagedTensor in + * which DLManagedTensor is used in the framework. It can also be NULL. + */ + void * manager_ctx; + /*! \brief Destructor signature void (*)(void*) - this should be called + * to destruct manager_ctx which holds the DLManagedTensor. It can be NULL + * if there is no way for the caller to provide a reasonable destructor. + * The destructors deletes the argument self as well. + */ + void (*deleter)(struct DLManagedTensor * self); +} DLManagedTensor; +#ifdef __cplusplus +} // DLPACK_EXTERN_C +#endif +#endif // DLPACK_DLPACK_H_ diff --git a/dpctl/tests/test_usm_ndarray_dlpack.py b/dpctl/tests/test_usm_ndarray_dlpack.py new file mode 100644 index 0000000000..a40688965b --- /dev/null +++ b/dpctl/tests/test_usm_ndarray_dlpack.py @@ -0,0 +1,120 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ctypes + +import pytest + +import dpctl +import dpctl.tensor as dpt + +device_oneAPI = 14 # DLDeviceType.kDLOneAPI + +_usm_types_list = ["shared", "device", "host"] + + +@pytest.fixture(params=_usm_types_list) +def usm_type(request): + return request.param + + +_typestrs_list = [ + "b1", + "u1", + "i1", + "u2", + "i2", + "u4", + "i4", + "u8", + "i8", + "f2", + "f4", + "f8", + "c8", + "c16", +] + + +@pytest.fixture(params=_typestrs_list) +def typestr(request): + return request.param + + +def test_dlpack_device(usm_type): + all_root_devices = dpctl.get_devices() + for sycl_dev in all_root_devices: + X = dpt.empty((64,), dtype="u1", usm_type=usm_type, device=sycl_dev) + dev = X.__dlpack_device__() + assert type(dev) is tuple + assert len(dev) == 2 + assert dev[0] == device_oneAPI + assert sycl_dev == all_root_devices[dev[1]] + + +def test_dlpack_exporter(typestr, usm_type): + caps_fn = ctypes.pythonapi.PyCapsule_IsValid + caps_fn.restype = bool + caps_fn.argtypes = [ctypes.py_object, ctypes.c_char_p] + all_root_devices = dpctl.get_devices() + for sycl_dev in all_root_devices: + X = dpt.empty((64,), dtype=typestr, usm_type=usm_type, device=sycl_dev) + caps = X.__dlpack__() + assert caps_fn(caps, b"dltensor") + Y = X[::2] + caps2 = Y.__dlpack__() + assert caps_fn(caps2, b"dltensor") + + +@pytest.mark.parametrize("shape", [tuple(), (2,), (3, 0, 1), (2, 2, 2)]) +def test_from_dlpack(shape, typestr, usm_type): + all_root_devices = dpctl.get_devices() + for sycl_dev in all_root_devices: + X = dpt.empty(shape, dtype=typestr, usm_type=usm_type, device=sycl_dev) + Y = dpt.from_dlpack(X) + assert X.shape == Y.shape + assert X.dtype == Y.dtype or ( + str(X.dtype) == "bool" and str(Y.dtype) == "uint8" + ) + assert X.sycl_device == Y.sycl_device + assert X.usm_type == Y.usm_type + assert X._pointer == Y._pointer + if Y.ndim: + V = Y[::-1] + W = dpt.from_dlpack(V) + assert V.strides == W.strides + + +def test_from_dlpack_input_validation(): + vstr = dpt._dlpack.get_build_dlpack_version() + assert type(vstr) is str + with pytest.raises(TypeError): + dpt.from_dlpack(None) + + class DummyWithProperty: + @property + def __dlpack__(self): + return None + + with pytest.raises(TypeError): + dpt.from_dlpack(DummyWithProperty()) + + class DummyWithMethod: + def __dlpack__(self): + return None + + with pytest.raises(TypeError): + dpt.from_dlpack(DummyWithMethod()) diff --git a/setup.py b/setup.py index 928a8e0010..5435f3d489 100644 --- a/setup.py +++ b/setup.py @@ -240,6 +240,24 @@ def extensions(): runtime_library_dirs=extension_args["runtime_library_dirs"], define_macros=extension_args["define_macros"], ), + Extension( + "dpctl.tensor._dlpack", + [ + os.path.join("dpctl", "tensor", "_dlpack.pyx"), + ], + depends=extension_args["depends"], + language="c++", + include_dirs=extension_args["include_dirs"] + + [ + os.path.join("dpctl", "tensor"), + ], + extra_compile_args=extension_args["extra_compile_args"], + extra_link_args=extension_args["extra_link_args"], + libraries=extension_args["libraries"], + library_dirs=extension_args["library_dirs"], + runtime_library_dirs=extension_args["runtime_library_dirs"], + define_macros=extension_args["define_macros"], + ), ] return extensions