From c2abd2c2716fecfdb520b950e82f2f5b4ef5d111 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 11 Nov 2021 08:20:50 -0600 Subject: [PATCH 01/12] Added tensor._dlpack extension, vendored dlpack header Added dlpack's license file in include/dlpack/ Added note in README.md about the location of the origin LICENSE file from which the copy was made Add _dlpack.pyx to flake exception file Implemented to_dlpack_capsule/from_dlpack_capsule functions --- .flake8 | 1 + dpctl/tensor/_dlpack.pxd | 33 ++ dpctl/tensor/_dlpack.pyx | 309 ++++++++++++++++++ dpctl/tensor/include/dlpack/.clang-format | 2 + .../tensor/include/dlpack/LICENSE.third-party | 201 ++++++++++++ dpctl/tensor/include/dlpack/README.md | 7 + dpctl/tensor/include/dlpack/dlpack.h | 213 ++++++++++++ setup.py | 18 + 8 files changed, 784 insertions(+) create mode 100644 dpctl/tensor/_dlpack.pxd create mode 100644 dpctl/tensor/_dlpack.pyx create mode 100644 dpctl/tensor/include/dlpack/.clang-format create mode 100644 dpctl/tensor/include/dlpack/LICENSE.third-party create mode 100644 dpctl/tensor/include/dlpack/README.md create mode 100644 dpctl/tensor/include/dlpack/dlpack.h diff --git a/.flake8 b/.flake8 index 6e9c78b236..c4dc50e407 100644 --- a/.flake8 +++ b/.flake8 @@ -23,6 +23,7 @@ per-file-ignores = dpctl/memory/_memory.pyx: E999, E225, E226, E227 dpctl/program/_program.pyx: E999, E225, E226, E227 dpctl/tensor/_usmarray.pyx: E999, E225, E226, E227 + dpctl/tensor/_dlpack.pyx: E999, E225, E226, E227 dpctl/tensor/numpy_usm_shared.py: F821 dpctl/tests/_cython_api.pyx: E999, E225, E227, E402 dpctl/utils/_compute_follows_data.pyx: E999, E225, E227 diff --git a/dpctl/tensor/_dlpack.pxd b/dpctl/tensor/_dlpack.pxd new file mode 100644 index 0000000000..2a8ff3dda6 --- /dev/null +++ b/dpctl/tensor/_dlpack.pxd @@ -0,0 +1,33 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# distutils: language = c++ +# cython: language_level=3 +# cython: linetrace=True + +from ._usmarray cimport usm_ndarray + + +cdef extern from './include/dlpack/dlpack.h' nogil: + int device_CPU 'kDLCPU' + int device_oneAPI 'kDLOneAPI' + int device_OpenCL 'kDLOpenCL' + + +cpdef object to_dlpack_capsule(usm_ndarray array) except + +cpdef usm_ndarray from_dlpack_capsule(object dltensor) except + + +cpdef from_dlpack(array) diff --git a/dpctl/tensor/_dlpack.pyx b/dpctl/tensor/_dlpack.pyx new file mode 100644 index 0000000000..09887dcb75 --- /dev/null +++ b/dpctl/tensor/_dlpack.pyx @@ -0,0 +1,309 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# distutils: language = c++ +# cython: language_level=3 +# cython: linetrace=True + +cimport cpython +from libc cimport stdlib +from libc.stdint cimport int32_t, int64_t, uint8_t, uint16_t, uint64_t + +cimport dpctl as c_dpctl +cimport dpctl.memory as c_dpmem + +from .._backend cimport ( + DPCTLDevice_Delete, + DPCTLDevice_GetParentDevice, + DPCTLSyclDeviceRef, + DPCTLSyclUSMRef, +) +from ._usmarray cimport usm_ndarray + +import numpy as np + +import dpctl + + +cdef extern from './include/dlpack/dlpack.h' nogil: + cdef int DLPACK_VERSION + + cdef enum DLDeviceType: + kDLCPU + kDLCUDA + kDLCUDAHost + kDLCUDAManaged + kDLROCM + kDLROCMHost + kDLOpenCL + kDLVulkan + kDLMetal + kDLVPI + kDLOneAPI + + ctypedef struct DLDevice: + DLDeviceType device_type + int device_id + + cdef enum DLDataTypeCode: + kDLInt + kDLUInt + kDLFloat + kDLBfloat + kDLComplex + + ctypedef struct DLDataType: + uint8_t code + uint8_t bits + uint16_t lanes + + ctypedef struct DLTensor: + void* data + DLDevice device + int ndim + DLDataType dtype + int64_t* shape + int64_t* strides + uint64_t byte_offset + + ctypedef struct DLManagedTensor: + DLTensor dl_tensor + void* manager_ctx + void (*deleter)(DLManagedTensor*) # noqa: E211 + + +def get_build_dlpack_version(): + return str(DLPACK_VERSION) + + +cdef void pycapsule_deleter(object dlt_capsule): + cdef DLManagedTensor *dlm_tensor = NULL + if cpython.PyCapsule_IsValid(dlt_capsule, 'dltensor'): + dlm_tensor = cpython.PyCapsule_GetPointer( + dlt_capsule, 'dltensor') + dlm_tensor.deleter(dlm_tensor) + elif cpython.PyCapsule_IsValid(dlt_capsule, 'used_dltensor'): + dlm_tensor = cpython.PyCapsule_GetPointer( + dlt_capsule, 'used_dltensor') + dlm_tensor.deleter(dlm_tensor) + + +cdef void managed_tensor_deleter(DLManagedTensor *dlm_tensor) with gil: + if dlm_tensor is not NULL: + stdlib.free(dlm_tensor.dl_tensor.shape) + cpython.Py_DECREF(dlm_tensor.manager_ctx) + dlm_tensor.manager_ctx = NULL + stdlib.free(dlm_tensor) + + +cdef class DLPackCreationError(Exception): + """ + A DLPackCreateError exception is raised when constructing + DLPack capsule from `usm_ndarray` based on a USM allocation + on a partitioned SYCL device. + """ + pass + + +cpdef to_dlpack_capsule(usm_ndarray usm_ary) except+: + """Constructs named Python capsule object referencing + instance of `DLManagerTensor` from `usm_ndarray` instance""" + cdef c_dpctl.SyclQueue ary_sycl_queue + cdef c_dpctl.SyclDevice ary_sycl_device + cdef DPCTLSyclDeviceRef pDRef = NULL + cdef DLManagedTensor *dlm_tensor = NULL + cdef DLTensor* dl_tensor = NULL + cdef int nd = usm_ary.get_ndim() + cdef char* data_ptr = usm_ary.get_data() + cdef Py_ssize_t *shape_ptr = NULL + cdef Py_ssize_t *strides_ptr = NULL + cdef int64_t *shape_strides_ptr = NULL + cdef int i = 0 + cdef int device_id = -1 + + ary_sycl_queue = usm_ary.get_sycl_queue() + ary_sycl_device = ary_sycl_queue.get_sycl_device() + + # check that ary_sycl_device is a non-partitioned device + pDRef = DPCTLDevice_GetParentDevice(ary_sycl_device.get_device_ref()) + if pDRef is not NULL: + DPCTLDevice_Delete(pDRef) + raise DLPackCreationError( + "to_dlpack_capsule: DLPack can only export arrays allocated on " + "non-partitioned SYCL devices." + ) + + dlm_tensor = stdlib.malloc( + sizeof(DLManagedTensor)) + if dlm_tensor is NULL: + raise MemoryError( + "to_dlpack_capsule: Could not allocate memory for DLManagedTensor" + ) + shape_strides_ptr = stdlib.malloc((sizeof(int64_t) * 2) * nd) + if shape_strides_ptr is NULL: + stdlib.free(dlm_tensor) + raise MemoryError( + "to_dlpack_capsule: Could not allocate memory for shape/strides" + ) + shape_ptr = usm_ary.get_shape() + for i in range(nd): + shape_strides_ptr[i] = shape_ptr[i] + strides_ptr = usm_ary.get_strides() + if strides_ptr: + for i in range(nd): + shape_strides_ptr[nd + i] = strides_ptr[i] + + device_id = ary_sycl_device.get_overall_ordinal() + if device_id < 0: + stdlib.free(shape_strides_ptr) + stdlib.free(dlm_tensor) + raise DLPackCreationError( + "to_dlpack_capsule: failed to determine device_id" + ) + + ary_dt = usm_ary.dtype + ary_dtk = ary_dt.kind + + dl_tensor = &dlm_tensor.dl_tensor + dl_tensor.data = data_ptr + dl_tensor.ndim = nd + dl_tensor.byte_offset = 0 + dl_tensor.shape = &shape_strides_ptr[0] + if strides_ptr is NULL: + dl_tensor.strides = NULL + else: + dl_tensor.strides = &shape_strides_ptr[nd] + dl_tensor.device.device_type = kDLOneAPI + dl_tensor.device.device_id = device_id + dl_tensor.dtype.lanes = 1 + dl_tensor.dtype.bits = (ary_dt.itemsize * 8) + if (ary_dtk == "b"): + dl_tensor.dtype.code = kDLUInt + elif (ary_dtk == "u"): + dl_tensor.dtype.code = kDLUInt + elif (ary_dtk == "i"): + dl_tensor.dtype.code = kDLInt + elif (ary_dtk == "f"): + dl_tensor.dtype.code = kDLFloat + elif (ary_dtk == "c"): + dl_tensor.dtype.code = kDLComplex + else: + stdlib.free(shape_strides_ptr) + stdlib.free(dlm_tensor) + raise ValueError("Unrecognized array data type") + + dlm_tensor.manager_ctx = usm_ary + cpython.Py_INCREF(usm_ary) + dlm_tensor.deleter = managed_tensor_deleter + + return cpython.PyCapsule_New(dlm_tensor, 'dltensor', pycapsule_deleter) + + +cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +: + """Reconstructs instance of usm_ndarray from named Python + capsule object referencing instance of `DLManagedTensor` without + a copy""" + cdef DLManagedTensor *dlm_tensor = NULL + cdef bytes usm_type + cdef size_t sz = 1 + cdef int i + cdef int element_bytesize = 0 + + if not cpython.PyCapsule_IsValid(py_caps, 'dltensor'): + if cpython.PyCapsule_IsValid(py_caps, 'used_dltensor'): + raise ValueError( + "A DLPack tensor object can not be consumed multiple times" + ) + else: + raise TypeError( + f"A Python 'dltensor' capsule was expected, " + "got {type(dlm_tensor)}" + ) + dlm_tensor = cpython.PyCapsule_GetPointer( + py_caps, "dltensor") + # Verify that we can work with this device + if dlm_tensor.dl_tensor.device.device_type == kDLOneAPI: + q = dpctl.SyclQueue(str(dlm_tensor.dl_tensor.device.device_id)) + usm_type = c_dpmem._Memory.get_pointer_type( + dlm_tensor.dl_tensor.data, + q.sycl_context) + if usm_type == b"unknown": + raise ValueError( + f"Data pointer in DLPack is not bound to default sycl " + "context of device '{device_id}', translated to " + "{q.sycl_device.filter_string}" + ) + if dlm_tensor.dl_tensor.dtype.bits % 8: + raise ValueError( + "Can not import DLPack tensor whose element's " + "bitsize is not a multiple of 8" + ) + if dlm_tensor.dl_tensor.dtype.lanes != 1: + raise ValueError( + "Can not import DLPack tensor with lanes != 1" + ) + for i in range(dlm_tensor.dl_tensor.ndim): + sz = sz * dlm_tensor.dl_tensor.shape[i] + + element_bytesize = (dlm_tensor.dl_tensor.dtype.bits // 8) + sz = sz * element_bytesize + usm_mem = c_dpmem._Memory.create_from_usm_pointer_size_qref( + dlm_tensor.dl_tensor.data, + sz, + (q).get_queue_ref(), + memory_owner=py_caps + ) + py_shape = list() + for i in range(dlm_tensor.dl_tensor.ndim): + py_shape.append(dlm_tensor.dl_tensor.shape[i]) + if (dlm_tensor.dl_tensor.strides is NULL): + py_strides = None + else: + py_strides = list() + for i in range(dlm_tensor.dl_tensor.ndim): + py_strides.append(dlm_tensor.dl_tensor.strides[i]) + if (dlm_tensor.dl_tensor.dtype.code == kDLUInt): + ary_dt = np.dtype("u" + str(element_bytesize)) + elif (dlm_tensor.dl_tensor.dtype.code == kDLInt): + ary_dt = np.dtype("i" + str(element_bytesize)) + elif (dlm_tensor.dl_tensor.dtype.code == kDLFloat): + ary_dt = np.dtype("f" + str(element_bytesize)) + elif (dlm_tensor.dl_tensor.dtype.code == kDLComplex): + ary_dt = np.dtype("c" + str(element_bytesize)) + else: + raise ValueError( + "Can not import DLPack tensor with type code {}.".format( + dlm_tensor.dl_tensor.dtype.code + ) + ) + res_ary = usm_ndarray( + py_shape, + dtype=ary_dt, + buffer=usm_mem, + strides=py_strides + ) + cpython.PyCapsule_SetName(py_caps, 'used_dltensor') + return res_ary + else: + raise ValueError( + "The DLPack tensor resides on unsupported device." + ) + + +cpdef from_dlpack(array): + """Constructs `usm_ndarray` from a Python object that implements + `__dlpack__` protocol.""" + pass diff --git a/dpctl/tensor/include/dlpack/.clang-format b/dpctl/tensor/include/dlpack/.clang-format new file mode 100644 index 0000000000..9d159247d5 --- /dev/null +++ b/dpctl/tensor/include/dlpack/.clang-format @@ -0,0 +1,2 @@ +DisableFormat: true +SortIncludes: false diff --git a/dpctl/tensor/include/dlpack/LICENSE.third-party b/dpctl/tensor/include/dlpack/LICENSE.third-party new file mode 100644 index 0000000000..20a9c8a7b4 --- /dev/null +++ b/dpctl/tensor/include/dlpack/LICENSE.third-party @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2017 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/dpctl/tensor/include/dlpack/README.md b/dpctl/tensor/include/dlpack/README.md new file mode 100644 index 0000000000..5d9bf51177 --- /dev/null +++ b/dpctl/tensor/include/dlpack/README.md @@ -0,0 +1,7 @@ +# DLPack header + +The header `dlpack.h` downloaded from `https://github.com/dmlc/dlpack.git` remote at commit [`98861a50e5`](https://github.com/dmlc/dlpack/commit/98861a50e5ade5a6b2df388b12d67b418e3baebe). + +The file can also be viewed using github web interface at https://github.com/dmlc/dlpack/blob/98861a50e5ade5a6b2df388b12d67b418e3baebe/include/dlpack/dlpack.h + +License file was retrived from https://github.com/dmlc/dlpack/blob/main/LICENSE diff --git a/dpctl/tensor/include/dlpack/dlpack.h b/dpctl/tensor/include/dlpack/dlpack.h new file mode 100644 index 0000000000..afbac0573a --- /dev/null +++ b/dpctl/tensor/include/dlpack/dlpack.h @@ -0,0 +1,213 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file dlpack.h + * \brief The common header of DLPack. + */ +#ifndef DLPACK_DLPACK_H_ +#define DLPACK_DLPACK_H_ + +#ifdef __cplusplus +#define DLPACK_EXTERN_C extern "C" +#else +#define DLPACK_EXTERN_C +#endif + +/*! \brief The current version of dlpack */ +#define DLPACK_VERSION 60 + +/*! \brief DLPACK_DLL prefix for windows */ +#ifdef _WIN32 +#ifdef DLPACK_EXPORTS +#define DLPACK_DLL __declspec(dllexport) +#else +#define DLPACK_DLL __declspec(dllimport) +#endif +#else +#define DLPACK_DLL +#endif + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +/*! + * \brief The device type in DLDevice. + */ +typedef enum { + /*! \brief CPU device */ + kDLCPU = 1, + /*! \brief CUDA GPU device */ + kDLCUDA = 2, + /*! + * \brief Pinned CUDA CPU memory by cudaMallocHost + */ + kDLCUDAHost = 3, + /*! \brief OpenCL devices. */ + kDLOpenCL = 4, + /*! \brief Vulkan buffer for next generation graphics. */ + kDLVulkan = 7, + /*! \brief Metal for Apple GPU. */ + kDLMetal = 8, + /*! \brief Verilog simulator buffer */ + kDLVPI = 9, + /*! \brief ROCm GPUs for AMD GPUs */ + kDLROCM = 10, + /*! + * \brief Pinned ROCm CPU memory allocated by hipMallocHost + */ + kDLROCMHost = 11, + /*! + * \brief Reserved extension device type, + * used for quickly test extension device + * The semantics can differ depending on the implementation. + */ + kDLExtDev = 12, + /*! + * \brief CUDA managed/unified memory allocated by cudaMallocManaged + */ + kDLCUDAManaged = 13, + /*! + * \brief Unified shared memory allocated on a oneAPI non-partititioned + * device. Call to oneAPI runtime is required to determine the device + * type, the USM allocation type and the sycl context it is bound to. + * + */ + kDLOneAPI = 14, +} DLDeviceType; + +/*! + * \brief A Device for Tensor and operator. + */ +typedef struct { + /*! \brief The device type used in the device. */ + DLDeviceType device_type; + /*! + * \brief The device index. + * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0. + */ + int device_id; +} DLDevice; + +/*! + * \brief The type code options DLDataType. + */ +typedef enum { + /*! \brief signed integer */ + kDLInt = 0U, + /*! \brief unsigned integer */ + kDLUInt = 1U, + /*! \brief IEEE floating point */ + kDLFloat = 2U, + /*! + * \brief Opaque handle type, reserved for testing purposes. + * Frameworks need to agree on the handle data type for the exchange to be well-defined. + */ + kDLOpaqueHandle = 3U, + /*! \brief bfloat16 */ + kDLBfloat = 4U, + /*! + * \brief complex number + * (C/C++/Python layout: compact struct per complex number) + */ + kDLComplex = 5U, +} DLDataTypeCode; + +/*! + * \brief The data type the tensor can hold. + * + * Examples + * - float: type_code = 2, bits = 32, lanes=1 + * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4 + * - int8: type_code = 0, bits = 8, lanes=1 + * - std::complex: type_code = 5, bits = 64, lanes = 1 + */ +typedef struct { + /*! + * \brief Type code of base types. + * We keep it uint8_t instead of DLDataTypeCode for minimal memory + * footprint, but the value should be one of DLDataTypeCode enum values. + * */ + uint8_t code; + /*! + * \brief Number of bits, common choices are 8, 16, 32. + */ + uint8_t bits; + /*! \brief Number of lanes in the type, used for vector types. */ + uint16_t lanes; +} DLDataType; + +/*! + * \brief Plain C Tensor object, does not manage memory. + */ +typedef struct { + /*! + * \brief The data pointer points to the allocated data. This will be CUDA + * device pointer or cl_mem handle in OpenCL. It may be opaque on some device + * types. This pointer is always aligned to 256 bytes as in CUDA. The + * `byte_offset` field should be used to point to the beginning of the data. + * + * Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow, + * TVM, perhaps others) do not adhere to this 256 byte aligment requirement + * on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed + * (after which this note will be updated); at the moment it is recommended + * to not rely on the data pointer being correctly aligned. + * + * For given DLTensor, the size of memory required to store the contents of + * data is calculated as follows: + * + * \code{.c} + * static inline size_t GetDataSize(const DLTensor* t) { + * size_t size = 1; + * for (tvm_index_t i = 0; i < t->ndim; ++i) { + * size *= t->shape[i]; + * } + * size *= (t->dtype.bits * t->dtype.lanes + 7) / 8; + * return size; + * } + * \endcode + */ + void* data; + /*! \brief The device of the tensor */ + DLDevice device; + /*! \brief Number of dimensions */ + int ndim; + /*! \brief The data type of the pointer*/ + DLDataType dtype; + /*! \brief The shape of the tensor */ + int64_t* shape; + /*! + * \brief strides of the tensor (in number of elements, not bytes) + * can be NULL, indicating tensor is compact and row-majored. + */ + int64_t* strides; + /*! \brief The offset in bytes to the beginning pointer to data */ + uint64_t byte_offset; +} DLTensor; + +/*! + * \brief C Tensor object, manage memory of DLTensor. This data structure is + * intended to facilitate the borrowing of DLTensor by another framework. It is + * not meant to transfer the tensor. When the borrowing framework doesn't need + * the tensor, it should call the deleter to notify the host that the resource + * is no longer needed. + */ +typedef struct DLManagedTensor { + /*! \brief DLTensor which is being memory managed */ + DLTensor dl_tensor; + /*! \brief the context of the original host framework of DLManagedTensor in + * which DLManagedTensor is used in the framework. It can also be NULL. + */ + void * manager_ctx; + /*! \brief Destructor signature void (*)(void*) - this should be called + * to destruct manager_ctx which holds the DLManagedTensor. It can be NULL + * if there is no way for the caller to provide a reasonable destructor. + * The destructors deletes the argument self as well. + */ + void (*deleter)(struct DLManagedTensor * self); +} DLManagedTensor; +#ifdef __cplusplus +} // DLPACK_EXTERN_C +#endif +#endif // DLPACK_DLPACK_H_ diff --git a/setup.py b/setup.py index 928a8e0010..5435f3d489 100644 --- a/setup.py +++ b/setup.py @@ -240,6 +240,24 @@ def extensions(): runtime_library_dirs=extension_args["runtime_library_dirs"], define_macros=extension_args["define_macros"], ), + Extension( + "dpctl.tensor._dlpack", + [ + os.path.join("dpctl", "tensor", "_dlpack.pyx"), + ], + depends=extension_args["depends"], + language="c++", + include_dirs=extension_args["include_dirs"] + + [ + os.path.join("dpctl", "tensor"), + ], + extra_compile_args=extension_args["extra_compile_args"], + extra_link_args=extension_args["extra_link_args"], + libraries=extension_args["libraries"], + library_dirs=extension_args["library_dirs"], + runtime_library_dirs=extension_args["runtime_library_dirs"], + define_macros=extension_args["define_macros"], + ), ] return extensions From fff470dbf64aff05141bdd44e92dfb7fc99a1836 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Nov 2021 15:20:48 -0600 Subject: [PATCH 02/12] Implemented usm_ndarray.__dlpack__, usm_ndarray.__dlpack_device__ --- dpctl/tensor/_usmarray.pyx | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 57c5225a3e..4b4d41dd87 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -32,6 +32,7 @@ from cpython.tuple cimport PyTuple_New, PyTuple_SetItem cimport dpctl as c_dpctl cimport dpctl.memory as c_dpmem +cimport dpctl.tensor._dlpack as c_dlpack include "_stride_utils.pxi" include "_types.pxi" @@ -738,10 +739,20 @@ cdef class usm_ndarray: return NotImplemented def __dlpack__(self, stream=None): - return NotImplemented + """Produce DLPack capsule""" + if stream is None: + return c_dlpack.to_dlpack_capsule(self) + else: + raise NotImplementedError( + "Only stream=None is supported. " + "Use `dpctl.SyclQueue.submit_barrier` to synchronize queues." + ) def __dlpack_device__(self): - return NotImplemented + return ( + c_dlpack.device_oneAPI, + (self.sycl_device).get_overall_ordinal(), + ) def __eq__(self, other): return _dispatch_binary_elementwise(self, "equal", other) From 3a96289d8a16065ed5d87f3f01ff31f242fadb3e Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Nov 2021 15:46:23 -0600 Subject: [PATCH 03/12] Implemented from_dlpack(array) --- dpctl/tensor/_dlpack.pyx | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/dpctl/tensor/_dlpack.pyx b/dpctl/tensor/_dlpack.pyx index 09887dcb75..35cb8140fa 100644 --- a/dpctl/tensor/_dlpack.pyx +++ b/dpctl/tensor/_dlpack.pyx @@ -305,5 +305,18 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +: cpdef from_dlpack(array): """Constructs `usm_ndarray` from a Python object that implements - `__dlpack__` protocol.""" - pass + `__dlpack__` protocol. + """ + if not hasattr(array, "__dlpack__"): + raise TypeError( + "The argument of type {type(array)} does not implement " + "`__dlpack__` method." + ) + dlpack_attr = getattr(array, "__dlpack__") + if not callable(dlpack_attr): + raise TypeError( + "The argument of type {type(array)} does not implement " + "`__dlpack__` method." + ) + dlpack_capsule = dlpack_attr() + return from_dlpack_capsule(dlpack_capsule) From 96511159128bca68c417205251443130abda8249 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Nov 2021 15:50:43 -0600 Subject: [PATCH 04/12] Exported dpctl.tensor.from_dlpack --- dpctl/tensor/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index 6036099c62..ab2bf72ebb 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -34,6 +34,7 @@ from dpctl.tensor._copy_utils import copy_to_numpy as asnumpy from dpctl.tensor._copy_utils import copy_to_numpy as to_numpy from dpctl.tensor._ctors import asarray, empty +from dpctl.tensor._dlpack import from_dlpack from dpctl.tensor._reshape import reshape from dpctl.tensor._usmarray import usm_ndarray @@ -47,4 +48,5 @@ "from_numpy", "to_numpy", "asnumpy", + "from_dlpack", ] From a2bf38f787b85e4e1bc82ec6408a0e70181e98d0 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 16 Nov 2021 09:48:36 -0600 Subject: [PATCH 05/12] DLManagedTensor lifetime management implemented per array-API specs 1. The pycapsule destructor only calls DLManagedTensor.deleter is the name is "dltensor" 2. Code consuming the DLPack capsule renamed the capsule (to avoid destructor calling the deleter) and instead creates an internal object to do that and uses that internal object as the base of _Memory object `from_dlpack_capsule` function should handle NULL data field For zero-elements arrays in DLPack, allocate 1 element Proper support for strides added. Expanded docstring of `dpctl.tensor.from_dlpack` --- dpctl/tensor/_dlpack.pyx | 107 +++++++++++++++++++++++++++++++-------- 1 file changed, 86 insertions(+), 21 deletions(-) diff --git a/dpctl/tensor/_dlpack.pyx b/dpctl/tensor/_dlpack.pyx index 35cb8140fa..86daf8b72b 100644 --- a/dpctl/tensor/_dlpack.pyx +++ b/dpctl/tensor/_dlpack.pyx @@ -36,6 +36,7 @@ from ._usmarray cimport usm_ndarray import numpy as np import dpctl +import dpctl.memory as dpmem cdef extern from './include/dlpack/dlpack.h' nogil: @@ -95,10 +96,6 @@ cdef void pycapsule_deleter(object dlt_capsule): dlm_tensor = cpython.PyCapsule_GetPointer( dlt_capsule, 'dltensor') dlm_tensor.deleter(dlm_tensor) - elif cpython.PyCapsule_IsValid(dlt_capsule, 'used_dltensor'): - dlm_tensor = cpython.PyCapsule_GetPointer( - dlt_capsule, 'used_dltensor') - dlm_tensor.deleter(dlm_tensor) cdef void managed_tensor_deleter(DLManagedTensor *dlm_tensor) with gil: @@ -133,7 +130,11 @@ cpdef to_dlpack_capsule(usm_ndarray usm_ary) except+: cdef int64_t *shape_strides_ptr = NULL cdef int i = 0 cdef int device_id = -1 + cdef char* base_ptr = NULL + cdef Py_ssize_t element_offset = 0 + cdef Py_ssize_t byte_offset = 0 + ary_base = usm_ary.get_base() ary_sycl_queue = usm_ary.get_sycl_queue() ary_sycl_device = ary_sycl_queue.get_sycl_device() @@ -176,11 +177,13 @@ cpdef to_dlpack_capsule(usm_ndarray usm_ary) except+: ary_dt = usm_ary.dtype ary_dtk = ary_dt.kind + element_offset = usm_ary.get_offset() + byte_offset = element_offset * (ary_dt.itemsize) dl_tensor = &dlm_tensor.dl_tensor - dl_tensor.data = data_ptr + dl_tensor.data = (data_ptr - byte_offset) dl_tensor.ndim = nd - dl_tensor.byte_offset = 0 + dl_tensor.byte_offset = byte_offset dl_tensor.shape = &shape_strides_ptr[0] if strides_ptr is NULL: dl_tensor.strides = NULL @@ -212,6 +215,24 @@ cpdef to_dlpack_capsule(usm_ndarray usm_ary) except+: return cpython.PyCapsule_New(dlm_tensor, 'dltensor', pycapsule_deleter) +cdef class _DLManagedTensorOwner: + """Helper class managing lifetimes of the DLManagedTensor struct""" + cdef DLManagedTensor *dlm_tensor + + def __cinit__(self): + self.dlm_tensor = NULL + + def __dealloc__(self): + if self.dlm_tensor: + self.dlm_tensor.deleter(self.dlm_tensor) + + @staticmethod + cdef _DLManagedTensorOwner _create(DLManagedTensor *dlm_tensor_src): + cdef _DLManagedTensorOwner res = _DLManagedTensorOwner.__new__(_DLManagedTensorOwner) + res.dlm_tensor = dlm_tensor_src + return res + + cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +: """Reconstructs instance of usm_ndarray from named Python capsule object referencing instance of `DLManagedTensor` without @@ -221,6 +242,11 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +: cdef size_t sz = 1 cdef int i cdef int element_bytesize = 0 + cdef Py_ssize_t offset_min = 0 + cdef Py_ssize_t offset_max = 0 + cdef int64_t stride_i + cdef char* mem_ptr = NULL + cdef Py_ssize_t element_offset = 0 if not cpython.PyCapsule_IsValid(py_caps, 'dltensor'): if cpython.PyCapsule_IsValid(py_caps, 'used_dltensor'): @@ -237,9 +263,12 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +: # Verify that we can work with this device if dlm_tensor.dl_tensor.device.device_type == kDLOneAPI: q = dpctl.SyclQueue(str(dlm_tensor.dl_tensor.device.device_id)) - usm_type = c_dpmem._Memory.get_pointer_type( - dlm_tensor.dl_tensor.data, - q.sycl_context) + if dlm_tensor.dl_tensor.data is NULL: + usm_type = b"device" + else: + usm_type = c_dpmem._Memory.get_pointer_type( + dlm_tensor.dl_tensor.data, + q.sycl_context) if usm_type == b"unknown": raise ValueError( f"Data pointer in DLPack is not bound to default sycl " @@ -255,17 +284,45 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +: raise ValueError( "Can not import DLPack tensor with lanes != 1" ) - for i in range(dlm_tensor.dl_tensor.ndim): - sz = sz * dlm_tensor.dl_tensor.shape[i] + if dlm_tensor.dl_tensor.strides is NULL: + for i in range(dlm_tensor.dl_tensor.ndim): + sz = sz * dlm_tensor.dl_tensor.shape[i] + else: + offset_min = 0 + offset_max = 0 + for i in range(dlm_tensor.dl_tensor.ndim): + stride_i = dlm_tensor.dl_tensor.strides[i] + if stride_i > 0: + offset_max = offset_max + stride_i * ( + dlm_tensor.dl_tensor.shape[i] - 1 + ) + else: + offset_min = offset_min + stride_i * ( + dlm_tensor.dl_tensor.shape[i] - 1 + ) + sz = offset_max - offset_min + 1 + if sz == 0: + sz = 1 element_bytesize = (dlm_tensor.dl_tensor.dtype.bits // 8) sz = sz * element_bytesize - usm_mem = c_dpmem._Memory.create_from_usm_pointer_size_qref( - dlm_tensor.dl_tensor.data, - sz, - (q).get_queue_ref(), - memory_owner=py_caps - ) + element_offset = dlm_tensor.dl_tensor.byte_offset // element_bytesize + + # transfer dlm_tensor ownership + dlm_holder = _DLManagedTensorOwner._create(dlm_tensor) + cpython.PyCapsule_SetName(py_caps, 'used_dltensor') + + if dlm_tensor.dl_tensor.data is NULL: + usm_mem = dpmem.MemoryUSMDevice(sz, q) + else: + mem_ptr = dlm_tensor.dl_tensor.data + dlm_tensor.dl_tensor.byte_offset + mem_ptr = mem_ptr - (element_offset * element_bytesize) + usm_mem = c_dpmem._Memory.create_from_usm_pointer_size_qref( + mem_ptr, + sz, + (q).get_queue_ref(), + memory_owner=dlm_holder + ) py_shape = list() for i in range(dlm_tensor.dl_tensor.ndim): py_shape.append(dlm_tensor.dl_tensor.shape[i]) @@ -293,9 +350,9 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +: py_shape, dtype=ary_dt, buffer=usm_mem, - strides=py_strides + strides=py_strides, + offset=element_offset ) - cpython.PyCapsule_SetName(py_caps, 'used_dltensor') return res_ary else: raise ValueError( @@ -304,8 +361,16 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +: cpdef from_dlpack(array): - """Constructs `usm_ndarray` from a Python object that implements - `__dlpack__` protocol. + """dpctl.tensor.from_dlpack(obj) + + Constructs :class:`dpctl.tensor.usm_ndarray` instance from a Python + object `obj` that implements `__dlpack__` protocol. The output + array is always a zero-copy view of the input. + + Raises: + TypeError: if `obj` does not implement `__dlpack__` method. + ValueError: if zero copy view can not be constructed because + the input array resides on an unsupported device. """ if not hasattr(array, "__dlpack__"): raise TypeError( From ee4dcc6567cbc896f8df80be2487ec8a213e4aa4 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 16 Nov 2021 09:50:15 -0600 Subject: [PATCH 06/12] Adding test_usm_ndarray_dlpack Test should anticipate that dlpack roundtripping changes bool dtype to uint8 Adding test for `from_dlpack` input validation --- dpctl/tests/test_usm_ndarray_dlpack.py | 120 +++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 dpctl/tests/test_usm_ndarray_dlpack.py diff --git a/dpctl/tests/test_usm_ndarray_dlpack.py b/dpctl/tests/test_usm_ndarray_dlpack.py new file mode 100644 index 0000000000..a40688965b --- /dev/null +++ b/dpctl/tests/test_usm_ndarray_dlpack.py @@ -0,0 +1,120 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ctypes + +import pytest + +import dpctl +import dpctl.tensor as dpt + +device_oneAPI = 14 # DLDeviceType.kDLOneAPI + +_usm_types_list = ["shared", "device", "host"] + + +@pytest.fixture(params=_usm_types_list) +def usm_type(request): + return request.param + + +_typestrs_list = [ + "b1", + "u1", + "i1", + "u2", + "i2", + "u4", + "i4", + "u8", + "i8", + "f2", + "f4", + "f8", + "c8", + "c16", +] + + +@pytest.fixture(params=_typestrs_list) +def typestr(request): + return request.param + + +def test_dlpack_device(usm_type): + all_root_devices = dpctl.get_devices() + for sycl_dev in all_root_devices: + X = dpt.empty((64,), dtype="u1", usm_type=usm_type, device=sycl_dev) + dev = X.__dlpack_device__() + assert type(dev) is tuple + assert len(dev) == 2 + assert dev[0] == device_oneAPI + assert sycl_dev == all_root_devices[dev[1]] + + +def test_dlpack_exporter(typestr, usm_type): + caps_fn = ctypes.pythonapi.PyCapsule_IsValid + caps_fn.restype = bool + caps_fn.argtypes = [ctypes.py_object, ctypes.c_char_p] + all_root_devices = dpctl.get_devices() + for sycl_dev in all_root_devices: + X = dpt.empty((64,), dtype=typestr, usm_type=usm_type, device=sycl_dev) + caps = X.__dlpack__() + assert caps_fn(caps, b"dltensor") + Y = X[::2] + caps2 = Y.__dlpack__() + assert caps_fn(caps2, b"dltensor") + + +@pytest.mark.parametrize("shape", [tuple(), (2,), (3, 0, 1), (2, 2, 2)]) +def test_from_dlpack(shape, typestr, usm_type): + all_root_devices = dpctl.get_devices() + for sycl_dev in all_root_devices: + X = dpt.empty(shape, dtype=typestr, usm_type=usm_type, device=sycl_dev) + Y = dpt.from_dlpack(X) + assert X.shape == Y.shape + assert X.dtype == Y.dtype or ( + str(X.dtype) == "bool" and str(Y.dtype) == "uint8" + ) + assert X.sycl_device == Y.sycl_device + assert X.usm_type == Y.usm_type + assert X._pointer == Y._pointer + if Y.ndim: + V = Y[::-1] + W = dpt.from_dlpack(V) + assert V.strides == W.strides + + +def test_from_dlpack_input_validation(): + vstr = dpt._dlpack.get_build_dlpack_version() + assert type(vstr) is str + with pytest.raises(TypeError): + dpt.from_dlpack(None) + + class DummyWithProperty: + @property + def __dlpack__(self): + return None + + with pytest.raises(TypeError): + dpt.from_dlpack(DummyWithProperty()) + + class DummyWithMethod: + def __dlpack__(self): + return None + + with pytest.raises(TypeError): + dpt.from_dlpack(DummyWithMethod()) From ea71e66e0f62caebae954373e96059f126007d48 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sat, 20 Nov 2021 08:15:35 -0600 Subject: [PATCH 07/12] dlpack support noted in changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c09fea7af3..371ff0a0f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - `dpctl.tensor.asarray`, `dpctl.tensor.empty` implemented (#646). +- `dpctl.tensor.usm_ndarray` adds support for DLPack protocol. `dpctl.tensor.from_dlpack` implemented (#682). ### Changed - dpctl-capi is now renamed to `libsyclinterface` (#666). From b6b6b5555263b9f8d0fc80cc9363317ec116e23b Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 24 Nov 2021 10:33:33 -0600 Subject: [PATCH 08/12] Addressed PR feedback 1. Added docstrings 2. Exported DLPackCreationError in `_dlpack.pxd` 3. Added validation in `__dlpack_device__` to raise an error if device_id came back -1 (not-found) 4. `to_dlpack_capsule(usm_ary)` raises DLPackCreationError if the array context is not the default context (the one created in dpctl.SyclQueue(dev) call) --- dpctl/tensor/_dlpack.pxd | 8 ++++++ dpctl/tensor/_dlpack.pyx | 50 +++++++++++++++++++++++--------------- dpctl/tensor/_usmarray.pyx | 36 +++++++++++++++++++++++---- 3 files changed, 70 insertions(+), 24 deletions(-) diff --git a/dpctl/tensor/_dlpack.pxd b/dpctl/tensor/_dlpack.pxd index 2a8ff3dda6..439880f7d2 100644 --- a/dpctl/tensor/_dlpack.pxd +++ b/dpctl/tensor/_dlpack.pxd @@ -31,3 +31,11 @@ cpdef object to_dlpack_capsule(usm_ndarray array) except + cpdef usm_ndarray from_dlpack_capsule(object dltensor) except + cpdef from_dlpack(array) + +cdef class DLPackCreationError(Exception): + """ + A DLPackCreateError exception is raised when constructing + DLPack capsule from `usm_ndarray` based on a USM allocation + on a partitioned SYCL device. + """ + pass diff --git a/dpctl/tensor/_dlpack.pyx b/dpctl/tensor/_dlpack.pyx index 86daf8b72b..5cf799521d 100644 --- a/dpctl/tensor/_dlpack.pyx +++ b/dpctl/tensor/_dlpack.pyx @@ -87,10 +87,14 @@ cdef extern from './include/dlpack/dlpack.h' nogil: def get_build_dlpack_version(): + """ + Returns the string value of DLPACK_VERSION from dlpack.h + `dpcl.tensor` was built with. + """ return str(DLPACK_VERSION) -cdef void pycapsule_deleter(object dlt_capsule): +cdef void _pycapsule_deleter(object dlt_capsule): cdef DLManagedTensor *dlm_tensor = NULL if cpython.PyCapsule_IsValid(dlt_capsule, 'dltensor'): dlm_tensor = cpython.PyCapsule_GetPointer( @@ -98,7 +102,7 @@ cdef void pycapsule_deleter(object dlt_capsule): dlm_tensor.deleter(dlm_tensor) -cdef void managed_tensor_deleter(DLManagedTensor *dlm_tensor) with gil: +cdef void _managed_tensor_deleter(DLManagedTensor *dlm_tensor) with gil: if dlm_tensor is not NULL: stdlib.free(dlm_tensor.dl_tensor.shape) cpython.Py_DECREF(dlm_tensor.manager_ctx) @@ -106,18 +110,12 @@ cdef void managed_tensor_deleter(DLManagedTensor *dlm_tensor) with gil: stdlib.free(dlm_tensor) -cdef class DLPackCreationError(Exception): +cpdef to_dlpack_capsule(usm_ndarray usm_ary) except+: """ - A DLPackCreateError exception is raised when constructing - DLPack capsule from `usm_ndarray` based on a USM allocation - on a partitioned SYCL device. + Constructs named Python capsule object referencing + instance of `DLManagerTensor` from + :class:`dpctl.tensor.usm_ndarray` instance. """ - pass - - -cpdef to_dlpack_capsule(usm_ndarray usm_ary) except+: - """Constructs named Python capsule object referencing - instance of `DLManagerTensor` from `usm_ndarray` instance""" cdef c_dpctl.SyclQueue ary_sycl_queue cdef c_dpctl.SyclDevice ary_sycl_device cdef DPCTLSyclDeviceRef pDRef = NULL @@ -146,6 +144,14 @@ cpdef to_dlpack_capsule(usm_ndarray usm_ary) except+: "to_dlpack_capsule: DLPack can only export arrays allocated on " "non-partitioned SYCL devices." ) + # TODO: check that ary_sycl_context is the default context + default_context = dpctl.SyclQueue(ary_sycl_device).sycl_context + if not usm_ary.sycl_context == default_context: + raise DLPackCreationError( + "to_dlpack_capsule: DLPack can only export arrays based on USM " + "allocations bound to a default platform SYCL context" + ) + dlm_tensor = stdlib.malloc( sizeof(DLManagedTensor)) @@ -210,13 +216,16 @@ cpdef to_dlpack_capsule(usm_ndarray usm_ary) except+: dlm_tensor.manager_ctx = usm_ary cpython.Py_INCREF(usm_ary) - dlm_tensor.deleter = managed_tensor_deleter + dlm_tensor.deleter = _managed_tensor_deleter - return cpython.PyCapsule_New(dlm_tensor, 'dltensor', pycapsule_deleter) + return cpython.PyCapsule_New(dlm_tensor, 'dltensor', _pycapsule_deleter) cdef class _DLManagedTensorOwner: - """Helper class managing lifetimes of the DLManagedTensor struct""" + """ + Helper class managing the lifetime of the DLManagedTensor struct + transferred from a 'dlpack' capsule. + """ cdef DLManagedTensor *dlm_tensor def __cinit__(self): @@ -234,9 +243,11 @@ cdef class _DLManagedTensorOwner: cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +: - """Reconstructs instance of usm_ndarray from named Python - capsule object referencing instance of `DLManagedTensor` without - a copy""" + """ + Reconstructs instance of :class:`dpctl.tensor.usm_ndarray` from + named Python capsule object referencing instance of `DLManagedTensor` + without copy. The instance forms a view in the memory of the tensor. + """ cdef DLManagedTensor *dlm_tensor = NULL cdef bytes usm_type cdef size_t sz = 1 @@ -361,7 +372,8 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +: cpdef from_dlpack(array): - """dpctl.tensor.from_dlpack(obj) + """ + dpctl.tensor.from_dlpack(obj) Constructs :class:`dpctl.tensor.usm_ndarray` instance from a Python object `obj` that implements `__dlpack__` protocol. The output diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 4b4d41dd87..75131fd916 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -739,7 +739,16 @@ cdef class usm_ndarray: return NotImplemented def __dlpack__(self, stream=None): - """Produce DLPack capsule""" + """ + Produces DLPack capsule. + + Raises: + MemoryError: when host memory can not be allocated. + DLPackCreationError: when array is allocated on a partitioned + SYCL device, or with a non-default context. + NotImplementedError: when non-default value of `stream` keyword + is used. + """ if stream is None: return c_dlpack.to_dlpack_capsule(self) else: @@ -749,10 +758,27 @@ cdef class usm_ndarray: ) def __dlpack_device__(self): - return ( - c_dlpack.device_oneAPI, - (self.sycl_device).get_overall_ordinal(), - ) + """ + Gives a tuple (`device_type`, `device_id`) corresponding to `DLDevice` + entry in `DLTensor` in DLPack protocol. + + The tuple describes the non-partitioned device where the array + has been allocated. + + Raises: + DLPackCreationError: when array is allocation on a partitioned + SYCL device + """ + cdef int dev_id = (self.sycl_device).get_overall_ordinal() + if dev_id < 0: + raise c_dlpack.DLPackCreationError( + "DLPack protocol is only supported for non-partitioned devices" + ) + else: + return ( + c_dlpack.device_oneAPI, + dev_id, + ) def __eq__(self, other): return _dispatch_binary_elementwise(self, "equal", other) From 4c23947a85a3ce897e303dc44c98928cfced9d3f Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 25 Nov 2021 11:35:03 -0600 Subject: [PATCH 09/12] Make sure that dpctl/tensor/include/dlpack is included in the layout --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST.in b/MANIFEST.in index 19f37e5a30..5ce66287a8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,6 @@ include versioneer.py recursive-include dpctl/include *.h +recursive-include dpctl/tensor/include * recursive-include dpctl *.pxd include dpctl/_sycl_context.h include dpctl/_sycl_context_api.h From f276b8d0fe325ffe2a0475a18a9963aa34d2b702 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 30 Nov 2021 09:51:33 -0600 Subject: [PATCH 10/12] fixed typo in docstring --- dpctl/tensor/_dlpack.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/tensor/_dlpack.pyx b/dpctl/tensor/_dlpack.pyx index 5cf799521d..741ca9d4c4 100644 --- a/dpctl/tensor/_dlpack.pyx +++ b/dpctl/tensor/_dlpack.pyx @@ -89,7 +89,7 @@ cdef extern from './include/dlpack/dlpack.h' nogil: def get_build_dlpack_version(): """ Returns the string value of DLPACK_VERSION from dlpack.h - `dpcl.tensor` was built with. + `dpctl.tensor` was built with. """ return str(DLPACK_VERSION) From 96636b20920a725b8798cffa912aa04ecf3456a7 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 30 Nov 2021 13:30:17 -0600 Subject: [PATCH 11/12] Addressed docstrings/formatting PR feedback --- dpctl/tensor/_dlpack.pyx | 44 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/dpctl/tensor/_dlpack.pyx b/dpctl/tensor/_dlpack.pyx index 741ca9d4c4..2647904ef5 100644 --- a/dpctl/tensor/_dlpack.pyx +++ b/dpctl/tensor/_dlpack.pyx @@ -90,6 +90,10 @@ def get_build_dlpack_version(): """ Returns the string value of DLPACK_VERSION from dlpack.h `dpctl.tensor` was built with. + + Returns: + A string value of the version of DLPack used to build + `dpctl`. """ return str(DLPACK_VERSION) @@ -112,9 +116,26 @@ cdef void _managed_tensor_deleter(DLManagedTensor *dlm_tensor) with gil: cpdef to_dlpack_capsule(usm_ndarray usm_ary) except+: """ + to_dlpack_capsule(usm_ary) + Constructs named Python capsule object referencing instance of `DLManagerTensor` from :class:`dpctl.tensor.usm_ndarray` instance. + + Args: + usm_ary: An instance of :class:`dpctl.tensor.usm_ndarray` + Returns: + Python a new capsule with name "dltensor" that contains + a pointer to `DLManagedTensor` struct. + Raises: + DLPackCreationError: when array can be represented as + DLPack tensor. This may happen when array was allocated + on a partitioned sycl device, or its USM allocation is + not bound to the platform default SYCL context. + MemoryError: when host allocation to needed for `DLManagedTensor` + did not succeed. + ValueError: when array elements data type could not be represented + in `DLManagedTensor`. """ cdef c_dpctl.SyclQueue ary_sycl_queue cdef c_dpctl.SyclDevice ary_sycl_device @@ -144,7 +165,6 @@ cpdef to_dlpack_capsule(usm_ndarray usm_ary) except+: "to_dlpack_capsule: DLPack can only export arrays allocated on " "non-partitioned SYCL devices." ) - # TODO: check that ary_sycl_context is the default context default_context = dpctl.SyclQueue(ary_sycl_device).sycl_context if not usm_ary.sycl_context == default_context: raise DLPackCreationError( @@ -152,7 +172,6 @@ cpdef to_dlpack_capsule(usm_ndarray usm_ary) except+: "allocations bound to a default platform SYCL context" ) - dlm_tensor = stdlib.malloc( sizeof(DLManagedTensor)) if dlm_tensor is NULL: @@ -244,9 +263,25 @@ cdef class _DLManagedTensorOwner: cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +: """ + from_dlpack_capsule(caps) + Reconstructs instance of :class:`dpctl.tensor.usm_ndarray` from named Python capsule object referencing instance of `DLManagedTensor` without copy. The instance forms a view in the memory of the tensor. + + Args: + caps: Python capsule with name "dltensor" expected to reference + an instance of `DLManagedTensor` struct. + Returns: + Instance of :class:`dpctl.tensor.usm_ndarray` with a view into + memory of the tensor. Capsule is renamed to "used_dltensor" upon + success. + Raises: + TypeError: if argument is not a "dltensor" capsule. + ValueError: if argument is "used_dltensor" capsule, + if the USM pointer is not bound to the reconstructed + sycl context, or the DLPack's device_type is not supported + by dpctl. """ cdef DLManagedTensor *dlm_tensor = NULL cdef bytes usm_type @@ -355,7 +390,7 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +: raise ValueError( "Can not import DLPack tensor with type code {}.".format( dlm_tensor.dl_tensor.dtype.code - ) + ) ) res_ary = usm_ndarray( py_shape, @@ -379,6 +414,9 @@ cpdef from_dlpack(array): object `obj` that implements `__dlpack__` protocol. The output array is always a zero-copy view of the input. + Args: + A Python object representing an array that supports `__dlpack__` + protocol. Raises: TypeError: if `obj` does not implement `__dlpack__` method. ValueError: if zero copy view can not be constructed because From 49293af463bfd2f0e085f073bd149c297b0de130 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 30 Nov 2021 16:45:32 -0600 Subject: [PATCH 12/12] Applied consistent style for references --- dpctl/tensor/_dlpack.pyx | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/dpctl/tensor/_dlpack.pyx b/dpctl/tensor/_dlpack.pyx index 2647904ef5..7d4ebfa0c3 100644 --- a/dpctl/tensor/_dlpack.pyx +++ b/dpctl/tensor/_dlpack.pyx @@ -72,18 +72,18 @@ cdef extern from './include/dlpack/dlpack.h' nogil: uint16_t lanes ctypedef struct DLTensor: - void* data + void *data DLDevice device int ndim DLDataType dtype - int64_t* shape - int64_t* strides + int64_t *shape + int64_t *strides uint64_t byte_offset ctypedef struct DLManagedTensor: DLTensor dl_tensor - void* manager_ctx - void (*deleter)(DLManagedTensor*) # noqa: E211 + void *manager_ctx + void (*deleter)(DLManagedTensor *) # noqa: E211 def get_build_dlpack_version(): @@ -141,15 +141,15 @@ cpdef to_dlpack_capsule(usm_ndarray usm_ary) except+: cdef c_dpctl.SyclDevice ary_sycl_device cdef DPCTLSyclDeviceRef pDRef = NULL cdef DLManagedTensor *dlm_tensor = NULL - cdef DLTensor* dl_tensor = NULL + cdef DLTensor *dl_tensor = NULL cdef int nd = usm_ary.get_ndim() - cdef char* data_ptr = usm_ary.get_data() + cdef char *data_ptr = usm_ary.get_data() cdef Py_ssize_t *shape_ptr = NULL cdef Py_ssize_t *strides_ptr = NULL cdef int64_t *shape_strides_ptr = NULL cdef int i = 0 cdef int device_id = -1 - cdef char* base_ptr = NULL + cdef char *base_ptr = NULL cdef Py_ssize_t element_offset = 0 cdef Py_ssize_t byte_offset = 0 @@ -291,7 +291,7 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +: cdef Py_ssize_t offset_min = 0 cdef Py_ssize_t offset_max = 0 cdef int64_t stride_i - cdef char* mem_ptr = NULL + cdef char *mem_ptr = NULL cdef Py_ssize_t element_offset = 0 if not cpython.PyCapsule_IsValid(py_caps, 'dltensor'):