From a0e32c0cea94c742d87fed56aad85f351d1d8931 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 12 Aug 2024 16:47:02 +0200 Subject: [PATCH 1/2] Adopt dpnp to DLPack v1.0 --- dpnp/dpnp_array.py | 50 ++++++++++++++++++++++++++++++++++++++-------- dpnp/dpnp_iface.py | 35 +++++++++++++++++++++++++++++--- 2 files changed, 74 insertions(+), 11 deletions(-) diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index f7b0085b3d9b..7fa2803b5cef 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -184,27 +184,61 @@ def __copy__(self): # '__divmod__', # '__doc__', - def __dlpack__(self, stream=None): + def __dlpack__( + self, *, stream=None, max_version=None, dl_device=None, copy=None + ): """ Produces DLPack capsule. Parameters ---------- stream : {:class:`dpctl.SyclQueue`, None}, optional - Execution queue to synchronize with. If ``None``, - synchronization is not performed. + Execution queue to synchronize with. If ``None``, synchronization + is not performed. + Default: ``None``. + max_version {tuple of ints, None}, optional + The maximum DLPack version the consumer (caller of ``__dlpack__``) + supports. As ``__dlpack__`` may not always return a DLPack capsule + with version `max_version`, the consumer must verify the version + even if this argument is passed. + Default: ``None``. + dl_device {tuple, None}, optional: + The device the returned DLPack capsule will be placed on. The + device must be a 2-tuple matching the format of + ``__dlpack_device__`` method, an integer enumerator representing + the device type followed by an integer representing the index of + the device. + Default: ``None``. + copy {bool, None}, optional: + Boolean indicating whether or not to copy the input. + + * If `copy` is ``True``, the input will always be copied. + * If ``False``, a ``BufferError`` will be raised if a copy is + deemed necessary. + * If ``None``, a copy will be made only if deemed necessary, + otherwise, the existing memory buffer will be reused. + + Default: ``None``. Raises ------ - MemoryError + MemoryError: when host memory can not be allocated. - DLPackCreationError - when array is allocated on a partitioned - SYCL device, or with a non-default context. + DLPackCreationError: + when array is allocated on a partitioned SYCL device, or with + a non-default context. + BufferError: + when a copy is deemed necessary but `copy` is ``False`` or when + the provided `dl_device` cannot be handled. """ - return self._array_obj.__dlpack__(stream=stream) + return self._array_obj.__dlpack__( + stream=stream, + max_version=max_version, + dl_device=dl_device, + copy=copy, + ) def __dlpack_device__(self): """ diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index b2891c06adc3..15b68aa658b4 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -464,7 +464,7 @@ def default_float_type(device=None, sycl_queue=None): return map_dtype_to_device(float64, _sycl_queue.sycl_device) -def from_dlpack(obj, /): +def from_dlpack(obj, /, *, device=None, copy=None): """ Create a dpnp array from a Python object implementing the ``__dlpack__`` protocol. @@ -476,6 +476,28 @@ def from_dlpack(obj, /): obj : object A Python object representing an array that implements the ``__dlpack__`` and ``__dlpack_device__`` methods. + device : {:class:`dpctl.SyclDevice`, :class:`dpctl.SyclQueue`, + :class:`dpctl.tensor.Device`, tuple, None}, optional + Array API concept of a device where the output array is to be placed. + ``device`` can be ``None``, an oneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to + a non-partitioned SYCL device, an instance of :class:`dpctl.SyclQueue`, + a :class:`dpctl.tensor.Device` object returned by + :attr:`dpctl.tensor.usm_ndarray.device`, or a 2-tuple matching + the format of the output of the ``__dlpack_device__`` method, + an integer enumerator representing the device type followed by + an integer representing the index of the device. + Default: ``None``. + copy {bool, None}, optional + Boolean indicating whether or not to copy the input. + + * If `copy``is ``True``, the input will always be copied. + * If ``False``, a ``BufferError`` will be raised if a copy is deemed + necessary. + * If ``None``, a copy will be made only if deemed necessary, otherwise, + the existing memory buffer will be reused. + + Default: ``None``. Returns ------- @@ -483,10 +505,17 @@ def from_dlpack(obj, /): Returns a new dpnp array containing the data from another array (obj) with the ``__dlpack__`` method on the same device as object. + Raises + ------ + TypeError: + if `obj` does not implement ``__dlpack__`` method + ValueError: + if the input array resides on an unsupported device + """ - usm_ary = dpt.from_dlpack(obj) - return dpnp_array._create_from_usm_ndarray(usm_ary) + usm_res = dpt.from_dlpack(obj, device=device, copy=copy) + return dpnp_array._create_from_usm_ndarray(usm_res) def get_dpnp_descriptor( From c9d2f5bcd64a119983358a66b84eae3c72978120 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 12 Aug 2024 16:47:36 +0200 Subject: [PATCH 2/2] Add more tests to cover different use cases --- tests/test_dlpack.py | 74 +++++++++++ .../cupy/core_tests/test_dlpack.py | 120 ++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 tests/test_dlpack.py create mode 100644 tests/third_party/cupy/core_tests/test_dlpack.py diff --git a/tests/test_dlpack.py b/tests/test_dlpack.py new file mode 100644 index 000000000000..25090d397cb4 --- /dev/null +++ b/tests/test_dlpack.py @@ -0,0 +1,74 @@ +import numpy +import pytest +from numpy.testing import assert_array_equal + +import dpnp + +from .helper import ( + get_all_dtypes, +) + +device_oneAPI = 14 # DLDeviceType.kDLOneAPI + + +class TestDLPack: + @pytest.mark.parametrize("stream", [None, 1]) + def test_stream(self, stream): + x = dpnp.arange(5) + x.__dlpack__(stream=stream) + + @pytest.mark.parametrize("copy", [True, None, False]) + def test_copy(self, copy): + x = dpnp.arange(5) + x.__dlpack__(copy=copy) + + def test_wrong_copy(self): + x = dpnp.arange(5) + x.__dlpack__(copy=dpnp.array([1, 2, 3])) + + @pytest.mark.parametrize("xp", [dpnp, numpy]) + @pytest.mark.parametrize("dt", get_all_dtypes(no_none=True)) + def test_dtype_passthrough(self, xp, dt): + x = xp.arange(5).astype(dt) + y = xp.from_dlpack(x) + + assert y.dtype == x.dtype + assert_array_equal(x, y) + + @pytest.mark.parametrize("xp", [dpnp, numpy]) + def test_non_contiguous(self, xp): + x = xp.arange(25).reshape((5, 5)) + + y1 = x[0] + assert_array_equal(y1, xp.from_dlpack(y1)) + + y2 = x[:, 0] + assert_array_equal(y2, xp.from_dlpack(y2)) + + y3 = x[1, :] + assert_array_equal(y3, xp.from_dlpack(y3)) + + y4 = x[1] + assert_array_equal(y4, xp.from_dlpack(y4)) + + y5 = xp.diagonal(x).copy() + assert_array_equal(y5, xp.from_dlpack(y5)) + + def test_device(self): + x = dpnp.arange(5) + assert x.__dlpack_device__()[0] == device_oneAPI + y = dpnp.from_dlpack(x) + assert y.__dlpack_device__()[0] == device_oneAPI + z = y[::2] + assert z.__dlpack_device__()[0] == device_oneAPI + + def test_ndim0(self): + x = dpnp.array(1.0) + y = dpnp.from_dlpack(x) + assert_array_equal(x, y) + + def test_device(self): + x = dpnp.arange(5) + y = dpnp.from_dlpack(x, device=x.__dlpack_device__()) + assert x.device == y.device + assert x.get_array()._pointer == y.get_array()._pointer diff --git a/tests/third_party/cupy/core_tests/test_dlpack.py b/tests/third_party/cupy/core_tests/test_dlpack.py new file mode 100644 index 000000000000..18eba5574093 --- /dev/null +++ b/tests/third_party/cupy/core_tests/test_dlpack.py @@ -0,0 +1,120 @@ +import unittest + +import dpctl +import dpctl.tensor._dlpack as dlp +import numpy +import pytest + +import dpnp as cupy +from tests.third_party.cupy import testing + + +def _gen_array(dtype, alloc_q=None): + if cupy.issubdtype(dtype, numpy.unsignedinteger): + array = cupy.random.randint( + 0, 10, size=(2, 3), sycl_queue=alloc_q + ).astype(dtype) + elif cupy.issubdtype(dtype, cupy.integer): + array = cupy.random.randint( + -10, 10, size=(2, 3), sycl_queue=alloc_q + ).astype(dtype) + elif cupy.issubdtype(dtype, cupy.floating): + array = cupy.random.rand(2, 3, sycl_queue=alloc_q).astype(dtype) + elif cupy.issubdtype(dtype, cupy.complexfloating): + array = cupy.random.random((2, 3), sycl_queue=alloc_q).astype(dtype) + elif dtype == cupy.bool_: + array = cupy.random.randint( + 0, 2, size=(2, 3), sycl_queue=alloc_q + ).astype(cupy.bool_) + else: + assert False, f"unrecognized dtype: {dtype}" + return array + + +class TestDLPackConversion(unittest.TestCase): + @testing.for_all_dtypes(no_bool=False) + def test_conversion(self, dtype): + orig_array = _gen_array(dtype) + tensor = orig_array.__dlpack__() + out_array = dlp.from_dlpack_capsule(tensor) + testing.assert_array_equal(orig_array, out_array) + assert orig_array.get_array()._pointer == out_array._pointer + + +@testing.parameterize(*testing.product({"memory": ("device", "managed")})) +class TestNewDLPackConversion(unittest.TestCase): + def _get_stream(self, stream_name): + if stream_name == "null": + return dpctl.SyclQueue() + return dpctl.SyclQueue() + + @testing.for_all_dtypes(no_bool=False) + def test_conversion(self, dtype): + orig_array = _gen_array(dtype) + out_array = cupy.from_dlpack(orig_array) + testing.assert_array_equal(orig_array, out_array) + assert orig_array.get_array()._pointer == out_array.get_array()._pointer + + def test_stream(self): + allowed_streams = ["null", True] + + # stream order is automatically established via DLPack protocol + for src_s in [self._get_stream(s) for s in allowed_streams]: + for dst_s in [self._get_stream(s) for s in allowed_streams]: + orig_array = _gen_array(cupy.float32, alloc_q=src_s) + dltensor = orig_array.__dlpack__(stream=orig_array) + + out_array = dlp.from_dlpack_capsule(dltensor) + out_array = cupy.from_dlpack(out_array, device=dst_s) + testing.assert_array_equal(orig_array, out_array) + assert ( + orig_array.get_array()._pointer + == out_array.get_array()._pointer + ) + + +class TestDLTensorMemory(unittest.TestCase): + # def setUp(self): + # self.old_pool = cupy.get_default_memory_pool() + # self.pool = cupy.cuda.MemoryPool() + # cupy.cuda.set_allocator(self.pool.malloc) + + # def tearDown(self): + # self.pool.free_all_blocks() + # cupy.cuda.set_allocator(self.old_pool.malloc) + + def test_deleter(self): + # memory is freed when tensor is deleted, as it's not consumed + array = cupy.empty(10) + tensor = array.__dlpack__() + # str(tensor): + assert '"dltensor"' in str(tensor) + # assert self.pool.n_free_blocks() == 0 + # del array + # assert self.pool.n_free_blocks() == 0 + # del tensor + # assert self.pool.n_free_blocks() == 1 + + def test_deleter2(self): + # memory is freed when array2 is deleted, as tensor is consumed + array = cupy.empty(10) + tensor = array.__dlpack__() + assert '"dltensor"' in str(tensor) + array2 = dlp.from_dlpack_capsule(tensor) + assert '"used_dltensor"' in str(tensor) + # assert self.pool.n_free_blocks() == 0 + # del array + # assert self.pool.n_free_blocks() == 0 + # del array2 + # assert self.pool.n_free_blocks() == 1 + # del tensor + # assert self.pool.n_free_blocks() == 1 + + def test_multiple_consumption_error(self): + # Prevent segfault, see #3611 + array = cupy.empty(10) + tensor = array.__dlpack__() + array2 = dlp.from_dlpack_capsule(tensor) + with pytest.raises(ValueError) as e: + array3 = dlp.from_dlpack_capsule(tensor) + assert "consumed multiple times" in str(e.value)