From 7f0f48ac3f3cf1e52c2059eef072d743459bb2ee Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 26 Mar 2023 17:16:17 -0500 Subject: [PATCH 1/9] First implementation to resolve gh-1134 Examples: ``` import dpctl.tensor as dpt m = dpt.ones((2,4), dtype='i4') w = dpt.zeros(4) v = dpt.full(4, -1) ar = dpt.asarray([m, [w, v]]) ar2 = dpt.asarray([m, [w, v]], device='cpu') ``` --- dpctl/tensor/_ctors.py | 132 ++++++++++++++++++++++++++++++++----- dpctl/tensor/_usmarray.pyx | 5 ++ 2 files changed, 122 insertions(+), 15 deletions(-) diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index cecbdadb58..284e6a401f 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -24,6 +24,7 @@ import dpctl.tensor._tensor_impl as ti import dpctl.utils from dpctl.tensor._device import normalize_queue_device +from dpctl.tensor._usmarray import _is_object_with_buffer_protocol __doc__ = "Implementation of creation functions in :module:`dpctl.tensor`" @@ -276,17 +277,6 @@ def _asarray_from_numpy_ndarray( return res -def _is_object_with_buffer_protocol(obj): - "Returns `True` if object support Python buffer protocol" - try: - # use context manager to ensure - # buffer is instantly released - with memoryview(obj): - return True - except TypeError: - return False - - def _ensure_native_dtype_device_support(dtype, dev) -> None: """Check that dtype is natively supported by device. @@ -318,6 +308,106 @@ def _ensure_native_dtype_device_support(dtype, dev) -> None: ) +def _usm_types_walker(o, usm_types_list): + if isinstance(o, dpt.usm_ndarray): + usm_types_list.append(o.usm_type) + return + if isinstance(o, (list, tuple)): + for el in o: + _usm_types_walker(el, usm_types_list) + return + raise TypeError + + +def _device_copy_walker(seq_o, res, events): + if isinstance(seq_o, dpt.usm_ndarray): + exec_q = res.sycl_queue + ht_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray( + src=seq_o, dst=res, sycl_queue=exec_q + ) + events.append(ht_ev) + return + if isinstance(seq_o, (list, tuple)): + for i, el in enumerate(seq_o): + _device_copy_walker(el, res[i], events) + return + raise TypeError + + +def _copy_through_host_walker(seq_o, usm_res): + if isinstance(seq_o, dpt.usm_ndarray): + usm_res[...] = dpt.asnumpy(seq_o).copy() + return + if isinstance(seq_o, (list, tuple)): + for i, el in enumerate(seq_o): + _copy_through_host_walker(el, usm_res[i]) + return + usm_res[...] = np.asarray(seq_o) + + +def _asarray_from_seq( + seq_obj, + seq_shape, + seq_dt, + seq_dev, + dtype=None, + usm_type=None, + sycl_queue=None, + order="C", +): + "`obj` is a sequence" + if usm_type is None: + usm_types_in_seq = [] + _usm_types_walker(seq_obj, usm_types_in_seq) + usm_type = dpctl.utils.get_coerced_usm_type(usm_types_in_seq) + dpctl.utils.validate_usm_type(usm_type) + if sycl_queue is None: + exec_q = seq_dev + alloc_q = seq_dev + else: + exec_q = dpctl.utils.get_execution_queue( + ( + sycl_queue, + seq_dev, + ) + ) + alloc_q = sycl_queue + if dtype is None: + dtype = _map_to_device_dtype(seq_dt, alloc_q) + else: + _mapped_dt = _map_to_device_dtype(dtype, alloc_q) + if _mapped_dt != dtype: + raise ValueError( + f"Device {sycl_queue.sycl_device} " + f"does not support {dtype} natively." + ) + dtype = _mapped_dt + if order in "KA": + order = "C" + if isinstance(exec_q, dpctl.SyclQueue): + res = dpt.empty( + seq_shape, + dtype=dtype, + usm_type=usm_type, + sycl_queue=alloc_q, + order=order, + ) + ht_events = [] + _device_copy_walker(seq_obj, res, ht_events) + dpctl.SyclEvent.wait_for(ht_events) + return res + else: + res = dpt.empty( + seq_shape, + dtype=dtype, + usm_type=usm_type, + sycl_queue=alloc_q, + order=order, + ) + _copy_through_host_walker(seq_obj, res) + return res + + def asarray( obj, dtype=None, @@ -327,7 +417,9 @@ def asarray( sycl_queue=None, order="K", ): - """ + """ asarray(obj, dtype=None, copy=None, device=None, \ + usm_type=None, sycl_queue=None, order="K") + Converts `obj` to :class:`dpctl.tensor.usm_ndarray`. Args: @@ -347,7 +439,7 @@ def asarray( allocations if possible, but allowed to perform a copy otherwise. Default: `None`. order ("C","F","A","K", optional): memory layout of the output array. - Default: "C" + Default: "K" device (optional): array API concept of device where the output array is created. `device` can be `None`, a oneAPI filter selector string, an instance of :class:`dpctl.SyclDevice` corresponding to a @@ -452,7 +544,7 @@ def asarray( raise ValueError( "Converting Python sequence to usm_ndarray requires a copy" ) - _, _, devs = _array_info_sequence(obj) + seq_shape, seq_dt, devs = _array_info_sequence(obj) if devs == _host_set: return _asarray_from_numpy_ndarray( np.asarray(obj, dtype=dtype, order=order), @@ -461,7 +553,17 @@ def asarray( sycl_queue=sycl_queue, order=order, ) - # for sequences + elif len(devs) == 1: + return _asarray_from_seq( + obj, + seq_shape, + seq_dt, + list(devs)[0], + dtype=dtype, + usm_type=usm_type, + sycl_queue=sycl_queue, + order=order, + ) raise NotImplementedError( "Converting Python sequences is not implemented" ) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 570490dd8d..d04da080e5 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -1629,3 +1629,8 @@ cdef api object UsmNDArray_MakeFromPtr( offset=offset ) return arr + + +def _is_object_with_buffer_protocol(o): + "Returns True if object support Python buffer protocol" + return _is_buffer(o) From 5ef475ee35c137e6f02a8442cab02d9e22273016 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 27 Mar 2023 05:54:49 -0500 Subject: [PATCH 2/9] Added tests for asarray of sequence of usm_arrays --- dpctl/tests/test_tensor_asarray.py | 37 ++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/dpctl/tests/test_tensor_asarray.py b/dpctl/tests/test_tensor_asarray.py index a309b4e3a4..83c10606af 100644 --- a/dpctl/tests/test_tensor_asarray.py +++ b/dpctl/tests/test_tensor_asarray.py @@ -234,3 +234,40 @@ def test_asarray_cross_device(): x = dpt.empty(10, dtype="i8", sycl_queue=q) y = dpt.asarray(x, sycl_queue=qprof) assert y.sycl_queue == qprof + + +def test_asarray_seq_of_arrays_simple(): + get_queue_or_skip() + r = dpt.arange(10) + m = dpt.asarray( + [ + r, + ] + * 4 + ) + assert m.shape == (4,) + r.shape + assert m.dtype == r.dtype + assert m.device == r.device + + +def test_asarray_seq_of_arrays(): + get_queue_or_skip() + m = dpt.ones((2, 4), dtype="i4") + w = dpt.zeros(4) + v = dpt.full(4, -1) + ar = dpt.asarray([m, [w, v]]) + assert ar.shape == (2, 2, 4) + assert ar.device == m.device + assert ar.device == w.device + assert ar.device == v.device + + +def test_asarray_seq_of_array_different_queue(): + get_queue_or_skip() + m = dpt.ones((2, 4), dtype="i4") + w = dpt.zeros(4) + v = dpt.full(4, -1) + qprof = dpctl.SyclQueue(property="enable_profiling") + ar = dpt.asarray([m, [w, v]], sycl_queue=qprof) + assert ar.shape == (2, 2, 4) + assert ar.sycl_queue == qprof From 1755e37cfcded32d6377c2d4228e471d0783b4ee Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 28 Mar 2023 16:25:02 -0500 Subject: [PATCH 3/9] Fixed issue with using SAUI objects in sequences ``` In [1]: import dpctl.tensor._tensor_impl as ti, dpctl.tensor as dpt, dpnp In [2]: dpt.asarray([dpnp.ones(3)]) Out[2]: usm_ndarray([[1., 1., 1.]], dtype=float32) In [3]: dpnp.asarray([dpnp.ones(3)]) Out[3]: array([[1., 1., 1.]], dtype=float32) ``` --- dpctl/tensor/_ctors.py | 44 +++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index 284e6a401f..91e77b3826 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -67,11 +67,12 @@ def _array_info_dispatch(obj): return _empty_tuple, complex, _host_set if isinstance(obj, (list, tuple, range)): return _array_info_sequence(obj) - if any( - isinstance(obj, s) - for s in [np.integer, np.floating, np.complexfloating, np.bool_] - ): - return _empty_tuple, obj.dtype, _host_set + if _is_object_with_buffer_protocol(obj): + np_obj = np.array(obj) + return np_obj.shape, np_obj.dtype, _host_set + if hasattr(obj, "__sycl_usm_array_interface__"): + usm_ar = _usm_ndarray_from_suai(obj) + return usm_ar.shape, usm_ar.dtype, frozenset([obj.sycl_queue]) raise ValueError(type(obj)) @@ -220,6 +221,18 @@ def _map_to_device_dtype(dt, q): raise RuntimeError(f"Unrecognized data type '{dt}' encountered.") +def _usm_ndarray_from_suai(obj): + sua_iface = getattr(obj, "__sycl_usm_array_interface__") + membuf = dpm.as_usm_memory(obj) + ary = dpt.usm_ndarray( + sua_iface["shape"], + dtype=sua_iface["typestr"], + buffer=membuf, + strides=sua_iface.get("strides", None), + ) + return ary + + def _asarray_from_numpy_ndarray( ary, dtype=None, usm_type=None, sycl_queue=None, order="K" ): @@ -312,6 +325,10 @@ def _usm_types_walker(o, usm_types_list): if isinstance(o, dpt.usm_ndarray): usm_types_list.append(o.usm_type) return + if hasattr(o, "__sycl_usm_array_interface__"): + usm_ar = _usm_ndarray_from_suai(o) + usm_types_list.append(usm_ar.usm_type) + return if isinstance(o, (list, tuple)): for el in o: _usm_types_walker(el, usm_types_list) @@ -327,6 +344,14 @@ def _device_copy_walker(seq_o, res, events): ) events.append(ht_ev) return + if hasattr(seq_o, "__sycl_usm_array_interface__"): + usm_ar = _usm_ndarray_from_suai(seq_o) + exec_q = res.sycl_queue + ht_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray( + src=usm_ar, dst=res, sycl_queue=exec_q + ) + events.append(ht_ev) + return if isinstance(seq_o, (list, tuple)): for i, el in enumerate(seq_o): _device_copy_walker(el, res[i], events) @@ -499,14 +524,7 @@ def asarray( order=order, ) if hasattr(obj, "__sycl_usm_array_interface__"): - sua_iface = getattr(obj, "__sycl_usm_array_interface__") - membuf = dpm.as_usm_memory(obj) - ary = dpt.usm_ndarray( - sua_iface["shape"], - dtype=sua_iface["typestr"], - buffer=membuf, - strides=sua_iface.get("strides", None), - ) + ary = _usm_ndarray_from_suai(obj) return _asarray_from_usm_ndarray( ary, dtype=dtype, From 86cf7d8ca5f9e7d339834327d3ebd217e8d8820e Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 29 Mar 2023 11:58:31 -0500 Subject: [PATCH 4/9] Fixed a type in array dispatcher caught by a test --- dpctl/tensor/_ctors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index 91e77b3826..15d64e3f62 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -72,7 +72,7 @@ def _array_info_dispatch(obj): return np_obj.shape, np_obj.dtype, _host_set if hasattr(obj, "__sycl_usm_array_interface__"): usm_ar = _usm_ndarray_from_suai(obj) - return usm_ar.shape, usm_ar.dtype, frozenset([obj.sycl_queue]) + return usm_ar.shape, usm_ar.dtype, frozenset([usm_ar.sycl_queue]) raise ValueError(type(obj)) From 89eed948245ea8c5ebe50ea210b98af43d24f1bf Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 29 Mar 2023 11:50:45 -0500 Subject: [PATCH 5/9] Updated docstrings for MemoryUSM* classes --- dpctl/memory/_memory.pyx | 42 ++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/dpctl/memory/_memory.pyx b/dpctl/memory/_memory.pyx index 044a5b55a1..5fe4fe6b58 100644 --- a/dpctl/memory/_memory.pyx +++ b/dpctl/memory/_memory.pyx @@ -679,17 +679,13 @@ cdef class _Memory: cdef class MemoryUSMShared(_Memory): """ MemoryUSMShared(nbytes, alignment=0, queue=None, copy=False) - allocates nbytes of USM shared memory. - Non-positive alignments are not used (malloc_shared is used instead). - For the queue=None case the ``dpctl.SyclQueue()`` is used to allocate - memory. + An object representing allocation of SYCL USM-shared memory. - MemoryUSMShared(usm_obj) constructor creates instance from `usm_obj` - expected to implement `__sycl_usm_array_interface__` protocol and to expose - a contiguous block of USM shared allocation. Use `copy=True` to - perform a copy if USM type of the allocation represented by the argument - is other than 'shared'. + Non-positive ``alignment`` values are not ignored and + the allocator ``malloc_shared`` is used for allocation instead. + If ``queue`` is ``None`` a cached default-constructed + :class:`dpctl.SyclQueue` is used to allocate memory. """ def __cinit__(self, other, *, Py_ssize_t alignment=0, SyclQueue queue=None, int copy=False): @@ -720,17 +716,13 @@ cdef class MemoryUSMShared(_Memory): cdef class MemoryUSMHost(_Memory): """ MemoryUSMHost(nbytes, alignment=0, queue=None, copy=False) - allocates nbytes of USM host memory. - Non-positive alignments are not used (malloc_host is used instead). - For the queue=None case the ``dpctl.SyclQueue()`` is used to allocate - memory. + An object representing allocation of SYCL USM-host memory. - MemoryUSMDevice(usm_obj) constructor create instance from `usm_obj` - expected to implement `__sycl_usm_array_interface__` protocol and to expose - a contiguous block of USM host allocation. Use `copy=True` to - perform a copy if USM type of the allocation represented by the argument - is other than 'host'. + Non-positive ``alignment`` values are not ignored and + the allocator ``malloc_host`` is used for allocation instead. + If ``queue`` is ``None`` a cached default-constructed + :class:`dpctl.SyclQueue` is used to allocate memory. """ def __cinit__(self, other, *, Py_ssize_t alignment=0, SyclQueue queue=None, int copy=False): @@ -762,17 +754,13 @@ cdef class MemoryUSMHost(_Memory): cdef class MemoryUSMDevice(_Memory): """ MemoryUSMDevice(nbytes, alignment=0, queue=None, copy=False) - allocates nbytes of USM device memory. - Non-positive alignments are not used (malloc_device is used instead). - For the queue=None case the ``dpctl.SyclQueue()`` is used to allocate - memory. + An object representing allocation of SYCL USM-device memory. - MemoryUSMDevice(usm_obj) constructor create instance from `usm_obj` - expected to implement `__sycl_usm_array_interface__` protocol and exposing - a contiguous block of USM device allocation. Use `copy=True` to - perform a copy if USM type of the allocation represented by the argument - is other than 'device'. + Non-positive ``alignment`` values are not ignored and + the allocator ``malloc_device`` is used for allocation instead. + If ``queue`` is ``None`` a cached default-constructed + :class:`dpctl.SyclQueue` is used to allocate memory. """ def __cinit__(self, other, *, Py_ssize_t alignment=0, SyclQueue queue=None, int copy=False): From 915079067d35f2f5342f2816186cea2675f0b3a3 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 29 Mar 2023 11:50:56 -0500 Subject: [PATCH 6/9] Change to allow as_usm_memory to consume SUAI with zero dimensions --- .../_sycl_usm_array_interface_utils.pxi | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/dpctl/memory/_sycl_usm_array_interface_utils.pxi b/dpctl/memory/_sycl_usm_array_interface_utils.pxi index 0812b2015f..f0ad9f4297 100644 --- a/dpctl/memory/_sycl_usm_array_interface_utils.pxi +++ b/dpctl/memory/_sycl_usm_array_interface_utils.pxi @@ -88,27 +88,38 @@ cdef object _pointers_from_shape_and_stride( Returns: tuple(min_disp, nbytes) """ + cdef Py_ssize_t nelems = 1 + cdef Py_ssize_t min_disp = 0 + cdef Py_ssize_t max_disp = 0 + cdef int i + cdef Py_ssize_t sh_i = 0 + cdef Py_ssize_t str_i = 0 if (nd > 0): if (ary_strides is None): nelems = 1 for si in ary_shape: sh_i = int(si) - if (sh_i <= 0): + if (sh_i < 0): raise ValueError("Array shape elements need to be positive") nelems = nelems * sh_i - return (ary_offset, nelems * itemsize) + return (ary_offset, max(nelems, 1) * itemsize) else: min_disp = ary_offset max_disp = ary_offset for i in range(nd): str_i = int(ary_strides[i]) sh_i = int(ary_shape[i]) - if (sh_i <= 0): + if (sh_i < 0): raise ValueError("Array shape elements need to be positive") - if (str_i > 0): - max_disp += str_i * (sh_i - 1) + if (sh_i > 0): + if (str_i > 0): + max_disp += str_i * (sh_i - 1) + else: + min_disp += str_i * (sh_i - 1) else: - min_disp += str_i * (sh_i - 1); + nelems = 0 + if nelems == 0: + return (ary_offset, itemsize) return (min_disp, (max_disp - min_disp + 1) * itemsize) elif (nd == 0): return (ary_offset, itemsize) From 3036a6798160da48cf6bde3f947f1f44f68ed5e3 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 29 Mar 2023 11:58:58 -0500 Subject: [PATCH 7/9] Added a test based on example found by @npolina4 --- dpctl/tests/test_tensor_asarray.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/dpctl/tests/test_tensor_asarray.py b/dpctl/tests/test_tensor_asarray.py index 83c10606af..bf0a0e14cb 100644 --- a/dpctl/tests/test_tensor_asarray.py +++ b/dpctl/tests/test_tensor_asarray.py @@ -271,3 +271,25 @@ def test_asarray_seq_of_array_different_queue(): ar = dpt.asarray([m, [w, v]], sycl_queue=qprof) assert ar.shape == (2, 2, 4) assert ar.sycl_queue == qprof + + +def test_asarray_seq_of_suai(): + get_queue_or_skip() + + class Dummy: + def __init__(self, obj, iface): + self.obj = obj + self.__sycl_usm_array_interface__ = iface + + o = dpt.empty(0, usm_type="shared") + d = Dummy(o, o.__sycl_usm_array_interface__) + x = dpt.asarray(d) + assert x.shape == (0,) + assert x.usm_type == o.usm_type + assert x._pointer == o._pointer + assert x.sycl_queue == o.sycl_queue + + x = dpt.asarray([d, d]) + assert x.shape == (2, 0) + assert x.usm_type == o.usm_type + assert x.sycl_queue == o.sycl_queue From 2a9032120e41920f942ba9aea45e96152a822896 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 29 Mar 2023 15:30:49 -0500 Subject: [PATCH 8/9] Fixed issue with sequence of arrays from different device found by @npolina4 --- dpctl/tensor/_ctors.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index 15d64e3f62..616bbb3498 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -363,6 +363,10 @@ def _copy_through_host_walker(seq_o, usm_res): if isinstance(seq_o, dpt.usm_ndarray): usm_res[...] = dpt.asnumpy(seq_o).copy() return + if hasattr(seq_o, "__sycl_usm_array_interface__"): + usm_ar = _usm_ndarray_from_suai(seq_o) + usm_res[...] = dpt.asnumpy(usm_ar).copy() + return if isinstance(seq_o, (list, tuple)): for i, el in enumerate(seq_o): _copy_through_host_walker(el, usm_res[i]) From d294fb62a5ce898b723c47b7690ec4458b010a15 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 29 Mar 2023 15:37:21 -0500 Subject: [PATCH 9/9] Added tests based on example found by @npolina4 --- dpctl/tests/test_tensor_asarray.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/dpctl/tests/test_tensor_asarray.py b/dpctl/tests/test_tensor_asarray.py index bf0a0e14cb..7a3b54ae83 100644 --- a/dpctl/tests/test_tensor_asarray.py +++ b/dpctl/tests/test_tensor_asarray.py @@ -293,3 +293,31 @@ def __init__(self, obj, iface): assert x.shape == (2, 0) assert x.usm_type == o.usm_type assert x.sycl_queue == o.sycl_queue + + +def test_asarray_seq_of_suai_different_queue(): + q = get_queue_or_skip() + + class Dummy: + def __init__(self, obj, iface): + self.obj = obj + self.__sycl_usm_array_interface__ = iface + + @property + def shape(self): + return self.__sycl_usm_array_interface__["shape"] + + q2 = dpctl.SyclQueue() + assert q != q2 + o = dpt.empty((2, 2), usm_type="shared", sycl_queue=q2) + d = Dummy(o, o.__sycl_usm_array_interface__) + + x = dpt.asarray(d, sycl_queue=q) + assert x.sycl_queue == q + assert x.shape == d.shape + x = dpt.asarray([d], sycl_queue=q) + assert x.sycl_queue == q + assert x.shape == (1,) + d.shape + x = dpt.asarray([d, d], sycl_queue=q) + assert x.sycl_queue == q + assert x.shape == (2,) + d.shape