diff --git a/dpctl/apis/include/dpctl4pybind11.hpp b/dpctl/apis/include/dpctl4pybind11.hpp index 11834ccdc7..921f231aa1 100644 --- a/dpctl/apis/include/dpctl4pybind11.hpp +++ b/dpctl/apis/include/dpctl4pybind11.hpp @@ -115,16 +115,24 @@ class dpctl_capi DPCTLSyclQueueRef (*UsmNDArray_GetQueueRef_)(PyUSMArrayObject *); py::ssize_t (*UsmNDArray_GetOffset_)(PyUSMArrayObject *); void (*UsmNDArray_SetWritableFlag_)(PyUSMArrayObject *, int); - PyObject *(*UsmNDArray_MakeFromMemory_)(int, - const py::ssize_t *, - int, - Py_MemoryObject *, - py::ssize_t, - char); - PyObject *(*UsmNDArray_MakeFromPtr_)(size_t, + PyObject *(*UsmNDArray_MakeSimpleFromMemory_)(int, + const py::ssize_t *, + int, + Py_MemoryObject *, + py::ssize_t, + char); + PyObject *(*UsmNDArray_MakeSimpleFromPtr_)(size_t, + int, + DPCTLSyclUSMRef, + DPCTLSyclQueueRef, + PyObject *); + PyObject *(*UsmNDArray_MakeFromPtr_)(int, + const py::ssize_t *, int, + const py::ssize_t *, DPCTLSyclUSMRef, DPCTLSyclQueueRef, + py::ssize_t, PyObject *); int USM_ARRAY_C_CONTIGUOUS_; @@ -233,12 +241,13 @@ class dpctl_capi UsmNDArray_GetTypenum_(nullptr), UsmNDArray_GetElementSize_(nullptr), UsmNDArray_GetFlags_(nullptr), UsmNDArray_GetQueueRef_(nullptr), UsmNDArray_GetOffset_(nullptr), UsmNDArray_SetWritableFlag_(nullptr), - UsmNDArray_MakeFromMemory_(nullptr), UsmNDArray_MakeFromPtr_(nullptr), - USM_ARRAY_C_CONTIGUOUS_(0), USM_ARRAY_F_CONTIGUOUS_(0), - USM_ARRAY_WRITABLE_(0), UAR_BOOL_(-1), UAR_SHORT_(-1), - UAR_USHORT_(-1), UAR_INT_(-1), UAR_UINT_(-1), UAR_LONG_(-1), - UAR_ULONG_(-1), UAR_LONGLONG_(-1), UAR_ULONGLONG_(-1), UAR_FLOAT_(-1), - UAR_DOUBLE_(-1), UAR_CFLOAT_(-1), UAR_CDOUBLE_(-1), + UsmNDArray_MakeSimpleFromMemory_(nullptr), + UsmNDArray_MakeSimpleFromPtr_(nullptr), + UsmNDArray_MakeFromPtr_(nullptr), USM_ARRAY_C_CONTIGUOUS_(0), + USM_ARRAY_F_CONTIGUOUS_(0), USM_ARRAY_WRITABLE_(0), UAR_BOOL_(-1), + UAR_SHORT_(-1), UAR_USHORT_(-1), UAR_INT_(-1), UAR_UINT_(-1), + UAR_LONG_(-1), UAR_ULONG_(-1), UAR_LONGLONG_(-1), UAR_ULONGLONG_(-1), + UAR_FLOAT_(-1), UAR_DOUBLE_(-1), UAR_CFLOAT_(-1), UAR_CDOUBLE_(-1), UAR_TYPE_SENTINEL_(-1), UAR_HALF_(-1), UAR_INT8_(-1), UAR_UINT8_(-1), UAR_INT16_(-1), UAR_UINT16_(-1), UAR_INT32_(-1), UAR_UINT32_(-1), UAR_INT64_(-1), UAR_UINT64_(-1), default_sycl_queue_{}, @@ -310,7 +319,9 @@ class dpctl_capi this->UsmNDArray_GetQueueRef_ = UsmNDArray_GetQueueRef; this->UsmNDArray_GetOffset_ = UsmNDArray_GetOffset; this->UsmNDArray_SetWritableFlag_ = UsmNDArray_SetWritableFlag; - this->UsmNDArray_MakeFromMemory_ = UsmNDArray_MakeFromMemory; + this->UsmNDArray_MakeSimpleFromMemory_ = + UsmNDArray_MakeSimpleFromMemory; + this->UsmNDArray_MakeSimpleFromPtr_ = UsmNDArray_MakeSimpleFromPtr; this->UsmNDArray_MakeFromPtr_ = UsmNDArray_MakeFromPtr; // constants diff --git a/dpctl/tensor/_stride_utils.pxi b/dpctl/tensor/_stride_utils.pxi index 24bb8e3834..f0862f3e8e 100644 --- a/dpctl/tensor/_stride_utils.pxi +++ b/dpctl/tensor/_stride_utils.pxi @@ -141,9 +141,9 @@ cdef int _from_input_shape_strides( str_i = strides[i] strides_arr[i] = str_i if str_i > 0: - max_shift += strides_arr[i] * (shape_arr[i] - 1) + max_shift += str_i * (shape_arr[i] - 1) else: - min_shift += strides_arr[i] * (shape_arr[i] - 1) + min_shift += str_i * (shape_arr[i] - 1) min_disp[0] = min_shift max_disp[0] = max_shift if max_shift == min_shift + (elem_count - 1): @@ -199,7 +199,7 @@ cdef int _from_input_shape_strides( # return ERROR_INTERNAL -cdef object _make_int_tuple(int nd, Py_ssize_t *ary): +cdef object _make_int_tuple(int nd, const Py_ssize_t *ary): """ Makes Python tuple from C array """ @@ -216,7 +216,7 @@ cdef object _make_int_tuple(int nd, Py_ssize_t *ary): return None -cdef object _make_reversed_int_tuple(int nd, Py_ssize_t *ary): +cdef object _make_reversed_int_tuple(int nd, const Py_ssize_t *ary): """ Makes Python reversed tuple from C array """ diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 73269fe36c..a708418746 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -351,6 +351,9 @@ cdef class usm_ndarray: return mem.queue cdef c_dpctl.DPCTLSyclQueueRef get_queue_ref(self) except *: + """ + Returns a copy of DPCTLSyclQueueRef associated with array + """ cdef c_dpctl.SyclQueue q = self.get_sycl_queue() cdef c_dpctl.DPCTLSyclQueueRef QRef = q.get_queue_ref() cdef c_dpctl.DPCTLSyclQueueRef QRefCopy = NULL @@ -1316,15 +1319,24 @@ cdef api void UsmNDArray_SetWritableFlag(usm_ndarray arr, int flag): arr_fl |= (USM_ARRAY_WRITABLE if flag else 0) arr.flags_ = arr_fl -cdef api object UsmNDArray_MakeFromMemory( +cdef api object UsmNDArray_MakeSimpleFromMemory( int nd, const Py_ssize_t *shape, int typenum, c_dpmem._Memory mobj, Py_ssize_t offset, char order ): - """Create usm_ndarray. - - Equivalent to usm_ndarray( - _make_tuple(nd, shape), dtype=_make_dtype(typenum), - buffer=mobj, offset=offset) + """Create contiguous usm_ndarray. + + Args: + nd: number of dimensions (non-negative) + shape: array of nd non-negative array's sizes along each dimension + typenum: array elemental type number + ptr: pointer to the start of allocation + QRef: DPCTLSyclQueueRef associated with the allocation + offset: distance between element with zero multi-index and the + start of allocation + oder: Memory layout of the array. Use 'C' for C-contiguous or + row-major layout; 'F' for F-contiguous or column-major layout + Returns: + Created usm_ndarray instance """ cdef object shape_tuple = _make_int_tuple(nd, shape) cdef usm_ndarray arr = usm_ndarray( @@ -1337,17 +1349,25 @@ cdef api object UsmNDArray_MakeFromMemory( return arr -cdef api object UsmNDArray_MakeFromPtr( +cdef api object UsmNDArray_MakeSimpleFromPtr( size_t nelems, int typenum, c_dpctl.DPCTLSyclUSMRef ptr, c_dpctl.DPCTLSyclQueueRef QRef, object owner ): - """Create usm_ndarray from pointer. - - Argument owner=None implies transert of USM allocation ownership - to create array object. + """Create 1D contiguous usm_ndarray from pointer. + + Args: + nelems: number of elements in array + typenum: array elemental type number + ptr: pointer to the start of allocation + QRef: DPCTLSyclQueueRef associated with the allocation + owner: Python object managing lifetime of USM allocation. + Value None implies transfer of USM allocation ownership + to the created array object. + Returns: + Created usm_ndarray instance """ cdef size_t itemsize = type_bytesize(typenum) cdef size_t nbytes = itemsize * nelems @@ -1360,3 +1380,108 @@ cdef api object UsmNDArray_MakeFromPtr( buffer=mobj ) return arr + +cdef api object UsmNDArray_MakeFromPtr( + int nd, + const Py_ssize_t *shape, + int typenum, + const Py_ssize_t *strides, + c_dpctl.DPCTLSyclUSMRef ptr, + c_dpctl.DPCTLSyclQueueRef QRef, + Py_ssize_t offset, + object owner +): + """ + General usm_ndarray constructor from externally made USM-allocation. + + Args: + nd: number of dimensions (non-negative) + shape: array of nd non-negative array's sizes along each dimension + typenum: array elemental type number + strides: array of nd strides along each dimension in elements + ptr: pointer to the start of allocation + QRef: DPCTLSyclQueueRef associated with the allocation + offset: distance between element with zero multi-index and the + start of allocation + owner: Python object managing lifetime of USM allocation. + Value None implies transfer of USM allocation ownership + to the created array object. + Returns: + Created usm_ndarray instance + """ + cdef size_t itemsize = type_bytesize(typenum) + cdef int err = 0 + cdef size_t nelems = 1 + cdef Py_ssize_t min_disp = 0 + cdef Py_ssize_t max_disp = 0 + cdef Py_ssize_t step_ = 0 + cdef Py_ssize_t dim_ = 0 + cdef it = 0 + cdef c_dpmem._Memory mobj + cdef usm_ndarray arr + cdef object obj_shape + cdef object obj_strides + + if (nd < 0): + raise ValueError("Dimensionality must be non-negative") + if (ptr is NULL or QRef is NULL): + raise ValueError( + "Non-null USM allocation pointer and QRef are expected" + ) + if (nd == 0): + # case of 0d scalars + mobj = c_dpmem._Memory.create_from_usm_pointer_size_qref( + ptr, itemsize, QRef, memory_owner=owner + ) + arr = usm_ndarray( + tuple(), + dtype=_make_typestr(typenum), + buffer=mobj + ) + return arr + if (shape is NULL or strides is NULL): + raise ValueError("Both shape and stride vectors are required") + for it in range(nd): + dim_ = shape[it] + if dim_ < 0: + raise ValueError( + f"Dimension along axis {it} must be non-negative" + ) + nelems *= dim_ + if dim_ > 0: + step_ = strides[it] + if step_ > 0: + max_disp += step_ * (dim_ - 1) + else: + min_disp += step_ * (dim_ - 1) + + obj_shape = _make_int_tuple(nd, shape) + obj_strides = _make_int_tuple(nd, strides) + if nelems == 0: + mobj = c_dpmem._Memory.create_from_usm_pointer_size_qref( + ptr, itemsize, QRef, memory_owner=owner + ) + arr = usm_ndarray( + obj_shape, + dtype=_make_typestr(typenum), + strides=obj_strides, + buffer=mobj, + offset=0 + ) + return arr + if offset + min_disp < 0: + raise ValueError( + "Given shape, strides and offset reference out-of-bound memory" + ) + nbytes = itemsize * (offset + max_disp + 1) + mobj = c_dpmem._Memory.create_from_usm_pointer_size_qref( + ptr, nbytes, QRef, memory_owner=owner + ) + arr = usm_ndarray( + obj_shape, + dtype=_make_typestr(typenum), + strides=obj_strides, + buffer=mobj, + offset=offset + ) + return arr diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index 4534959a52..946ff53b27 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -530,7 +530,7 @@ def test_pyx_capi_make_from_memory(): any_usm_ndarray = dpt.empty(tuple(), dtype="i4", sycl_queue=q) make_from_memory_fn = _pyx_capi_fnptr_to_callable( any_usm_ndarray, - "UsmNDArray_MakeFromMemory", + "UsmNDArray_MakeSimpleFromMemory", b"PyObject *(int, Py_ssize_t const *, int, " b"struct Py_MemoryObject *, Py_ssize_t, char)", fn_restype=ctypes.py_object, @@ -601,7 +601,7 @@ def test_pyx_capi_make_from_ptr(): usm_ndarray = dpt.empty(tuple(), dtype="i4", sycl_queue=q) make_from_ptr = _pyx_capi_fnptr_to_callable( usm_ndarray, - "UsmNDArray_MakeFromPtr", + "UsmNDArray_MakeSimpleFromPtr", b"PyObject *(size_t, int, DPCTLSyclUSMRef, " b"DPCTLSyclQueueRef, PyObject *)", fn_restype=ctypes.py_object, @@ -632,6 +632,108 @@ def test_pyx_capi_make_from_ptr(): assert isinstance(arr.__repr__(), str) +def test_pyx_capi_make_general(): + q = get_queue_or_skip() + usm_ndarray = dpt.empty(tuple(), dtype="i4", sycl_queue=q) + make_from_ptr = _pyx_capi_fnptr_to_callable( + usm_ndarray, + "UsmNDArray_MakeFromPtr", + b"PyObject *(int, Py_ssize_t const *, int, Py_ssize_t const *, " + b"DPCTLSyclUSMRef, DPCTLSyclQueueRef, Py_ssize_t, PyObject *)", + fn_restype=ctypes.py_object, + fn_argtypes=( + ctypes.c_int, + ctypes.POINTER(ctypes.c_ssize_t), + ctypes.c_int, + ctypes.POINTER(ctypes.c_ssize_t), + ctypes.c_void_p, + ctypes.c_void_p, + ctypes.c_ssize_t, + ctypes.py_object, + ), + ) + # Create array to view into diagonal of a matrix + n = 5 + mat = dpt.reshape( + dpt.arange(n * n, dtype="i4", sycl_queue=q), + ( + n, + n, + ), + ) + c_shape = (ctypes.c_ssize_t * 1)( + n, + ) + c_strides = (ctypes.c_ssize_t * 1)( + n + 1, + ) + diag = make_from_ptr( + ctypes.c_int(1), + c_shape, + ctypes.c_int(mat.dtype.num), + c_strides, + mat._pointer, + mat.sycl_queue.addressof_ref(), + ctypes.c_ssize_t(0), + mat, + ) + assert isinstance(diag, dpt.usm_ndarray) + assert diag.shape == (n,) + assert diag.strides == (n + 1,) + assert diag.dtype == mat.dtype + assert diag.sycl_queue == q + assert diag._pointer == mat._pointer + del mat + assert isinstance(diag.__repr__(), str) + # create 0d scalar + mat = dpt.reshape( + dpt.arange(n * n, dtype="i4", sycl_queue=q), + ( + n, + n, + ), + ) + sc = make_from_ptr( + ctypes.c_int(0), + None, # NULL pointer + ctypes.c_int(mat.dtype.num), + None, # NULL pointer + mat._pointer, + mat.sycl_queue.addressof_ref(), + ctypes.c_ssize_t(0), + mat, + ) + assert isinstance(sc, dpt.usm_ndarray) + assert sc.shape == tuple() + assert sc.dtype == mat.dtype + assert sc.sycl_queue == q + assert sc._pointer == mat._pointer + c_shape = (ctypes.c_ssize_t * 2)(0, n) + c_strides = (ctypes.c_ssize_t * 2)(0, 1) + zd_arr = make_from_ptr( + ctypes.c_int(2), + c_shape, + ctypes.c_int(mat.dtype.num), + c_strides, + mat._pointer, + mat.sycl_queue.addressof_ref(), + ctypes.c_ssize_t(0), + mat, + ) + assert isinstance(zd_arr, dpt.usm_ndarray) + assert zd_arr.shape == ( + 0, + n, + ) + assert zd_arr.strides == ( + 0, + 1, + ) + assert zd_arr.dtype == mat.dtype + assert zd_arr.sycl_queue == q + assert zd_arr._pointer == mat._pointer + + def _pyx_capi_int(X, pyx_capi_name, caps_name=b"int", val_restype=ctypes.c_int): import sys