From a496d53f89f846d101ef3a7755cce9593a14f0d3 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 26 Sep 2022 18:54:35 -0500 Subject: [PATCH 1/4] Introduced dpctl::tensor::is_c_contiguous and is_f_contiguous and used it --- dpctl/apis/include/dpctl4pybind11.hpp | 12 ++++ dpctl/tensor/libtensor/source/tensor_py.cpp | 78 ++++++++------------- 2 files changed, 43 insertions(+), 47 deletions(-) diff --git a/dpctl/apis/include/dpctl4pybind11.hpp b/dpctl/apis/include/dpctl4pybind11.hpp index 8c4d7a31fd..745f5053d2 100644 --- a/dpctl/apis/include/dpctl4pybind11.hpp +++ b/dpctl/apis/include/dpctl4pybind11.hpp @@ -563,6 +563,18 @@ class usm_ndarray : public py::object return UsmNDArray_GetElementSize(raw_ar); } + + bool is_c_contiguous() const + { + int flags = this->get_flags(); + return static_cast(flags & USM_ARRAY_C_CONTIGUOUS); + } + + bool is_f_contiguous() const + { + int flags = this->get_flags(); + return static_cast(flags & USM_ARRAY_F_CONTIGUOUS); + } }; } // end namespace tensor diff --git a/dpctl/tensor/libtensor/source/tensor_py.cpp b/dpctl/tensor/libtensor/source/tensor_py.cpp index 0566681d0f..0477f6767e 100644 --- a/dpctl/tensor/libtensor/source/tensor_py.cpp +++ b/dpctl/tensor/libtensor/source/tensor_py.cpp @@ -322,15 +322,16 @@ copy_usm_ndarray_into_usm_ndarray(dpctl::tensor::usm_ndarray src, throw py::value_error("Arrays index overlapping segments of memory"); } - int src_flags = src.get_flags(); - int dst_flags = dst.get_flags(); + bool is_src_c_contig = src.is_c_contiguous(); + bool is_src_f_contig = src.is_f_contiguous(); + + bool is_dst_c_contig = dst.is_c_contiguous(); + bool is_dst_f_contig = dst.is_f_contiguous(); // check for applicability of special cases: // (same type && (both C-contiguous || both F-contiguous) - bool both_c_contig = ((src_flags & USM_ARRAY_C_CONTIGUOUS) && - (dst_flags & USM_ARRAY_C_CONTIGUOUS)); - bool both_f_contig = ((src_flags & USM_ARRAY_F_CONTIGUOUS) && - (dst_flags & USM_ARRAY_F_CONTIGUOUS)); + bool both_c_contig = (is_src_c_contig && is_dst_c_contig); + bool both_f_contig = (is_src_f_contig && is_dst_f_contig); if (both_c_contig || both_f_contig) { if (src_type_id == dst_type_id) { @@ -360,12 +361,6 @@ copy_usm_ndarray_into_usm_ndarray(dpctl::tensor::usm_ndarray src, int nd = src_nd; const py::ssize_t *shape = src_shape; - bool is_src_c_contig = ((src_flags & USM_ARRAY_C_CONTIGUOUS) != 0); - bool is_src_f_contig = ((src_flags & USM_ARRAY_F_CONTIGUOUS) != 0); - - bool is_dst_c_contig = ((dst_flags & USM_ARRAY_C_CONTIGUOUS) != 0); - bool is_dst_f_contig = ((dst_flags & USM_ARRAY_F_CONTIGUOUS) != 0); - constexpr py::ssize_t src_itemsize = 1; // in elements constexpr py::ssize_t dst_itemsize = 1; // in elements @@ -576,14 +571,13 @@ copy_usm_ndarray_for_reshape(dpctl::tensor::usm_ndarray src, const py::ssize_t *src_strides = src.get_strides_raw(); if (src_strides == nullptr) { - int src_flags = src.get_flags(); - if (src_flags & USM_ARRAY_C_CONTIGUOUS) { + if (src.is_c_contiguous()) { const auto &src_contig_strides = c_contiguous_strides(src_nd, src_shape); std::copy(src_contig_strides.begin(), src_contig_strides.end(), packed_host_shapes_strides_shp->begin() + src_nd); } - else if (src_flags & USM_ARRAY_F_CONTIGUOUS) { + else if (src.is_f_contiguous()) { const auto &src_contig_strides = f_contiguous_strides(src_nd, src_shape); std::copy(src_contig_strides.begin(), src_contig_strides.end(), @@ -602,15 +596,14 @@ copy_usm_ndarray_for_reshape(dpctl::tensor::usm_ndarray src, const py::ssize_t *dst_strides = dst.get_strides_raw(); if (dst_strides == nullptr) { - int dst_flags = dst.get_flags(); - if (dst_flags & USM_ARRAY_C_CONTIGUOUS) { + if (dst.is_c_contiguous()) { const auto &dst_contig_strides = c_contiguous_strides(dst_nd, dst_shape); std::copy(dst_contig_strides.begin(), dst_contig_strides.end(), packed_host_shapes_strides_shp->begin() + 2 * src_nd + dst_nd); } - else if (dst_flags & USM_ARRAY_F_CONTIGUOUS) { + else if (dst.is_f_contiguous()) { const auto &dst_contig_strides = f_contiguous_strides(dst_nd, dst_shape); std::copy(dst_contig_strides.begin(), dst_contig_strides.end(), @@ -744,14 +737,13 @@ void copy_numpy_ndarray_into_usm_ndarray( char *dst_data = dst.get_data(); int src_flags = npy_src.flags(); - int dst_flags = dst.get_flags(); // check for applicability of special cases: // (same type && (both C-contiguous || both F-contiguous) - bool both_c_contig = ((src_flags & py::array::c_style) && - (dst_flags & USM_ARRAY_C_CONTIGUOUS)); - bool both_f_contig = ((src_flags & py::array::f_style) && - (dst_flags & USM_ARRAY_F_CONTIGUOUS)); + bool both_c_contig = + ((src_flags & py::array::c_style) && dst.is_c_contiguous()); + bool both_f_contig = + ((src_flags & py::array::f_style) && dst.is_f_contiguous()); if (both_c_contig || both_f_contig) { if (src_type_id == dst_type_id) { int src_elem_size = npy_src.itemsize(); @@ -791,8 +783,8 @@ void copy_numpy_ndarray_into_usm_ndarray( bool is_src_c_contig = ((src_flags & py::array::c_style) != 0); bool is_src_f_contig = ((src_flags & py::array::f_style) != 0); - bool is_dst_c_contig = ((dst_flags & USM_ARRAY_C_CONTIGUOUS) != 0); - bool is_dst_f_contig = ((dst_flags & USM_ARRAY_F_CONTIGUOUS) != 0); + bool is_dst_c_contig = dst.is_c_contiguous(); + bool is_dst_f_contig = dst.is_f_contiguous(); // all args except itemsizes and is_?_contig bools can be modified by // reference @@ -906,16 +898,15 @@ usm_ndarray_linear_sequence_step(py::object start, "usm_ndarray_linspace: Expecting 1D array to populate"); } - int flags = dst.get_flags(); - if (!(flags & USM_ARRAY_C_CONTIGUOUS)) { + if (!dst.is_c_contiguous()) { throw py::value_error( "usm_ndarray_linspace: Non-contiguous arrays are not supported"); } sycl::queue dst_q = dst.get_queue(); - if (dst_q != exec_q && dst_q.get_context() != exec_q.get_context()) { + if (!dpctl::utils::queues_are_compatible(exec_q, {dst_q})) { throw py::value_error( - "Execution queue context is not the same as allocation context"); + "Execution queue is not compatible with the allocation queue"); } int dst_typenum = dst.get_typenum(); @@ -955,14 +946,13 @@ usm_ndarray_linear_sequence_affine(py::object start, "usm_ndarray_linspace: Expecting 1D array to populate"); } - int flags = dst.get_flags(); - if (!(flags & USM_ARRAY_C_CONTIGUOUS)) { + if (!dst.is_c_contiguous()) { throw py::value_error( "usm_ndarray_linspace: Non-contiguous arrays are not supported"); } sycl::queue dst_q = dst.get_queue(); - if (dst_q != exec_q && dst_q.get_context() != exec_q.get_context()) { + if (!dpctl::utils::queues_are_compatible(exec_q, {dst_q})) { throw py::value_error( "Execution queue context is not the same as allocation context"); } @@ -1010,12 +1000,10 @@ usm_ndarray_full(py::object py_value, return std::make_pair(sycl::event(), sycl::event()); } - int dst_flags = dst.get_flags(); - sycl::queue dst_q = dst.get_queue(); - if (dst_q != exec_q && dst_q.get_context() != exec_q.get_context()) { + if (!dpctl::utils::queues_are_compatible(exec_q, {dst_q})) { throw py::value_error( - "Execution queue context is not the same as allocation context"); + "Execution queue is not compatible with the allocation queue"); } int dst_typenum = dst.get_typenum(); @@ -1024,9 +1012,7 @@ usm_ndarray_full(py::object py_value, char *dst_data = dst.get_data(); sycl::event full_event; - if (dst_nelems == 1 || (dst_flags & USM_ARRAY_C_CONTIGUOUS) || - (dst_flags & USM_ARRAY_F_CONTIGUOUS)) - { + if (dst_nelems == 1 || dst.is_c_contiguous() || dst.is_f_contiguous()) { auto fn = full_contig_dispatch_vector[dst_typeid]; sycl::event full_contig_event = @@ -1079,8 +1065,8 @@ eye(py::ssize_t k, return std::make_pair(sycl::event{}, sycl::event{}); } - bool is_dst_c_contig = ((dst.get_flags() & USM_ARRAY_C_CONTIGUOUS) != 0); - bool is_dst_f_contig = ((dst.get_flags() & USM_ARRAY_F_CONTIGUOUS) != 0); + bool is_dst_c_contig = dst.is_c_contiguous(); + bool is_dst_f_contig = dst.is_f_contiguous(); if (!is_dst_c_contig && !is_dst_f_contig) { throw py::value_error("USM array is not contiguous"); } @@ -1203,9 +1189,8 @@ tri(sycl::queue &exec_q, using shT = std::vector; shT src_strides(src_nd); - int src_flags = src.get_flags(); - bool is_src_c_contig = ((src_flags & USM_ARRAY_C_CONTIGUOUS) != 0); - bool is_src_f_contig = ((src_flags & USM_ARRAY_F_CONTIGUOUS) != 0); + bool is_src_c_contig = src.is_c_contiguous(); + bool is_src_f_contig = src.is_f_contiguous(); const py::ssize_t *src_strides_raw = src.get_strides_raw(); if (src_strides_raw == nullptr) { @@ -1227,9 +1212,8 @@ tri(sycl::queue &exec_q, shT dst_strides(src_nd); - int dst_flags = dst.get_flags(); - bool is_dst_c_contig = ((dst_flags & USM_ARRAY_C_CONTIGUOUS) != 0); - bool is_dst_f_contig = ((dst_flags & USM_ARRAY_F_CONTIGUOUS) != 0); + bool is_dst_c_contig = dst.is_c_contiguous(); + bool is_dst_f_contig = dst.is_f_contiguous(); const py::ssize_t *dst_strides_raw = dst.get_strides_raw(); if (dst_strides_raw == nullptr) { From 046d37b88c89bf626f8b4a8a059cfa9ab0a43cda Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sat, 1 Oct 2022 13:42:44 -0500 Subject: [PATCH 2/4] Modularized retrieval of usm_ndarray_ptr Use -O1 when compiling tensor_py for now to work around suspected issue with loading of C-API functions. --- dpctl/apis/include/dpctl4pybind11.hpp | 36 ++++++++++++--------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/dpctl/apis/include/dpctl4pybind11.hpp b/dpctl/apis/include/dpctl4pybind11.hpp index 745f5053d2..c97d6e6e65 100644 --- a/dpctl/apis/include/dpctl4pybind11.hpp +++ b/dpctl/apis/include/dpctl4pybind11.hpp @@ -439,8 +439,7 @@ class usm_ndarray : public py::object char *get_data() const { - PyObject *raw_o = this->ptr(); - PyUSMArrayObject *raw_ar = reinterpret_cast(raw_o); + PyUSMArrayObject *raw_ar = this->usm_array_ptr(); return UsmNDArray_GetData(raw_ar); } @@ -452,16 +451,14 @@ class usm_ndarray : public py::object int get_ndim() const { - PyObject *raw_o = this->ptr(); - PyUSMArrayObject *raw_ar = reinterpret_cast(raw_o); + PyUSMArrayObject *raw_ar = this->usm_array_ptr(); return UsmNDArray_GetNDim(raw_ar); } const py::ssize_t *get_shape_raw() const { - PyObject *raw_o = this->ptr(); - PyUSMArrayObject *raw_ar = reinterpret_cast(raw_o); + PyUSMArrayObject *raw_ar = this->usm_array_ptr(); return UsmNDArray_GetShape(raw_ar); } @@ -474,16 +471,14 @@ class usm_ndarray : public py::object const py::ssize_t *get_strides_raw() const { - PyObject *raw_o = this->ptr(); - PyUSMArrayObject *raw_ar = reinterpret_cast(raw_o); + PyUSMArrayObject *raw_ar = this->usm_array_ptr(); return UsmNDArray_GetStrides(raw_ar); } py::ssize_t get_size() const { - PyObject *raw_o = this->ptr(); - PyUSMArrayObject *raw_ar = reinterpret_cast(raw_o); + PyUSMArrayObject *raw_ar = this->usm_array_ptr(); int ndim = UsmNDArray_GetNDim(raw_ar); const py::ssize_t *shape = UsmNDArray_GetShape(raw_ar); @@ -499,8 +494,7 @@ class usm_ndarray : public py::object std::pair get_minmax_offsets() const { - PyObject *raw_o = this->ptr(); - PyUSMArrayObject *raw_ar = reinterpret_cast(raw_o); + PyUSMArrayObject *raw_ar = this->usm_array_ptr(); int nd = UsmNDArray_GetNDim(raw_ar); const py::ssize_t *shape = UsmNDArray_GetShape(raw_ar); @@ -533,8 +527,7 @@ class usm_ndarray : public py::object sycl::queue get_queue() const { - PyObject *raw_o = this->ptr(); - PyUSMArrayObject *raw_ar = reinterpret_cast(raw_o); + PyUSMArrayObject *raw_ar = this->usm_array_ptr(); DPCTLSyclQueueRef QRef = UsmNDArray_GetQueueRef(raw_ar); return *(reinterpret_cast(QRef)); @@ -542,24 +535,21 @@ class usm_ndarray : public py::object int get_typenum() const { - PyObject *raw_o = this->ptr(); - PyUSMArrayObject *raw_ar = reinterpret_cast(raw_o); + PyUSMArrayObject *raw_ar = this->usm_array_ptr(); return UsmNDArray_GetTypenum(raw_ar); } int get_flags() const { - PyObject *raw_o = this->ptr(); - PyUSMArrayObject *raw_ar = reinterpret_cast(raw_o); + PyUSMArrayObject *raw_ar = this->usm_array_ptr(); return UsmNDArray_GetFlags(raw_ar); } int get_elemsize() const { - PyObject *raw_o = this->ptr(); - PyUSMArrayObject *raw_ar = reinterpret_cast(raw_o); + PyUSMArrayObject *raw_ar = this->usm_array_ptr(); return UsmNDArray_GetElementSize(raw_ar); } @@ -575,6 +565,12 @@ class usm_ndarray : public py::object int flags = this->get_flags(); return static_cast(flags & USM_ARRAY_F_CONTIGUOUS); } + +private: + PyUSMArrayObject *usm_array_ptr() const + { + return reinterpret_cast(m_ptr); + } }; } // end namespace tensor From 43a6321a8c0cce0311965064ab9fc0080a8e9eb8 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 28 Sep 2022 06:34:01 -0500 Subject: [PATCH 3/4] Got rid of `static dpctl::tensor::detail::usm_ndarray_types array_types`. The class is already singleton. Instead create a local variable at each use site. This local variable is going to be a constant reference to the singleton. --- dpctl/tensor/libtensor/source/tensor_py.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/dpctl/tensor/libtensor/source/tensor_py.cpp b/dpctl/tensor/libtensor/source/tensor_py.cpp index 0477f6767e..36a6ac90d3 100644 --- a/dpctl/tensor/libtensor/source/tensor_py.cpp +++ b/dpctl/tensor/libtensor/source/tensor_py.cpp @@ -42,8 +42,6 @@ namespace py = pybind11; -static dpctl::tensor::detail::usm_ndarray_types array_types; - namespace { @@ -301,6 +299,7 @@ copy_usm_ndarray_into_usm_ndarray(dpctl::tensor::usm_ndarray src, int src_typenum = src.get_typenum(); int dst_typenum = dst.get_typenum(); + auto array_types = dpctl::tensor::detail::usm_ndarray_types::get(); int src_type_id = array_types.typenum_to_lookup_id(src_typenum); int dst_type_id = array_types.typenum_to_lookup_id(dst_typenum); @@ -545,6 +544,7 @@ copy_usm_ndarray_for_reshape(dpctl::tensor::usm_ndarray src, const py::ssize_t *src_shape = src.get_shape_raw(); const py::ssize_t *dst_shape = dst.get_shape_raw(); + auto array_types = dpctl::tensor::detail::usm_ndarray_types::get(); int type_id = array_types.typenum_to_lookup_id(src_typenum); auto fn = copy_for_reshape_generic_dispatch_vector[type_id]; @@ -729,6 +729,7 @@ void copy_numpy_ndarray_into_usm_ndarray( py::detail::array_descriptor_proxy(npy_src.dtype().ptr())->type_num; int dst_typenum = dst.get_typenum(); + auto array_types = dpctl::tensor::detail::usm_ndarray_types::get(); int src_type_id = array_types.typenum_to_lookup_id(src_typenum); int dst_type_id = array_types.typenum_to_lookup_id(dst_typenum); @@ -909,6 +910,7 @@ usm_ndarray_linear_sequence_step(py::object start, "Execution queue is not compatible with the allocation queue"); } + auto array_types = dpctl::tensor::detail::usm_ndarray_types::get(); int dst_typenum = dst.get_typenum(); int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum); @@ -957,6 +959,7 @@ usm_ndarray_linear_sequence_affine(py::object start, "Execution queue context is not the same as allocation context"); } + auto array_types = dpctl::tensor::detail::usm_ndarray_types::get(); int dst_typenum = dst.get_typenum(); int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum); @@ -1006,6 +1009,7 @@ usm_ndarray_full(py::object py_value, "Execution queue is not compatible with the allocation queue"); } + auto array_types = dpctl::tensor::detail::usm_ndarray_types::get(); int dst_typenum = dst.get_typenum(); int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum); @@ -1054,6 +1058,7 @@ eye(py::ssize_t k, "allocation queue"); } + auto array_types = dpctl::tensor::detail::usm_ndarray_types::get(); int dst_typenum = dst.get_typenum(); int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum); @@ -1168,6 +1173,8 @@ tri(sycl::queue &exec_q, throw py::value_error("Arrays index overlapping segments of memory"); } + auto array_types = dpctl::tensor::detail::usm_ndarray_types::get(); + int src_typenum = src.get_typenum(); int dst_typenum = dst.get_typenum(); int src_typeid = array_types.typenum_to_lookup_id(src_typenum); @@ -1441,9 +1448,6 @@ PYBIND11_MODULE(_tensor_impl, m) init_copy_for_reshape_dispatch_vector(); import_dpctl(); - // populate types constants for type dispatching functions - array_types = dpctl::tensor::detail::usm_ndarray_types::get(); - m.def( "_contract_iter", &contract_iter, "Simplifies iteration of array of given shape & stride. Returns " From 7d2ab88ff6b6daecc742e6667be03f6060cd5bf8 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sat, 8 Oct 2022 15:39:59 -0500 Subject: [PATCH 4/4] Changed dpctl/.gitignore to only ignore generated _*.cpp files, not all *.cpp --- dpctl/.gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/.gitignore b/dpctl/.gitignore index 3e23a8af25..3376d3081c 100644 --- a/dpctl/.gitignore +++ b/dpctl/.gitignore @@ -1,5 +1,5 @@ *.so -*.cpp +_*.cpp *.cxx *.c *.h