From 6887fb39411f1b431d465b4562a575b40c80ccba Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Tue, 9 Mar 2021 16:12:34 +0300 Subject: [PATCH 1/8] MULTIPLY enable broadcasting --- dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 38 +++++---- dpnp/backend/src/dpnp_iterator.hpp | 85 +++++++++++++++++++- dpnp/backend/src/dpnp_utils.hpp | 86 +++++++++++++++++++++ dpnp/dpnp_iface_mathematical.py | 4 - 4 files changed, 193 insertions(+), 20 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index cebf63271c5e..7d7176978d23 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -28,6 +28,7 @@ #include #include "dpnp_fptr.hpp" +#include "dpnp_iterator.hpp" #include "dpnp_utils.hpp" #include "queue_sycl.hpp" @@ -353,10 +354,6 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) const size_t* where) \ { \ /* avoid warning unused variable*/ \ - (void)input1_shape; \ - (void)input1_shape_ndim; \ - (void)input2_shape; \ - (void)input2_shape_ndim; \ (void)where; \ \ if (!input1_size || !input2_size) \ @@ -364,18 +361,31 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) return; \ } \ \ - const size_t result_size = (input2_size > input1_size) ? input2_size : input1_size; \ - \ - const _DataType_input1* input1_data = reinterpret_cast(input1_in); \ - const _DataType_input2* input2_data = reinterpret_cast(input2_in); \ + _DataType_input1* input1_data = reinterpret_cast<_DataType_input1*>(const_cast(input1_in)); \ + _DataType_input2* input2_data = reinterpret_cast<_DataType_input2*>(const_cast(input2_in)); \ _DataType_output* result = reinterpret_cast<_DataType_output*>(result_out); \ \ + std::vector result_shape = get_common_shape(input1_shape, input1_shape_ndim, \ + input2_shape, input2_shape_ndim); \ + \ + DPNPC_id<_DataType_input1> input1(input1_data, input1_shape, input1_shape_ndim); \ + input1.broadcast(result_shape); \ + \ + DPNPC_id<_DataType_input2> input2(input2_data, input2_shape, input2_shape_ndim); \ + input2.broadcast(result_shape); \ + \ + const size_t result_size = input1.get_output_size(); \ + \ cl::sycl::range<1> gws(result_size); \ + const DPNPC_id<_DataType_input1>* input1_it = &input1; \ + const DPNPC_id<_DataType_input2>* input2_it = &input2; \ auto kernel_parallel_for_func = [=](cl::sycl::id<1> global_id) { \ - size_t i = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/ \ - const _DataType_output input1_elem = (input1_size == 1) ? input1_data[0] : input1_data[i]; \ - const _DataType_output input2_elem = (input2_size == 1) ? input2_data[0] : input2_data[i]; \ - result[i] = __operation1__; \ + const size_t i = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/ \ + { \ + const _DataType_output input1_elem = (*input1_it)[i]; \ + const _DataType_output input2_elem = (*input2_it)[i]; \ + result[i] = __operation1__; \ + } \ }; \ auto kernel_func = [&](cl::sycl::handler& cgh) { \ cgh.parallel_for::value) && \ std::is_same<_DataType_input2, _DataType_input1>::value) \ { \ - _DataType_input1* input1 = const_cast<_DataType_input1*>(input1_data); \ - _DataType_input2* input2 = const_cast<_DataType_input2*>(input2_data); \ - event = __operation2__(DPNP_QUEUE, result_size, input1, input2, result); \ + event = __operation2__(DPNP_QUEUE, result_size, input1_data, input2_data, result); \ } \ else \ { \ diff --git a/dpnp/backend/src/dpnp_iterator.hpp b/dpnp/backend/src/dpnp_iterator.hpp index 54015c2985eb..e578fb4116c9 100644 --- a/dpnp/backend/src/dpnp_iterator.hpp +++ b/dpnp/backend/src/dpnp_iterator.hpp @@ -238,6 +238,58 @@ class DPNPC_id final return output_size; } + /** + * @ingroup BACKEND_UTILS + * @brief Set output shape for the data object to use in computation. + * + * Set shape of output array to use in computation of input array index by index of output array. + * + * @note this function is designed for non-SYCL environment execution + * + * @param [in] __shape Output shape. + */ + inline void broadcast(const std::vector& __shape) + { + if (axis_use || broadcast_use) + { + return; + } + + if (broadcastable(input_shape, input_shape_size, __shape)) + { + broadcast_use = true; + + output_shape_size = __shape.size(); + const size_type output_shape_size_in_bytes = output_shape_size * sizeof(size_type); + + output_shape = reinterpret_cast(dpnp_memory_alloc_c(output_shape_size_in_bytes)); + int in_idx = input_shape_size - 1; + int out_idx = output_shape_size - 1; + for (; out_idx >= 0; --in_idx, --out_idx) + { + output_shape[out_idx] = __shape[out_idx]; + + // ex: input_shape = {7, 1, 5}, output_shape = {8, 7, 6, 5} => broadcast_axes = {0, 2} + if (in_idx < 0 || input_shape[in_idx] != output_shape[out_idx]) + { + broadcast_axes.insert(broadcast_axes.begin(), out_idx); + } + } + + output_size = std::accumulate( + output_shape, output_shape + output_shape_size, size_type(1), std::multiplies()); + + output_shape_strides = reinterpret_cast(dpnp_memory_alloc_c(output_shape_size_in_bytes)); + get_shape_offsets_inkernel(output_shape, output_shape_size, output_shape_strides); + + iteration_size = 1; + + // make thread private storage for each shape by multiplying memory + sycl_output_xyz = + reinterpret_cast(dpnp_memory_alloc_c(output_size * output_shape_size_in_bytes)); + } + } + /** * @ingroup BACKEND_UTILS * @brief Set axis for the data object to use in computation. @@ -268,7 +320,7 @@ class DPNPC_id final /** * @ingroup BACKEND_UTILS - * @brief Set axes for the data object to use in computation. + * @brief Broadcast input data to specified shape. * * Set axes of the shape of input array to use in iteration. * Axes might be negative to indicate axes as reverse iterator @@ -285,6 +337,11 @@ class DPNPC_id final */ inline void set_axes(const std::vector& __axes) { + if (axis_use || broadcast_use) + { + return; + } + if (!__axes.empty() && input_shape_size) { axes = get_validated_axes(__axes, input_shape_size); @@ -364,6 +421,11 @@ class DPNPC_id final /// this function is designed for SYCL environment execution inline reference operator[](size_type __n) const { + if (broadcast_use) + { + return *begin(__n); + } + const iterator it = begin(); return it[__n]; } @@ -426,6 +488,24 @@ class DPNPC_id final } } } + else if (broadcast_use) + { + assert(output_global_id < output_size); + + // use thread private storage + size_type* sycl_output_xyz_thread = sycl_output_xyz + (output_global_id * output_shape_size); + + get_xyz_by_id_inkernel(output_global_id, output_shape_strides, output_shape_size, sycl_output_xyz_thread); + + for (size_t oit = 0, iit = 0; oit < output_shape_size; ++oit) + { + if (std::find(broadcast_axes.begin(), broadcast_axes.end(), oit) == broadcast_axes.end()) + { + input_global_id += (sycl_output_xyz_thread[oit] * input_shape_strides[iit]); + ++iit; + } + } + } return input_global_id; } @@ -456,6 +536,9 @@ class DPNPC_id final std::vector axes; /**< input shape reduction axes */ bool axis_use = false; + std::vector broadcast_axes; /**< input shape broadcast axes */ + bool broadcast_use = false; + size_type output_size = size_type{}; /**< output array size. Expected is same as GWS */ size_type* output_shape = nullptr; /**< output array shape */ size_type output_shape_size = size_type{}; /**< output array shape size */ diff --git a/dpnp/backend/src/dpnp_utils.hpp b/dpnp/backend/src/dpnp_utils.hpp index d5410bc7425a..940174e49050 100644 --- a/dpnp/backend/src/dpnp_utils.hpp +++ b/dpnp/backend/src/dpnp_utils.hpp @@ -127,6 +127,92 @@ size_t get_id_by_xyz_inkernel(const _DataType* xyz, size_t xyz_size, const _Data return global_id; } +/** + * @ingroup BACKEND_UTILS + * @brief Check input shape is broadcastable to output one. + * + * @param [in] input_shape Input shape. + * @param [in] output_shape Output shape. + * + * @return Input shape is broadcastable to output one or not. + */ +static inline bool + broadcastable(const std::vector& input_shape, const std::vector& output_shape) +{ + if (input_shape.size() > output_shape.size()) + { + return false; + } + + std::vector::const_reverse_iterator irit = input_shape.rbegin(); + std::vector::const_reverse_iterator orit = output_shape.rbegin(); + for (; irit != input_shape.rend(); ++irit, ++orit) + { + if (*irit != 1 && *irit != *orit) + { + return false; + } + } + + return true; +} + +static inline bool + broadcastable(const size_t* input_shape, const size_t input_shape_size, const std::vector& output_shape) +{ + const std::vector input_shape_vec(input_shape, input_shape + input_shape_size); + return broadcastable(input_shape_vec, output_shape); +} + +/** + * @ingroup BACKEND_UTILS + * @brief Get common shape based on input shapes. + * + * Example: + * Input1 shape A[8, 1, 6, 1] + * Input2 shape B[7, 1, 5] + * Output shape will be C[8, 7, 6, 5] + * + * @param [in] input1_shape Input1 shape. + * @param [in] input1_shape_size Input1 shape size. + * @param [in] input2_shape Input2 shape. + * @param [in] input2_shape_size Input2 shape size. + * + * @exception std::domain_error Input shapes are not broadcastable. + * @return Common shape. + */ +static inline std::vector + get_common_shape(const size_t* input1_shape, const size_t input1_shape_size, + const size_t* input2_shape, const size_t input2_shape_size) +{ + const size_t result_shape_size = (input2_shape_size > input1_shape_size) ? input2_shape_size : input1_shape_size; + std::vector result_shape; + result_shape.reserve(result_shape_size); + + int in1_idx = input1_shape_size - 1; + int in2_idx = input2_shape_size - 1; + for (; in1_idx >= 0 || in2_idx >= 0; --in1_idx, --in2_idx) + { + size_t input1_val = (in1_idx >= 0) ? input1_shape[in1_idx] : 1; + size_t input2_val = (in2_idx >= 0) ? input2_shape[in2_idx] : 1; + + if (input1_val == input2_val || input1_val == 1) + { + result_shape.insert(result_shape.begin(), input2_val); + } + else if (input2_val == 1) + { + result_shape.insert(result_shape.begin(), input1_val); + } + else + { + throw std::domain_error("DPNP Error: get_common_shape() failed with input shapes check"); + } + } + + return result_shape; +} + /** * @ingroup BACKEND_UTILS * @brief Normalizes an axes into a non-negative integer axes. diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 05da909ff25b..27392ba933c4 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -1071,10 +1071,6 @@ def multiply(x1, x2, dtype=None, out=None, where=True, **kwargs): pass elif x2_is_dparray and x2.ndim == 0: pass - elif x1_is_dparray and x2_is_dparray and x1.size != x2.size: - pass - elif x1_is_dparray and x2_is_dparray and x1.shape != x2.shape: - pass elif dtype is not None: pass elif out is not None: From b66a4e8d7939811caf0d667a61dbabffd56df5ba Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Mon, 22 Mar 2021 09:26:12 +0300 Subject: [PATCH 2/8] Add gtests for broadcast iterator --- dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 6 +- dpnp/backend/src/dpnp_iterator.hpp | 29 +-- dpnp/backend/src/dpnp_utils.hpp | 10 +- dpnp/backend/tests/CMakeLists.txt | 1 + .../backend/tests/test_broadcast_iterator.cpp | 202 ++++++++++++++++++ 5 files changed, 226 insertions(+), 22 deletions(-) create mode 100644 dpnp/backend/tests/test_broadcast_iterator.cpp diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index 7d7176978d23..0913fb126adf 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -365,14 +365,14 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) _DataType_input2* input2_data = reinterpret_cast<_DataType_input2*>(const_cast(input2_in)); \ _DataType_output* result = reinterpret_cast<_DataType_output*>(result_out); \ \ - std::vector result_shape = get_common_shape(input1_shape, input1_shape_ndim, \ + std::vector result_shape = get_result_shape(input1_shape, input1_shape_ndim, \ input2_shape, input2_shape_ndim); \ \ DPNPC_id<_DataType_input1> input1(input1_data, input1_shape, input1_shape_ndim); \ - input1.broadcast(result_shape); \ + input1.broadcast_to_shape(result_shape); \ \ DPNPC_id<_DataType_input2> input2(input2_data, input2_shape, input2_shape_ndim); \ - input2.broadcast(result_shape); \ + input2.broadcast_to_shape(result_shape); \ \ const size_t result_size = input1.get_output_size(); \ \ diff --git a/dpnp/backend/src/dpnp_iterator.hpp b/dpnp/backend/src/dpnp_iterator.hpp index e578fb4116c9..c5160f2a0721 100644 --- a/dpnp/backend/src/dpnp_iterator.hpp +++ b/dpnp/backend/src/dpnp_iterator.hpp @@ -248,31 +248,35 @@ class DPNPC_id final * * @param [in] __shape Output shape. */ - inline void broadcast(const std::vector& __shape) + inline void broadcast_to_shape(const std::vector& __shape) { - if (axis_use || broadcast_use) + if (axis_use) { return; } if (broadcastable(input_shape, input_shape_size, __shape)) { + dpnp_memory_free_c(output_shape); + dpnp_memory_free_c(output_shape_strides); + dpnp_memory_free_c(sycl_output_xyz); + broadcast_axes.clear(); + broadcast_use = true; output_shape_size = __shape.size(); const size_type output_shape_size_in_bytes = output_shape_size * sizeof(size_type); output_shape = reinterpret_cast(dpnp_memory_alloc_c(output_shape_size_in_bytes)); - int in_idx = input_shape_size - 1; - int out_idx = output_shape_size - 1; - for (; out_idx >= 0; --in_idx, --out_idx) + + for (int irit = input_shape_size - 1, orit = output_shape_size - 1; orit >= 0; --irit, --orit) { - output_shape[out_idx] = __shape[out_idx]; + output_shape[orit] = __shape[orit]; // ex: input_shape = {7, 1, 5}, output_shape = {8, 7, 6, 5} => broadcast_axes = {0, 2} - if (in_idx < 0 || input_shape[in_idx] != output_shape[out_idx]) + if (irit < 0 || input_shape[irit] != output_shape[orit]) { - broadcast_axes.insert(broadcast_axes.begin(), out_idx); + broadcast_axes.insert(broadcast_axes.begin(), orit); } } @@ -337,7 +341,7 @@ class DPNPC_id final */ inline void set_axes(const std::vector& __axes) { - if (axis_use || broadcast_use) + if (broadcast_use) { return; } @@ -497,12 +501,11 @@ class DPNPC_id final get_xyz_by_id_inkernel(output_global_id, output_shape_strides, output_shape_size, sycl_output_xyz_thread); - for (size_t oit = 0, iit = 0; oit < output_shape_size; ++oit) + for (int irit = input_shape_size - 1, orit = output_shape_size - 1; irit >= 0; --irit, --orit) { - if (std::find(broadcast_axes.begin(), broadcast_axes.end(), oit) == broadcast_axes.end()) + if (std::find(broadcast_axes.begin(), broadcast_axes.end(), orit) == broadcast_axes.end()) { - input_global_id += (sycl_output_xyz_thread[oit] * input_shape_strides[iit]); - ++iit; + input_global_id += (sycl_output_xyz_thread[orit] * input_shape_strides[irit]); } } } diff --git a/dpnp/backend/src/dpnp_utils.hpp b/dpnp/backend/src/dpnp_utils.hpp index 940174e49050..a20a91c34b5f 100644 --- a/dpnp/backend/src/dpnp_utils.hpp +++ b/dpnp/backend/src/dpnp_utils.hpp @@ -182,19 +182,17 @@ static inline bool * @return Common shape. */ static inline std::vector - get_common_shape(const size_t* input1_shape, const size_t input1_shape_size, + get_result_shape(const size_t* input1_shape, const size_t input1_shape_size, const size_t* input2_shape, const size_t input2_shape_size) { const size_t result_shape_size = (input2_shape_size > input1_shape_size) ? input2_shape_size : input1_shape_size; std::vector result_shape; result_shape.reserve(result_shape_size); - int in1_idx = input1_shape_size - 1; - int in2_idx = input2_shape_size - 1; - for (; in1_idx >= 0 || in2_idx >= 0; --in1_idx, --in2_idx) + for (int irit1 = input1_shape_size - 1, irit2 = input2_shape_size - 1; irit1 >= 0 || irit2 >= 0; --irit1, --irit2) { - size_t input1_val = (in1_idx >= 0) ? input1_shape[in1_idx] : 1; - size_t input2_val = (in2_idx >= 0) ? input2_shape[in2_idx] : 1; + size_t input1_val = (irit1 >= 0) ? input1_shape[irit1] : 1; + size_t input2_val = (irit2 >= 0) ? input2_shape[irit2] : 1; if (input1_val == input2_val || input1_val == 1) { diff --git a/dpnp/backend/tests/CMakeLists.txt b/dpnp/backend/tests/CMakeLists.txt index 460516e52ff6..e0a4936d02d3 100644 --- a/dpnp/backend/tests/CMakeLists.txt +++ b/dpnp/backend/tests/CMakeLists.txt @@ -46,6 +46,7 @@ link_directories(${GTEST_LIB_DIR}) # TODO split add_executable(dpnpc_tests + test_broadcast_iterator.cpp test_main.cpp test_random.cpp test_utils.cpp diff --git a/dpnp/backend/tests/test_broadcast_iterator.cpp b/dpnp/backend/tests/test_broadcast_iterator.cpp new file mode 100644 index 000000000000..d62ce1a9302e --- /dev/null +++ b/dpnp/backend/tests/test_broadcast_iterator.cpp @@ -0,0 +1,202 @@ +//***************************************************************************** +// Copyright (c) 2016-2020, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include +#include + +#include "dpnp_iterator.hpp" + +#define DPNP_LOCAL_QUEUE 1 // TODO need to fix build procedure and remove this workaround. Issue #551 +#include "queue_sycl.hpp" + +using namespace std; +using dpnpc_it_t = DPNPC_id::iterator; +using dpnpc_value_t = dpnpc_it_t::value_type; +using dpnpc_index_t = dpnpc_it_t::size_type; + +template +vector<_DataType> get_input_data(const vector& shape) +{ + const dpnpc_index_t size = accumulate(shape.begin(), shape.end(), dpnpc_index_t(1), multiplies()); + + vector<_DataType> input_data(size); + iota(input_data.begin(), input_data.end(), 1); // let's start from 1 to avoid cleaned memory comparison + + return input_data; +} + +TEST(TestBroadcastIterator, take_value_broadcast_loop_2D) +{ + vector expected_data{1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3}; + // expected input data 1, 2, 3 + vector input_data = get_input_data({3, 1}); + DPNPC_id result_obj(input_data.data(), {3, 1}); + result_obj.broadcast_to_shape({3, 4}); + + for (dpnpc_index_t output_id = 0; output_id < result_obj.get_output_size(); ++output_id) + { + EXPECT_EQ(result_obj[output_id], expected_data[output_id]); + } +} + +TEST(TestBroadcastIterator, take_value_broadcast_loop_3D) +{ + vector expected_data{1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6}; + // expected input data 1, 2, 3, 4, 5, 6 + vector input_data = get_input_data({2, 3}); + DPNPC_id result_obj(input_data.data(), {2, 3}); + result_obj.broadcast_to_shape({2, 2, 3}); + + for (dpnpc_index_t output_id = 0; output_id < result_obj.get_output_size(); ++output_id) + { + EXPECT_EQ(result_obj[output_id], expected_data[output_id]); + } +} + +TEST(TestBroadcastIterator, output_size_broadcast) +{ + vector input_data = get_input_data({3, 4}); + DPNPC_id result_obj(input_data.data(), {3, 4}); + result_obj.broadcast_to_shape({2, 3, 4}); + + const dpnpc_index_t output_size = result_obj.get_output_size(); + EXPECT_EQ(output_size, 24); +} + +struct IteratorParameters +{ + vector input_shape; + vector output_shape; + vector result; + + /// Operator needs to print this container in human readable form in error reporting + friend std::ostream& operator<<(std::ostream& out, const IteratorParameters& data) + { + out << "IteratorParameters(input_shape=" << data.input_shape << ", output_shape=" << data.output_shape + << ", result=" << data.result << ")"; + + return out; + } +}; + +class IteratorBroadcasting : public ::testing::TestWithParam +{ +}; + +TEST_P(IteratorBroadcasting, loop_broadcast) +{ + const IteratorParameters& param = GetParam(); + const dpnpc_index_t result_size = param.result.size(); + + vector input_data = get_input_data(param.input_shape); + DPNPC_id input(input_data.data(), param.input_shape); + input.broadcast_to_shape(param.output_shape); + + ASSERT_EQ(input.get_output_size(), result_size); + + vector test_result(result_size, 42); + for (dpnpc_index_t output_id = 0; output_id < result_size; ++output_id) + { + test_result[output_id] = input[output_id]; + std::cout << "test_result[" << output_id << "] = " << test_result[output_id] << std::endl; + std::cout << "param.result.at(" << output_id << ") = " << param.result.at(output_id) << std::endl; + EXPECT_EQ(test_result[output_id], param.result.at(output_id)); + } +} + +TEST_P(IteratorBroadcasting, sycl_broadcast) +{ + using data_type = double; + + const IteratorParameters& param = GetParam(); + const dpnpc_index_t result_size = param.result.size(); + vector result(result_size, 42); + data_type* result_ptr = result.data(); + + vector input_data = get_input_data(param.input_shape); + DPNPC_id input(input_data.data(), param.input_shape); + input.broadcast_to_shape(param.output_shape); + + ASSERT_EQ(input.get_output_size(), result_size); + + cl::sycl::range<1> gws(result_size); + const DPNPC_id* input_it = &input; + auto kernel_parallel_for_func = [=](cl::sycl::id<1> global_id) { + const size_t idx = global_id[0]; + result_ptr[idx] = (*input_it)[idx]; + }; + + auto kernel_func = [&](cl::sycl::handler& cgh) { + cgh.parallel_for(gws, kernel_parallel_for_func); + }; + + cl::sycl::event event = DPNP_QUEUE.submit(kernel_func); + event.wait(); + + for (dpnpc_index_t i = 0; i < result_size; ++i) + { + EXPECT_EQ(result.at(i), param.result.at(i)); + } +} + +/** + * Expected values produced by following script: + * + * import numpy as np + * + * input_size = 12 + * input_shape = [3, 4] + * input = np.arange(1, input_size + 1, dtype=np.int64).reshape(input_shape) + * print(f"input shape={input.shape}") + * print(f"input:\n{input}\n") + * + * output_shape = [2, 3, 4] + * output = np.ones(output_shape, dtype=np.int64) + * print(f"output shape={output.shape}") + * + * result = input * output + * print(f"result={np.array2string(result.reshape(result.size), separator=', ')}\n", sep=", ") + */ +INSTANTIATE_TEST_SUITE_P( + TestBroadcastIterator, + IteratorBroadcasting, + testing::Values( + IteratorParameters{{1}, {1}, {1}}, + IteratorParameters{{1}, {4}, {1, 1, 1, 1}}, + IteratorParameters{{1}, {3, 4}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, + IteratorParameters{{1}, {2, 3, 4}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, + IteratorParameters{{4}, {4}, {1, 2, 3, 4}}, + IteratorParameters{{4}, {3, 4}, {1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4}}, + IteratorParameters{{4}, {2, 3, 4}, {1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4}}, + IteratorParameters{{3, 4}, {3, 4}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}, + IteratorParameters{{3, 4}, {2, 3, 4}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}, + IteratorParameters{{2, 3, 4}, {2, 3, 4}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}}, + IteratorParameters{{2, 3, 1}, {2, 3, 4}, {1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6}}, + IteratorParameters{{2, 1, 4}, {2, 3, 4}, {1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, + 5, 6, 7, 8, 5, 6, 7, 8, 5, 6, 7, 8}})); From 052a307c59551ace8b8edf5fdb9cdff35149681e Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Thu, 25 Mar 2021 13:04:58 +0300 Subject: [PATCH 3/8] Expand memory free before broadcasting --- dpnp/backend/src/dpnp_iterator.hpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dpnp/backend/src/dpnp_iterator.hpp b/dpnp/backend/src/dpnp_iterator.hpp index c5160f2a0721..9cc8bc3ae76f 100644 --- a/dpnp/backend/src/dpnp_iterator.hpp +++ b/dpnp/backend/src/dpnp_iterator.hpp @@ -257,10 +257,15 @@ class DPNPC_id final if (broadcastable(input_shape, input_shape_size, __shape)) { + broadcast_axes.clear(); + output_size = size_type{}; + output_shape_size = size_type{}; dpnp_memory_free_c(output_shape); dpnp_memory_free_c(output_shape_strides); dpnp_memory_free_c(sycl_output_xyz); - broadcast_axes.clear(); + output_shape = nullptr; + output_shape_strides = nullptr; + sycl_output_xyz = nullptr; broadcast_use = true; From 6f6f9ed2f94e03f2155d460111951ace3ca1ddee Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Thu, 25 Mar 2021 13:12:17 +0300 Subject: [PATCH 4/8] Disable broadcasting in element-wise operations --- dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 38 ++++++++------------- dpnp/dpnp_iface_mathematical.py | 4 +++ 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index 0913fb126adf..cebf63271c5e 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -28,7 +28,6 @@ #include #include "dpnp_fptr.hpp" -#include "dpnp_iterator.hpp" #include "dpnp_utils.hpp" #include "queue_sycl.hpp" @@ -354,6 +353,10 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) const size_t* where) \ { \ /* avoid warning unused variable*/ \ + (void)input1_shape; \ + (void)input1_shape_ndim; \ + (void)input2_shape; \ + (void)input2_shape_ndim; \ (void)where; \ \ if (!input1_size || !input2_size) \ @@ -361,31 +364,18 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) return; \ } \ \ - _DataType_input1* input1_data = reinterpret_cast<_DataType_input1*>(const_cast(input1_in)); \ - _DataType_input2* input2_data = reinterpret_cast<_DataType_input2*>(const_cast(input2_in)); \ - _DataType_output* result = reinterpret_cast<_DataType_output*>(result_out); \ - \ - std::vector result_shape = get_result_shape(input1_shape, input1_shape_ndim, \ - input2_shape, input2_shape_ndim); \ - \ - DPNPC_id<_DataType_input1> input1(input1_data, input1_shape, input1_shape_ndim); \ - input1.broadcast_to_shape(result_shape); \ - \ - DPNPC_id<_DataType_input2> input2(input2_data, input2_shape, input2_shape_ndim); \ - input2.broadcast_to_shape(result_shape); \ + const size_t result_size = (input2_size > input1_size) ? input2_size : input1_size; \ \ - const size_t result_size = input1.get_output_size(); \ + const _DataType_input1* input1_data = reinterpret_cast(input1_in); \ + const _DataType_input2* input2_data = reinterpret_cast(input2_in); \ + _DataType_output* result = reinterpret_cast<_DataType_output*>(result_out); \ \ cl::sycl::range<1> gws(result_size); \ - const DPNPC_id<_DataType_input1>* input1_it = &input1; \ - const DPNPC_id<_DataType_input2>* input2_it = &input2; \ auto kernel_parallel_for_func = [=](cl::sycl::id<1> global_id) { \ - const size_t i = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/ \ - { \ - const _DataType_output input1_elem = (*input1_it)[i]; \ - const _DataType_output input2_elem = (*input2_it)[i]; \ - result[i] = __operation1__; \ - } \ + size_t i = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/ \ + const _DataType_output input1_elem = (input1_size == 1) ? input1_data[0] : input1_data[i]; \ + const _DataType_output input2_elem = (input2_size == 1) ? input2_data[0] : input2_data[i]; \ + result[i] = __operation1__; \ }; \ auto kernel_func = [&](cl::sycl::handler& cgh) { \ cgh.parallel_for::value) && \ std::is_same<_DataType_input2, _DataType_input1>::value) \ { \ - event = __operation2__(DPNP_QUEUE, result_size, input1_data, input2_data, result); \ + _DataType_input1* input1 = const_cast<_DataType_input1*>(input1_data); \ + _DataType_input2* input2 = const_cast<_DataType_input2*>(input2_data); \ + event = __operation2__(DPNP_QUEUE, result_size, input1, input2, result); \ } \ else \ { \ diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 27392ba933c4..05da909ff25b 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -1071,6 +1071,10 @@ def multiply(x1, x2, dtype=None, out=None, where=True, **kwargs): pass elif x2_is_dparray and x2.ndim == 0: pass + elif x1_is_dparray and x2_is_dparray and x1.size != x2.size: + pass + elif x1_is_dparray and x2_is_dparray and x1.shape != x2.shape: + pass elif dtype is not None: pass elif out is not None: From bc0c1f7e6b1fd67afea91e3b148cdd26af0edf7a Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Thu, 25 Mar 2021 14:24:18 +0300 Subject: [PATCH 5/8] Change docs for 2 DPNPC_id methods --- dpnp/backend/src/dpnp_iterator.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dpnp/backend/src/dpnp_iterator.hpp b/dpnp/backend/src/dpnp_iterator.hpp index 9cc8bc3ae76f..af362ce5abf2 100644 --- a/dpnp/backend/src/dpnp_iterator.hpp +++ b/dpnp/backend/src/dpnp_iterator.hpp @@ -240,9 +240,9 @@ class DPNPC_id final /** * @ingroup BACKEND_UTILS - * @brief Set output shape for the data object to use in computation. + * @brief Broadcast input data to specified shape. * - * Set shape of output array to use in computation of input array index by index of output array. + * Set output shape to use in computation of input index by output index. * * @note this function is designed for non-SYCL environment execution * @@ -329,7 +329,7 @@ class DPNPC_id final /** * @ingroup BACKEND_UTILS - * @brief Broadcast input data to specified shape. + * @brief Set axes for the data object to use in computation. * * Set axes of the shape of input array to use in iteration. * Axes might be negative to indicate axes as reverse iterator From 7f2e8e249a1e886b88eb3c070f5f862ca6805576 Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Thu, 25 Mar 2021 17:25:10 +0300 Subject: [PATCH 6/8] Change memory free call before broadcasting --- dpnp/backend/src/dpnp_iterator.hpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/dpnp/backend/src/dpnp_iterator.hpp b/dpnp/backend/src/dpnp_iterator.hpp index 59e51b4e74b8..e9a8901303bb 100644 --- a/dpnp/backend/src/dpnp_iterator.hpp +++ b/dpnp/backend/src/dpnp_iterator.hpp @@ -258,14 +258,7 @@ class DPNPC_id final if (broadcastable(input_shape, input_shape_size, __shape)) { broadcast_axes.clear(); - output_size = size_type{}; - output_shape_size = size_type{}; - dpnp_memory_free_c(output_shape); - dpnp_memory_free_c(output_shape_strides); - dpnp_memory_free_c(sycl_output_xyz); - output_shape = nullptr; - output_shape_strides = nullptr; - sycl_output_xyz = nullptr; + free_output_memory(); broadcast_use = true; From 0731675949c1ee7552619a5f30d595c96c84e84f Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Thu, 1 Apr 2021 10:05:20 +0300 Subject: [PATCH 7/8] Enable gtests on broadcasting on GPU --- dpnp/backend/src/dpnp_iterator.hpp | 27 ++++-- dpnp/backend/tests/dpnp_test_utils.hpp | 30 ++++++ .../backend/tests/test_broadcast_iterator.cpp | 97 +++++-------------- dpnp/backend/tests/test_utils_iterator.cpp | 15 +-- 4 files changed, 76 insertions(+), 93 deletions(-) create mode 100644 dpnp/backend/tests/dpnp_test_utils.hpp diff --git a/dpnp/backend/src/dpnp_iterator.hpp b/dpnp/backend/src/dpnp_iterator.hpp index e9a8901303bb..ff635bcfe69c 100644 --- a/dpnp/backend/src/dpnp_iterator.hpp +++ b/dpnp/backend/src/dpnp_iterator.hpp @@ -257,27 +257,32 @@ class DPNPC_id final if (broadcastable(input_shape, input_shape_size, __shape)) { - broadcast_axes.clear(); + free_broadcast_axes_memory(); free_output_memory(); + std::vector valid_axes; broadcast_use = true; output_shape_size = __shape.size(); const size_type output_shape_size_in_bytes = output_shape_size * sizeof(size_type); - output_shape = reinterpret_cast(dpnp_memory_alloc_c(output_shape_size_in_bytes)); for (int irit = input_shape_size - 1, orit = output_shape_size - 1; orit >= 0; --irit, --orit) { output_shape[orit] = __shape[orit]; - // ex: input_shape = {7, 1, 5}, output_shape = {8, 7, 6, 5} => broadcast_axes = {0, 2} + // ex: input_shape = {7, 1, 5}, output_shape = {8, 7, 6, 5} => valid_axes = {0, 2} if (irit < 0 || input_shape[irit] != output_shape[orit]) { - broadcast_axes.insert(broadcast_axes.begin(), orit); + valid_axes.insert(valid_axes.begin(), orit); } } + broadcast_axes_size = valid_axes.size(); + const size_type broadcast_axes_size_in_bytes = broadcast_axes_size * sizeof(size_type); + broadcast_axes = reinterpret_cast(dpnp_memory_alloc_c(broadcast_axes_size_in_bytes)); + std::copy(valid_axes.begin(), valid_axes.end(), broadcast_axes); + output_size = std::accumulate( output_shape, output_shape + output_shape_size, size_type(1), std::multiplies()); @@ -505,7 +510,8 @@ class DPNPC_id final for (int irit = input_shape_size - 1, orit = output_shape_size - 1; irit >= 0; --irit, --orit) { - if (std::find(broadcast_axes.begin(), broadcast_axes.end(), orit) == broadcast_axes.end()) + size_type* broadcast_axes_end = broadcast_axes + broadcast_axes_size; + if (std::find(broadcast_axes, broadcast_axes_end, orit) == broadcast_axes_end) { input_global_id += (sycl_output_xyz_thread[orit] * input_shape_strides[irit]); } @@ -528,6 +534,13 @@ class DPNPC_id final axes_shape_strides = nullptr; } + void free_broadcast_axes_memory() + { + broadcast_axes_size = size_type{}; + dpnp_memory_free_c(broadcast_axes); + broadcast_axes = nullptr; + } + void free_input_memory() { input_size = size_type{}; @@ -561,6 +574,7 @@ class DPNPC_id final void free_memory() { free_axes_memory(); + free_broadcast_axes_memory(); free_input_memory(); free_iteration_memory(); free_output_memory(); @@ -575,7 +589,8 @@ class DPNPC_id final std::vector axes; /**< input shape reduction axes */ bool axis_use = false; - std::vector broadcast_axes; /**< input shape broadcast axes */ + size_type* broadcast_axes = nullptr; /**< input shape broadcast axes */ + size_type broadcast_axes_size = size_type{}; /**< input shape broadcast axes size */ bool broadcast_use = false; size_type output_size = size_type{}; /**< output array size. Expected is same as GWS */ diff --git a/dpnp/backend/tests/dpnp_test_utils.hpp b/dpnp/backend/tests/dpnp_test_utils.hpp new file mode 100644 index 000000000000..84e363d2c94f --- /dev/null +++ b/dpnp/backend/tests/dpnp_test_utils.hpp @@ -0,0 +1,30 @@ +#include +#include + +#include "dpnp_iterator.hpp" + +using namespace std; +using dpnpc_it_t = DPNPC_id::iterator; +using dpnpc_value_t = dpnpc_it_t::value_type; +using dpnpc_index_t = dpnpc_it_t::size_type; + +template +vector<_DataType> get_input_data(const vector& shape) +{ + const dpnpc_index_t size = accumulate(shape.begin(), shape.end(), dpnpc_index_t(1), multiplies()); + + vector<_DataType> input_data(size); + iota(input_data.begin(), input_data.end(), 1); // let's start from 1 to avoid cleaned memory comparison + + return input_data; +} + +template +_DataType* get_shared_data(const vector<_DataType>& input_data) +{ + const size_t data_size_in_bytes = input_data.size() * sizeof(_DataType); + _DataType* shared_data = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(data_size_in_bytes)); + copy(input_data.begin(), input_data.end(), shared_data); + + return shared_data; +} diff --git a/dpnp/backend/tests/test_broadcast_iterator.cpp b/dpnp/backend/tests/test_broadcast_iterator.cpp index d62ce1a9302e..290838e70c64 100644 --- a/dpnp/backend/tests/test_broadcast_iterator.cpp +++ b/dpnp/backend/tests/test_broadcast_iterator.cpp @@ -28,64 +28,11 @@ #include #include "dpnp_iterator.hpp" +#include "dpnp_test_utils.hpp" #define DPNP_LOCAL_QUEUE 1 // TODO need to fix build procedure and remove this workaround. Issue #551 #include "queue_sycl.hpp" -using namespace std; -using dpnpc_it_t = DPNPC_id::iterator; -using dpnpc_value_t = dpnpc_it_t::value_type; -using dpnpc_index_t = dpnpc_it_t::size_type; - -template -vector<_DataType> get_input_data(const vector& shape) -{ - const dpnpc_index_t size = accumulate(shape.begin(), shape.end(), dpnpc_index_t(1), multiplies()); - - vector<_DataType> input_data(size); - iota(input_data.begin(), input_data.end(), 1); // let's start from 1 to avoid cleaned memory comparison - - return input_data; -} - -TEST(TestBroadcastIterator, take_value_broadcast_loop_2D) -{ - vector expected_data{1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3}; - // expected input data 1, 2, 3 - vector input_data = get_input_data({3, 1}); - DPNPC_id result_obj(input_data.data(), {3, 1}); - result_obj.broadcast_to_shape({3, 4}); - - for (dpnpc_index_t output_id = 0; output_id < result_obj.get_output_size(); ++output_id) - { - EXPECT_EQ(result_obj[output_id], expected_data[output_id]); - } -} - -TEST(TestBroadcastIterator, take_value_broadcast_loop_3D) -{ - vector expected_data{1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6}; - // expected input data 1, 2, 3, 4, 5, 6 - vector input_data = get_input_data({2, 3}); - DPNPC_id result_obj(input_data.data(), {2, 3}); - result_obj.broadcast_to_shape({2, 2, 3}); - - for (dpnpc_index_t output_id = 0; output_id < result_obj.get_output_size(); ++output_id) - { - EXPECT_EQ(result_obj[output_id], expected_data[output_id]); - } -} - -TEST(TestBroadcastIterator, output_size_broadcast) -{ - vector input_data = get_input_data({3, 4}); - DPNPC_id result_obj(input_data.data(), {3, 4}); - result_obj.broadcast_to_shape({2, 3, 4}); - - const dpnpc_index_t output_size = result_obj.get_output_size(); - EXPECT_EQ(output_size, 24); -} - struct IteratorParameters { vector input_shape; @@ -108,22 +55,19 @@ class IteratorBroadcasting : public ::testing::TestWithParam TEST_P(IteratorBroadcasting, loop_broadcast) { + using data_type = double; + const IteratorParameters& param = GetParam(); - const dpnpc_index_t result_size = param.result.size(); + std::vector input_data = get_input_data(param.input_shape); - vector input_data = get_input_data(param.input_shape); - DPNPC_id input(input_data.data(), param.input_shape); + DPNPC_id input(input_data.data(), param.input_shape); input.broadcast_to_shape(param.output_shape); - ASSERT_EQ(input.get_output_size(), result_size); + ASSERT_EQ(input.get_output_size(), param.result.size()); - vector test_result(result_size, 42); - for (dpnpc_index_t output_id = 0; output_id < result_size; ++output_id) + for (dpnpc_index_t output_id = 0; output_id < input.get_output_size(); ++output_id) { - test_result[output_id] = input[output_id]; - std::cout << "test_result[" << output_id << "] = " << test_result[output_id] << std::endl; - std::cout << "param.result.at(" << output_id << ") = " << param.result.at(output_id) << std::endl; - EXPECT_EQ(test_result[output_id], param.result.at(output_id)); + EXPECT_EQ(input[output_id], param.result.at(output_id)); } } @@ -133,20 +77,23 @@ TEST_P(IteratorBroadcasting, sycl_broadcast) const IteratorParameters& param = GetParam(); const dpnpc_index_t result_size = param.result.size(); - vector result(result_size, 42); - data_type* result_ptr = result.data(); + data_type* result = reinterpret_cast(dpnp_memory_alloc_c(result_size * sizeof(data_type))); - vector input_data = get_input_data(param.input_shape); - DPNPC_id input(input_data.data(), param.input_shape); - input.broadcast_to_shape(param.output_shape); + std::vector input_data = get_input_data(param.input_shape); + data_type* shared_data = get_shared_data(input_data); - ASSERT_EQ(input.get_output_size(), result_size); + DPNPC_id* input_it; + input_it = reinterpret_cast*>(dpnp_memory_alloc_c(sizeof(DPNPC_id))); + new (input_it) DPNPC_id(shared_data, param.input_shape); + + input_it->broadcast_to_shape(param.output_shape); + + ASSERT_EQ(input_it->get_output_size(), result_size); cl::sycl::range<1> gws(result_size); - const DPNPC_id* input_it = &input; auto kernel_parallel_for_func = [=](cl::sycl::id<1> global_id) { const size_t idx = global_id[0]; - result_ptr[idx] = (*input_it)[idx]; + result[idx] = (*input_it)[idx]; }; auto kernel_func = [&](cl::sycl::handler& cgh) { @@ -158,8 +105,12 @@ TEST_P(IteratorBroadcasting, sycl_broadcast) for (dpnpc_index_t i = 0; i < result_size; ++i) { - EXPECT_EQ(result.at(i), param.result.at(i)); + EXPECT_EQ(result[i], param.result.at(i)); } + + input_it->~DPNPC_id(); + dpnp_memory_free_c(shared_data); + dpnp_memory_free_c(result); } /** diff --git a/dpnp/backend/tests/test_utils_iterator.cpp b/dpnp/backend/tests/test_utils_iterator.cpp index ffc2e0658659..569555e7b773 100644 --- a/dpnp/backend/tests/test_utils_iterator.cpp +++ b/dpnp/backend/tests/test_utils_iterator.cpp @@ -28,25 +28,12 @@ #include #include "dpnp_iterator.hpp" +#include "dpnp_test_utils.hpp" #define DPNP_LOCAL_QUEUE 1 // TODO need to fix build procedure and remove this workaround. Issue #551 #include "queue_sycl.hpp" using namespace std; -using dpnpc_it_t = DPNPC_id::iterator; -using dpnpc_value_t = dpnpc_it_t::value_type; -using dpnpc_index_t = dpnpc_it_t::size_type; - -template -vector<_DataType> get_input_data(const vector& shape) -{ - const dpnpc_index_t size = accumulate(shape.begin(), shape.end(), dpnpc_index_t(1), multiplies()); - - vector<_DataType> input_data(size); - iota(input_data.begin(), input_data.end(), 1); // let's start from 1 to avoid cleaned memory comparison - - return input_data; -} TEST(TestUtilsIterator, begin_prefix_postfix) { From da181e29d9d24cbd0c5696610b41366b0f422395 Mon Sep 17 00:00:00 2001 From: Denis Smirnov Date: Thu, 1 Apr 2021 13:30:51 +0300 Subject: [PATCH 8/8] Enable broadcasting in element-wise operations --- dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 43 ++++++++++++++------- dpnp/dpnp_iface_mathematical.py | 4 -- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index cebf63271c5e..2e681e1c58e6 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -28,6 +28,7 @@ #include #include "dpnp_fptr.hpp" +#include "dpnp_iterator.hpp" #include "dpnp_utils.hpp" #include "queue_sycl.hpp" @@ -353,10 +354,6 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) const size_t* where) \ { \ /* avoid warning unused variable*/ \ - (void)input1_shape; \ - (void)input1_shape_ndim; \ - (void)input2_shape; \ - (void)input2_shape_ndim; \ (void)where; \ \ if (!input1_size || !input2_size) \ @@ -364,17 +361,34 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) return; \ } \ \ - const size_t result_size = (input2_size > input1_size) ? input2_size : input1_size; \ - \ - const _DataType_input1* input1_data = reinterpret_cast(input1_in); \ - const _DataType_input2* input2_data = reinterpret_cast(input2_in); \ + _DataType_input1* input1_data = reinterpret_cast<_DataType_input1*>(const_cast(input1_in)); \ + _DataType_input2* input2_data = reinterpret_cast<_DataType_input2*>(const_cast(input2_in)); \ _DataType_output* result = reinterpret_cast<_DataType_output*>(result_out); \ \ + std::vector result_shape = get_result_shape(input1_shape, input1_shape_ndim, \ + input2_shape, input2_shape_ndim); \ + \ + DPNPC_id<_DataType_input1>* input1_it; \ + const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input1>); \ + input1_it = reinterpret_cast*>(dpnp_memory_alloc_c(input1_it_size_in_bytes)); \ + new (input1_it) DPNPC_id<_DataType_input1>(input1_data, input1_shape, input1_shape_ndim); \ + \ + input1_it->broadcast_to_shape(result_shape); \ + \ + DPNPC_id<_DataType_input2>* input2_it; \ + const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>); \ + input2_it = reinterpret_cast*>(dpnp_memory_alloc_c(input2_it_size_in_bytes)); \ + new (input2_it) DPNPC_id<_DataType_input2>(input2_data, input2_shape, input2_shape_ndim); \ + \ + input2_it->broadcast_to_shape(result_shape); \ + \ + const size_t result_size = input1_it->get_output_size(); \ + \ cl::sycl::range<1> gws(result_size); \ auto kernel_parallel_for_func = [=](cl::sycl::id<1> global_id) { \ - size_t i = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/ \ - const _DataType_output input1_elem = (input1_size == 1) ? input1_data[0] : input1_data[i]; \ - const _DataType_output input2_elem = (input2_size == 1) ? input2_data[0] : input2_data[i]; \ + const size_t i = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/ \ + const _DataType_output input1_elem = (*input1_it)[i]; \ + const _DataType_output input2_elem = (*input2_it)[i]; \ result[i] = __operation1__; \ }; \ auto kernel_func = [&](cl::sycl::handler& cgh) { \ @@ -390,9 +404,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) std::is_same<_DataType_input1, float>::value) && \ std::is_same<_DataType_input2, _DataType_input1>::value) \ { \ - _DataType_input1* input1 = const_cast<_DataType_input1*>(input1_data); \ - _DataType_input2* input2 = const_cast<_DataType_input2*>(input2_data); \ - event = __operation2__(DPNP_QUEUE, result_size, input1, input2, result); \ + event = __operation2__(DPNP_QUEUE, result_size, input1_data, input2_data, result); \ } \ else \ { \ @@ -405,6 +417,9 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap) } \ \ event.wait(); \ + \ + input1_it->~DPNPC_id(); \ + input2_it->~DPNPC_id(); \ } #include diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 05da909ff25b..27392ba933c4 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -1071,10 +1071,6 @@ def multiply(x1, x2, dtype=None, out=None, where=True, **kwargs): pass elif x2_is_dparray and x2.ndim == 0: pass - elif x1_is_dparray and x2_is_dparray and x1.size != x2.size: - pass - elif x1_is_dparray and x2_is_dparray and x1.shape != x2.shape: - pass elif dtype is not None: pass elif out is not None: