From 883fd26f5e5bc354ff96c33080b7645b083a50c6 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 4 Jul 2023 01:12:02 -0700 Subject: [PATCH 1/4] Implements negative, positive, pow, and square --- dpctl/tensor/__init__.py | 8 + dpctl/tensor/_elementwise_funcs.py | 74 ++++- .../elementwise_functions/negative.hpp | 236 +++++++++++++++ .../elementwise_functions/positive.hpp | 251 ++++++++++++++++ .../kernels/elementwise_functions/pow.hpp | 269 ++++++++++++++++++ .../kernels/elementwise_functions/square.hpp | 206 ++++++++++++++ .../source/elementwise_functions.cpp | 242 +++++++++++++++- 7 files changed, 1274 insertions(+), 12 deletions(-) create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/positive.hpp create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/square.hpp diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index ec488cb3d4..af3ae9b1c2 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -116,11 +116,15 @@ logical_or, logical_xor, multiply, + negative, not_equal, + positive, + pow, proj, real, sin, sqrt, + square, subtract, ) from ._reduction import sum @@ -220,12 +224,16 @@ "logical_or", "logical_xor", "log1p", + "negative", + "positive", "proj", "real", "sin", "sqrt", + "square", "divide", "multiply", + "pow", "subtract", "equal", "not_equal", diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py index 2c07ab8e6a..49e7309998 100644 --- a/dpctl/tensor/_elementwise_funcs.py +++ b/dpctl/tensor/_elementwise_funcs.py @@ -715,7 +715,27 @@ ) # U25: ==== NEGATIVE (x) -# FIXME: implement U25 +_negative_docstring_ = """ +negative(x, out=None, order='K') + +Computes the numerical negative elementwise. +Args: + x (usm_ndarray): + Input array, expected to have numeric data type. + out (usm_ndarray): + Output array to populate. Array must have the correct + shape and the expected data type. + order ("C","F","A","K", optional): memory layout of the new + output array, if parameter `out` is `None`. + Default: "K". +Return: + usm_ndarray: + An array containing the element-wise negative values. +""" + +negative = UnaryElementwiseFunc( + "negative", ti._negative_result_type, ti._negative, _negative_docstring_ +) # B20: ==== NOT_EQUAL (x1, x2) _not_equal_docstring_ = """ @@ -747,10 +767,48 @@ ) # U26: ==== POSITIVE (x) -# FIXME: implement U26 +_positive_docstring_ = """ +positive(x, out=None, order='K') + +Computes the numerical positive element-wise. +Args: + x (usm_ndarray): + Input array, expected to have numeric data type. + out (usm_ndarray): + Output array to populate. Array must have the correct + shape and the expected data type. + order ("C","F","A","K", optional): memory layout of the new + output array, if parameter `out` is `None`. + Default: "K". +Return: + usm_ndarray: + An array containing the element-wise positive values. +""" + +positive = UnaryElementwiseFunc( + "positive", ti._positive_result_type, ti._positive, _positive_docstring_ +) # B21: ==== POW (x1, x2) -# FIXME: implement B21 +_pow_docstring_ = """ +pow(x1, x2, out=None, order='K') + +Calculates `x1_i` raised to `x2_i` for each element `x1_i` of the input array +`x1` with the respective element `x2_i` of the input array `x2`. + +Args: + x1 (usm_ndarray): + First input array, expected to have a numeric data type. + x2 (usm_ndarray): + Second input array, also expected to have a numeric data type. +Returns: + usm_narray: + an array containing the element-wise result. The data type of + the returned array is determined by the Type Promotion Rules. +""" +pow = BinaryElementwiseFunc( + "pow", ti._pow_result_type, ti._pow, _pow_docstring_ +) # U??: ==== PROJ (x) _proj_docstring = """ @@ -838,7 +896,15 @@ # FIXME: implement U31 # U32: ==== SQUARE (x) -# FIXME: implement U32 +_square_docstring_ = """ +square(x, out=None, order='K') + +Computes `x_i**2` for each element `x_i` for input array `x`. +""" + +square = UnaryElementwiseFunc( + "square", ti._square_result_type, ti._square, _square_docstring_ +) # U33: ==== SQRT (x) _sqrt_docstring_ = """ diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp new file mode 100644 index 0000000000..f69a35669e --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp @@ -0,0 +1,236 @@ +//=== negative.hpp - Unary function POSITIVE ------ *-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of POSITIVE(x) +/// function that returns x. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include + +#include "kernels/elementwise_functions/common.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" +#include + +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace negative +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +using dpctl::tensor::type_utils::is_complex; +using dpctl::tensor::type_utils::vec_cast; + +template struct NegativeFunctor +{ + + using is_constant = typename std::false_type; + // constexpr resT constant_value = resT{}; + using supports_vec = typename std::false_type; + using supports_sg_loadstore = typename std::negation< + std::disjunction, is_complex>>; + + resT operator()(const argT &x) + { + return -x; + } +}; + +template +using NegativeContigFunctor = elementwise_common:: + UnaryContigFunctor, vec_sz, n_vecs>; + +template struct NegativeOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class negative_contig_kernel; + +typedef sycl::event (*negative_contig_impl_fn_ptr_t)( + sycl::queue, + size_t, + const char *, + char *, + const std::vector &); + +template +sycl::event negative_contig_impl(sycl::queue exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + sycl::event negative_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + + size_t lws = 64; + constexpr unsigned int vec_sz = 4; + constexpr unsigned int n_vecs = 2; + const size_t n_groups = + ((nelems + lws * n_vecs * vec_sz - 1) / (lws * n_vecs * vec_sz)); + const auto gws_range = sycl::range<1>(n_groups * lws); + const auto lws_range = sycl::range<1>(lws); + + using resTy = typename NegativeOutputType::value_type; + const argTy *arg_tp = reinterpret_cast(arg_p); + resTy *res_tp = reinterpret_cast(res_p); + + cgh.parallel_for>( + sycl::nd_range<1>(gws_range, lws_range), + NegativeContigFunctor(arg_tp, res_tp, + nelems)); + }); + return negative_ev; +} + +template struct NegativeContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = negative_contig_impl; + return fn; + } + } +}; + +template struct NegativeTypeMapFactory +{ + /*! @brief get typeid for output type of std::negative(T x) */ + std::enable_if_t::value, int> get() + { + using rT = typename NegativeOutputType::value_type; + ; + return td_ns::GetTypeid{}.get(); + } +}; + +template +using NegativeStridedFunctor = elementwise_common:: + UnaryStridedFunctor>; + +template class negative_strided_kernel; + +typedef sycl::event (*negative_strided_impl_fn_ptr_t)( + sycl::queue, + size_t, + int, + const py::ssize_t *, + const char *, + py::ssize_t, + char *, + py::ssize_t, + const std::vector &, + const std::vector &); + +template +sycl::event +negative_strided_impl(sycl::queue exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + sycl::event negative_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + cgh.depends_on(additional_depends); + + using resTy = typename NegativeOutputType::value_type; + using IndexerT = + typename dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer; + + IndexerT indexer{nd, arg_offset, res_offset, shape_and_strides}; + + const argTy *arg_tp = reinterpret_cast(arg_p); + resTy *res_tp = reinterpret_cast(res_p); + + cgh.parallel_for>( + {nelems}, NegativeStridedFunctor( + arg_tp, res_tp, indexer)); + }); + return negative_ev; +} + +template struct NegativeStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = negative_strided_impl; + return fn; + } + } +}; + +} // namespace negative +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/positive.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/positive.hpp new file mode 100644 index 0000000000..725bdf83df --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/positive.hpp @@ -0,0 +1,251 @@ +//=== positive.hpp - Unary function POSITIVE ------ *-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of POSITIVE(x) +/// function that returns x. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include + +#include "kernels/elementwise_functions/common.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" +#include + +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace positive +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +using dpctl::tensor::type_utils::is_complex; +using dpctl::tensor::type_utils::vec_cast; + +template struct PositiveFunctor +{ + + using is_constant = typename std::false_type; + // constexpr resT constant_value = resT{}; + using supports_vec = typename std::negation< + std::disjunction, is_complex>>; + using supports_sg_loadstore = typename std::negation< + std::disjunction, is_complex>>; + + resT operator()(const argT &x) + { + return x; + } + + template + sycl::vec operator()(const sycl::vec &in) + { + auto const &res_vec = in; + using deducedT = typename std::remove_cv_t< + std::remove_reference_t>::element_type; + if constexpr (std::is_same_v) { + return res_vec; + } + else { + return vec_cast(res_vec); + } + } +}; + +template +using PositiveContigFunctor = elementwise_common:: + UnaryContigFunctor, vec_sz, n_vecs>; + +template struct PositiveOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class positive_contig_kernel; + +typedef sycl::event (*positive_contig_impl_fn_ptr_t)( + sycl::queue, + size_t, + const char *, + char *, + const std::vector &); + +template +sycl::event positive_contig_impl(sycl::queue exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + sycl::event positive_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + + size_t lws = 64; + constexpr unsigned int vec_sz = 4; + constexpr unsigned int n_vecs = 2; + const size_t n_groups = + ((nelems + lws * n_vecs * vec_sz - 1) / (lws * n_vecs * vec_sz)); + const auto gws_range = sycl::range<1>(n_groups * lws); + const auto lws_range = sycl::range<1>(lws); + + using resTy = typename PositiveOutputType::value_type; + const argTy *arg_tp = reinterpret_cast(arg_p); + resTy *res_tp = reinterpret_cast(res_p); + + cgh.parallel_for>( + sycl::nd_range<1>(gws_range, lws_range), + PositiveContigFunctor(arg_tp, res_tp, + nelems)); + }); + return positive_ev; +} + +template struct PositiveContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = positive_contig_impl; + return fn; + } + } +}; + +template struct PositiveTypeMapFactory +{ + /*! @brief get typeid for output type of std::positive(T x) */ + std::enable_if_t::value, int> get() + { + using rT = typename PositiveOutputType::value_type; + ; + return td_ns::GetTypeid{}.get(); + } +}; + +template +using PositiveStridedFunctor = elementwise_common:: + UnaryStridedFunctor>; + +template class positive_strided_kernel; + +typedef sycl::event (*positive_strided_impl_fn_ptr_t)( + sycl::queue, + size_t, + int, + const py::ssize_t *, + const char *, + py::ssize_t, + char *, + py::ssize_t, + const std::vector &, + const std::vector &); + +template +sycl::event +positive_strided_impl(sycl::queue exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + sycl::event positive_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + cgh.depends_on(additional_depends); + + using resTy = typename PositiveOutputType::value_type; + using IndexerT = + typename dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer; + + IndexerT indexer{nd, arg_offset, res_offset, shape_and_strides}; + + const argTy *arg_tp = reinterpret_cast(arg_p); + resTy *res_tp = reinterpret_cast(res_p); + + cgh.parallel_for>( + {nelems}, PositiveStridedFunctor( + arg_tp, res_tp, indexer)); + }); + return positive_ev; +} + +template struct PositiveStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = positive_strided_impl; + return fn; + } + } +}; + +} // namespace positive +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp new file mode 100644 index 0000000000..9c5727c6d4 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp @@ -0,0 +1,269 @@ +//=== POW.hpp - Binary function POW ------ *-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of POW(x1, x2) +/// function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace pow +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template struct PowFunctor +{ + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = + std::negation, + tu_ns::is_complex, + std::is_integral, + std::is_integral>>; + + resT operator()(const argT1 &in1, const argT2 &in2) + { + return std::pow(in1, in2); + } + + template + sycl::vec operator()(const sycl::vec &in1, + const sycl::vec &in2) + { + auto res = sycl::pow(in1, in2); + if constexpr (std::is_same_v) { + return res; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast( + res); + } + } +}; + +template +using PowContigFunctor = + elementwise_common::BinaryContigFunctor, + vec_sz, + n_vecs>; + +template +using PowStridedFunctor = + elementwise_common::BinaryStridedFunctor>; + +// TODO: when type promotion logic is better defined, +// consider implementing overloads of std::pow that take +// integers for the exponents. Seem to give better accuracy in +// some cases (complex data especially) +template struct PowOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class pow_contig_kernel; + +template +sycl::event pow_contig_impl(sycl::queue exec_q, + size_t nelems, + const char *arg1_p, + py::ssize_t arg1_offset, + const char *arg2_p, + py::ssize_t arg2_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_contig_impl< + argTy1, argTy2, PowOutputType, PowContigFunctor, pow_contig_kernel>( + exec_q, nelems, arg1_p, arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends); +} + +template struct PowContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = pow_contig_impl; + return fn; + } + } +}; + +template struct PowTypeMapFactory +{ + /*! @brief get typeid for output type of std::pow(T1 x, T2 y) */ + std::enable_if_t::value, int> get() + { + using rT = typename PowOutputType::value_type; + ; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class pow_strided_strided_kernel; + +template +sycl::event pow_strided_impl(sycl::queue exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg1_p, + py::ssize_t arg1_offset, + const char *arg2_p, + py::ssize_t arg2_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, PowOutputType, PowStridedFunctor, + pow_strided_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg1_p, arg1_offset, arg2_p, + arg2_offset, res_p, res_offset, depends, additional_depends); +} + +template struct PowStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = pow_strided_impl; + return fn; + } + } +}; + +} // namespace pow +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/square.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/square.hpp new file mode 100644 index 0000000000..29d096ae88 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/square.hpp @@ -0,0 +1,206 @@ +//=== square.hpp - Unary function SQUARE ------ *-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of SQUARE(x) +/// +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include + +#include "kernels/elementwise_functions/common.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace square +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +using dpctl::tensor::type_utils::is_complex; +using dpctl::tensor::type_utils::vec_cast; + +template struct SquareFunctor +{ + + // is function constant for given argT + using is_constant = typename std::false_type; + // constant value, if constant + // constexpr resT constant_value = resT{}; + // is function defined for sycl::vec + using supports_vec = typename std::negation< + std::disjunction, is_complex>>; + // do both argTy and resTy support sugroup store/load operation + using supports_sg_loadstore = typename std::negation< + std::disjunction, is_complex>>; + + resT operator()(const argT &in) + { + return in * in; + } + + template + sycl::vec operator()(const sycl::vec &in) + { + auto const &res_vec = in * in; + using deducedT = typename std::remove_cv_t< + std::remove_reference_t>::element_type; + if constexpr (std::is_same_v) { + return res_vec; + } + else { + return vec_cast(res_vec); + } + } +}; + +template +using SquareContigFunctor = + elementwise_common::UnaryContigFunctor, + vec_sz, + n_vecs>; + +template +using SquareStridedFunctor = elementwise_common:: + UnaryStridedFunctor>; + +template struct SquareOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class square_contig_kernel; + +template +sycl::event square_contig_impl(sycl::queue exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + return elementwise_common::unary_contig_impl< + argTy, SquareOutputType, SquareContigFunctor, square_contig_kernel>( + exec_q, nelems, arg_p, res_p, depends); +} + +template struct SquareContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = square_contig_impl; + return fn; + } + } +}; + +template struct SquareTypeMapFactory +{ + /*! @brief get typeid for output type of x * x */ + std::enable_if_t::value, int> get() + { + using rT = typename SquareOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template class square_strided_kernel; + +template +sycl::event +square_strided_impl(sycl::queue exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::unary_strided_impl< + argTy, SquareOutputType, SquareStridedFunctor, square_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct SquareStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = square_strided_impl; + return fn; + } + } +}; + +} // namespace square +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.cpp b/dpctl/tensor/libtensor/source/elementwise_functions.cpp index 5898f0ca7d..dbd06d9250 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.cpp @@ -55,11 +55,15 @@ #include "kernels/elementwise_functions/logical_or.hpp" #include "kernels/elementwise_functions/logical_xor.hpp" #include "kernels/elementwise_functions/multiply.hpp" +#include "kernels/elementwise_functions/negative.hpp" #include "kernels/elementwise_functions/not_equal.hpp" +#include "kernels/elementwise_functions/positive.hpp" +#include "kernels/elementwise_functions/pow.hpp" #include "kernels/elementwise_functions/proj.hpp" #include "kernels/elementwise_functions/real.hpp" #include "kernels/elementwise_functions/sin.hpp" #include "kernels/elementwise_functions/sqrt.hpp" +#include "kernels/elementwise_functions/square.hpp" #include "kernels/elementwise_functions/subtract.hpp" #include "kernels/elementwise_functions/true_divide.hpp" @@ -1276,7 +1280,37 @@ void populate_multiply_dispatch_tables(void) // U25: ==== NEGATIVE (x) namespace impl { -// FIXME: add code for U25 + +namespace negative_fn_ns = dpctl::tensor::kernels::negative; + +static unary_contig_impl_fn_ptr_t + negative_contig_dispatch_vector[td_ns::num_types]; +static int negative_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + negative_strided_dispatch_vector[td_ns::num_types]; + +void populate_negative_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = negative_fn_ns; + + using fn_ns::NegativeContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(negative_contig_dispatch_vector); + + using fn_ns::NegativeStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(negative_strided_dispatch_vector); + + using fn_ns::NegativeTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(negative_output_typeid_vector); +} + } // namespace impl // B20: ==== NOT_EQUAL (x1, x2) @@ -1320,13 +1354,77 @@ void populate_not_equal_dispatch_tables(void) // U26: ==== POSITIVE (x) namespace impl { -// FIXME: add code for U26 + +namespace positive_fn_ns = dpctl::tensor::kernels::positive; + +static unary_contig_impl_fn_ptr_t + positive_contig_dispatch_vector[td_ns::num_types]; +static int positive_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + positive_strided_dispatch_vector[td_ns::num_types]; + +void populate_positive_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = positive_fn_ns; + + using fn_ns::PositiveContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(positive_contig_dispatch_vector); + + using fn_ns::PositiveStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(positive_strided_dispatch_vector); + + using fn_ns::PositiveTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(positive_output_typeid_vector); +} + } // namespace impl // B21: ==== POW (x1, x2) namespace impl { -// FIXME: add code for B21 + +namespace pow_fn_ns = dpctl::tensor::kernels::pow; + +static binary_contig_impl_fn_ptr_t pow_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static int pow_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + pow_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_pow_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = pow_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::PowTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(pow_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::PowStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(pow_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::PowContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(pow_contig_dispatch_table); +}; + } // namespace impl // U??: ==== PROJ (x) @@ -1459,7 +1557,37 @@ namespace impl // U32: ==== SQUARE (x) namespace impl { -// FIXME: add code for U32 + +namespace square_fn_ns = dpctl::tensor::kernels::square; + +static unary_contig_impl_fn_ptr_t + square_contig_dispatch_vector[td_ns::num_types]; +static int square_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + square_strided_dispatch_vector[td_ns::num_types]; + +void populate_square_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = square_fn_ns; + + using fn_ns::SquareContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(square_contig_dispatch_vector); + + using fn_ns::SquareStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(square_strided_dispatch_vector); + + using fn_ns::SquareTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(square_output_typeid_vector); +} + } // namespace impl // U33: ==== SQRT (x) @@ -2493,7 +2621,28 @@ void init_elementwise_functions(py::module_ m) } // U25: ==== NEGATIVE (x) - // FIXME: + { + impl::populate_negative_dispatch_vectors(); + using impl::negative_contig_dispatch_vector; + using impl::negative_output_typeid_vector; + using impl::negative_strided_dispatch_vector; + + auto negative_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc(src, dst, exec_q, depends, + negative_output_typeid_vector, + negative_contig_dispatch_vector, + negative_strided_dispatch_vector); + }; + m.def("_negative", negative_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto negative_result_type_pyapi = [&](py::dtype dtype) { + return py_unary_ufunc_result_type(dtype, + negative_output_typeid_vector); + }; + m.def("_negative_result_type", negative_result_type_pyapi); + } // B20: ==== NOT_EQUAL (x1, x2) { @@ -2537,10 +2686,67 @@ void init_elementwise_functions(py::module_ m) } // U26: ==== POSITIVE (x) - // FIXME: + { + impl::populate_positive_dispatch_vectors(); + using impl::positive_contig_dispatch_vector; + using impl::positive_output_typeid_vector; + using impl::positive_strided_dispatch_vector; + + auto positive_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc(src, dst, exec_q, depends, + positive_output_typeid_vector, + positive_contig_dispatch_vector, + positive_strided_dispatch_vector); + }; + m.def("_positive", positive_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto positive_result_type_pyapi = [&](py::dtype dtype) { + return py_unary_ufunc_result_type(dtype, + positive_output_typeid_vector); + }; + m.def("_positive_result_type", positive_result_type_pyapi); + } // B21: ==== POW (x1, x2) - // FIXME: + { + + impl::populate_pow_dispatch_tables(); + using impl::pow_contig_dispatch_table; + using impl::pow_output_id_table; + using impl::pow_strided_dispatch_table; + + auto pow_pyapi = [&](dpctl::tensor::usm_ndarray src1, + dpctl::tensor::usm_ndarray src2, + dpctl::tensor::usm_ndarray dst, sycl::queue exec_q, + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, pow_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + pow_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + pow_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto pow_result_type_pyapi = [&](py::dtype dtype1, py::dtype dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + pow_output_id_table); + }; + m.def("_pow", pow_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_pow_result_type", pow_result_type_pyapi, ""); + } // U??: ==== PROJ (x) { @@ -2620,7 +2826,27 @@ void init_elementwise_functions(py::module_ m) // FIXME: // U32: ==== SQUARE (x) - // FIXME: + { + impl::populate_square_dispatch_vectors(); + using impl::square_contig_dispatch_vector; + using impl::square_output_typeid_vector; + using impl::square_strided_dispatch_vector; + + auto square_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, square_output_typeid_vector, + square_contig_dispatch_vector, square_strided_dispatch_vector); + }; + m.def("_square", square_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto square_result_type_pyapi = [&](py::dtype dtype) { + return py_unary_ufunc_result_type(dtype, + square_output_typeid_vector); + }; + m.def("_square_result_type", square_result_type_pyapi); + } // U33: ==== SQRT (x) { From 55caa045499696c414e9ad579a06d32e2dddea6d Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 4 Jul 2023 02:16:44 -0700 Subject: [PATCH 2/4] Tests for negative, positive, pow, and square --- dpctl/tests/elementwise/test_negative.py | 79 ++++++++++++ dpctl/tests/elementwise/test_positive.py | 79 ++++++++++++ dpctl/tests/elementwise/test_pow.py | 154 +++++++++++++++++++++++ dpctl/tests/elementwise/test_square.py | 99 +++++++++++++++ 4 files changed, 411 insertions(+) create mode 100644 dpctl/tests/elementwise/test_negative.py create mode 100644 dpctl/tests/elementwise/test_positive.py create mode 100644 dpctl/tests/elementwise/test_pow.py create mode 100644 dpctl/tests/elementwise/test_square.py diff --git a/dpctl/tests/elementwise/test_negative.py b/dpctl/tests/elementwise/test_negative.py new file mode 100644 index 0000000000..3af6d7fcf5 --- /dev/null +++ b/dpctl/tests/elementwise/test_negative.py @@ -0,0 +1,79 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools + +import numpy as np +import pytest + +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _all_dtypes, _usm_types + + +@pytest.mark.parametrize("dtype", _all_dtypes[1:]) +def test_negative_out_type(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + arg_dt = np.dtype(dtype) + X = dpt.asarray(0, dtype=arg_dt, sycl_queue=q) + assert dpt.negative(X).dtype == arg_dt + + r = dpt.empty_like(X, dtype=arg_dt) + dpt.negative(X, out=r) + assert np.allclose(dpt.asnumpy(r), dpt.asnumpy(dpt.negative(X))) + + +@pytest.mark.parametrize("usm_type", _usm_types) +def test_negative_usm_type(usm_type): + q = get_queue_or_skip() + + arg_dt = np.dtype("i4") + input_shape = (10, 10, 10, 10) + X = dpt.empty(input_shape, dtype=arg_dt, usm_type=usm_type, sycl_queue=q) + X[..., 0::2] = 1 + X[..., 1::2] = 0 + + Y = dpt.negative(X) + assert Y.usm_type == X.usm_type + assert Y.sycl_queue == X.sycl_queue + assert Y.flags.c_contiguous + + expected_Y = np.negative(dpt.asnumpy(X)) + assert np.allclose(dpt.asnumpy(Y), expected_Y) + + +@pytest.mark.parametrize("dtype", _all_dtypes[1:]) +def test_negative_order(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + arg_dt = np.dtype(dtype) + input_shape = (10, 10, 10, 10) + X = dpt.empty(input_shape, dtype=arg_dt, sycl_queue=q) + X[..., 0::2] = 1 + X[..., 1::2] = 0 + + for ord in ["C", "F", "A", "K"]: + for perms in itertools.permutations(range(4)): + U = dpt.permute_dims(X[:, ::-1, ::-1, :], perms) + Y = dpt.negative(U, order=ord) + expected_Y = np.negative(np.ones(Y.shape, dtype=Y.dtype)) + expected_Y[..., 1::2] = 0 + expected_Y = np.transpose(expected_Y, perms) + assert np.allclose(dpt.asnumpy(Y), expected_Y) diff --git a/dpctl/tests/elementwise/test_positive.py b/dpctl/tests/elementwise/test_positive.py new file mode 100644 index 0000000000..657c26d8cf --- /dev/null +++ b/dpctl/tests/elementwise/test_positive.py @@ -0,0 +1,79 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools + +import numpy as np +import pytest + +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _all_dtypes, _usm_types + + +@pytest.mark.parametrize("dtype", _all_dtypes[1:]) +def test_positive_out_type(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + arg_dt = np.dtype(dtype) + X = dpt.asarray(0, dtype=arg_dt, sycl_queue=q) + assert dpt.positive(X).dtype == arg_dt + + r = dpt.empty_like(X, dtype=arg_dt) + dpt.positive(X, out=r) + assert np.allclose(dpt.asnumpy(r), dpt.asnumpy(dpt.positive(X))) + + +@pytest.mark.parametrize("usm_type", _usm_types) +def test_positive_usm_type(usm_type): + q = get_queue_or_skip() + + arg_dt = np.dtype("i4") + input_shape = (10, 10, 10, 10) + X = dpt.empty(input_shape, dtype=arg_dt, usm_type=usm_type, sycl_queue=q) + X[..., 0::2] = 1 + X[..., 1::2] = 0 + + Y = dpt.positive(X) + assert Y.usm_type == X.usm_type + assert Y.sycl_queue == X.sycl_queue + assert Y.flags.c_contiguous + + expected_Y = dpt.asnumpy(X) + assert np.allclose(dpt.asnumpy(Y), expected_Y) + + +@pytest.mark.parametrize("dtype", _all_dtypes[1:]) +def test_positive_order(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + arg_dt = np.dtype(dtype) + input_shape = (10, 10, 10, 10) + X = dpt.empty(input_shape, dtype=arg_dt, sycl_queue=q) + X[..., 0::2] = 1 + X[..., 1::2] = 0 + + for ord in ["C", "F", "A", "K"]: + for perms in itertools.permutations(range(4)): + U = dpt.permute_dims(X[:, ::-1, ::-1, :], perms) + Y = dpt.positive(U, order=ord) + expected_Y = np.ones(Y.shape, dtype=Y.dtype) + expected_Y[..., 1::2] = 0 + expected_Y = np.transpose(expected_Y, perms) + assert np.allclose(dpt.asnumpy(Y), expected_Y) diff --git a/dpctl/tests/elementwise/test_pow.py b/dpctl/tests/elementwise/test_pow.py new file mode 100644 index 0000000000..1f13e2b533 --- /dev/null +++ b/dpctl/tests/elementwise/test_pow.py @@ -0,0 +1,154 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ctypes + +import numpy as np +import pytest + +import dpctl +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _all_dtypes, _compare_dtypes, _usm_types + + +@pytest.mark.parametrize("op1_dtype", _all_dtypes[1:]) +@pytest.mark.parametrize("op2_dtype", _all_dtypes[1:]) +def test_power_dtype_matrix(op1_dtype, op2_dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(op1_dtype, q) + skip_if_dtype_not_supported(op2_dtype, q) + + sz = 127 + ar1 = dpt.ones(sz, dtype=op1_dtype) + ar2 = dpt.ones_like(ar1, dtype=op2_dtype) + + r = dpt.pow(ar1, ar2) + assert isinstance(r, dpt.usm_ndarray) + expected = np.power( + np.ones(1, dtype=op1_dtype), np.ones(1, dtype=op2_dtype) + ) + assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q) + assert r.shape == ar1.shape + assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all() + assert r.sycl_queue == ar1.sycl_queue + + ar3 = dpt.ones(sz, dtype=op1_dtype) + ar4 = dpt.ones(2 * sz, dtype=op2_dtype) + + r = dpt.pow(ar3[::-1], ar4[::2]) + assert isinstance(r, dpt.usm_ndarray) + expected = np.power( + np.ones(1, dtype=op1_dtype), np.ones(1, dtype=op2_dtype) + ) + assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q) + assert r.shape == ar3.shape + assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all() + + +@pytest.mark.parametrize("op1_usm_type", _usm_types) +@pytest.mark.parametrize("op2_usm_type", _usm_types) +def test_power_usm_type_matrix(op1_usm_type, op2_usm_type): + get_queue_or_skip() + + sz = 128 + ar1 = dpt.ones(sz, dtype="i4", usm_type=op1_usm_type) + ar2 = dpt.ones_like(ar1, dtype="i4", usm_type=op2_usm_type) + + r = dpt.pow(ar1, ar2) + assert isinstance(r, dpt.usm_ndarray) + expected_usm_type = dpctl.utils.get_coerced_usm_type( + (op1_usm_type, op2_usm_type) + ) + assert r.usm_type == expected_usm_type + + +def test_pow_order(): + get_queue_or_skip() + + ar1 = dpt.ones((20, 20), dtype="i4", order="C") + ar2 = dpt.ones((20, 20), dtype="i4", order="C") + r1 = dpt.pow(ar1, ar2, order="C") + assert r1.flags.c_contiguous + r2 = dpt.pow(ar1, ar2, order="F") + assert r2.flags.f_contiguous + r3 = dpt.pow(ar1, ar2, order="A") + assert r3.flags.c_contiguous + r4 = dpt.pow(ar1, ar2, order="K") + assert r4.flags.c_contiguous + + ar1 = dpt.ones((20, 20), dtype="i4", order="F") + ar2 = dpt.ones((20, 20), dtype="i4", order="F") + r1 = dpt.pow(ar1, ar2, order="C") + assert r1.flags.c_contiguous + r2 = dpt.pow(ar1, ar2, order="F") + assert r2.flags.f_contiguous + r3 = dpt.pow(ar1, ar2, order="A") + assert r3.flags.f_contiguous + r4 = dpt.pow(ar1, ar2, order="K") + assert r4.flags.f_contiguous + + ar1 = dpt.ones((40, 40), dtype="i4", order="C")[:20, ::-2] + ar2 = dpt.ones((40, 40), dtype="i4", order="C")[:20, ::-2] + r4 = dpt.pow(ar1, ar2, order="K") + assert r4.strides == (20, -1) + + ar1 = dpt.ones((40, 40), dtype="i4", order="C")[:20, ::-2].mT + ar2 = dpt.ones((40, 40), dtype="i4", order="C")[:20, ::-2].mT + r4 = dpt.pow(ar1, ar2, order="K") + assert r4.strides == (-1, 20) + + +def test_pow_broadcasting(): + get_queue_or_skip() + + v = dpt.arange(1, 6, dtype="i4") + m = dpt.full((100, 5), 2, dtype="i4") + + r = dpt.pow(m, v) + + expected = np.power( + np.full((100, 5), 2, dtype="i4"), np.arange(1, 6, dtype="i4") + ) + assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all() + + r2 = dpt.pow(v, m) + expected2 = np.power( + np.arange(1, 6, dtype="i4"), np.full((100, 5), 2, dtype="i4") + ) + assert (dpt.asnumpy(r2) == expected2.astype(r2.dtype)).all() + + +@pytest.mark.parametrize("arr_dt", _all_dtypes) +def test_pow_python_scalar(arr_dt): + q = get_queue_or_skip() + skip_if_dtype_not_supported(arr_dt, q) + + X = dpt.ones((10, 10), dtype=arr_dt, sycl_queue=q) + py_ones = ( + bool(1), + int(1), + float(1), + complex(1), + np.float32(1), + ctypes.c_int(1), + ) + for sc in py_ones: + R = dpt.pow(X, sc) + assert isinstance(R, dpt.usm_ndarray) + R = dpt.pow(sc, X) + assert isinstance(R, dpt.usm_ndarray) diff --git a/dpctl/tests/elementwise/test_square.py b/dpctl/tests/elementwise/test_square.py new file mode 100644 index 0000000000..95ec163e2f --- /dev/null +++ b/dpctl/tests/elementwise/test_square.py @@ -0,0 +1,99 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools + +import numpy as np +import pytest + +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _all_dtypes, _usm_types + + +@pytest.mark.parametrize("dtype", _all_dtypes[1:]) +def test_square_out_type(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + arg_dt = np.dtype(dtype) + X = dpt.arange(5, dtype=arg_dt, sycl_queue=q) + assert dpt.square(X).dtype == arg_dt + + r = dpt.empty_like(X, dtype=arg_dt) + dpt.square(X, out=r) + assert np.allclose(dpt.asnumpy(r), dpt.asnumpy(dpt.square(X))) + + +@pytest.mark.parametrize("usm_type", _usm_types) +def test_square_usm_type(usm_type): + q = get_queue_or_skip() + + arg_dt = np.dtype("i4") + input_shape = (10, 10, 10, 10) + X = dpt.empty(input_shape, dtype=arg_dt, usm_type=usm_type, sycl_queue=q) + X[..., 0::2] = 1 + X[..., 1::2] = 0 + + Y = dpt.square(X) + assert Y.usm_type == X.usm_type + assert Y.sycl_queue == X.sycl_queue + assert Y.flags.c_contiguous + + expected_Y = dpt.asnumpy(X) + assert np.allclose(dpt.asnumpy(Y), expected_Y) + + +@pytest.mark.parametrize("dtype", _all_dtypes[1:]) +def test_square_order(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + arg_dt = np.dtype(dtype) + input_shape = (10, 10, 10, 10) + X = dpt.empty(input_shape, dtype=arg_dt, sycl_queue=q) + X[..., 0::2] = 2 + X[..., 1::2] = 0 + + for ord in ["C", "F", "A", "K"]: + for perms in itertools.permutations(range(4)): + U = dpt.permute_dims(X[:, ::-1, ::-1, :], perms) + Y = dpt.square(U, order=ord) + expected_Y = np.full(Y.shape, 4, dtype=Y.dtype) + expected_Y[..., 1::2] = 0 + expected_Y = np.transpose(expected_Y, perms) + assert np.allclose(dpt.asnumpy(Y), expected_Y) + + +@pytest.mark.parametrize("dtype", ["c8", "c16"]) +def test_square_special_cases(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + vals = [np.nan, np.inf, -np.inf, 0.0, -0.0] + X = dpt.asarray(vals, dtype=dtype, sycl_queue=q) + X_np = dpt.asnumpy(X) + + tol = 8 * dpt.finfo(dtype).resolution + with np.errstate(all="ignore"): + assert np.allclose( + dpt.asnumpy(dpt.square(X)), + np.square(X_np), + atol=tol, + rtol=tol, + equal_nan=True, + ) From 602eef11c868508d494edeb51a16cfcefc819971 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 4 Jul 2023 03:28:57 -0700 Subject: [PATCH 3/4] pow no longer uses std::pow for integers - Not portable to devices without 64-bit precision --- .../kernels/elementwise_functions/pow.hpp | 27 +++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp index 9c5727c6d4..d4249c7574 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "utils/offset_utils.hpp" @@ -60,9 +61,31 @@ template struct PowFunctor std::is_integral, std::is_integral>>; - resT operator()(const argT1 &in1, const argT2 &in2) + resT operator()(argT1 in1, argT2 in2) { - return std::pow(in1, in2); + if constexpr (std::is_integral_v || std::is_integral_v) { + if constexpr (std::is_signed_v) { + if (in2 < 0) { + // invalid; return 0 + return resT(0); + } + } + resT res = 1; + if (in1 == 1 || in2 == 0) { + return res; + } + while (in2 > 0) { + if (in2 & 1) { + res *= in1; + } + in2 >>= 1; + in1 *= in1; + } + return res; + } + else { + return std::pow(in1, in2); + } } template From 47482334c2912d5914e0e0cb8d4e7d6a47e157f2 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 4 Jul 2023 04:30:22 -0700 Subject: [PATCH 4/4] Fixed docstrings for negative, positive, pow, and square --- dpctl/tensor/_elementwise_funcs.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py index 49e7309998..335e0350ac 100644 --- a/dpctl/tensor/_elementwise_funcs.py +++ b/dpctl/tensor/_elementwise_funcs.py @@ -718,7 +718,7 @@ _negative_docstring_ = """ negative(x, out=None, order='K') -Computes the numerical negative elementwise. +Computes the numerical negative for each element `x_i` of input array `x`. Args: x (usm_ndarray): Input array, expected to have numeric data type. @@ -730,7 +730,7 @@ Default: "K". Return: usm_ndarray: - An array containing the element-wise negative values. + An array containing the negative of `x`. """ negative = UnaryElementwiseFunc( @@ -770,7 +770,7 @@ _positive_docstring_ = """ positive(x, out=None, order='K') -Computes the numerical positive element-wise. +Computes the numerical positive for each element `x_i` of input array `x`. Args: x (usm_ndarray): Input array, expected to have numeric data type. @@ -782,7 +782,7 @@ Default: "K". Return: usm_ndarray: - An array containing the element-wise positive values. + An array containing the values of `x`. """ positive = UnaryElementwiseFunc( @@ -802,7 +802,7 @@ x2 (usm_ndarray): Second input array, also expected to have a numeric data type. Returns: - usm_narray: + usm_ndarray: an array containing the element-wise result. The data type of the returned array is determined by the Type Promotion Rules. """ @@ -899,7 +899,21 @@ _square_docstring_ = """ square(x, out=None, order='K') -Computes `x_i**2` for each element `x_i` for input array `x`. +Computes `x_i**2` (or `x_i*x_i`) for each element `x_i` of input array `x`. +Args: + x (usm_ndarray): + Input array, expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_ndarray: + An array containing the square `x`. + The data type of the returned array is determined by + the Type Promotion Rules. """ square = UnaryElementwiseFunc(