diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index c9c1c46f71..2e720cba92 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -94,16 +94,22 @@ from ._elementwise_funcs import ( abs, add, + conj, cos, divide, equal, + exp, expm1, + imag, isfinite, isinf, isnan, log, log1p, multiply, + proj, + real, + sin, sqrt, subtract, ) @@ -186,13 +192,19 @@ "inf", "abs", "add", + "conj", "cos", + "exp", "expm1", + "imag", "isinf", "isnan", "isfinite", "log", "log1p", + "proj", + "real", + "sin", "sqrt", "divide", "multiply", diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py index 9f8b95c87c..cb24929b76 100644 --- a/dpctl/tensor/_elementwise_funcs.py +++ b/dpctl/tensor/_elementwise_funcs.py @@ -20,7 +20,24 @@ # U01: ==== ABS (x) _abs_docstring_ = """ -Calculate the absolute value element-wise. +abs(x, out=None, order='K') + +Calculates the absolute value for each element `x_i` of input array `x`. + +Args: + x (usm_ndarray): + Input array, expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise absolute values. + For complex input, the absolute value is its magnitude. The data type + of the returned array is determined by the Type Promotion Rules. """ abs = UnaryElementwiseFunc("abs", ti._abs_result_type, ti._abs, _abs_docstring_) @@ -44,9 +61,15 @@ First input array, expected to have numeric data type. x2 (usm_ndarray): Second input array, also expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". Returns: usm_narray: - an array containing the element-wise sums. The data type of the + An array containing the element-wise sums. The data type of the returned array is determined by the Type Promotion Rules. """ add = BinaryElementwiseFunc( @@ -90,13 +113,49 @@ # FIXME: implement U09 # U10: ==== CONJ (x) -# FIXME: implement U10 +_conj_docstring = """ +conj(x, out=None, order='K') + +Computes conjugate of each element `x_i` for input array `x`. + +Args: + x (usm_ndarray): + Input array, expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise conjugate values. The data type + of the returned array is determined by the Type Promotion Rules. +""" + +conj = UnaryElementwiseFunc( + "conj", ti._conj_result_type, ti._conj, _conj_docstring +) # U11: ==== COS (x) _cos_docstring = """ cos(x, out=None, order='K') Computes cosine for each element `x_i` for input array `x`. + +Args: + x (usm_ndarray): + Input array, expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise cosine. The data type + of the returned array is determined by the Type Promotion Rules. """ cos = UnaryElementwiseFunc("cos", ti._cos_result_type, ti._cos, _cos_docstring) @@ -116,9 +175,15 @@ First input array, expected to have numeric data type. x2 (usm_ndarray): Second input array, also expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". Returns: usm_narray: - an array containing the result of element-wise division. The data type + An array containing the result of element-wise division. The data type of the returned array is determined by the Type Promotion Rules. """ @@ -138,9 +203,15 @@ First input array, expected to have numeric data type. x2 (usm_ndarray): Second input array, also expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". Returns: usm_narray: - an array containing the result of element-wise equality comparison. + An array containing the result of element-wise equality comparison. The data type of the returned array is determined by the Type Promotion Rules. """ @@ -150,7 +221,28 @@ ) # U13: ==== EXP (x) -# FIXME: implement U13 +_exp_docstring = """ +exp(x, out=None, order='K') + +Computes exponential for each element `x_i` of input array `x`. + +Args: + x (usm_ndarray): + Input array, expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise exponential of x. + The data type of the returned array is determined by + the Type Promotion Rules. +""" + +exp = UnaryElementwiseFunc("exp", ti._exp_result_type, ti._exp, _exp_docstring) # U14: ==== EXPM1 (x) _expm1_docstring = """ @@ -187,13 +279,51 @@ # FIXME: implement B12 # U16: ==== IMAG (x) -# FIXME: implement U16 +_imag_docstring = """ +imag(x, out=None, order='K') + +Computes imaginary part of each element `x_i` for input array `x`. + +Args: + x (usm_ndarray): + Input array, expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise imaginary component of input. + The data type of the returned array is determined + by the Type Promotion Rules. +""" + +imag = UnaryElementwiseFunc( + "imag", ti._imag_result_type, ti._imag, _imag_docstring +) # U17: ==== ISFINITE (x) _isfinite_docstring_ = """ isfinite(x, out=None, order='K') -Computes if every element of input array is a finite number. +Checks if each element of input array is a finite number. + +Args: + x (usm_ndarray): + Input array, expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array which is True where `x` is not positive infinity, + negative infinity, or NaN, False otherwise. + The data type of the returned array is boolean. """ isfinite = UnaryElementwiseFunc( @@ -204,7 +334,21 @@ _isinf_docstring_ = """ isinf(x, out=None, order='K') -Computes if every element of input array is an infinity. +Checks if each element of input array is an infinity. + +Args: + x (usm_ndarray): + Input array, expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array which is True where `x` is positive or negative infinity, + False otherwise. The data type of the returned array is boolean. """ isinf = UnaryElementwiseFunc( @@ -215,7 +359,21 @@ _isnan_docstring_ = """ isnan(x, out=None, order='K') -Computes if every element of input array is a NaN. +Checks if each element of an input array is a NaN. + +Args: + x (usm_ndarray): + Input array, expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array which is True where x is NaN, False otherwise. + The data type of the returned array is boolean. """ isnan = UnaryElementwiseFunc( @@ -303,9 +461,15 @@ First input array, expected to have numeric data type. x2 (usm_ndarray): Second input array, also expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". Returns: usm_narray: - an array containing the element-wise products. The data type of + An array containing the element-wise products. The data type of the returned array is determined by the Type Promotion Rules. """ multiply = BinaryElementwiseFunc( @@ -324,8 +488,55 @@ # B21: ==== POW (x1, x2) # FIXME: implement B21 +# U??: ==== PROJ (x) +_proj_docstring = """ +proj(x, out=None, order='K') + +Computes projection of each element `x_i` for input array `x`. + +Args: + x (usm_ndarray): + Input array, expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise projection. The data + type of the returned array is determined by the Type Promotion Rules. +""" + +proj = UnaryElementwiseFunc( + "proj", ti._proj_result_type, ti._proj, _proj_docstring +) + # U27: ==== REAL (x) -# FIXME: implement U27 +_real_docstring = """ +real(x, out=None, order='K') + +Computes real part of each element `x_i` for input array `x`. + +Args: + x (usm_ndarray): + Input array, expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise real component of input. The data + type of the returned array is determined by the Type Promotion Rules. +""" + +real = UnaryElementwiseFunc( + "real", ti._real_result_type, ti._real, _real_docstring +) # B22: ==== REMAINDER (x1, x2) # FIXME: implement B22 @@ -337,7 +548,27 @@ # FIXME: implement U29 # U30: ==== SIN (x) -# FIXME: implement U30 +_sin_docstring = """ +sin(x, out=None, order='K') + +Computes sine for each element `x_i` of input array `x`. + +Args: + x (usm_ndarray): + Input array, expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise sine. The data type of the + returned array is determined by the Type Promotion Rules. +""" + +sin = UnaryElementwiseFunc("sin", ti._sin_result_type, ti._sin, _sin_docstring) # U31: ==== SINH (x) # FIXME: implement U31 @@ -349,7 +580,22 @@ _sqrt_docstring_ = """ sqrt(x, out=None, order='K') -Computes sqrt for each element `x_i` for input array `x`. +Computes positive square-root for each element `x_i` for input array `x`. + +Args: + x (usm_ndarray): + Input array, expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise positive square-root. + The data type of the returned array is determined by + the Type Promotion Rules. """ sqrt = UnaryElementwiseFunc( @@ -368,9 +614,15 @@ First input array, expected to have numeric data type. x2 (usm_ndarray): Second input array, also expected to have numeric data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". Returns: usm_narray: - an array containing the element-wise differences. The data type + An array containing the element-wise differences. The data type of the returned array is determined by the Type Promotion Rules. """ subtract = BinaryElementwiseFunc( diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/conj.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/conj.hpp new file mode 100644 index 0000000000..24c5a128d0 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/conj.hpp @@ -0,0 +1,194 @@ +//=== conj.hpp - Unary function CONJ ------ +//*-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of CONJ(x) function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include +#include + +#include "kernels/elementwise_functions/common.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace conj +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +using dpctl::tensor::type_utils::is_complex; + +template struct ConjFunctor +{ + + // is function constant for given argT + using is_constant = typename std::false_type; + // constant value, if constant + // constexpr resT constant_value = resT{}; + // is function defined for sycl::vec + using supports_vec = typename std::false_type; + // do both argTy and resTy support sugroup store/load operation + using supports_sg_loadstore = typename std::negation< + std::disjunction, is_complex>>; + + resT operator()(const argT &in) + { + if constexpr (is_complex::value) { + return std::conj(in); + } + else { + if constexpr (!std::is_same_v) + static_assert(std::is_same_v); + return in; + } + } +}; + +template +using ConjContigFunctor = elementwise_common:: + UnaryContigFunctor, vec_sz, n_vecs>; + +template +using ConjStridedFunctor = elementwise_common:: + UnaryStridedFunctor>; + +template struct ConjOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class conj_contig_kernel; + +template +sycl::event conj_contig_impl(sycl::queue exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + return elementwise_common::unary_contig_impl< + argTy, ConjOutputType, ConjContigFunctor, conj_contig_kernel>( + exec_q, nelems, arg_p, res_p, depends); +} + +template struct ConjContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = conj_contig_impl; + return fn; + } + } +}; + +template struct ConjTypeMapFactory +{ + /*! @brief get typeid for output type of std::conj(T x) */ + std::enable_if_t::value, int> get() + { + using rT = typename ConjOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template class conj_strided_kernel; + +template +sycl::event +conj_strided_impl(sycl::queue exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::unary_strided_impl< + argTy, ConjOutputType, ConjStridedFunctor, conj_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct ConjStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = conj_strided_impl; + return fn; + } + } +}; + +} // namespace conj +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp.hpp new file mode 100644 index 0000000000..710ab33a49 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp.hpp @@ -0,0 +1,174 @@ +//=== exp.hpp - Unary function EXP ------ *-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of EXP(x) function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include + +#include "kernels/elementwise_functions/common.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace exp +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +using dpctl::tensor::type_utils::is_complex; + +template struct ExpFunctor +{ + // is function constant for given argT + using is_constant = typename std::false_type; + // constant value, if constant + // constexpr resT constant_value = resT{}; + // is function defined for sycl::vec + using supports_vec = typename std::false_type; + // do both argTy and resTy support sugroup store/load operation + using supports_sg_loadstore = typename std::negation< + std::disjunction, is_complex>>; + + resT operator()(const argT &in) + { + return std::exp(in); + } +}; + +template +using ExpContigFunctor = elementwise_common:: + UnaryContigFunctor, vec_sz, n_vecs>; + +template +using ExpStridedFunctor = elementwise_common:: + UnaryStridedFunctor>; + +template struct ExpOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class exp_contig_kernel; + +template +sycl::event exp_contig_impl(sycl::queue exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + return elementwise_common::unary_contig_impl< + argTy, ExpOutputType, ExpContigFunctor, exp_contig_kernel>( + exec_q, nelems, arg_p, res_p, depends); +} + +template struct ExpContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = exp_contig_impl; + return fn; + } + } +}; + +template struct ExpTypeMapFactory +{ + /*! @brief get typeid for output type of std::exp(T x) */ + std::enable_if_t::value, int> get() + { + using rT = typename ExpOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template class exp_strided_kernel; + +template +sycl::event exp_strided_impl(sycl::queue exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::unary_strided_impl< + argTy, ExpOutputType, ExpStridedFunctor, exp_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct ExpStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = exp_strided_impl; + return fn; + } + } +}; + +} // namespace exp +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/imag.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/imag.hpp new file mode 100644 index 0000000000..6c85de5561 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/imag.hpp @@ -0,0 +1,193 @@ +//=== imag.hpp - Unary function IMAG ------ +//*-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of IMAG(x) function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include +#include + +#include "kernels/elementwise_functions/common.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace imag +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +using dpctl::tensor::type_utils::is_complex; + +template struct ImagFunctor +{ + + // is function constant for given argT + using is_constant = typename std::false_type; + // constant value, if constant + // constexpr resT constant_value = resT{}; + // is function defined for sycl::vec + using supports_vec = typename std::false_type; + // do both argTy and resTy support sugroup store/load operation + using supports_sg_loadstore = typename std::negation< + std::disjunction, is_complex>>; + + resT operator()(const argT &in) + { + if constexpr (is_complex::value) { + return std::imag(in); + } + else { + static_assert(std::is_same_v); + return resT{0}; + } + } +}; + +template +using ImagContigFunctor = elementwise_common:: + UnaryContigFunctor, vec_sz, n_vecs>; + +template +using ImagStridedFunctor = elementwise_common:: + UnaryStridedFunctor>; + +template struct ImagOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, float>, + td_ns::TypeMapResultEntry, double>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class imag_contig_kernel; + +template +sycl::event imag_contig_impl(sycl::queue exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + return elementwise_common::unary_contig_impl< + argTy, ImagOutputType, ImagContigFunctor, imag_contig_kernel>( + exec_q, nelems, arg_p, res_p, depends); +} + +template struct ImagContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = imag_contig_impl; + return fn; + } + } +}; + +template struct ImagTypeMapFactory +{ + /*! @brief get typeid for output type of std::imag(T x) */ + std::enable_if_t::value, int> get() + { + using rT = typename ImagOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template class imag_strided_kernel; + +template +sycl::event +imag_strided_impl(sycl::queue exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::unary_strided_impl< + argTy, ImagOutputType, ImagStridedFunctor, imag_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct ImagStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = imag_strided_impl; + return fn; + } + } +}; + +} // namespace imag +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp new file mode 100644 index 0000000000..c1b89cebec --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp @@ -0,0 +1,183 @@ +//=== proj.hpp - Unary function CONJ ------ +//*-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of PROJ(x) function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + +#include "kernels/elementwise_functions/common.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace proj +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +using dpctl::tensor::type_utils::is_complex; + +template struct ProjFunctor +{ + + // is function constant for given argT + using is_constant = typename std::false_type; + // constant value, if constant + // constexpr resT constant_value = resT{}; + // is function defined for sycl::vec + using supports_vec = typename std::false_type; + // do both argTy and resTy support sugroup store/load operation + using supports_sg_loadstore = typename std::false_type; + + resT operator()(const argT &in) + { + using realT = typename argT::value_type; + const realT x = std::real(in); + const realT y = std::imag(in); + + if (std::isinf(x) || std::isinf(y)) { + const realT res_im = std::copysign(0.0, y); + return resT{std::numeric_limits::infinity(), res_im}; + } + return in; + } +}; + +template +using ProjContigFunctor = elementwise_common:: + UnaryContigFunctor, vec_sz, n_vecs>; + +template +using ProjStridedFunctor = elementwise_common:: + UnaryStridedFunctor>; + +template struct ProjOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class proj_contig_kernel; + +template +sycl::event proj_contig_impl(sycl::queue exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + return elementwise_common::unary_contig_impl< + argTy, ProjOutputType, ProjContigFunctor, proj_contig_kernel>( + exec_q, nelems, arg_p, res_p, depends); +} + +template struct ProjContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = proj_contig_impl; + return fn; + } + } +}; + +template struct ProjTypeMapFactory +{ + /*! @brief get typeid for output type of std::proj(T x) */ + std::enable_if_t::value, int> get() + { + using rT = typename ProjOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template class proj_strided_kernel; + +template +sycl::event +proj_strided_impl(sycl::queue exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::unary_strided_impl< + argTy, ProjOutputType, ProjStridedFunctor, proj_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct ProjStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = proj_strided_impl; + return fn; + } + } +}; + +} // namespace proj +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/real.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/real.hpp new file mode 100644 index 0000000000..43e256913f --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/real.hpp @@ -0,0 +1,193 @@ +//=== real.hpp - Unary function REAL ------ +//*-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of REAL(x) function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include +#include + +#include "kernels/elementwise_functions/common.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace real +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +using dpctl::tensor::type_utils::is_complex; + +template struct RealFunctor +{ + + // is function constant for given argT + using is_constant = typename std::false_type; + // constant value, if constant + // constexpr resT constant_value = resT{}; + // is function defined for sycl::vec + using supports_vec = typename std::false_type; + // do both argTy and resTy support sugroup store/load operation + using supports_sg_loadstore = typename std::negation< + std::disjunction, is_complex>>; + + resT operator()(const argT &in) + { + if constexpr (is_complex::value) { + return std::real(in); + } + else { + static_assert(std::is_same_v); + return in; + } + } +}; + +template +using RealContigFunctor = elementwise_common:: + UnaryContigFunctor, vec_sz, n_vecs>; + +template +using RealStridedFunctor = elementwise_common:: + UnaryStridedFunctor>; + +template struct RealOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, float>, + td_ns::TypeMapResultEntry, double>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class real_contig_kernel; + +template +sycl::event real_contig_impl(sycl::queue exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + return elementwise_common::unary_contig_impl< + argTy, RealOutputType, RealContigFunctor, real_contig_kernel>( + exec_q, nelems, arg_p, res_p, depends); +} + +template struct RealContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = real_contig_impl; + return fn; + } + } +}; + +template struct RealTypeMapFactory +{ + /*! @brief get typeid for output type of std::real(T x) */ + std::enable_if_t::value, int> get() + { + using rT = typename RealOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template class real_strided_kernel; + +template +sycl::event +real_strided_impl(sycl::queue exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::unary_strided_impl< + argTy, RealOutputType, RealStridedFunctor, real_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct RealStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = real_strided_impl; + return fn; + } + } +}; + +} // namespace real +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp new file mode 100644 index 0000000000..b89dfc2003 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp @@ -0,0 +1,174 @@ +//=== sin.hpp - Unary function SIN ------ *-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of SIN(x) function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include + +#include "kernels/elementwise_functions/common.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace sin +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +using dpctl::tensor::type_utils::is_complex; + +template struct SinFunctor +{ + // is function constant for given argT + using is_constant = typename std::false_type; + // constant value, if constant + // constexpr resT constant_value = resT{}; + // is function defined for sycl::vec + using supports_vec = typename std::false_type; + // do both argTy and resTy support sugroup store/load operation + using supports_sg_loadstore = typename std::negation< + std::disjunction, is_complex>>; + + resT operator()(const argT &in) + { + return std::sin(in); + } +}; + +template +using SinContigFunctor = elementwise_common:: + UnaryContigFunctor, vec_sz, n_vecs>; + +template +using SinStridedFunctor = elementwise_common:: + UnaryStridedFunctor>; + +template struct SinOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class sin_contig_kernel; + +template +sycl::event sin_contig_impl(sycl::queue exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + return elementwise_common::unary_contig_impl< + argTy, SinOutputType, SinContigFunctor, sin_contig_kernel>( + exec_q, nelems, arg_p, res_p, depends); +} + +template struct SinContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = sin_contig_impl; + return fn; + } + } +}; + +template struct SinTypeMapFactory +{ + /*! @brief get typeid for output type of std::sin(T x) */ + std::enable_if_t::value, int> get() + { + using rT = typename SinOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template class sin_strided_kernel; + +template +sycl::event sin_strided_impl(sycl::queue exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::unary_strided_impl< + argTy, SinOutputType, SinStridedFunctor, sin_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct SinStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = sin_strided_impl; + return fn; + } + } +}; + +} // namespace sin +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.cpp b/dpctl/tensor/libtensor/source/elementwise_functions.cpp index 7104c65bf6..88597512bc 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.cpp @@ -34,15 +34,21 @@ #include "kernels/elementwise_functions/abs.hpp" #include "kernels/elementwise_functions/add.hpp" +#include "kernels/elementwise_functions/conj.hpp" #include "kernels/elementwise_functions/cos.hpp" #include "kernels/elementwise_functions/equal.hpp" +#include "kernels/elementwise_functions/exp.hpp" #include "kernels/elementwise_functions/expm1.hpp" +#include "kernels/elementwise_functions/imag.hpp" #include "kernels/elementwise_functions/isfinite.hpp" #include "kernels/elementwise_functions/isinf.hpp" #include "kernels/elementwise_functions/isnan.hpp" #include "kernels/elementwise_functions/log.hpp" #include "kernels/elementwise_functions/log1p.hpp" #include "kernels/elementwise_functions/multiply.hpp" +#include "kernels/elementwise_functions/proj.hpp" +#include "kernels/elementwise_functions/real.hpp" +#include "kernels/elementwise_functions/sin.hpp" #include "kernels/elementwise_functions/sqrt.hpp" #include "kernels/elementwise_functions/subtract.hpp" #include "kernels/elementwise_functions/true_divide.hpp" @@ -301,7 +307,35 @@ namespace impl // U10: ==== CONJ (x) namespace impl { -// FIXME: add code for U10 + +namespace conj_fn_ns = dpctl::tensor::kernels::conj; + +static unary_contig_impl_fn_ptr_t conj_contig_dispatch_vector[td_ns::num_types]; +static int conj_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + conj_strided_dispatch_vector[td_ns::num_types]; + +void populate_conj_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = conj_fn_ns; + + using fn_ns::ConjContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(conj_contig_dispatch_vector); + + using fn_ns::ConjStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(conj_strided_dispatch_vector); + + using fn_ns::ConjTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(conj_output_typeid_vector); +} } // namespace impl // U11: ==== COS (x) @@ -455,7 +489,36 @@ void populate_equal_dispatch_tables(void) // U13: ==== EXP (x) namespace impl { -// FIXME: add code for U13 + +namespace exp_fn_ns = dpctl::tensor::kernels::exp; + +static unary_contig_impl_fn_ptr_t exp_contig_dispatch_vector[td_ns::num_types]; +static int exp_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + exp_strided_dispatch_vector[td_ns::num_types]; + +void populate_exp_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = exp_fn_ns; + + using fn_ns::ExpContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(exp_contig_dispatch_vector); + + using fn_ns::ExpStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(exp_strided_dispatch_vector); + + using fn_ns::ExpTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(exp_output_typeid_vector); +} + } // namespace impl // U14: ==== EXPM1 (x) @@ -521,7 +584,35 @@ namespace impl // U16: ==== IMAG (x) namespace impl { -// FIXME: add code for U16 + +namespace imag_fn_ns = dpctl::tensor::kernels::imag; + +static unary_contig_impl_fn_ptr_t imag_contig_dispatch_vector[td_ns::num_types]; +static int imag_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + imag_strided_dispatch_vector[td_ns::num_types]; + +void populate_imag_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = imag_fn_ns; + + using fn_ns::ImagContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(imag_contig_dispatch_vector); + + using fn_ns::ImagStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(imag_strided_dispatch_vector); + + using fn_ns::ImagTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(imag_output_typeid_vector); +} } // namespace impl // U17: ==== ISFINITE (x) @@ -848,10 +939,72 @@ namespace impl // FIXME: add code for B21 } // namespace impl +// U??: ==== PROJ (x) +namespace impl +{ + +namespace proj_fn_ns = dpctl::tensor::kernels::proj; + +static unary_contig_impl_fn_ptr_t proj_contig_dispatch_vector[td_ns::num_types]; +static int proj_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + proj_strided_dispatch_vector[td_ns::num_types]; + +void populate_proj_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = proj_fn_ns; + + using fn_ns::ProjContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(proj_contig_dispatch_vector); + + using fn_ns::ProjStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(proj_strided_dispatch_vector); + + using fn_ns::ProjTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(proj_output_typeid_vector); +} +} // namespace impl + // U27: ==== REAL (x) namespace impl { -// FIXME: add code for U27 + +namespace real_fn_ns = dpctl::tensor::kernels::real; + +static unary_contig_impl_fn_ptr_t real_contig_dispatch_vector[td_ns::num_types]; +static int real_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + real_strided_dispatch_vector[td_ns::num_types]; + +void populate_real_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = real_fn_ns; + + using fn_ns::RealContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(real_contig_dispatch_vector); + + using fn_ns::RealStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(real_strided_dispatch_vector); + + using fn_ns::RealTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(real_output_typeid_vector); +} } // namespace impl // B22: ==== REMAINDER (x1, x2) @@ -875,7 +1028,36 @@ namespace impl // U30: ==== SIN (x) namespace impl { -// FIXME: add code for U30 + +namespace sin_fn_ns = dpctl::tensor::kernels::sin; + +static unary_contig_impl_fn_ptr_t sin_contig_dispatch_vector[td_ns::num_types]; +static int sin_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + sin_strided_dispatch_vector[td_ns::num_types]; + +void populate_sin_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = sin_fn_ns; + + using fn_ns::SinContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(sin_contig_dispatch_vector); + + using fn_ns::SinStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(sin_strided_dispatch_vector); + + using fn_ns::SinTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(sin_output_typeid_vector); +} + } // namespace impl // U31: ==== SINH (x) @@ -1125,7 +1307,26 @@ void init_elementwise_functions(py::module_ m) // FIXME: // U10: ==== CONJ (x) - // FIXME: + { + impl::populate_conj_dispatch_vectors(); + using impl::conj_contig_dispatch_vector; + using impl::conj_output_typeid_vector; + using impl::conj_strided_dispatch_vector; + + auto conj_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, conj_output_typeid_vector, + conj_contig_dispatch_vector, conj_strided_dispatch_vector); + }; + m.def("_conj", conj_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto conj_result_type_pyapi = [&](py::dtype dtype) { + return py_unary_ufunc_result_type(dtype, conj_output_typeid_vector); + }; + m.def("_conj_result_type", conj_result_type_pyapi); + } // U11: ==== COS (x) { @@ -1234,7 +1435,26 @@ void init_elementwise_functions(py::module_ m) } // U13: ==== EXP (x) - // FIXME: + { + impl::populate_exp_dispatch_vectors(); + using impl::exp_contig_dispatch_vector; + using impl::exp_output_typeid_vector; + using impl::exp_strided_dispatch_vector; + + auto exp_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, exp_output_typeid_vector, + exp_contig_dispatch_vector, exp_strided_dispatch_vector); + }; + m.def("_exp", exp_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto exp_result_type_pyapi = [&](py::dtype dtype) { + return py_unary_ufunc_result_type(dtype, exp_output_typeid_vector); + }; + m.def("_exp_result_type", exp_result_type_pyapi); + } // U14: ==== EXPM1 (x) { @@ -1272,7 +1492,26 @@ void init_elementwise_functions(py::module_ m) // FIXME: // U16: ==== IMAG (x) - // FIXME: + { + impl::populate_imag_dispatch_vectors(); + using impl::imag_contig_dispatch_vector; + using impl::imag_output_typeid_vector; + using impl::imag_strided_dispatch_vector; + + auto imag_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, imag_output_typeid_vector, + imag_contig_dispatch_vector, imag_strided_dispatch_vector); + }; + m.def("_imag", imag_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto imag_result_type_pyapi = [&](py::dtype dtype) { + return py_unary_ufunc_result_type(dtype, imag_output_typeid_vector); + }; + m.def("_imag_result_type", imag_result_type_pyapi); + } // U17: ==== ISFINITE (x) { @@ -1471,8 +1710,49 @@ void init_elementwise_functions(py::module_ m) // B21: ==== POW (x1, x2) // FIXME: + // U??: ==== PROJ (x) + { + impl::populate_proj_dispatch_vectors(); + using impl::proj_contig_dispatch_vector; + using impl::proj_output_typeid_vector; + using impl::proj_strided_dispatch_vector; + + auto proj_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, proj_output_typeid_vector, + proj_contig_dispatch_vector, proj_strided_dispatch_vector); + }; + m.def("_proj", proj_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto proj_result_type_pyapi = [&](py::dtype dtype) { + return py_unary_ufunc_result_type(dtype, proj_output_typeid_vector); + }; + m.def("_proj_result_type", proj_result_type_pyapi); + } + // U27: ==== REAL (x) - // FIXME: + { + impl::populate_real_dispatch_vectors(); + using impl::real_contig_dispatch_vector; + using impl::real_output_typeid_vector; + using impl::real_strided_dispatch_vector; + + auto real_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, real_output_typeid_vector, + real_contig_dispatch_vector, real_strided_dispatch_vector); + }; + m.def("_real", real_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto real_result_type_pyapi = [&](py::dtype dtype) { + return py_unary_ufunc_result_type(dtype, real_output_typeid_vector); + }; + m.def("_real_result_type", real_result_type_pyapi); + } // B22: ==== REMAINDER (x1, x2) // FIXME: @@ -1484,8 +1764,26 @@ void init_elementwise_functions(py::module_ m) // FIXME: // U30: ==== SIN (x) - // FIXME: + { + impl::populate_sin_dispatch_vectors(); + using impl::sin_contig_dispatch_vector; + using impl::sin_output_typeid_vector; + using impl::sin_strided_dispatch_vector; + auto sin_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, sin_output_typeid_vector, + sin_contig_dispatch_vector, sin_strided_dispatch_vector); + }; + m.def("_sin", sin_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto sin_result_type_pyapi = [&](py::dtype dtype) { + return py_unary_ufunc_result_type(dtype, sin_output_typeid_vector); + }; + m.def("_sin_result_type", sin_result_type_pyapi); + } // U31: ==== SINH (x) // FIXME: diff --git a/dpctl/tests/elementwise/test_complex.py b/dpctl/tests/elementwise/test_complex.py new file mode 100644 index 0000000000..e1d4ebb66b --- /dev/null +++ b/dpctl/tests/elementwise/test_complex.py @@ -0,0 +1,198 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools + +import numpy as np +import pytest +from numpy.testing import assert_allclose + +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _all_dtypes, _map_to_device_dtype, _usm_types + + +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_complex_out_type(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + X = dpt.asarray(0, dtype=dtype, sycl_queue=q) + expected_dtype = np.real(np.array(0, dtype=dtype)).dtype + expected_dtype = _map_to_device_dtype(expected_dtype, q.sycl_device) + assert dpt.real(X).dtype == expected_dtype + + expected_dtype = np.imag(np.array(0, dtype=dtype)).dtype + expected_dtype = _map_to_device_dtype(expected_dtype, q.sycl_device) + assert dpt.imag(X).dtype == expected_dtype + + expected_dtype = np.conj(np.array(0, dtype=dtype)).dtype + expected_dtype = _map_to_device_dtype(expected_dtype, q.sycl_device) + assert dpt.conj(X).dtype == expected_dtype + + +@pytest.mark.parametrize( + "np_call, dpt_call", + [(np.real, dpt.real), (np.imag, dpt.imag), (np.conj, dpt.conj)], +) +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_complex_output(np_call, dpt_call, dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 100 + + x1 = np.linspace(0, 10, num=n_seq, dtype=dtype) + x2 = np.linspace(0, 20, num=n_seq, dtype=dtype) + Xnp = x1 + 1j * x2 + X = dpt.asarray(Xnp, dtype=Xnp.dtype, sycl_queue=q) + + Y = dpt_call(X) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), np_call(Xnp), atol=tol, rtol=tol) + + Z = dpt.empty_like(X, dtype=np_call(Xnp).dtype) + dpt_call(X, out=Z) + + assert_allclose(dpt.asnumpy(Z), np_call(Xnp), atol=tol, rtol=tol) + + +@pytest.mark.parametrize( + "np_call, dpt_call", + [(np.real, dpt.real), (np.imag, dpt.imag), (np.conj, dpt.conj)], +) +@pytest.mark.parametrize("usm_type", _usm_types) +def test_complex_usm_type(np_call, dpt_call, usm_type): + q = get_queue_or_skip() + + arg_dt = np.dtype("c8") + input_shape = (10, 10, 10, 10) + X = dpt.empty(input_shape, dtype=arg_dt, usm_type=usm_type, sycl_queue=q) + X[..., 0::2] = np.pi / 6 + 1j * np.pi / 3 + X[..., 1::2] = np.pi / 3 + 1j * np.pi / 6 + + Y = dpt_call(X) + assert Y.usm_type == X.usm_type + assert Y.sycl_queue == X.sycl_queue + assert Y.flags.c_contiguous + + expected_Y = np.empty(input_shape, dtype=arg_dt) + expected_Y[..., 0::2] = np_call(np.complex64(np.pi / 6 + 1j * np.pi / 3)) + expected_Y[..., 1::2] = np_call(np.complex64(np.pi / 3 + 1j * np.pi / 6)) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) + + +@pytest.mark.parametrize( + "np_call, dpt_call", + [(np.real, dpt.real), (np.imag, dpt.imag), (np.conj, dpt.conj)], +) +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_complex_order(np_call, dpt_call, dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + arg_dt = np.dtype(dtype) + input_shape = (10, 10, 10, 10) + X = dpt.empty(input_shape, dtype=arg_dt, sycl_queue=q) + X[..., 0::2] = np.pi / 6 + 1j * np.pi / 3 + X[..., 1::2] = np.pi / 3 + 1j * np.pi / 6 + + for ord in ["C", "F", "A", "K"]: + for perms in itertools.permutations(range(4)): + U = dpt.permute_dims(X[:, ::-1, ::-1, :], perms) + Y = dpt_call(U, order=ord) + expected_Y = np_call(dpt.asnumpy(U)) + assert np.allclose(dpt.asnumpy(Y), expected_Y) + + +@pytest.mark.parametrize("dtype", ["c8", "c16"]) +def test_projection_complex(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + X = [ + complex(1, 2), + complex(dpt.inf, -1), + complex(0, -dpt.inf), + complex(-dpt.inf, dpt.nan), + ] + Y = [ + complex(1, 2), + complex(np.inf, -0.0), + complex(np.inf, -0.0), + complex(np.inf, 0.0), + ] + + Xf = dpt.asarray(X, dtype=dtype, sycl_queue=q) + Yf = np.array(Y, dtype=dtype) + + tol = 8 * dpt.finfo(Xf.dtype).resolution + assert_allclose(dpt.asnumpy(dpt.proj(Xf)), Yf, atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_projection(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + Xf = dpt.asarray(1, dtype=dtype, sycl_queue=q) + out_dtype = dpt.proj(Xf).dtype + Yf = np.array(complex(1, 0), dtype=out_dtype) + + tol = 8 * dpt.finfo(Yf.dtype).resolution + assert_allclose(dpt.asnumpy(dpt.proj(Xf)), Yf, atol=tol, rtol=tol) + + +@pytest.mark.parametrize( + "np_call, dpt_call", + [(np.real, dpt.real), (np.imag, dpt.imag), (np.conj, dpt.conj)], +) +@pytest.mark.parametrize("dtype", ["f4", "f8"]) +@pytest.mark.parametrize("stride", [-1, 1, 2, 4, 5]) +def test_complex_strided(np_call, dpt_call, dtype, stride): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + N = 100 + rng = np.random.default_rng(42) + x1 = rng.standard_normal(N, dtype) + x2 = 1j * rng.standard_normal(N, dtype) + x = x1 + x2 + y = np_call(x[::stride]) + z = dpt_call(dpt.asarray(x[::stride])) + + tol = 8 * dpt.finfo(y.dtype).resolution + assert_allclose(y, dpt.asnumpy(z), atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8"]) +def test_complex_special_cases(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + x = [np.nan, -np.nan, np.inf, -np.inf] + with np.errstate(all="ignore"): + Xnp = 1j * np.array(x, dtype=dtype) + X = dpt.asarray(Xnp, dtype=Xnp.dtype) + + tol = 8 * dpt.finfo(dtype).resolution + assert_allclose(dpt.asnumpy(dpt.real(X)), np.real(Xnp), atol=tol, rtol=tol) + assert_allclose(dpt.asnumpy(dpt.imag(X)), np.imag(Xnp), atol=tol, rtol=tol) + assert_allclose(dpt.asnumpy(dpt.conj(X)), np.conj(Xnp), atol=tol, rtol=tol) diff --git a/dpctl/tests/elementwise/test_exp.py b/dpctl/tests/elementwise/test_exp.py new file mode 100644 index 0000000000..5ea8ded018 --- /dev/null +++ b/dpctl/tests/elementwise/test_exp.py @@ -0,0 +1,147 @@ +import itertools + +import numpy as np +import pytest +from numpy.testing import assert_allclose + +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _all_dtypes, _map_to_device_dtype, _usm_types + + +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_exp_out_type(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + X = dpt.asarray(0, dtype=dtype, sycl_queue=q) + expected_dtype = np.exp(np.array(0, dtype=dtype)).dtype + expected_dtype = _map_to_device_dtype(expected_dtype, q.sycl_device) + assert dpt.exp(X).dtype == expected_dtype + + +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) +def test_exp_output_contig(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 1027 + + X = dpt.linspace(0, 11, num=n_seq, dtype=dtype, sycl_queue=q) + Xnp = dpt.asnumpy(X) + + Y = dpt.exp(X) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), np.exp(Xnp), atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) +def test_exp_output_strided(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 2054 + + X = dpt.linspace(0, 11, num=n_seq, dtype=dtype, sycl_queue=q)[::-2] + Xnp = dpt.asnumpy(X) + + Y = dpt.exp(X) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), np.exp(Xnp), atol=tol, rtol=tol) + + +@pytest.mark.parametrize("usm_type", _usm_types) +def test_exp_usm_type(usm_type): + q = get_queue_or_skip() + + arg_dt = np.dtype("f4") + input_shape = (10, 10, 10, 10) + X = dpt.empty(input_shape, dtype=arg_dt, usm_type=usm_type, sycl_queue=q) + X[..., 0::2] = 16.0 + X[..., 1::2] = 23.0 + + Y = dpt.exp(X) + assert Y.usm_type == X.usm_type + assert Y.sycl_queue == X.sycl_queue + assert Y.flags.c_contiguous + + expected_Y = np.empty(input_shape, dtype=arg_dt) + expected_Y[..., 0::2] = np.exp(np.float32(16.0)) + expected_Y[..., 1::2] = np.exp(np.float32(23.0)) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_exp_order(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + arg_dt = np.dtype(dtype) + input_shape = (10, 10, 10, 10) + X = dpt.empty(input_shape, dtype=arg_dt, sycl_queue=q) + X[..., 0::2] = 8.0 + X[..., 1::2] = 11.0 + + for ord in ["C", "F", "A", "K"]: + for perms in itertools.permutations(range(4)): + U = dpt.permute_dims(X[:, ::-1, ::-1, :], perms) + Y = dpt.exp(U, order=ord) + expected_Y = np.exp(dpt.asnumpy(U)) + tol = 8 * max( + dpt.finfo(Y.dtype).resolution, + np.finfo(expected_Y.dtype).resolution, + ) + assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", ["f", "d"]) +def test_exp_values(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + tol = 8 * dpt.finfo(dtype).resolution + x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + log2_ = 0.69314718055994530943 + Xnp = np.array(x, dtype=dtype) * log2_ + X = dpt.asarray(Xnp, dtype=dtype) + assert_allclose(dpt.asnumpy(dpt.exp(X)), np.exp(Xnp), atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", ["e", "f", "d"]) +def test_exp_special_cases(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + tol = 8 * dpt.finfo(dtype).resolution + x = [np.nan, -np.nan, np.inf, -np.inf, -1.0, 1.0, 0.0, -0.0] + Xnp = np.array(x, dtype=dtype) + X = dpt.asarray(x, dtype=dtype) + assert_allclose(dpt.asnumpy(dpt.exp(X)), np.exp(Xnp), atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +def test_exp_strided(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + np.random.seed(42) + strides = np.array([-4, -3, -2, -1, 1, 2, 3, 4]) + sizes = np.arange(2, 100) + tol = 8 * dpt.finfo(dtype).resolution + + for ii in sizes: + Xnp = dtype(np.random.uniform(low=0.01, high=88.1, size=ii)) + X = dpt.asarray(Xnp) + Y_expected = np.exp(Xnp) + for jj in strides: + assert_allclose( + dpt.asnumpy(dpt.exp(X[::jj])), + Y_expected[::jj], + atol=tol, + rtol=tol, + ) diff --git a/dpctl/tests/elementwise/test_cos.py b/dpctl/tests/elementwise/test_sincos.py similarity index 54% rename from dpctl/tests/elementwise/test_cos.py rename to dpctl/tests/elementwise/test_sincos.py index 3bf441a8dc..d027ef026a 100644 --- a/dpctl/tests/elementwise/test_cos.py +++ b/dpctl/tests/elementwise/test_sincos.py @@ -18,7 +18,7 @@ import numpy as np import pytest -from numpy.testing import assert_raises_regex +from numpy.testing import assert_allclose, assert_raises_regex import dpctl import dpctl.tensor as dpt @@ -27,26 +27,32 @@ from .utils import _all_dtypes, _map_to_device_dtype +@pytest.mark.parametrize( + "np_call, dpt_call", [(np.sin, dpt.sin), (np.cos, dpt.cos)] +) @pytest.mark.parametrize("dtype", _all_dtypes) -def test_cos_out_type(dtype): +def test_sincos_out_type(np_call, dpt_call, dtype): q = get_queue_or_skip() skip_if_dtype_not_supported(dtype, q) X = dpt.asarray(0, dtype=dtype, sycl_queue=q) - expected_dtype = np.cos(np.array(0, dtype=dtype)).dtype + expected_dtype = np_call(np.array(0, dtype=dtype)).dtype expected_dtype = _map_to_device_dtype(expected_dtype, q.sycl_device) - assert dpt.cos(X).dtype == expected_dtype + assert dpt_call(X).dtype == expected_dtype X = dpt.asarray(0, dtype=dtype, sycl_queue=q) - expected_dtype = np.cos(np.array(0, dtype=dtype)).dtype + expected_dtype = np_call(np.array(0, dtype=dtype)).dtype expected_dtype = _map_to_device_dtype(expected_dtype, q.sycl_device) Y = dpt.empty_like(X, dtype=expected_dtype) - dpt.cos(X, out=Y) - np.testing.assert_allclose(dpt.asnumpy(dpt.cos(X)), dpt.asnumpy(Y)) + dpt_call(X, out=Y) + assert_allclose(dpt.asnumpy(dpt_call(X)), dpt.asnumpy(Y)) +@pytest.mark.parametrize( + "np_call, dpt_call", [(np.sin, dpt.sin), (np.cos, dpt.cos)] +) @pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) -def test_cos_output(dtype): +def test_sincos_output(np_call, dpt_call, dtype): q = get_queue_or_skip() skip_if_dtype_not_supported(dtype, q) @@ -56,23 +62,26 @@ def test_cos_output(dtype): Xnp = np.linspace(-np.pi / 4, np.pi / 4, num=n_seq, dtype=dtype) X = dpt.asarray(np.repeat(Xnp, n_rep), dtype=dtype, sycl_queue=q) - Y = dpt.cos(X) + Y = dpt_call(X) tol = 8 * dpt.finfo(Y.dtype).resolution - np.testing.assert_allclose( - dpt.asnumpy(Y), np.repeat(np.cos(Xnp), n_rep), atol=tol, rtol=tol + assert_allclose( + dpt.asnumpy(Y), np.repeat(np_call(Xnp), n_rep), atol=tol, rtol=tol ) Z = dpt.empty_like(X, dtype=dtype) - dpt.cos(X, out=Z) + dpt_call(X, out=Z) - np.testing.assert_allclose( - dpt.asnumpy(Z), np.repeat(np.cos(Xnp), n_rep), atol=tol, rtol=tol + assert_allclose( + dpt.asnumpy(Z), np.repeat(np_call(Xnp), n_rep), atol=tol, rtol=tol ) +@pytest.mark.parametrize( + "np_call, dpt_call", [(np.sin, dpt.sin), (np.cos, dpt.cos)] +) @pytest.mark.parametrize("usm_type", ["device", "shared", "host"]) -def test_cos_usm_type(usm_type): +def test_sincos_usm_type(np_call, dpt_call, usm_type): q = get_queue_or_skip() arg_dt = np.dtype("f4") @@ -81,21 +90,24 @@ def test_cos_usm_type(usm_type): X[..., 0::2] = np.pi / 6 X[..., 1::2] = np.pi / 3 - Y = dpt.cos(X) + Y = dpt_call(X) assert Y.usm_type == X.usm_type assert Y.sycl_queue == X.sycl_queue assert Y.flags.c_contiguous expected_Y = np.empty(input_shape, dtype=arg_dt) - expected_Y[..., 0::2] = np.cos(np.float32(np.pi / 6)) - expected_Y[..., 1::2] = np.cos(np.float32(np.pi / 3)) + expected_Y[..., 0::2] = np_call(np.float32(np.pi / 6)) + expected_Y[..., 1::2] = np_call(np.float32(np.pi / 3)) tol = 8 * dpt.finfo(Y.dtype).resolution - np.testing.assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) + assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) +@pytest.mark.parametrize( + "np_call, dpt_call", [(np.sin, dpt.sin), (np.cos, dpt.cos)] +) @pytest.mark.parametrize("dtype", _all_dtypes) -def test_cos_order(dtype): +def test_sincos_order(np_call, dpt_call, dtype): q = get_queue_or_skip() skip_if_dtype_not_supported(dtype, q) @@ -108,18 +120,17 @@ def test_cos_order(dtype): for ord in ["C", "F", "A", "K"]: for perms in itertools.permutations(range(4)): U = dpt.permute_dims(X[:, ::-1, ::-1, :], perms) - Y = dpt.cos(U, order=ord) - expected_Y = np.cos(dpt.asnumpy(U)) + Y = dpt_call(U, order=ord) + expected_Y = np_call(dpt.asnumpy(U)) tol = 8 * max( dpt.finfo(Y.dtype).resolution, np.finfo(expected_Y.dtype).resolution, ) - np.testing.assert_allclose( - dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol - ) + assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) -def test_cos_errors(): +@pytest.mark.parametrize("callable", [dpt.sin, dpt.cos]) +def test_sincos_errors(callable): get_queue_or_skip() try: gpu_queue = dpctl.SyclQueue("gpu") @@ -135,7 +146,7 @@ def test_cos_errors(): assert_raises_regex( TypeError, "Input and output allocation queues are not compatible", - dpt.cos, + callable, x, y, ) @@ -145,7 +156,7 @@ def test_cos_errors(): assert_raises_regex( TypeError, "The shape of input and output arrays are inconsistent", - dpt.cos, + callable, x, y, ) @@ -153,23 +164,69 @@ def test_cos_errors(): x = dpt.zeros(2) y = x assert_raises_regex( - TypeError, "Input and output arrays have memory overlap", dpt.cos, x, y + TypeError, "Input and output arrays have memory overlap", callable, x, y ) x = dpt.zeros(2, dtype="float32") y = np.empty_like(x) assert_raises_regex( - TypeError, "output array must be of usm_ndarray type", dpt.cos, x, y + TypeError, "output array must be of usm_ndarray type", callable, x, y ) +@pytest.mark.parametrize("callable", [dpt.sin, dpt.cos]) @pytest.mark.parametrize("dtype", _all_dtypes) -def test_cos_error_dtype(dtype): +def test_sincos_error_dtype(callable, dtype): q = get_queue_or_skip() skip_if_dtype_not_supported(dtype, q) x = dpt.zeros(5, dtype=dtype) y = dpt.empty_like(x, dtype="int16") assert_raises_regex( - TypeError, "Output array of type.*is needed", dpt.cos, x, y + TypeError, "Output array of type.*is needed", callable, x, y ) + + +@pytest.mark.parametrize("dtype", ["e", "f", "d"]) +def test_sincos_special_cases(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + tol = 8 * dpt.finfo(dtype).resolution + x = [np.nan, np.nan, np.nan, np.nan] + y = [np.nan, -np.nan, np.inf, -np.inf] + xf = np.array(x, dtype=dtype) + yf = dpt.asarray(y, dtype=dtype) + assert_allclose(dpt.asnumpy(dpt.sin(yf)), xf, atol=tol, rtol=tol) + assert_allclose(dpt.asnumpy(dpt.cos(yf)), xf, atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +def test_sincos_strided(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + np.random.seed(42) + strides = np.array([-4, -3, -2, -1, 1, 2, 3, 4]) + sizes = np.arange(2, 100) + tol = 8 * dpt.finfo(dtype).resolution + + for ii in sizes: + Xnp = dtype(np.random.uniform(low=0.01, high=88.1, size=ii)) + Xnp[3:-1:4] = 120000.0 + X = dpt.asarray(Xnp) + sin_true = np.sin(Xnp) + cos_true = np.cos(Xnp) + for jj in strides: + assert_allclose( + dpt.asnumpy(dpt.sin(X[::jj])), + sin_true[::jj], + atol=tol, + rtol=tol, + ) + assert_allclose( + dpt.asnumpy(dpt.cos(X[::jj])), + cos_true[::jj], + atol=tol, + rtol=tol, + )