From 821a888899bbfbabb57ca96c843372d5837a13ed Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Fri, 26 May 2023 12:57:14 -0700 Subject: [PATCH 1/3] Implements expm1, log, and log1p - Also adds tests for real and complex variants where appropriate --- dpctl/tensor/__init__.py | 6 + dpctl/tensor/_elementwise_funcs.py | 61 ++++- .../kernels/elementwise_functions/expm1.hpp | 259 ++++++++++++++++++ .../kernels/elementwise_functions/log.hpp | 196 +++++++++++++ .../kernels/elementwise_functions/log1p.hpp | 233 ++++++++++++++++ .../source/elementwise_functions.cpp | 163 ++++++++++- dpctl/tests/elementwise/test_expm1.py | 144 ++++++++++ dpctl/tests/elementwise/test_log.py | 128 +++++++++ dpctl/tests/elementwise/test_log1p.py | 146 ++++++++++ 9 files changed, 1327 insertions(+), 9 deletions(-) create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/log.hpp create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp create mode 100644 dpctl/tests/elementwise/test_expm1.py create mode 100644 dpctl/tests/elementwise/test_log.py create mode 100644 dpctl/tests/elementwise/test_log1p.py diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index 5896bde65a..c9c1c46f71 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -97,9 +97,12 @@ cos, divide, equal, + expm1, isfinite, isinf, isnan, + log, + log1p, multiply, sqrt, subtract, @@ -184,9 +187,12 @@ "abs", "add", "cos", + "expm1", "isinf", "isnan", "isfinite", + "log", + "log1p", "sqrt", "divide", "multiply", diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py index 29d1052c9c..9f8b95c87c 100644 --- a/dpctl/tensor/_elementwise_funcs.py +++ b/dpctl/tensor/_elementwise_funcs.py @@ -153,7 +153,26 @@ # FIXME: implement U13 # U14: ==== EXPM1 (x) -# FIXME: implement U14 +_expm1_docstring = """ +expm1(x, out=None, order='K') +Computes an approximation of exp(x)-1 element-wise. +Args: + x (usm_ndarray): + Input array, expected to have numeric data type. + out (usm_ndarray): + Output array to populate. Array must have the correct + shape and the expected data type. + order ("C","F","A","K", optional): memory layout of the new + output array, if parameter `out` is `None`. + Default: "K". +Return: + usm_ndarray: + An array containing the element-wise exp(x)-1 values. +""" + +expm1 = UnaryElementwiseFunc( + "expm1", ti._expm1_result_type, ti._expm1, _expm1_docstring +) # U15: ==== FLOOR (x) # FIXME: implement U15 @@ -210,10 +229,46 @@ # FIXME: implement B14 # U20: ==== LOG (x) -# FIXME: implement U20 +_log_docstring = """ +log(x, out=None, order='K') +Computes the natural logarithm element-wise. +Args: + x (usm_ndarray): + Input array, expected to have numeric data type. + out (usm_ndarray): + Output array to populate. Array must have the correct + shape and the expected data type. + order ("C","F","A","K", optional): memory layout of the new + output array, if parameter `out` is `None`. + Default: "K". +Return: + usm_ndarray: + An array containing the element-wise natural logarithm values. +""" + +log = UnaryElementwiseFunc("log", ti._log_result_type, ti._log, _log_docstring) # U21: ==== LOG1P (x) -# FIXME: implement U21 +_log1p_docstring = """ +log1p(x, out=None, order='K') +Computes an approximation of log(1+x) element-wise. +Args: + x (usm_ndarray): + Input array, expected to have numeric data type. + out (usm_ndarray): + Output array to populate. Array must have the correct + shape and the expected data type. + order ("C","F","A","K", optional): memory layout of the new + output array, if parameter `out` is `None`. + Default: "K". +Return: + usm_ndarray: + An array containing the element-wise log(1+x) values. +""" + +log1p = UnaryElementwiseFunc( + "log1p", ti._log1p_result_type, ti._log1p, _log1p_docstring +) # U22: ==== LOG2 (x) # FIXME: implement U22 diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp new file mode 100644 index 0000000000..81a06fc90b --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp @@ -0,0 +1,259 @@ +//=== expm1.hpp - Unary function EXPM1 ------ +//*-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of EXPM1(x) function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include + +#include "kernels/elementwise_functions/common.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace expm1 +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +using dpctl::tensor::type_utils::is_complex; + +template struct Expm1Functor +{ + + // is function constant for given argT + using is_constant = typename std::false_type; + // constant value, if constant + // constexpr resT constant_value = resT{}; + // is function defined for sycl::vec + using supports_vec = typename std::false_type; + // do both argTy and resTy support sugroup store/load operation + using supports_sg_loadstore = typename std::negation< + std::disjunction, is_complex>>; + + resT operator()(const argT &in) + { + if constexpr (is_complex::value) { + using realT = typename argT::value_type; + // expm1(x + I*y) = expm1(x)*cos(y) - 2*sin(y / 2)^2 + + // I*exp(x)*sin(y) + auto x = std::real(in); + const realT expm1X_val = std::expm1(x); + const realT expX_val = std::exp(x); + + x = std::imag(in); + realT cosY_val; + const realT sinY_val = sycl::sincos(x, &cosY_val); + const realT sinhalfY_val = std::sin(x / realT{2}); + + const realT res_re = + expm1X_val * cosY_val - realT{2} * sinhalfY_val * sinhalfY_val; + const realT res_im = expX_val * sinY_val; + return resT{res_re, res_im}; + } + else { + static_assert(std::is_floating_point_v || + std::is_same_v); + return std::expm1(in); + } + } +}; + +template +using Expm1ContigFunctor = + elementwise_common::UnaryContigFunctor, + vec_sz, + n_vecs>; + +template +using Expm1StridedFunctor = elementwise_common:: + UnaryStridedFunctor>; + +template struct Expm1OutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, std::complex>, + td_ns:: + TypeMapResultEntry, std::complex>, + td_ns::DefaultResultEntry>::result_type; +}; + +typedef sycl::event (*expm1_contig_impl_fn_ptr_t)( + sycl::queue, + size_t, + const char *, + char *, + const std::vector &); + +template +class expm1_contig_kernel; + +template +sycl::event expm1_contig_impl(sycl::queue exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + sycl::event expm1_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + constexpr size_t lws = 64; + constexpr unsigned int vec_sz = 4; + constexpr unsigned int n_vecs = 2; + static_assert(lws % vec_sz == 0); + auto gws_range = sycl::range<1>( + ((nelems + n_vecs * lws * vec_sz - 1) / (lws * n_vecs * vec_sz)) * + lws); + auto lws_range = sycl::range<1>(lws); + + using resTy = typename Expm1OutputType::value_type; + const argTy *arg_tp = reinterpret_cast(arg_p); + resTy *res_tp = reinterpret_cast(res_p); + + cgh.parallel_for< + class expm1_contig_kernel>( + sycl::nd_range<1>(gws_range, lws_range), + Expm1ContigFunctor(arg_tp, res_tp, + nelems)); + }); + return expm1_ev; +} + +template struct Expm1ContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = expm1_contig_impl; + return fn; + } + } +}; + +template struct Expm1TypeMapFactory +{ + /*! @brief get typeid for output type of std::expm1(T x) */ + std::enable_if_t::value, int> get() + { + using rT = typename Expm1OutputType::value_type; + ; + return td_ns::GetTypeid{}.get(); + } +}; + +template class expm1_strided_kernel; + +typedef sycl::event (*expm1_strided_impl_fn_ptr_t)( + sycl::queue, + size_t, + int, + const py::ssize_t *, + const char *, + py::ssize_t, + char *, + py::ssize_t, + const std::vector &, + const std::vector &); + +template +sycl::event +expm1_strided_impl(sycl::queue exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + cgh.depends_on(additional_depends); + + using resTy = typename Expm1OutputType::value_type; + using IndexerT = + typename dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer; + + IndexerT arg_res_indexer(nd, arg_offset, res_offset, shape_and_strides); + + const argTy *arg_tp = reinterpret_cast(arg_p); + resTy *res_tp = reinterpret_cast(res_p); + + sycl::range<1> gRange{nelems}; + + cgh.parallel_for>( + gRange, Expm1StridedFunctor( + arg_tp, res_tp, arg_res_indexer)); + }); + return comp_ev; +} + +template struct Expm1StridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = expm1_strided_impl; + return fn; + } + } +}; + +} // namespace expm1 +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log.hpp new file mode 100644 index 0000000000..f6998d4e0f --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log.hpp @@ -0,0 +1,196 @@ +//=== log.hpp - Unary function LOG ------ *-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of LOG(x) function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include + +#include "kernels/elementwise_functions/common.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace log +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +using dpctl::tensor::type_utils::is_complex; + +template struct LogFunctor +{ + + // is function constant for given argT + using is_constant = typename std::false_type; + // constant value, if constant + // constexpr resT constant_value = resT{}; + // is function defined for sycl::vec + using supports_vec = typename std::false_type; + // do both argTy and resTy support sugroup store/load operation + using supports_sg_loadstore = typename std::negation< + std::disjunction, is_complex>>; + + resT operator()(const argT &in) + { + return std::log(in); + } +}; + +template +using LogContigFunctor = elementwise_common:: + UnaryContigFunctor, vec_sz, n_vecs>; + +template +using LogStridedFunctor = elementwise_common:: + UnaryStridedFunctor>; + +template struct LogOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, std::complex>, + td_ns:: + TypeMapResultEntry, std::complex>, + td_ns::DefaultResultEntry>::result_type; +}; + +typedef sycl::event (*log_contig_impl_fn_ptr_t)( + sycl::queue, + size_t, + const char *, + char *, + const std::vector &); + +template +class log_contig_kernel; + +template +sycl::event log_contig_impl(sycl::queue exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + return elementwise_common::unary_contig_impl< + argTy, LogOutputType, LogContigFunctor, log_contig_kernel>( + exec_q, nelems, arg_p, res_p, depends); +} + +template struct LogContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = log_contig_impl; + return fn; + } + } +}; + +template struct LogTypeMapFactory +{ + /*! @brief get typeid for output type of std::log(T x) */ + std::enable_if_t::value, int> get() + { + using rT = typename LogOutputType::value_type; + ; + return td_ns::GetTypeid{}.get(); + } +}; + +template class log_strided_kernel; + +typedef sycl::event (*log_strided_impl_fn_ptr_t)( + sycl::queue, + size_t, + int, + const py::ssize_t *, + const char *, + py::ssize_t, + char *, + py::ssize_t, + const std::vector &, + const std::vector &); + +template +sycl::event log_strided_impl(sycl::queue exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::unary_strided_impl< + argTy, LogOutputType, LogStridedFunctor, log_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct LogStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = log_strided_impl; + return fn; + } + } +}; + +} // namespace log +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp new file mode 100644 index 0000000000..626e338db1 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp @@ -0,0 +1,233 @@ +//=== log1p.hpp - Unary function LOG1P ------ +//*-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of LOG1P(x) function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include + +#include "kernels/elementwise_functions/common.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace log1p +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +using dpctl::tensor::type_utils::is_complex; + +// TODO: evaluate precision against alternatives +template struct Log1pFunctor +{ + + // is function constant for given argT + using is_constant = typename std::false_type; + // constant value, if constant + // constexpr resT constant_value = resT{}; + // is function defined for sycl::vec + using supports_vec = typename std::false_type; + // do both argTy and resTy support sugroup store/load operation + using supports_sg_loadstore = typename std::negation< + std::disjunction, is_complex>>; + + resT operator()(const argT &in) + { + if constexpr (is_complex::value) { + // log1p(z) = ln((x + 1) + yI) + // = ln(|(x + 1) + yi|) + // + I * atan2(y, x + 1) + // = ln(sqrt((x + 1)^2 + y^2)) + // + I *atan2(y, x + 1) + // = log1p(x^2 + 2x + y^2) / 2 + // + I * atan2(y, x + 1) + using realT = typename argT::value_type; + const realT x = std::real(in); + const realT y = std::imag(in); + + // imaginary part of result + const realT imagRes = std::atan2(y, x + 1); + + if (std::abs(in) < realT(.5)) { + const realT realRes = x * (2 + x) + y * y; + return {std::log1p(realRes) / 2, imagRes}; + } + else { + // when not close to zero, + // prevent overflow + const realT realRes = std::hypot(x + 1, y); + return {std::log(realRes), imagRes}; + } + } + else { + static_assert(std::is_floating_point_v || + std::is_same_v); + return std::log1p(in); + } + } +}; + +template +using Log1pContigFunctor = + elementwise_common::UnaryContigFunctor, + vec_sz, + n_vecs>; + +template +using Log1pStridedFunctor = elementwise_common:: + UnaryStridedFunctor>; + +template struct Log1pOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, std::complex>, + td_ns:: + TypeMapResultEntry, std::complex>, + td_ns::DefaultResultEntry>::result_type; +}; + +typedef sycl::event (*log1p_contig_impl_fn_ptr_t)( + sycl::queue, + size_t, + const char *, + char *, + const std::vector &); + +template +class log1p_contig_kernel; + +template +sycl::event log1p_contig_impl(sycl::queue exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + return elementwise_common::unary_contig_impl< + argTy, Log1pOutputType, Log1pContigFunctor, log1p_contig_kernel>( + exec_q, nelems, arg_p, res_p, depends); +} + +template struct Log1pContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = log1p_contig_impl; + return fn; + } + } +}; + +template struct Log1pTypeMapFactory +{ + /*! @brief get typeid for output type of std::log1p(T x) */ + std::enable_if_t::value, int> get() + { + using rT = typename Log1pOutputType::value_type; + ; + return td_ns::GetTypeid{}.get(); + } +}; + +template class log1p_strided_kernel; + +typedef sycl::event (*log1p_strided_impl_fn_ptr_t)( + sycl::queue, + size_t, + int, + const py::ssize_t *, + const char *, + py::ssize_t, + char *, + py::ssize_t, + const std::vector &, + const std::vector &); + +template +sycl::event +log1p_strided_impl(sycl::queue exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::unary_strided_impl< + argTy, Log1pOutputType, Log1pStridedFunctor, log1p_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct Log1pStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = log1p_strided_impl; + return fn; + } + } +}; + +} // namespace log1p +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.cpp b/dpctl/tensor/libtensor/source/elementwise_functions.cpp index c943f28c97..7104c65bf6 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.cpp @@ -36,9 +36,12 @@ #include "kernels/elementwise_functions/add.hpp" #include "kernels/elementwise_functions/cos.hpp" #include "kernels/elementwise_functions/equal.hpp" +#include "kernels/elementwise_functions/expm1.hpp" #include "kernels/elementwise_functions/isfinite.hpp" #include "kernels/elementwise_functions/isinf.hpp" #include "kernels/elementwise_functions/isnan.hpp" +#include "kernels/elementwise_functions/log.hpp" +#include "kernels/elementwise_functions/log1p.hpp" #include "kernels/elementwise_functions/multiply.hpp" #include "kernels/elementwise_functions/sqrt.hpp" #include "kernels/elementwise_functions/subtract.hpp" @@ -458,7 +461,37 @@ namespace impl // U14: ==== EXPM1 (x) namespace impl { -// FIXME: add code for U14 + +namespace expm1_fn_ns = dpctl::tensor::kernels::expm1; + +static unary_contig_impl_fn_ptr_t + expm1_contig_dispatch_vector[td_ns::num_types]; +static int expm1_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + expm1_strided_dispatch_vector[td_ns::num_types]; + +void populate_expm1_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = expm1_fn_ns; + + using fn_ns::Expm1ContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(expm1_contig_dispatch_vector); + + using fn_ns::Expm1StridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(expm1_strided_dispatch_vector); + + using fn_ns::Expm1TypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(expm1_output_typeid_vector); +} + } // namespace impl // U15: ==== FLOOR (x) @@ -611,13 +644,72 @@ namespace impl // U20: ==== LOG (x) namespace impl { -// FIXME: add code for U20 + +namespace log_fn_ns = dpctl::tensor::kernels::log; + +static unary_contig_impl_fn_ptr_t log_contig_dispatch_vector[td_ns::num_types]; +static int log_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + log_strided_dispatch_vector[td_ns::num_types]; + +void populate_log_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = log_fn_ns; + + using fn_ns::LogContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(log_contig_dispatch_vector); + + using fn_ns::LogStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(log_strided_dispatch_vector); + + using fn_ns::LogTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(log_output_typeid_vector); +} + } // namespace impl // U21: ==== LOG1P (x) namespace impl { -// FIXME: add code for U21 + +namespace log1p_fn_ns = dpctl::tensor::kernels::log1p; + +static unary_contig_impl_fn_ptr_t + log1p_contig_dispatch_vector[td_ns::num_types]; +static int log1p_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + log1p_strided_dispatch_vector[td_ns::num_types]; + +void populate_log1p_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = log1p_fn_ns; + + using fn_ns::Log1pContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(log1p_contig_dispatch_vector); + + using fn_ns::Log1pStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(log1p_strided_dispatch_vector); + + using fn_ns::Log1pTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(log1p_output_typeid_vector); +} + } // namespace impl // U22: ==== LOG2 (x) @@ -1145,7 +1237,27 @@ void init_elementwise_functions(py::module_ m) // FIXME: // U14: ==== EXPM1 (x) - // FIXME: + { + impl::populate_expm1_dispatch_vectors(); + using impl::expm1_contig_dispatch_vector; + using impl::expm1_output_typeid_vector; + using impl::expm1_strided_dispatch_vector; + + auto expm1_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, expm1_output_typeid_vector, + expm1_contig_dispatch_vector, expm1_strided_dispatch_vector); + }; + m.def("_expm1", expm1_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto expm1_result_type_pyapi = [&](py::dtype dtype) { + return py_unary_ufunc_result_type(dtype, + expm1_output_typeid_vector); + }; + m.def("_expm1_result_type", expm1_result_type_pyapi); + } // U15: ==== FLOOR (x) // FIXME: @@ -1242,10 +1354,49 @@ void init_elementwise_functions(py::module_ m) // FIXME: // U20: ==== LOG (x) - // FIXME: + { + impl::populate_log_dispatch_vectors(); + using impl::log_contig_dispatch_vector; + using impl::log_output_typeid_vector; + using impl::log_strided_dispatch_vector; + + auto log_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, log_output_typeid_vector, + log_contig_dispatch_vector, log_strided_dispatch_vector); + }; + m.def("_log", log_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto log_result_type_pyapi = [&](py::dtype dtype) { + return py_unary_ufunc_result_type(dtype, log_output_typeid_vector); + }; + m.def("_log_result_type", log_result_type_pyapi); + } // U21: ==== LOG1P (x) - // FIXME: + { + impl::populate_log1p_dispatch_vectors(); + using impl::log1p_contig_dispatch_vector; + using impl::log1p_output_typeid_vector; + using impl::log1p_strided_dispatch_vector; + + auto log1p_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, log1p_output_typeid_vector, + log1p_contig_dispatch_vector, log1p_strided_dispatch_vector); + }; + m.def("_log1p", log1p_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto log1p_result_type_pyapi = [&](py::dtype dtype) { + return py_unary_ufunc_result_type(dtype, + log1p_output_typeid_vector); + }; + m.def("_log1p_result_type", log1p_result_type_pyapi); + } // U22: ==== LOG2 (x) // FIXME: diff --git a/dpctl/tests/elementwise/test_expm1.py b/dpctl/tests/elementwise/test_expm1.py new file mode 100644 index 0000000000..20dc421c77 --- /dev/null +++ b/dpctl/tests/elementwise/test_expm1.py @@ -0,0 +1,144 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools + +import numpy as np +import pytest +from numpy.testing import assert_allclose + +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _all_dtypes, _map_to_device_dtype, _usm_types + + +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_expm1_out_type(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + X = dpt.asarray(0, dtype=dtype, sycl_queue=q) + expected_dtype = np.expm1(np.array(0, dtype=dtype)).dtype + expected_dtype = _map_to_device_dtype(expected_dtype, q.sycl_device) + assert dpt.expm1(X).dtype == expected_dtype + + +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) +def test_expm1_output_contig(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 1027 + + X = dpt.linspace(-2, 2, num=n_seq, dtype=dtype, sycl_queue=q) + Xnp = dpt.asnumpy(X) + + Y = dpt.expm1(X) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), np.expm1(Xnp), atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) +def test_expm1_output_strided(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 2 * 1027 + + X = dpt.linspace(-2, 2, num=n_seq, dtype=dtype, sycl_queue=q)[::-2] + Xnp = dpt.asnumpy(X) + + Y = dpt.expm1(X) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), np.expm1(Xnp), atol=tol, rtol=tol) + + +@pytest.mark.parametrize("usm_type", _usm_types) +def test_expm1_usm_type(usm_type): + q = get_queue_or_skip() + + arg_dt = np.dtype("f4") + input_shape = (10, 10, 10, 10) + X = dpt.empty(input_shape, dtype=arg_dt, usm_type=usm_type, sycl_queue=q) + X[..., 0::2] = 1 / 50 + X[..., 1::2] = 1 / 25 + + Y = dpt.expm1(X) + assert Y.usm_type == X.usm_type + assert Y.sycl_queue == X.sycl_queue + assert Y.flags.c_contiguous + + expected_Y = np.empty(input_shape, dtype=arg_dt) + expected_Y[..., 0::2] = np.expm1(np.float32(1 / 50)) + expected_Y[..., 1::2] = np.expm1(np.float32(1 / 25)) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_expm1_order(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + arg_dt = np.dtype(dtype) + input_shape = (10, 10, 10, 10) + X = dpt.empty(input_shape, dtype=arg_dt, sycl_queue=q) + X[..., 0::2] = 1 / 50 + X[..., 1::2] = 1 / 25 + + for ord in ["C", "F", "A", "K"]: + for perms in itertools.permutations(range(4)): + U = dpt.permute_dims(X[:, ::-1, ::-1, :], perms) + Y = dpt.expm1(U, order=ord) + expected_Y = np.expm1(dpt.asnumpy(U)) + tol = 8 * max( + dpt.finfo(Y.dtype).resolution, + np.finfo(expected_Y.dtype).resolution, + ) + assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) + + +def test_expm1_special_cases(): + q = get_queue_or_skip() + + X = dpt.asarray( + [dpt.nan, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4", sycl_queue=q + ) + Xnp = dpt.asnumpy(X) + + tol = dpt.finfo(X.dtype).resolution + assert_allclose( + dpt.asnumpy(dpt.expm1(X)), np.expm1(Xnp), atol=tol, rtol=tol + ) + + # special cases for complex variant + vals = [ + complex(*val) + for val in itertools.permutations( + [dpt.nan, dpt.inf, -dpt.inf, 0.0, -0.0, 1.0], 2 + ) + ] + X = dpt.asarray(vals, dtype=dpt.complex64) + Xnp = dpt.asnumpy(X) + + tol = dpt.finfo(X.dtype).resolution + assert_allclose( + dpt.asnumpy(dpt.expm1(X)), np.expm1(Xnp), atol=tol, rtol=tol + ) diff --git a/dpctl/tests/elementwise/test_log.py b/dpctl/tests/elementwise/test_log.py new file mode 100644 index 0000000000..ed56fb6468 --- /dev/null +++ b/dpctl/tests/elementwise/test_log.py @@ -0,0 +1,128 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools + +import numpy as np +import pytest +from numpy.testing import assert_equal + +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _all_dtypes, _map_to_device_dtype, _usm_types + + +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_log_out_type(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + X = dpt.asarray(0, dtype=dtype, sycl_queue=q) + expected_dtype = np.log(np.array(0, dtype=dtype)).dtype + expected_dtype = _map_to_device_dtype(expected_dtype, q.sycl_device) + assert dpt.log(X).dtype == expected_dtype + + +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) +def test_log_output_contig(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 1027 + + X = dpt.linspace(0, 13, num=n_seq, dtype=dtype, sycl_queue=q) + Xnp = dpt.asnumpy(X) + + Y = dpt.log(X) + tol = 8 * dpt.finfo(Y.dtype).resolution + + np.testing.assert_allclose(dpt.asnumpy(Y), np.log(Xnp), atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) +def test_log_output_strided(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 2 * 1027 + + X = dpt.linspace(0, 13, num=n_seq, dtype=dtype, sycl_queue=q)[::-2] + Xnp = dpt.asnumpy(X) + + Y = dpt.log(X) + tol = 8 * dpt.finfo(Y.dtype).resolution + + np.testing.assert_allclose(dpt.asnumpy(Y), np.log(Xnp), atol=tol, rtol=tol) + + +@pytest.mark.parametrize("usm_type", _usm_types) +def test_log_usm_type(usm_type): + q = get_queue_or_skip() + + arg_dt = np.dtype("f4") + input_shape = (10, 10, 10, 10) + X = dpt.empty(input_shape, dtype=arg_dt, usm_type=usm_type, sycl_queue=q) + X[..., 0::2] = 4 * dpt.e + X[..., 1::2] = 10 * dpt.e + + Y = dpt.log(X) + assert Y.usm_type == X.usm_type + assert Y.sycl_queue == X.sycl_queue + assert Y.flags.c_contiguous + + expected_Y = np.empty(input_shape, dtype=arg_dt) + expected_Y[..., 0::2] = np.log(np.float32(4 * dpt.e)) + expected_Y[..., 1::2] = np.log(np.float32(10 * dpt.e)) + tol = 8 * dpt.finfo(Y.dtype).resolution + + np.testing.assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_log_order(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + arg_dt = np.dtype(dtype) + input_shape = (10, 10, 10, 10) + X = dpt.empty(input_shape, dtype=arg_dt, sycl_queue=q) + X[..., 0::2] = 4 * dpt.e + X[..., 1::2] = 10 * dpt.e + + for ord in ["C", "F", "A", "K"]: + for perms in itertools.permutations(range(4)): + U = dpt.permute_dims(X[:, ::-1, ::-1, :], perms) + Y = dpt.log(U, order=ord) + expected_Y = np.log(dpt.asnumpy(U)) + tol = 8 * max( + dpt.finfo(Y.dtype).resolution, + np.finfo(expected_Y.dtype).resolution, + ) + np.testing.assert_allclose( + dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol + ) + + +def test_log_special_cases(): + q = get_queue_or_skip() + + X = dpt.asarray( + [dpt.nan, -1.0, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4", sycl_queue=q + ) + Xnp = dpt.asnumpy(X) + + assert_equal(dpt.asnumpy(dpt.log(X)), np.log(Xnp)) diff --git a/dpctl/tests/elementwise/test_log1p.py b/dpctl/tests/elementwise/test_log1p.py new file mode 100644 index 0000000000..2820b4f8b5 --- /dev/null +++ b/dpctl/tests/elementwise/test_log1p.py @@ -0,0 +1,146 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools + +import numpy as np +import pytest +from numpy.testing import assert_allclose + +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _all_dtypes, _map_to_device_dtype, _usm_types + + +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_log1p_out_type(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + X = dpt.asarray(0, dtype=dtype, sycl_queue=q) + expected_dtype = np.log1p(np.array(0, dtype=dtype)).dtype + expected_dtype = _map_to_device_dtype(expected_dtype, q.sycl_device) + assert dpt.log1p(X).dtype == expected_dtype + + +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) +def test_log1p_output_contig(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 1027 + + X = dpt.linspace(0, 2, num=n_seq, dtype=dtype, sycl_queue=q) + Xnp = dpt.asnumpy(X) + + Y = dpt.log1p(X) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), np.log1p(Xnp), atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) +def test_log1p_output_strided(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 2 * 1027 + + X = dpt.linspace(0, 2, num=n_seq, dtype=dtype, sycl_queue=q)[::-2] + Xnp = dpt.asnumpy(X) + + Y = dpt.log1p(X) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), np.log1p(Xnp), atol=tol, rtol=tol) + + +@pytest.mark.parametrize("usm_type", _usm_types) +def test_log1p_usm_type(usm_type): + q = get_queue_or_skip() + + arg_dt = np.dtype("f4") + input_shape = (10, 10, 10, 10) + X = dpt.empty(input_shape, dtype=arg_dt, usm_type=usm_type, sycl_queue=q) + X[..., 0::2] = dpt.e / 1000 + X[..., 1::2] = dpt.e / 100 + + Y = dpt.log1p(X) + assert Y.usm_type == X.usm_type + assert Y.sycl_queue == X.sycl_queue + assert Y.flags.c_contiguous + + expected_Y = np.empty(input_shape, dtype=arg_dt) + expected_Y[..., 0::2] = np.log1p(np.float32(dpt.e / 1000)) + expected_Y[..., 1::2] = np.log1p(np.float32(dpt.e / 100)) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_log1p_order(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + arg_dt = np.dtype(dtype) + input_shape = (10, 10, 10, 10) + X = dpt.empty(input_shape, dtype=arg_dt, sycl_queue=q) + X[..., 0::2] = dpt.e / 1000 + X[..., 1::2] = dpt.e / 100 + + for ord in ["C", "F", "A", "K"]: + for perms in itertools.permutations(range(4)): + U = dpt.permute_dims(X[:, ::-1, ::-1, :], perms) + Y = dpt.log1p(U, order=ord) + expected_Y = np.log1p(dpt.asnumpy(U)) + tol = 8 * max( + dpt.finfo(Y.dtype).resolution, + np.finfo(expected_Y.dtype).resolution, + ) + assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) + + +def test_log1p_special_cases(): + q = get_queue_or_skip() + + X = dpt.asarray( + [dpt.nan, -1.0, -2.0, 0.0, -0.0, dpt.inf, -dpt.inf], + dtype="f4", + sycl_queue=q, + ) + Xnp = dpt.asnumpy(X) + + tol = dpt.finfo(X.dtype).resolution + assert_allclose( + dpt.asnumpy(dpt.log1p(X)), np.log1p(Xnp), atol=tol, rtol=tol + ) + + # special cases for complex + vals = [ + complex(*val) + for val in itertools.permutations( + [dpt.nan, dpt.inf, -dpt.inf, 0.0, -0.0, 1.0, -1.0, -2.0], 2 + ) + ] + X = dpt.asarray(vals, dtype=dpt.complex64) + Xnp = dpt.asnumpy(X) + + tol = dpt.finfo(X.dtype).resolution + assert_allclose( + dpt.asnumpy(dpt.log1p(X)), np.log1p(Xnp), atol=tol, rtol=tol + ) From 909d8946ac7823343fe67847626e16862dda3a69 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 29 May 2023 17:58:35 -0700 Subject: [PATCH 2/3] Changes per review by @oleksandr-pavlyk --- .../kernels/elementwise_functions/expm1.hpp | 14 ++++++-------- .../kernels/elementwise_functions/log1p.hpp | 12 ++++++------ 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp index 81a06fc90b..8188dd153e 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp @@ -70,18 +70,16 @@ template struct Expm1Functor using realT = typename argT::value_type; // expm1(x + I*y) = expm1(x)*cos(y) - 2*sin(y / 2)^2 + // I*exp(x)*sin(y) - auto x = std::real(in); - const realT expm1X_val = std::expm1(x); - const realT expX_val = std::exp(x); + const realT x = std::real(in); + const realT y = std::imag(in); - x = std::imag(in); realT cosY_val; - const realT sinY_val = sycl::sincos(x, &cosY_val); - const realT sinhalfY_val = std::sin(x / realT{2}); + const realT sinY_val = sycl::sincos(y, &cosY_val); + const realT sinhalfY_val = std::sin(y / 2); const realT res_re = - expm1X_val * cosY_val - realT{2} * sinhalfY_val * sinhalfY_val; - const realT res_im = expX_val * sinY_val; + std::expm1(x) * cosY_val - 2 * sinhalfY_val * sinhalfY_val; + const realT res_im = std::exp(x) * sinY_val; return resT{res_re, res_im}; } else { diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp index 626e338db1..489cd6519a 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp @@ -80,17 +80,17 @@ template struct Log1pFunctor const realT y = std::imag(in); // imaginary part of result - const realT imagRes = std::atan2(y, x + 1); + const realT res_im = std::atan2(y, x + 1); - if (std::abs(in) < realT(.5)) { - const realT realRes = x * (2 + x) + y * y; - return {std::log1p(realRes) / 2, imagRes}; + if (std::max(std::abs(x), std::abs(y)) < realT(.1)) { + const realT v = x * (2 + x) + y * y; + return {std::log1p(v) / 2, res_im}; } else { // when not close to zero, // prevent overflow - const realT realRes = std::hypot(x + 1, y); - return {std::log(realRes), imagRes}; + const realT m = std::hypot(x + 1, y); + return {std::log(m), res_im}; } } else { From 190e5d36d2ebc393daa719981f3c0740f58084ba Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 30 May 2023 13:48:00 -0700 Subject: [PATCH 3/3] Complex log1p return value is properly initialized --- .../include/kernels/elementwise_functions/log1p.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp index 489cd6519a..22f16238db 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp @@ -82,15 +82,15 @@ template struct Log1pFunctor // imaginary part of result const realT res_im = std::atan2(y, x + 1); - if (std::max(std::abs(x), std::abs(y)) < realT(.1)) { + if (std::max(std::abs(x), std::abs(y)) < realT{.1}) { const realT v = x * (2 + x) + y * y; - return {std::log1p(v) / 2, res_im}; + return resT{std::log1p(v) / 2, res_im}; } else { // when not close to zero, // prevent overflow const realT m = std::hypot(x + 1, y); - return {std::log(m), res_im}; + return resT{std::log(m), res_im}; } } else {