From 821a888899bbfbabb57ca96c843372d5837a13ed Mon Sep 17 00:00:00 2001
From: Nikita Grigorian <nikita.grigorian@intel.com>
Date: Fri, 26 May 2023 12:57:14 -0700
Subject: [PATCH 1/3] Implements expm1, log, and log1p - Also adds tests for
 real and complex variants where appropriate

---
 dpctl/tensor/__init__.py                      |   6 +
 dpctl/tensor/_elementwise_funcs.py            |  61 ++++-
 .../kernels/elementwise_functions/expm1.hpp   | 259 ++++++++++++++++++
 .../kernels/elementwise_functions/log.hpp     | 196 +++++++++++++
 .../kernels/elementwise_functions/log1p.hpp   | 233 ++++++++++++++++
 .../source/elementwise_functions.cpp          | 163 ++++++++++-
 dpctl/tests/elementwise/test_expm1.py         | 144 ++++++++++
 dpctl/tests/elementwise/test_log.py           | 128 +++++++++
 dpctl/tests/elementwise/test_log1p.py         | 146 ++++++++++
 9 files changed, 1327 insertions(+), 9 deletions(-)
 create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp
 create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/log.hpp
 create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp
 create mode 100644 dpctl/tests/elementwise/test_expm1.py
 create mode 100644 dpctl/tests/elementwise/test_log.py
 create mode 100644 dpctl/tests/elementwise/test_log1p.py

diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py
index 5896bde65a..c9c1c46f71 100644
--- a/dpctl/tensor/__init__.py
+++ b/dpctl/tensor/__init__.py
@@ -97,9 +97,12 @@
     cos,
     divide,
     equal,
+    expm1,
     isfinite,
     isinf,
     isnan,
+    log,
+    log1p,
     multiply,
     sqrt,
     subtract,
@@ -184,9 +187,12 @@
     "abs",
     "add",
     "cos",
+    "expm1",
     "isinf",
     "isnan",
     "isfinite",
+    "log",
+    "log1p",
     "sqrt",
     "divide",
     "multiply",
diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py
index 29d1052c9c..9f8b95c87c 100644
--- a/dpctl/tensor/_elementwise_funcs.py
+++ b/dpctl/tensor/_elementwise_funcs.py
@@ -153,7 +153,26 @@
 # FIXME: implement U13
 
 # U14: ==== EXPM1         (x)
-# FIXME: implement U14
+_expm1_docstring = """
+expm1(x, out=None, order='K')
+Computes an approximation of exp(x)-1 element-wise.
+Args:
+    x (usm_ndarray):
+        Input array, expected to have numeric data type.
+    out (usm_ndarray):
+        Output array to populate. Array must have the correct
+        shape and the expected data type.
+    order ("C","F","A","K", optional): memory layout of the new
+        output array, if parameter `out` is `None`.
+        Default: "K".
+Return:
+    usm_ndarray:
+        An array containing the element-wise exp(x)-1 values.
+"""
+
+expm1 = UnaryElementwiseFunc(
+    "expm1", ti._expm1_result_type, ti._expm1, _expm1_docstring
+)
 
 # U15: ==== FLOOR         (x)
 # FIXME: implement U15
@@ -210,10 +229,46 @@
 # FIXME: implement B14
 
 # U20: ==== LOG         (x)
-# FIXME: implement U20
+_log_docstring = """
+log(x, out=None, order='K')
+Computes the natural logarithm element-wise.
+Args:
+    x (usm_ndarray):
+        Input array, expected to have numeric data type.
+    out (usm_ndarray):
+        Output array to populate. Array must have the correct
+        shape and the expected data type.
+    order ("C","F","A","K", optional): memory layout of the new
+        output array, if parameter `out` is `None`.
+        Default: "K".
+Return:
+    usm_ndarray:
+        An array containing the element-wise natural logarithm values.
+"""
+
+log = UnaryElementwiseFunc("log", ti._log_result_type, ti._log, _log_docstring)
 
 # U21: ==== LOG1P       (x)
-# FIXME: implement U21
+_log1p_docstring = """
+log1p(x, out=None, order='K')
+Computes an approximation of log(1+x) element-wise.
+Args:
+    x (usm_ndarray):
+        Input array, expected to have numeric data type.
+    out (usm_ndarray):
+        Output array to populate. Array must have the correct
+        shape and the expected data type.
+    order ("C","F","A","K", optional): memory layout of the new
+        output array, if parameter `out` is `None`.
+        Default: "K".
+Return:
+    usm_ndarray:
+        An array containing the element-wise log(1+x) values.
+"""
+
+log1p = UnaryElementwiseFunc(
+    "log1p", ti._log1p_result_type, ti._log1p, _log1p_docstring
+)
 
 # U22: ==== LOG2        (x)
 # FIXME: implement U22
diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp
new file mode 100644
index 0000000000..81a06fc90b
--- /dev/null
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp
@@ -0,0 +1,259 @@
+//=== expm1.hpp -   Unary function EXPM1                   ------
+//*-C++-*--/===//
+//
+//                      Data Parallel Control (dpctl)
+//
+// Copyright 2020-2023 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of EXPM1(x) function.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <CL/sycl.hpp>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/offset_utils.hpp"
+#include "utils/type_dispatch.hpp"
+#include "utils/type_utils.hpp"
+#include <pybind11/pybind11.h>
+
+namespace dpctl
+{
+namespace tensor
+{
+namespace kernels
+{
+namespace expm1
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpctl::tensor::type_utils::is_complex;
+
+template <typename argT, typename resT> struct Expm1Functor
+{
+
+    // is function constant for given argT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr resT constant_value = resT{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::false_type;
+    // do both argTy and resTy support sugroup store/load operation
+    using supports_sg_loadstore = typename std::negation<
+        std::disjunction<is_complex<resT>, is_complex<argT>>>;
+
+    resT operator()(const argT &in)
+    {
+        if constexpr (is_complex<argT>::value) {
+            using realT = typename argT::value_type;
+            // expm1(x + I*y) = expm1(x)*cos(y) - 2*sin(y / 2)^2 +
+            // I*exp(x)*sin(y)
+            auto x = std::real(in);
+            const realT expm1X_val = std::expm1(x);
+            const realT expX_val = std::exp(x);
+
+            x = std::imag(in);
+            realT cosY_val;
+            const realT sinY_val = sycl::sincos(x, &cosY_val);
+            const realT sinhalfY_val = std::sin(x / realT{2});
+
+            const realT res_re =
+                expm1X_val * cosY_val - realT{2} * sinhalfY_val * sinhalfY_val;
+            const realT res_im = expX_val * sinY_val;
+            return resT{res_re, res_im};
+        }
+        else {
+            static_assert(std::is_floating_point_v<argT> ||
+                          std::is_same_v<argT, sycl::half>);
+            return std::expm1(in);
+        }
+    }
+};
+
+template <typename argTy,
+          typename resTy = argTy,
+          unsigned int vec_sz = 4,
+          unsigned int n_vecs = 2>
+using Expm1ContigFunctor =
+    elementwise_common::UnaryContigFunctor<argTy,
+                                           resTy,
+                                           Expm1Functor<argTy, resTy>,
+                                           vec_sz,
+                                           n_vecs>;
+
+template <typename argTy, typename resTy, typename IndexerT>
+using Expm1StridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, Expm1Functor<argTy, resTy>>;
+
+template <typename T> struct Expm1OutputType
+{
+    using value_type = typename std::disjunction< // disjunction is C++17
+                                                  // feature, supported by DPC++
+        td_ns::TypeMapResultEntry<T, sycl::half, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float, float>,
+        td_ns::TypeMapResultEntry<T, double, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>, std::complex<float>>,
+        td_ns::
+            TypeMapResultEntry<T, std::complex<double>, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+};
+
+typedef sycl::event (*expm1_contig_impl_fn_ptr_t)(
+    sycl::queue,
+    size_t,
+    const char *,
+    char *,
+    const std::vector<sycl::event> &);
+
+template <typename T1, typename T2, unsigned int vec_sz, unsigned int n_vecs>
+class expm1_contig_kernel;
+
+template <typename argTy>
+sycl::event expm1_contig_impl(sycl::queue exec_q,
+                              size_t nelems,
+                              const char *arg_p,
+                              char *res_p,
+                              const std::vector<sycl::event> &depends = {})
+{
+    sycl::event expm1_ev = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(depends);
+        constexpr size_t lws = 64;
+        constexpr unsigned int vec_sz = 4;
+        constexpr unsigned int n_vecs = 2;
+        static_assert(lws % vec_sz == 0);
+        auto gws_range = sycl::range<1>(
+            ((nelems + n_vecs * lws * vec_sz - 1) / (lws * n_vecs * vec_sz)) *
+            lws);
+        auto lws_range = sycl::range<1>(lws);
+
+        using resTy = typename Expm1OutputType<argTy>::value_type;
+        const argTy *arg_tp = reinterpret_cast<const argTy *>(arg_p);
+        resTy *res_tp = reinterpret_cast<resTy *>(res_p);
+
+        cgh.parallel_for<
+            class expm1_contig_kernel<argTy, resTy, vec_sz, n_vecs>>(
+            sycl::nd_range<1>(gws_range, lws_range),
+            Expm1ContigFunctor<argTy, resTy, vec_sz, n_vecs>(arg_tp, res_tp,
+                                                             nelems));
+    });
+    return expm1_ev;
+}
+
+template <typename fnT, typename T> struct Expm1ContigFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<typename Expm1OutputType<T>::value_type,
+                                     void>) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = expm1_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T> struct Expm1TypeMapFactory
+{
+    /*! @brief get typeid for output type of std::expm1(T x) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename Expm1OutputType<T>::value_type;
+        ;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename T3> class expm1_strided_kernel;
+
+typedef sycl::event (*expm1_strided_impl_fn_ptr_t)(
+    sycl::queue,
+    size_t,
+    int,
+    const py::ssize_t *,
+    const char *,
+    py::ssize_t,
+    char *,
+    py::ssize_t,
+    const std::vector<sycl::event> &,
+    const std::vector<sycl::event> &);
+
+template <typename argTy>
+sycl::event
+expm1_strided_impl(sycl::queue exec_q,
+                   size_t nelems,
+                   int nd,
+                   const py::ssize_t *shape_and_strides,
+                   const char *arg_p,
+                   py::ssize_t arg_offset,
+                   char *res_p,
+                   py::ssize_t res_offset,
+                   const std::vector<sycl::event> &depends,
+                   const std::vector<sycl::event> &additional_depends)
+{
+    sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(depends);
+        cgh.depends_on(additional_depends);
+
+        using resTy = typename Expm1OutputType<argTy>::value_type;
+        using IndexerT =
+            typename dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer;
+
+        IndexerT arg_res_indexer(nd, arg_offset, res_offset, shape_and_strides);
+
+        const argTy *arg_tp = reinterpret_cast<const argTy *>(arg_p);
+        resTy *res_tp = reinterpret_cast<resTy *>(res_p);
+
+        sycl::range<1> gRange{nelems};
+
+        cgh.parallel_for<expm1_strided_kernel<argTy, resTy, IndexerT>>(
+            gRange, Expm1StridedFunctor<argTy, resTy, IndexerT>(
+                        arg_tp, res_tp, arg_res_indexer));
+    });
+    return comp_ev;
+}
+
+template <typename fnT, typename T> struct Expm1StridedFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<typename Expm1OutputType<T>::value_type,
+                                     void>) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = expm1_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace expm1
+} // namespace kernels
+} // namespace tensor
+} // namespace dpctl
diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log.hpp
new file mode 100644
index 0000000000..f6998d4e0f
--- /dev/null
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log.hpp
@@ -0,0 +1,196 @@
+//=== log.hpp -   Unary function LOG                     ------  *-C++-*--/===//
+//
+//                      Data Parallel Control (dpctl)
+//
+// Copyright 2020-2023 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of LOG(x) function.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <CL/sycl.hpp>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/offset_utils.hpp"
+#include "utils/type_dispatch.hpp"
+#include "utils/type_utils.hpp"
+#include <pybind11/pybind11.h>
+
+namespace dpctl
+{
+namespace tensor
+{
+namespace kernels
+{
+namespace log
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpctl::tensor::type_utils::is_complex;
+
+template <typename argT, typename resT> struct LogFunctor
+{
+
+    // is function constant for given argT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr resT constant_value = resT{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::false_type;
+    // do both argTy and resTy support sugroup store/load operation
+    using supports_sg_loadstore = typename std::negation<
+        std::disjunction<is_complex<resT>, is_complex<argT>>>;
+
+    resT operator()(const argT &in)
+    {
+        return std::log(in);
+    }
+};
+
+template <typename argTy,
+          typename resTy = argTy,
+          unsigned int vec_sz = 4,
+          unsigned int n_vecs = 2>
+using LogContigFunctor = elementwise_common::
+    UnaryContigFunctor<argTy, resTy, LogFunctor<argTy, resTy>, vec_sz, n_vecs>;
+
+template <typename argTy, typename resTy, typename IndexerT>
+using LogStridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, LogFunctor<argTy, resTy>>;
+
+template <typename T> struct LogOutputType
+{
+    using value_type = typename std::disjunction< // disjunction is C++17
+                                                  // feature, supported by DPC++
+        td_ns::TypeMapResultEntry<T, sycl::half, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float, float>,
+        td_ns::TypeMapResultEntry<T, double, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>, std::complex<float>>,
+        td_ns::
+            TypeMapResultEntry<T, std::complex<double>, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+};
+
+typedef sycl::event (*log_contig_impl_fn_ptr_t)(
+    sycl::queue,
+    size_t,
+    const char *,
+    char *,
+    const std::vector<sycl::event> &);
+
+template <typename T1, typename T2, unsigned int vec_sz, unsigned int n_vecs>
+class log_contig_kernel;
+
+template <typename argTy>
+sycl::event log_contig_impl(sycl::queue exec_q,
+                            size_t nelems,
+                            const char *arg_p,
+                            char *res_p,
+                            const std::vector<sycl::event> &depends = {})
+{
+    return elementwise_common::unary_contig_impl<
+        argTy, LogOutputType, LogContigFunctor, log_contig_kernel>(
+        exec_q, nelems, arg_p, res_p, depends);
+}
+
+template <typename fnT, typename T> struct LogContigFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<typename LogOutputType<T>::value_type,
+                                     void>) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = log_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T> struct LogTypeMapFactory
+{
+    /*! @brief get typeid for output type of std::log(T x) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename LogOutputType<T>::value_type;
+        ;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename T3> class log_strided_kernel;
+
+typedef sycl::event (*log_strided_impl_fn_ptr_t)(
+    sycl::queue,
+    size_t,
+    int,
+    const py::ssize_t *,
+    const char *,
+    py::ssize_t,
+    char *,
+    py::ssize_t,
+    const std::vector<sycl::event> &,
+    const std::vector<sycl::event> &);
+
+template <typename argTy>
+sycl::event log_strided_impl(sycl::queue exec_q,
+                             size_t nelems,
+                             int nd,
+                             const py::ssize_t *shape_and_strides,
+                             const char *arg_p,
+                             py::ssize_t arg_offset,
+                             char *res_p,
+                             py::ssize_t res_offset,
+                             const std::vector<sycl::event> &depends,
+                             const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::unary_strided_impl<
+        argTy, LogOutputType, LogStridedFunctor, log_strided_kernel>(
+        exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
+        res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T> struct LogStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<typename LogOutputType<T>::value_type,
+                                     void>) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = log_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace log
+} // namespace kernels
+} // namespace tensor
+} // namespace dpctl
diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp
new file mode 100644
index 0000000000..626e338db1
--- /dev/null
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp
@@ -0,0 +1,233 @@
+//=== log1p.hpp -   Unary function LOG1P                   ------
+//*-C++-*--/===//
+//
+//                      Data Parallel Control (dpctl)
+//
+// Copyright 2020-2023 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of LOG1P(x) function.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <CL/sycl.hpp>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/offset_utils.hpp"
+#include "utils/type_dispatch.hpp"
+#include "utils/type_utils.hpp"
+#include <pybind11/pybind11.h>
+
+namespace dpctl
+{
+namespace tensor
+{
+namespace kernels
+{
+namespace log1p
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpctl::tensor::type_utils::is_complex;
+
+// TODO: evaluate precision against alternatives
+template <typename argT, typename resT> struct Log1pFunctor
+{
+
+    // is function constant for given argT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr resT constant_value = resT{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::false_type;
+    // do both argTy and resTy support sugroup store/load operation
+    using supports_sg_loadstore = typename std::negation<
+        std::disjunction<is_complex<resT>, is_complex<argT>>>;
+
+    resT operator()(const argT &in)
+    {
+        if constexpr (is_complex<argT>::value) {
+            // log1p(z) = ln((x + 1) + yI)
+            //          = ln(|(x + 1) + yi|)
+            //             + I * atan2(y, x + 1)
+            //          = ln(sqrt((x + 1)^2 + y^2))
+            //             + I *atan2(y, x + 1)
+            //          = log1p(x^2 + 2x + y^2) / 2
+            //             + I * atan2(y, x + 1)
+            using realT = typename argT::value_type;
+            const realT x = std::real(in);
+            const realT y = std::imag(in);
+
+            // imaginary part of result
+            const realT imagRes = std::atan2(y, x + 1);
+
+            if (std::abs(in) < realT(.5)) {
+                const realT realRes = x * (2 + x) + y * y;
+                return {std::log1p(realRes) / 2, imagRes};
+            }
+            else {
+                // when not close to zero,
+                // prevent overflow
+                const realT realRes = std::hypot(x + 1, y);
+                return {std::log(realRes), imagRes};
+            }
+        }
+        else {
+            static_assert(std::is_floating_point_v<argT> ||
+                          std::is_same_v<argT, sycl::half>);
+            return std::log1p(in);
+        }
+    }
+};
+
+template <typename argTy,
+          typename resTy = argTy,
+          unsigned int vec_sz = 4,
+          unsigned int n_vecs = 2>
+using Log1pContigFunctor =
+    elementwise_common::UnaryContigFunctor<argTy,
+                                           resTy,
+                                           Log1pFunctor<argTy, resTy>,
+                                           vec_sz,
+                                           n_vecs>;
+
+template <typename argTy, typename resTy, typename IndexerT>
+using Log1pStridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, Log1pFunctor<argTy, resTy>>;
+
+template <typename T> struct Log1pOutputType
+{
+    using value_type = typename std::disjunction< // disjunction is C++17
+                                                  // feature, supported by DPC++
+        td_ns::TypeMapResultEntry<T, sycl::half, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float, float>,
+        td_ns::TypeMapResultEntry<T, double, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>, std::complex<float>>,
+        td_ns::
+            TypeMapResultEntry<T, std::complex<double>, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+};
+
+typedef sycl::event (*log1p_contig_impl_fn_ptr_t)(
+    sycl::queue,
+    size_t,
+    const char *,
+    char *,
+    const std::vector<sycl::event> &);
+
+template <typename T1, typename T2, unsigned int vec_sz, unsigned int n_vecs>
+class log1p_contig_kernel;
+
+template <typename argTy>
+sycl::event log1p_contig_impl(sycl::queue exec_q,
+                              size_t nelems,
+                              const char *arg_p,
+                              char *res_p,
+                              const std::vector<sycl::event> &depends = {})
+{
+    return elementwise_common::unary_contig_impl<
+        argTy, Log1pOutputType, Log1pContigFunctor, log1p_contig_kernel>(
+        exec_q, nelems, arg_p, res_p, depends);
+}
+
+template <typename fnT, typename T> struct Log1pContigFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<typename Log1pOutputType<T>::value_type,
+                                     void>) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = log1p_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T> struct Log1pTypeMapFactory
+{
+    /*! @brief get typeid for output type of std::log1p(T x) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename Log1pOutputType<T>::value_type;
+        ;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename T3> class log1p_strided_kernel;
+
+typedef sycl::event (*log1p_strided_impl_fn_ptr_t)(
+    sycl::queue,
+    size_t,
+    int,
+    const py::ssize_t *,
+    const char *,
+    py::ssize_t,
+    char *,
+    py::ssize_t,
+    const std::vector<sycl::event> &,
+    const std::vector<sycl::event> &);
+
+template <typename argTy>
+sycl::event
+log1p_strided_impl(sycl::queue exec_q,
+                   size_t nelems,
+                   int nd,
+                   const py::ssize_t *shape_and_strides,
+                   const char *arg_p,
+                   py::ssize_t arg_offset,
+                   char *res_p,
+                   py::ssize_t res_offset,
+                   const std::vector<sycl::event> &depends,
+                   const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::unary_strided_impl<
+        argTy, Log1pOutputType, Log1pStridedFunctor, log1p_strided_kernel>(
+        exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
+        res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T> struct Log1pStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<typename Log1pOutputType<T>::value_type,
+                                     void>) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = log1p_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace log1p
+} // namespace kernels
+} // namespace tensor
+} // namespace dpctl
diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.cpp b/dpctl/tensor/libtensor/source/elementwise_functions.cpp
index c943f28c97..7104c65bf6 100644
--- a/dpctl/tensor/libtensor/source/elementwise_functions.cpp
+++ b/dpctl/tensor/libtensor/source/elementwise_functions.cpp
@@ -36,9 +36,12 @@
 #include "kernels/elementwise_functions/add.hpp"
 #include "kernels/elementwise_functions/cos.hpp"
 #include "kernels/elementwise_functions/equal.hpp"
+#include "kernels/elementwise_functions/expm1.hpp"
 #include "kernels/elementwise_functions/isfinite.hpp"
 #include "kernels/elementwise_functions/isinf.hpp"
 #include "kernels/elementwise_functions/isnan.hpp"
+#include "kernels/elementwise_functions/log.hpp"
+#include "kernels/elementwise_functions/log1p.hpp"
 #include "kernels/elementwise_functions/multiply.hpp"
 #include "kernels/elementwise_functions/sqrt.hpp"
 #include "kernels/elementwise_functions/subtract.hpp"
@@ -458,7 +461,37 @@ namespace impl
 // U14: ==== EXPM1         (x)
 namespace impl
 {
-// FIXME: add code for U14
+
+namespace expm1_fn_ns = dpctl::tensor::kernels::expm1;
+
+static unary_contig_impl_fn_ptr_t
+    expm1_contig_dispatch_vector[td_ns::num_types];
+static int expm1_output_typeid_vector[td_ns::num_types];
+static unary_strided_impl_fn_ptr_t
+    expm1_strided_dispatch_vector[td_ns::num_types];
+
+void populate_expm1_dispatch_vectors(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = expm1_fn_ns;
+
+    using fn_ns::Expm1ContigFactory;
+    DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, Expm1ContigFactory,
+                          num_types>
+        dvb1;
+    dvb1.populate_dispatch_vector(expm1_contig_dispatch_vector);
+
+    using fn_ns::Expm1StridedFactory;
+    DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, Expm1StridedFactory,
+                          num_types>
+        dvb2;
+    dvb2.populate_dispatch_vector(expm1_strided_dispatch_vector);
+
+    using fn_ns::Expm1TypeMapFactory;
+    DispatchVectorBuilder<int, Expm1TypeMapFactory, num_types> dvb3;
+    dvb3.populate_dispatch_vector(expm1_output_typeid_vector);
+}
+
 } // namespace impl
 
 // U15: ==== FLOOR         (x)
@@ -611,13 +644,72 @@ namespace impl
 // U20: ==== LOG         (x)
 namespace impl
 {
-// FIXME: add code for U20
+
+namespace log_fn_ns = dpctl::tensor::kernels::log;
+
+static unary_contig_impl_fn_ptr_t log_contig_dispatch_vector[td_ns::num_types];
+static int log_output_typeid_vector[td_ns::num_types];
+static unary_strided_impl_fn_ptr_t
+    log_strided_dispatch_vector[td_ns::num_types];
+
+void populate_log_dispatch_vectors(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = log_fn_ns;
+
+    using fn_ns::LogContigFactory;
+    DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, LogContigFactory,
+                          num_types>
+        dvb1;
+    dvb1.populate_dispatch_vector(log_contig_dispatch_vector);
+
+    using fn_ns::LogStridedFactory;
+    DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, LogStridedFactory,
+                          num_types>
+        dvb2;
+    dvb2.populate_dispatch_vector(log_strided_dispatch_vector);
+
+    using fn_ns::LogTypeMapFactory;
+    DispatchVectorBuilder<int, LogTypeMapFactory, num_types> dvb3;
+    dvb3.populate_dispatch_vector(log_output_typeid_vector);
+}
+
 } // namespace impl
 
 // U21: ==== LOG1P       (x)
 namespace impl
 {
-// FIXME: add code for U21
+
+namespace log1p_fn_ns = dpctl::tensor::kernels::log1p;
+
+static unary_contig_impl_fn_ptr_t
+    log1p_contig_dispatch_vector[td_ns::num_types];
+static int log1p_output_typeid_vector[td_ns::num_types];
+static unary_strided_impl_fn_ptr_t
+    log1p_strided_dispatch_vector[td_ns::num_types];
+
+void populate_log1p_dispatch_vectors(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = log1p_fn_ns;
+
+    using fn_ns::Log1pContigFactory;
+    DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, Log1pContigFactory,
+                          num_types>
+        dvb1;
+    dvb1.populate_dispatch_vector(log1p_contig_dispatch_vector);
+
+    using fn_ns::Log1pStridedFactory;
+    DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, Log1pStridedFactory,
+                          num_types>
+        dvb2;
+    dvb2.populate_dispatch_vector(log1p_strided_dispatch_vector);
+
+    using fn_ns::Log1pTypeMapFactory;
+    DispatchVectorBuilder<int, Log1pTypeMapFactory, num_types> dvb3;
+    dvb3.populate_dispatch_vector(log1p_output_typeid_vector);
+}
+
 } // namespace impl
 
 // U22: ==== LOG2        (x)
@@ -1145,7 +1237,27 @@ void init_elementwise_functions(py::module_ m)
     // FIXME:
 
     // U14: ==== EXPM1         (x)
-    // FIXME:
+    {
+        impl::populate_expm1_dispatch_vectors();
+        using impl::expm1_contig_dispatch_vector;
+        using impl::expm1_output_typeid_vector;
+        using impl::expm1_strided_dispatch_vector;
+
+        auto expm1_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q,
+                               const event_vecT &depends = {}) {
+            return py_unary_ufunc(
+                src, dst, exec_q, depends, expm1_output_typeid_vector,
+                expm1_contig_dispatch_vector, expm1_strided_dispatch_vector);
+        };
+        m.def("_expm1", expm1_pyapi, "", py::arg("src"), py::arg("dst"),
+              py::arg("sycl_queue"), py::arg("depends") = py::list());
+
+        auto expm1_result_type_pyapi = [&](py::dtype dtype) {
+            return py_unary_ufunc_result_type(dtype,
+                                              expm1_output_typeid_vector);
+        };
+        m.def("_expm1_result_type", expm1_result_type_pyapi);
+    }
 
     // U15: ==== FLOOR         (x)
     // FIXME:
@@ -1242,10 +1354,49 @@ void init_elementwise_functions(py::module_ m)
     // FIXME:
 
     // U20: ==== LOG         (x)
-    // FIXME:
+    {
+        impl::populate_log_dispatch_vectors();
+        using impl::log_contig_dispatch_vector;
+        using impl::log_output_typeid_vector;
+        using impl::log_strided_dispatch_vector;
+
+        auto log_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q,
+                             const event_vecT &depends = {}) {
+            return py_unary_ufunc(
+                src, dst, exec_q, depends, log_output_typeid_vector,
+                log_contig_dispatch_vector, log_strided_dispatch_vector);
+        };
+        m.def("_log", log_pyapi, "", py::arg("src"), py::arg("dst"),
+              py::arg("sycl_queue"), py::arg("depends") = py::list());
+
+        auto log_result_type_pyapi = [&](py::dtype dtype) {
+            return py_unary_ufunc_result_type(dtype, log_output_typeid_vector);
+        };
+        m.def("_log_result_type", log_result_type_pyapi);
+    }
 
     // U21: ==== LOG1P       (x)
-    // FIXME:
+    {
+        impl::populate_log1p_dispatch_vectors();
+        using impl::log1p_contig_dispatch_vector;
+        using impl::log1p_output_typeid_vector;
+        using impl::log1p_strided_dispatch_vector;
+
+        auto log1p_pyapi = [&](arrayT src, arrayT dst, sycl::queue exec_q,
+                               const event_vecT &depends = {}) {
+            return py_unary_ufunc(
+                src, dst, exec_q, depends, log1p_output_typeid_vector,
+                log1p_contig_dispatch_vector, log1p_strided_dispatch_vector);
+        };
+        m.def("_log1p", log1p_pyapi, "", py::arg("src"), py::arg("dst"),
+              py::arg("sycl_queue"), py::arg("depends") = py::list());
+
+        auto log1p_result_type_pyapi = [&](py::dtype dtype) {
+            return py_unary_ufunc_result_type(dtype,
+                                              log1p_output_typeid_vector);
+        };
+        m.def("_log1p_result_type", log1p_result_type_pyapi);
+    }
 
     // U22: ==== LOG2        (x)
     // FIXME:
diff --git a/dpctl/tests/elementwise/test_expm1.py b/dpctl/tests/elementwise/test_expm1.py
new file mode 100644
index 0000000000..20dc421c77
--- /dev/null
+++ b/dpctl/tests/elementwise/test_expm1.py
@@ -0,0 +1,144 @@
+#                       Data Parallel Control (dpctl)
+#
+#  Copyright 2020-2023 Intel Corporation
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import itertools
+
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose
+
+import dpctl.tensor as dpt
+from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported
+
+from .utils import _all_dtypes, _map_to_device_dtype, _usm_types
+
+
+@pytest.mark.parametrize("dtype", _all_dtypes)
+def test_expm1_out_type(dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(dtype, q)
+
+    X = dpt.asarray(0, dtype=dtype, sycl_queue=q)
+    expected_dtype = np.expm1(np.array(0, dtype=dtype)).dtype
+    expected_dtype = _map_to_device_dtype(expected_dtype, q.sycl_device)
+    assert dpt.expm1(X).dtype == expected_dtype
+
+
+@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"])
+def test_expm1_output_contig(dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(dtype, q)
+
+    n_seq = 1027
+
+    X = dpt.linspace(-2, 2, num=n_seq, dtype=dtype, sycl_queue=q)
+    Xnp = dpt.asnumpy(X)
+
+    Y = dpt.expm1(X)
+    tol = 8 * dpt.finfo(Y.dtype).resolution
+
+    assert_allclose(dpt.asnumpy(Y), np.expm1(Xnp), atol=tol, rtol=tol)
+
+
+@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"])
+def test_expm1_output_strided(dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(dtype, q)
+
+    n_seq = 2 * 1027
+
+    X = dpt.linspace(-2, 2, num=n_seq, dtype=dtype, sycl_queue=q)[::-2]
+    Xnp = dpt.asnumpy(X)
+
+    Y = dpt.expm1(X)
+    tol = 8 * dpt.finfo(Y.dtype).resolution
+
+    assert_allclose(dpt.asnumpy(Y), np.expm1(Xnp), atol=tol, rtol=tol)
+
+
+@pytest.mark.parametrize("usm_type", _usm_types)
+def test_expm1_usm_type(usm_type):
+    q = get_queue_or_skip()
+
+    arg_dt = np.dtype("f4")
+    input_shape = (10, 10, 10, 10)
+    X = dpt.empty(input_shape, dtype=arg_dt, usm_type=usm_type, sycl_queue=q)
+    X[..., 0::2] = 1 / 50
+    X[..., 1::2] = 1 / 25
+
+    Y = dpt.expm1(X)
+    assert Y.usm_type == X.usm_type
+    assert Y.sycl_queue == X.sycl_queue
+    assert Y.flags.c_contiguous
+
+    expected_Y = np.empty(input_shape, dtype=arg_dt)
+    expected_Y[..., 0::2] = np.expm1(np.float32(1 / 50))
+    expected_Y[..., 1::2] = np.expm1(np.float32(1 / 25))
+    tol = 8 * dpt.finfo(Y.dtype).resolution
+
+    assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol)
+
+
+@pytest.mark.parametrize("dtype", _all_dtypes)
+def test_expm1_order(dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(dtype, q)
+
+    arg_dt = np.dtype(dtype)
+    input_shape = (10, 10, 10, 10)
+    X = dpt.empty(input_shape, dtype=arg_dt, sycl_queue=q)
+    X[..., 0::2] = 1 / 50
+    X[..., 1::2] = 1 / 25
+
+    for ord in ["C", "F", "A", "K"]:
+        for perms in itertools.permutations(range(4)):
+            U = dpt.permute_dims(X[:, ::-1, ::-1, :], perms)
+            Y = dpt.expm1(U, order=ord)
+            expected_Y = np.expm1(dpt.asnumpy(U))
+            tol = 8 * max(
+                dpt.finfo(Y.dtype).resolution,
+                np.finfo(expected_Y.dtype).resolution,
+            )
+            assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol)
+
+
+def test_expm1_special_cases():
+    q = get_queue_or_skip()
+
+    X = dpt.asarray(
+        [dpt.nan, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4", sycl_queue=q
+    )
+    Xnp = dpt.asnumpy(X)
+
+    tol = dpt.finfo(X.dtype).resolution
+    assert_allclose(
+        dpt.asnumpy(dpt.expm1(X)), np.expm1(Xnp), atol=tol, rtol=tol
+    )
+
+    # special cases for complex variant
+    vals = [
+        complex(*val)
+        for val in itertools.permutations(
+            [dpt.nan, dpt.inf, -dpt.inf, 0.0, -0.0, 1.0], 2
+        )
+    ]
+    X = dpt.asarray(vals, dtype=dpt.complex64)
+    Xnp = dpt.asnumpy(X)
+
+    tol = dpt.finfo(X.dtype).resolution
+    assert_allclose(
+        dpt.asnumpy(dpt.expm1(X)), np.expm1(Xnp), atol=tol, rtol=tol
+    )
diff --git a/dpctl/tests/elementwise/test_log.py b/dpctl/tests/elementwise/test_log.py
new file mode 100644
index 0000000000..ed56fb6468
--- /dev/null
+++ b/dpctl/tests/elementwise/test_log.py
@@ -0,0 +1,128 @@
+#                       Data Parallel Control (dpctl)
+#
+#  Copyright 2020-2023 Intel Corporation
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import itertools
+
+import numpy as np
+import pytest
+from numpy.testing import assert_equal
+
+import dpctl.tensor as dpt
+from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported
+
+from .utils import _all_dtypes, _map_to_device_dtype, _usm_types
+
+
+@pytest.mark.parametrize("dtype", _all_dtypes)
+def test_log_out_type(dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(dtype, q)
+
+    X = dpt.asarray(0, dtype=dtype, sycl_queue=q)
+    expected_dtype = np.log(np.array(0, dtype=dtype)).dtype
+    expected_dtype = _map_to_device_dtype(expected_dtype, q.sycl_device)
+    assert dpt.log(X).dtype == expected_dtype
+
+
+@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"])
+def test_log_output_contig(dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(dtype, q)
+
+    n_seq = 1027
+
+    X = dpt.linspace(0, 13, num=n_seq, dtype=dtype, sycl_queue=q)
+    Xnp = dpt.asnumpy(X)
+
+    Y = dpt.log(X)
+    tol = 8 * dpt.finfo(Y.dtype).resolution
+
+    np.testing.assert_allclose(dpt.asnumpy(Y), np.log(Xnp), atol=tol, rtol=tol)
+
+
+@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"])
+def test_log_output_strided(dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(dtype, q)
+
+    n_seq = 2 * 1027
+
+    X = dpt.linspace(0, 13, num=n_seq, dtype=dtype, sycl_queue=q)[::-2]
+    Xnp = dpt.asnumpy(X)
+
+    Y = dpt.log(X)
+    tol = 8 * dpt.finfo(Y.dtype).resolution
+
+    np.testing.assert_allclose(dpt.asnumpy(Y), np.log(Xnp), atol=tol, rtol=tol)
+
+
+@pytest.mark.parametrize("usm_type", _usm_types)
+def test_log_usm_type(usm_type):
+    q = get_queue_or_skip()
+
+    arg_dt = np.dtype("f4")
+    input_shape = (10, 10, 10, 10)
+    X = dpt.empty(input_shape, dtype=arg_dt, usm_type=usm_type, sycl_queue=q)
+    X[..., 0::2] = 4 * dpt.e
+    X[..., 1::2] = 10 * dpt.e
+
+    Y = dpt.log(X)
+    assert Y.usm_type == X.usm_type
+    assert Y.sycl_queue == X.sycl_queue
+    assert Y.flags.c_contiguous
+
+    expected_Y = np.empty(input_shape, dtype=arg_dt)
+    expected_Y[..., 0::2] = np.log(np.float32(4 * dpt.e))
+    expected_Y[..., 1::2] = np.log(np.float32(10 * dpt.e))
+    tol = 8 * dpt.finfo(Y.dtype).resolution
+
+    np.testing.assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol)
+
+
+@pytest.mark.parametrize("dtype", _all_dtypes)
+def test_log_order(dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(dtype, q)
+
+    arg_dt = np.dtype(dtype)
+    input_shape = (10, 10, 10, 10)
+    X = dpt.empty(input_shape, dtype=arg_dt, sycl_queue=q)
+    X[..., 0::2] = 4 * dpt.e
+    X[..., 1::2] = 10 * dpt.e
+
+    for ord in ["C", "F", "A", "K"]:
+        for perms in itertools.permutations(range(4)):
+            U = dpt.permute_dims(X[:, ::-1, ::-1, :], perms)
+            Y = dpt.log(U, order=ord)
+            expected_Y = np.log(dpt.asnumpy(U))
+            tol = 8 * max(
+                dpt.finfo(Y.dtype).resolution,
+                np.finfo(expected_Y.dtype).resolution,
+            )
+            np.testing.assert_allclose(
+                dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol
+            )
+
+
+def test_log_special_cases():
+    q = get_queue_or_skip()
+
+    X = dpt.asarray(
+        [dpt.nan, -1.0, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4", sycl_queue=q
+    )
+    Xnp = dpt.asnumpy(X)
+
+    assert_equal(dpt.asnumpy(dpt.log(X)), np.log(Xnp))
diff --git a/dpctl/tests/elementwise/test_log1p.py b/dpctl/tests/elementwise/test_log1p.py
new file mode 100644
index 0000000000..2820b4f8b5
--- /dev/null
+++ b/dpctl/tests/elementwise/test_log1p.py
@@ -0,0 +1,146 @@
+#                       Data Parallel Control (dpctl)
+#
+#  Copyright 2020-2023 Intel Corporation
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import itertools
+
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose
+
+import dpctl.tensor as dpt
+from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported
+
+from .utils import _all_dtypes, _map_to_device_dtype, _usm_types
+
+
+@pytest.mark.parametrize("dtype", _all_dtypes)
+def test_log1p_out_type(dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(dtype, q)
+
+    X = dpt.asarray(0, dtype=dtype, sycl_queue=q)
+    expected_dtype = np.log1p(np.array(0, dtype=dtype)).dtype
+    expected_dtype = _map_to_device_dtype(expected_dtype, q.sycl_device)
+    assert dpt.log1p(X).dtype == expected_dtype
+
+
+@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"])
+def test_log1p_output_contig(dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(dtype, q)
+
+    n_seq = 1027
+
+    X = dpt.linspace(0, 2, num=n_seq, dtype=dtype, sycl_queue=q)
+    Xnp = dpt.asnumpy(X)
+
+    Y = dpt.log1p(X)
+    tol = 8 * dpt.finfo(Y.dtype).resolution
+
+    assert_allclose(dpt.asnumpy(Y), np.log1p(Xnp), atol=tol, rtol=tol)
+
+
+@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"])
+def test_log1p_output_strided(dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(dtype, q)
+
+    n_seq = 2 * 1027
+
+    X = dpt.linspace(0, 2, num=n_seq, dtype=dtype, sycl_queue=q)[::-2]
+    Xnp = dpt.asnumpy(X)
+
+    Y = dpt.log1p(X)
+    tol = 8 * dpt.finfo(Y.dtype).resolution
+
+    assert_allclose(dpt.asnumpy(Y), np.log1p(Xnp), atol=tol, rtol=tol)
+
+
+@pytest.mark.parametrize("usm_type", _usm_types)
+def test_log1p_usm_type(usm_type):
+    q = get_queue_or_skip()
+
+    arg_dt = np.dtype("f4")
+    input_shape = (10, 10, 10, 10)
+    X = dpt.empty(input_shape, dtype=arg_dt, usm_type=usm_type, sycl_queue=q)
+    X[..., 0::2] = dpt.e / 1000
+    X[..., 1::2] = dpt.e / 100
+
+    Y = dpt.log1p(X)
+    assert Y.usm_type == X.usm_type
+    assert Y.sycl_queue == X.sycl_queue
+    assert Y.flags.c_contiguous
+
+    expected_Y = np.empty(input_shape, dtype=arg_dt)
+    expected_Y[..., 0::2] = np.log1p(np.float32(dpt.e / 1000))
+    expected_Y[..., 1::2] = np.log1p(np.float32(dpt.e / 100))
+    tol = 8 * dpt.finfo(Y.dtype).resolution
+
+    assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol)
+
+
+@pytest.mark.parametrize("dtype", _all_dtypes)
+def test_log1p_order(dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(dtype, q)
+
+    arg_dt = np.dtype(dtype)
+    input_shape = (10, 10, 10, 10)
+    X = dpt.empty(input_shape, dtype=arg_dt, sycl_queue=q)
+    X[..., 0::2] = dpt.e / 1000
+    X[..., 1::2] = dpt.e / 100
+
+    for ord in ["C", "F", "A", "K"]:
+        for perms in itertools.permutations(range(4)):
+            U = dpt.permute_dims(X[:, ::-1, ::-1, :], perms)
+            Y = dpt.log1p(U, order=ord)
+            expected_Y = np.log1p(dpt.asnumpy(U))
+            tol = 8 * max(
+                dpt.finfo(Y.dtype).resolution,
+                np.finfo(expected_Y.dtype).resolution,
+            )
+            assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol)
+
+
+def test_log1p_special_cases():
+    q = get_queue_or_skip()
+
+    X = dpt.asarray(
+        [dpt.nan, -1.0, -2.0, 0.0, -0.0, dpt.inf, -dpt.inf],
+        dtype="f4",
+        sycl_queue=q,
+    )
+    Xnp = dpt.asnumpy(X)
+
+    tol = dpt.finfo(X.dtype).resolution
+    assert_allclose(
+        dpt.asnumpy(dpt.log1p(X)), np.log1p(Xnp), atol=tol, rtol=tol
+    )
+
+    # special cases for complex
+    vals = [
+        complex(*val)
+        for val in itertools.permutations(
+            [dpt.nan, dpt.inf, -dpt.inf, 0.0, -0.0, 1.0, -1.0, -2.0], 2
+        )
+    ]
+    X = dpt.asarray(vals, dtype=dpt.complex64)
+    Xnp = dpt.asnumpy(X)
+
+    tol = dpt.finfo(X.dtype).resolution
+    assert_allclose(
+        dpt.asnumpy(dpt.log1p(X)), np.log1p(Xnp), atol=tol, rtol=tol
+    )

From 909d8946ac7823343fe67847626e16862dda3a69 Mon Sep 17 00:00:00 2001
From: Nikita Grigorian <nikita.grigorian@intel.com>
Date: Mon, 29 May 2023 17:58:35 -0700
Subject: [PATCH 2/3] Changes per review by @oleksandr-pavlyk

---
 .../kernels/elementwise_functions/expm1.hpp        | 14 ++++++--------
 .../kernels/elementwise_functions/log1p.hpp        | 12 ++++++------
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp
index 81a06fc90b..8188dd153e 100644
--- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp
@@ -70,18 +70,16 @@ template <typename argT, typename resT> struct Expm1Functor
             using realT = typename argT::value_type;
             // expm1(x + I*y) = expm1(x)*cos(y) - 2*sin(y / 2)^2 +
             // I*exp(x)*sin(y)
-            auto x = std::real(in);
-            const realT expm1X_val = std::expm1(x);
-            const realT expX_val = std::exp(x);
+            const realT x = std::real(in);
+            const realT y = std::imag(in);
 
-            x = std::imag(in);
             realT cosY_val;
-            const realT sinY_val = sycl::sincos(x, &cosY_val);
-            const realT sinhalfY_val = std::sin(x / realT{2});
+            const realT sinY_val = sycl::sincos(y, &cosY_val);
+            const realT sinhalfY_val = std::sin(y / 2);
 
             const realT res_re =
-                expm1X_val * cosY_val - realT{2} * sinhalfY_val * sinhalfY_val;
-            const realT res_im = expX_val * sinY_val;
+                std::expm1(x) * cosY_val - 2 * sinhalfY_val * sinhalfY_val;
+            const realT res_im = std::exp(x) * sinY_val;
             return resT{res_re, res_im};
         }
         else {
diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp
index 626e338db1..489cd6519a 100644
--- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp
@@ -80,17 +80,17 @@ template <typename argT, typename resT> struct Log1pFunctor
             const realT y = std::imag(in);
 
             // imaginary part of result
-            const realT imagRes = std::atan2(y, x + 1);
+            const realT res_im = std::atan2(y, x + 1);
 
-            if (std::abs(in) < realT(.5)) {
-                const realT realRes = x * (2 + x) + y * y;
-                return {std::log1p(realRes) / 2, imagRes};
+            if (std::max(std::abs(x), std::abs(y)) < realT(.1)) {
+                const realT v = x * (2 + x) + y * y;
+                return {std::log1p(v) / 2, res_im};
             }
             else {
                 // when not close to zero,
                 // prevent overflow
-                const realT realRes = std::hypot(x + 1, y);
-                return {std::log(realRes), imagRes};
+                const realT m = std::hypot(x + 1, y);
+                return {std::log(m), res_im};
             }
         }
         else {

From 190e5d36d2ebc393daa719981f3c0740f58084ba Mon Sep 17 00:00:00 2001
From: Nikita Grigorian <nikita.grigorian@intel.com>
Date: Tue, 30 May 2023 13:48:00 -0700
Subject: [PATCH 3/3] Complex log1p return value is properly initialized

---
 .../include/kernels/elementwise_functions/log1p.hpp         | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp
index 489cd6519a..22f16238db 100644
--- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp
@@ -82,15 +82,15 @@ template <typename argT, typename resT> struct Log1pFunctor
             // imaginary part of result
             const realT res_im = std::atan2(y, x + 1);
 
-            if (std::max(std::abs(x), std::abs(y)) < realT(.1)) {
+            if (std::max(std::abs(x), std::abs(y)) < realT{.1}) {
                 const realT v = x * (2 + x) + y * y;
-                return {std::log1p(v) / 2, res_im};
+                return resT{std::log1p(v) / 2, res_im};
             }
             else {
                 // when not close to zero,
                 // prevent overflow
                 const realT m = std::hypot(x + 1, y);
-                return {std::log(m), res_im};
+                return resT{std::log(m), res_im};
             }
         }
         else {