diff --git a/dpctl/tensor/libtensor/include/kernels/reductions.hpp b/dpctl/tensor/libtensor/include/kernels/reductions.hpp index a300ceacc4..1ac896059f 100644 --- a/dpctl/tensor/libtensor/include/kernels/reductions.hpp +++ b/dpctl/tensor/libtensor/include/kernels/reductions.hpp @@ -39,9 +39,6 @@ #include "utils/type_dispatch_building.hpp" #include "utils/type_utils.hpp" -namespace td_ns = dpctl::tensor::type_dispatch; -namespace su_ns = dpctl::tensor::sycl_utils; - namespace dpctl { namespace tensor @@ -49,6 +46,9 @@ namespace tensor namespace kernels { +namespace td_ns = dpctl::tensor::type_dispatch; +namespace su_ns = dpctl::tensor::sycl_utils; + namespace reduction_detail { diff --git a/dpctl/tensor/libtensor/include/utils/math_utils.hpp b/dpctl/tensor/libtensor/include/utils/math_utils.hpp index 97e2fee377..3b44954abd 100644 --- a/dpctl/tensor/libtensor/include/utils/math_utils.hpp +++ b/dpctl/tensor/libtensor/include/utils/math_utils.hpp @@ -122,17 +122,32 @@ template T logaddexp(T x, T y) return x + log2; } else { - // FIXME: switch to `sycl::log1p` when - // compiler segfault in CUDA build is fixed const T tmp = x - y; - if (tmp > 0) { - return x + std::log1p(sycl::exp(-tmp)); - } - else if (tmp <= 0) { - return y + std::log1p(sycl::exp(tmp)); + constexpr T zero(0); + + if constexpr (std::is_same_v) { + return (tmp > zero) + ? (x + sycl::log1p(sycl::exp(-tmp))) + : ((tmp <= zero) ? y + sycl::log1p(sycl::exp(tmp)) + : std::numeric_limits::quiet_NaN()); } else { - return std::numeric_limits::quiet_NaN(); + if constexpr (std::is_same_v) { + // FIXME: switch to `sycl::log1p` when + // compiler segfault in CUDA build is fixed + return (tmp > zero) + ? (x + std::log1p(sycl::exp(-tmp))) + : ((tmp <= zero) + ? y + std::log1p(sycl::exp(tmp)) + : std::numeric_limits::quiet_NaN()); + } + else { + return (tmp > zero) + ? (x + sycl::log1p(sycl::exp(-tmp))) + : ((tmp <= zero) + ? y + sycl::log1p(sycl::exp(tmp)) + : std::numeric_limits::quiet_NaN()); + } } } } diff --git a/dpctl/tensor/libtensor/source/linalg_functions/dot_dispatch.hpp b/dpctl/tensor/libtensor/source/linalg_functions/dot_dispatch.hpp index 731d4a8f81..3ce0ae1264 100644 --- a/dpctl/tensor/libtensor/source/linalg_functions/dot_dispatch.hpp +++ b/dpctl/tensor/libtensor/source/linalg_functions/dot_dispatch.hpp @@ -30,6 +30,7 @@ #include "kernels/linalg_functions/dot_product.hpp" #include "kernels/linalg_functions/gemm.hpp" +#include "utils/type_dispatch_building.hpp" namespace dpctl { @@ -38,6 +39,8 @@ namespace tensor namespace py_internal { +namespace td_ns = dpctl::tensor::type_dispatch; + template struct DotAtomicOutputType { using value_type = typename std::disjunction< // disjunction is C++17 diff --git a/dpctl/tensor/libtensor/source/reductions/argmax.cpp b/dpctl/tensor/libtensor/source/reductions/argmax.cpp index e441a36139..acfeb95087 100644 --- a/dpctl/tensor/libtensor/source/reductions/argmax.cpp +++ b/dpctl/tensor/libtensor/source/reductions/argmax.cpp @@ -32,6 +32,7 @@ #include "kernels/reductions.hpp" #include "reduction_over_axis.hpp" +#include "utils/sycl_utils.hpp" #include "utils/type_dispatch_building.hpp" namespace py = pybind11; @@ -44,6 +45,7 @@ namespace py_internal { namespace td_ns = dpctl::tensor::type_dispatch; +namespace su_ns = dpctl::tensor::sycl_utils; namespace impl { diff --git a/dpctl/tensor/libtensor/source/reductions/argmin.cpp b/dpctl/tensor/libtensor/source/reductions/argmin.cpp index 4892893cc5..8e9c0106ac 100644 --- a/dpctl/tensor/libtensor/source/reductions/argmin.cpp +++ b/dpctl/tensor/libtensor/source/reductions/argmin.cpp @@ -32,6 +32,8 @@ #include "kernels/reductions.hpp" #include "reduction_over_axis.hpp" + +#include "utils/sycl_utils.hpp" #include "utils/type_dispatch_building.hpp" namespace py = pybind11; @@ -44,6 +46,7 @@ namespace py_internal { namespace td_ns = dpctl::tensor::type_dispatch; +namespace su_ns = dpctl::tensor::sycl_utils; namespace impl { diff --git a/dpctl/tensor/libtensor/source/reductions/logsumexp.cpp b/dpctl/tensor/libtensor/source/reductions/logsumexp.cpp index d2bb6e3877..d36b715f32 100644 --- a/dpctl/tensor/libtensor/source/reductions/logsumexp.cpp +++ b/dpctl/tensor/libtensor/source/reductions/logsumexp.cpp @@ -32,6 +32,7 @@ #include "kernels/reductions.hpp" #include "reduction_over_axis.hpp" +#include "utils/sycl_utils.hpp" #include "utils/type_dispatch_building.hpp" namespace py = pybind11; @@ -44,6 +45,7 @@ namespace py_internal { namespace td_ns = dpctl::tensor::type_dispatch; +namespace su_ns = dpctl::tensor::sycl_utils; namespace impl { @@ -68,6 +70,7 @@ struct TypePairSupportDataForLogSumExpReductionTemps static constexpr bool is_defined = std::disjunction< // disjunction is C++17 // feature, supported // by DPC++ input bool +#if 1 td_ns::TypePairDefinedEntry, td_ns::TypePairDefinedEntry, td_ns::TypePairDefinedEntry, @@ -105,7 +108,6 @@ struct TypePairSupportDataForLogSumExpReductionTemps // input uint64_t td_ns::TypePairDefinedEntry, td_ns::TypePairDefinedEntry, - // input half td_ns::TypePairDefinedEntry, td_ns::TypePairDefinedEntry, @@ -117,6 +119,7 @@ struct TypePairSupportDataForLogSumExpReductionTemps // input double td_ns::TypePairDefinedEntry, +#endif // fall-through td_ns::NotDefinedEntry>::is_defined; diff --git a/dpctl/tensor/libtensor/source/reductions/max.cpp b/dpctl/tensor/libtensor/source/reductions/max.cpp index 0e402403a4..8036d873aa 100644 --- a/dpctl/tensor/libtensor/source/reductions/max.cpp +++ b/dpctl/tensor/libtensor/source/reductions/max.cpp @@ -31,6 +31,7 @@ #include #include "kernels/reductions.hpp" +#include "utils/sycl_utils.hpp" #include "utils/type_dispatch_building.hpp" #include "reduction_atomic_support.hpp" @@ -46,6 +47,7 @@ namespace py_internal { namespace td_ns = dpctl::tensor::type_dispatch; +namespace su_ns = dpctl::tensor::sycl_utils; namespace impl { diff --git a/dpctl/tensor/libtensor/source/reductions/min.cpp b/dpctl/tensor/libtensor/source/reductions/min.cpp index 970a05ea72..e612e59b8f 100644 --- a/dpctl/tensor/libtensor/source/reductions/min.cpp +++ b/dpctl/tensor/libtensor/source/reductions/min.cpp @@ -31,6 +31,7 @@ #include #include "kernels/reductions.hpp" +#include "utils/sycl_utils.hpp" #include "utils/type_dispatch_building.hpp" #include "reduction_atomic_support.hpp" @@ -46,6 +47,7 @@ namespace py_internal { namespace td_ns = dpctl::tensor::type_dispatch; +namespace su_ns = dpctl::tensor::sycl_utils; namespace impl { diff --git a/dpctl/tensor/libtensor/source/reductions/reduce_hypot.cpp b/dpctl/tensor/libtensor/source/reductions/reduce_hypot.cpp index ce655126a6..159b992307 100644 --- a/dpctl/tensor/libtensor/source/reductions/reduce_hypot.cpp +++ b/dpctl/tensor/libtensor/source/reductions/reduce_hypot.cpp @@ -32,6 +32,7 @@ #include "kernels/reductions.hpp" #include "reduction_over_axis.hpp" +#include "utils/sycl_utils.hpp" #include "utils/type_dispatch_building.hpp" namespace py = pybind11; @@ -44,6 +45,7 @@ namespace py_internal { namespace td_ns = dpctl::tensor::type_dispatch; +namespace su_ns = dpctl::tensor::sycl_utils; namespace impl { diff --git a/dpctl/tensor/libtensor/source/reductions/reduction_over_axis.hpp b/dpctl/tensor/libtensor/source/reductions/reduction_over_axis.hpp index ac2de98032..6d5cf6668f 100644 --- a/dpctl/tensor/libtensor/source/reductions/reduction_over_axis.hpp +++ b/dpctl/tensor/libtensor/source/reductions/reduction_over_axis.hpp @@ -52,6 +52,8 @@ namespace tensor namespace py_internal { +namespace td_ns = dpctl::tensor::type_dispatch; + /* ====================== dtype supported ======================== */ /*! @brief Template implementing Python API for querying type support by