diff --git a/dpctl/tensor/libtensor/include/kernels/reductions.hpp b/dpctl/tensor/libtensor/include/kernels/reductions.hpp
index a300ceacc4..1ac896059f 100644
--- a/dpctl/tensor/libtensor/include/kernels/reductions.hpp
+++ b/dpctl/tensor/libtensor/include/kernels/reductions.hpp
@@ -39,9 +39,6 @@
 #include "utils/type_dispatch_building.hpp"
 #include "utils/type_utils.hpp"
 
-namespace td_ns = dpctl::tensor::type_dispatch;
-namespace su_ns = dpctl::tensor::sycl_utils;
-
 namespace dpctl
 {
 namespace tensor
@@ -49,6 +46,9 @@ namespace tensor
 namespace kernels
 {
 
+namespace td_ns = dpctl::tensor::type_dispatch;
+namespace su_ns = dpctl::tensor::sycl_utils;
+
 namespace reduction_detail
 {
 
diff --git a/dpctl/tensor/libtensor/include/utils/math_utils.hpp b/dpctl/tensor/libtensor/include/utils/math_utils.hpp
index 97e2fee377..3b44954abd 100644
--- a/dpctl/tensor/libtensor/include/utils/math_utils.hpp
+++ b/dpctl/tensor/libtensor/include/utils/math_utils.hpp
@@ -122,17 +122,32 @@ template <typename T> T logaddexp(T x, T y)
         return x + log2;
     }
     else {
-        // FIXME: switch to `sycl::log1p` when
-        // compiler segfault in CUDA build is fixed
         const T tmp = x - y;
-        if (tmp > 0) {
-            return x + std::log1p(sycl::exp(-tmp));
-        }
-        else if (tmp <= 0) {
-            return y + std::log1p(sycl::exp(tmp));
+        constexpr T zero(0);
+
+        if constexpr (std::is_same_v<T, sycl::half>) {
+            return (tmp > zero)
+                       ? (x + sycl::log1p(sycl::exp(-tmp)))
+                       : ((tmp <= zero) ? y + sycl::log1p(sycl::exp(tmp))
+                                        : std::numeric_limits<T>::quiet_NaN());
         }
         else {
-            return std::numeric_limits<T>::quiet_NaN();
+            if constexpr (std::is_same_v<T, double>) {
+                // FIXME: switch to `sycl::log1p` when
+                // compiler segfault in CUDA build is fixed
+                return (tmp > zero)
+                           ? (x + std::log1p(sycl::exp(-tmp)))
+                           : ((tmp <= zero)
+                                  ? y + std::log1p(sycl::exp(tmp))
+                                  : std::numeric_limits<T>::quiet_NaN());
+            }
+            else {
+                return (tmp > zero)
+                           ? (x + sycl::log1p(sycl::exp(-tmp)))
+                           : ((tmp <= zero)
+                                  ? y + sycl::log1p(sycl::exp(tmp))
+                                  : std::numeric_limits<T>::quiet_NaN());
+            }
         }
     }
 }
diff --git a/dpctl/tensor/libtensor/source/linalg_functions/dot_dispatch.hpp b/dpctl/tensor/libtensor/source/linalg_functions/dot_dispatch.hpp
index 731d4a8f81..3ce0ae1264 100644
--- a/dpctl/tensor/libtensor/source/linalg_functions/dot_dispatch.hpp
+++ b/dpctl/tensor/libtensor/source/linalg_functions/dot_dispatch.hpp
@@ -30,6 +30,7 @@
 
 #include "kernels/linalg_functions/dot_product.hpp"
 #include "kernels/linalg_functions/gemm.hpp"
+#include "utils/type_dispatch_building.hpp"
 
 namespace dpctl
 {
@@ -38,6 +39,8 @@ namespace tensor
 namespace py_internal
 {
 
+namespace td_ns = dpctl::tensor::type_dispatch;
+
 template <typename T1, typename T2> struct DotAtomicOutputType
 {
     using value_type = typename std::disjunction< // disjunction is C++17
diff --git a/dpctl/tensor/libtensor/source/reductions/argmax.cpp b/dpctl/tensor/libtensor/source/reductions/argmax.cpp
index e441a36139..acfeb95087 100644
--- a/dpctl/tensor/libtensor/source/reductions/argmax.cpp
+++ b/dpctl/tensor/libtensor/source/reductions/argmax.cpp
@@ -32,6 +32,7 @@
 
 #include "kernels/reductions.hpp"
 #include "reduction_over_axis.hpp"
+#include "utils/sycl_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 
 namespace py = pybind11;
@@ -44,6 +45,7 @@ namespace py_internal
 {
 
 namespace td_ns = dpctl::tensor::type_dispatch;
+namespace su_ns = dpctl::tensor::sycl_utils;
 
 namespace impl
 {
diff --git a/dpctl/tensor/libtensor/source/reductions/argmin.cpp b/dpctl/tensor/libtensor/source/reductions/argmin.cpp
index 4892893cc5..8e9c0106ac 100644
--- a/dpctl/tensor/libtensor/source/reductions/argmin.cpp
+++ b/dpctl/tensor/libtensor/source/reductions/argmin.cpp
@@ -32,6 +32,8 @@
 
 #include "kernels/reductions.hpp"
 #include "reduction_over_axis.hpp"
+
+#include "utils/sycl_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 
 namespace py = pybind11;
@@ -44,6 +46,7 @@ namespace py_internal
 {
 
 namespace td_ns = dpctl::tensor::type_dispatch;
+namespace su_ns = dpctl::tensor::sycl_utils;
 
 namespace impl
 {
diff --git a/dpctl/tensor/libtensor/source/reductions/logsumexp.cpp b/dpctl/tensor/libtensor/source/reductions/logsumexp.cpp
index d2bb6e3877..d36b715f32 100644
--- a/dpctl/tensor/libtensor/source/reductions/logsumexp.cpp
+++ b/dpctl/tensor/libtensor/source/reductions/logsumexp.cpp
@@ -32,6 +32,7 @@
 
 #include "kernels/reductions.hpp"
 #include "reduction_over_axis.hpp"
+#include "utils/sycl_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 
 namespace py = pybind11;
@@ -44,6 +45,7 @@ namespace py_internal
 {
 
 namespace td_ns = dpctl::tensor::type_dispatch;
+namespace su_ns = dpctl::tensor::sycl_utils;
 
 namespace impl
 {
@@ -68,6 +70,7 @@ struct TypePairSupportDataForLogSumExpReductionTemps
     static constexpr bool is_defined = std::disjunction< // disjunction is C++17
                                                          // feature, supported
                                                          // by DPC++ input bool
+#if 1
         td_ns::TypePairDefinedEntry<argTy, bool, outTy, sycl::half>,
         td_ns::TypePairDefinedEntry<argTy, bool, outTy, float>,
         td_ns::TypePairDefinedEntry<argTy, bool, outTy, double>,
@@ -105,7 +108,6 @@ struct TypePairSupportDataForLogSumExpReductionTemps
         // input uint64_t
         td_ns::TypePairDefinedEntry<argTy, std::uint64_t, outTy, float>,
         td_ns::TypePairDefinedEntry<argTy, std::uint64_t, outTy, double>,
-
         // input half
         td_ns::TypePairDefinedEntry<argTy, sycl::half, outTy, sycl::half>,
         td_ns::TypePairDefinedEntry<argTy, sycl::half, outTy, float>,
@@ -117,6 +119,7 @@ struct TypePairSupportDataForLogSumExpReductionTemps
 
         // input double
         td_ns::TypePairDefinedEntry<argTy, double, outTy, double>,
+#endif
 
         // fall-through
         td_ns::NotDefinedEntry>::is_defined;
diff --git a/dpctl/tensor/libtensor/source/reductions/max.cpp b/dpctl/tensor/libtensor/source/reductions/max.cpp
index 0e402403a4..8036d873aa 100644
--- a/dpctl/tensor/libtensor/source/reductions/max.cpp
+++ b/dpctl/tensor/libtensor/source/reductions/max.cpp
@@ -31,6 +31,7 @@
 #include <vector>
 
 #include "kernels/reductions.hpp"
+#include "utils/sycl_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 
 #include "reduction_atomic_support.hpp"
@@ -46,6 +47,7 @@ namespace py_internal
 {
 
 namespace td_ns = dpctl::tensor::type_dispatch;
+namespace su_ns = dpctl::tensor::sycl_utils;
 
 namespace impl
 {
diff --git a/dpctl/tensor/libtensor/source/reductions/min.cpp b/dpctl/tensor/libtensor/source/reductions/min.cpp
index 970a05ea72..e612e59b8f 100644
--- a/dpctl/tensor/libtensor/source/reductions/min.cpp
+++ b/dpctl/tensor/libtensor/source/reductions/min.cpp
@@ -31,6 +31,7 @@
 #include <vector>
 
 #include "kernels/reductions.hpp"
+#include "utils/sycl_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 
 #include "reduction_atomic_support.hpp"
@@ -46,6 +47,7 @@ namespace py_internal
 {
 
 namespace td_ns = dpctl::tensor::type_dispatch;
+namespace su_ns = dpctl::tensor::sycl_utils;
 
 namespace impl
 {
diff --git a/dpctl/tensor/libtensor/source/reductions/reduce_hypot.cpp b/dpctl/tensor/libtensor/source/reductions/reduce_hypot.cpp
index ce655126a6..159b992307 100644
--- a/dpctl/tensor/libtensor/source/reductions/reduce_hypot.cpp
+++ b/dpctl/tensor/libtensor/source/reductions/reduce_hypot.cpp
@@ -32,6 +32,7 @@
 
 #include "kernels/reductions.hpp"
 #include "reduction_over_axis.hpp"
+#include "utils/sycl_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 
 namespace py = pybind11;
@@ -44,6 +45,7 @@ namespace py_internal
 {
 
 namespace td_ns = dpctl::tensor::type_dispatch;
+namespace su_ns = dpctl::tensor::sycl_utils;
 
 namespace impl
 {
diff --git a/dpctl/tensor/libtensor/source/reductions/reduction_over_axis.hpp b/dpctl/tensor/libtensor/source/reductions/reduction_over_axis.hpp
index ac2de98032..6d5cf6668f 100644
--- a/dpctl/tensor/libtensor/source/reductions/reduction_over_axis.hpp
+++ b/dpctl/tensor/libtensor/source/reductions/reduction_over_axis.hpp
@@ -52,6 +52,8 @@ namespace tensor
 namespace py_internal
 {
 
+namespace td_ns = dpctl::tensor::type_dispatch;
+
 /* ====================== dtype supported ======================== */
 
 /*! @brief Template implementing Python API for querying type support by