diff --git a/CHANGELOG.md b/CHANGELOG.md
index 85602f23e9..f244fddafd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -92,6 +92,7 @@ The full list of changes that went into this release are:
 * Element-wise `tensor.divide` and comparison operations allow greater range of Python integer and integer array combinations [gh-1771](https://github.com/IntelPython/dpctl/pull/1771)
 * Fix for unexpected behavior when using floating point types for array indexing [gh-1792](https://github.com/IntelPython/dpctl/pull/1792)
 * Enable `pytest --pyargs dpctl.tests` [gh-1833](https://github.com/IntelPython/dpctl/pull/1833)
+* Fix for undefined behavior in indexing using integer arrays [gh-1894](https://github.com/IntelPython/dpctl/pull/1894)
 
 ### Maintenance
 
diff --git a/dpctl/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp b/dpctl/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp
index 485fbef513..462323f001 100644
--- a/dpctl/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp
+++ b/dpctl/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp
@@ -30,6 +30,7 @@
 #include <type_traits>
 
 #include "dpctl_tensor_types.hpp"
+#include "utils/indexing_utils.hpp"
 #include "utils/offset_utils.hpp"
 #include "utils/type_utils.hpp"
 
@@ -42,54 +43,10 @@ namespace kernels
 namespace indexing
 {
 
-using namespace dpctl::tensor::offset_utils;
-
-template <typename ProjectorT,
-          typename OrthogStrider,
-          typename IndicesStrider,
-          typename AxesStrider,
-          typename T,
-          typename indT>
-class take_kernel;
 template <typename ProjectorT,
-          typename OrthogStrider,
-          typename IndicesStrider,
-          typename AxesStrider,
-          typename T,
-          typename indT>
-class put_kernel;
-
-class WrapIndex
-{
-public:
-    WrapIndex() = default;
-
-    void operator()(ssize_t max_item, ssize_t &ind) const
-    {
-        max_item = std::max<ssize_t>(max_item, 1);
-        ind = sycl::clamp<ssize_t>(ind, -max_item, max_item - 1);
-        ind = (ind < 0) ? ind + max_item : ind;
-        return;
-    }
-};
-
-class ClipIndex
-{
-public:
-    ClipIndex() = default;
-
-    void operator()(ssize_t max_item, ssize_t &ind) const
-    {
-        max_item = std::max<ssize_t>(max_item, 1);
-        ind = sycl::clamp<ssize_t>(ind, ssize_t(0), max_item - 1);
-        return;
-    }
-};
-
-template <typename ProjectorT,
-          typename OrthogStrider,
-          typename IndicesStrider,
-          typename AxesStrider,
+          typename OrthogIndexer,
+          typename IndicesIndexer,
+          typename AxesIndexer,
           typename T,
           typename indT>
 class TakeFunctor
@@ -101,9 +58,9 @@ class TakeFunctor
     int k_ = 0;
     size_t ind_nelems_ = 0;
     const ssize_t *axes_shape_and_strides_ = nullptr;
-    const OrthogStrider orthog_strider;
-    const IndicesStrider ind_strider;
-    const AxesStrider axes_strider;
+    const OrthogIndexer orthog_strider;
+    const IndicesIndexer ind_strider;
+    const AxesIndexer axes_strider;
 
 public:
     TakeFunctor(const char *src_cp,
@@ -112,9 +69,9 @@ class TakeFunctor
                 int k,
                 size_t ind_nelems,
                 const ssize_t *axes_shape_and_strides,
-                const OrthogStrider &orthog_strider_,
-                const IndicesStrider &ind_strider_,
-                const AxesStrider &axes_strider_)
+                const OrthogIndexer &orthog_strider_,
+                const IndicesIndexer &ind_strider_,
+                const AxesIndexer &axes_strider_)
         : src_(src_cp), dst_(dst_cp), ind_(ind_cp), k_(k),
           ind_nelems_(ind_nelems),
           axes_shape_and_strides_(axes_shape_and_strides),
@@ -136,16 +93,16 @@ class TakeFunctor
         ssize_t src_offset = orthog_offsets.get_first_offset();
         ssize_t dst_offset = orthog_offsets.get_second_offset();
 
-        const ProjectorT proj{};
+        constexpr ProjectorT proj{};
         for (int axis_idx = 0; axis_idx < k_; ++axis_idx) {
             indT *ind_data = reinterpret_cast<indT *>(ind_[axis_idx]);
 
             ssize_t ind_offset = ind_strider(i_along, axis_idx);
-            ssize_t i = static_cast<ssize_t>(ind_data[ind_offset]);
-
-            proj(axes_shape_and_strides_[axis_idx], i);
-
-            src_offset += i * axes_shape_and_strides_[k_ + axis_idx];
+            // proj produces an index in the range of the given axis
+            ssize_t projected_idx =
+                proj(axes_shape_and_strides_[axis_idx], ind_data[ind_offset]);
+            src_offset +=
+                projected_idx * axes_shape_and_strides_[k_ + axis_idx];
         }
 
         dst_offset += axes_strider(i_along);
@@ -154,6 +111,14 @@ class TakeFunctor
     }
 };
 
+template <typename ProjectorT,
+          typename OrthogIndexer,
+          typename IndicesIndexer,
+          typename AxesIndexer,
+          typename T,
+          typename indT>
+class take_kernel;
+
 typedef sycl::event (*take_fn_ptr_t)(sycl::queue &,
                                      size_t,
                                      size_t,
@@ -194,21 +159,29 @@ sycl::event take_impl(sycl::queue &q,
     sycl::event take_ev = q.submit([&](sycl::handler &cgh) {
         cgh.depends_on(depends);
 
-        const TwoOffsets_StridedIndexer orthog_indexer{
-            nd, src_offset, dst_offset, orthog_shape_and_strides};
-        const NthStrideOffset indices_indexer{ind_nd, ind_offsets,
-                                              ind_shape_and_strides};
-        const StridedIndexer axes_indexer{ind_nd, 0,
-                                          axes_shape_and_strides + (2 * k)};
+        using OrthogIndexerT =
+            dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer;
+        const OrthogIndexerT orthog_indexer{nd, src_offset, dst_offset,
+                                            orthog_shape_and_strides};
+
+        using NthStrideIndexerT = dpctl::tensor::offset_utils::NthStrideOffset;
+        const NthStrideIndexerT indices_indexer{ind_nd, ind_offsets,
+                                                ind_shape_and_strides};
+
+        using AxesIndexerT = dpctl::tensor::offset_utils::StridedIndexer;
+        const AxesIndexerT axes_indexer{ind_nd, 0,
+                                        axes_shape_and_strides + (2 * k)};
+
+        using KernelName =
+            take_kernel<ProjectorT, OrthogIndexerT, NthStrideIndexerT,
+                        AxesIndexerT, Ty, indT>;
 
         const size_t gws = orthog_nelems * ind_nelems;
 
-        cgh.parallel_for<
-            take_kernel<ProjectorT, TwoOffsets_StridedIndexer, NthStrideOffset,
-                        StridedIndexer, Ty, indT>>(
+        cgh.parallel_for<KernelName>(
             sycl::range<1>(gws),
-            TakeFunctor<ProjectorT, TwoOffsets_StridedIndexer, NthStrideOffset,
-                        StridedIndexer, Ty, indT>(
+            TakeFunctor<ProjectorT, OrthogIndexerT, NthStrideIndexerT,
+                        AxesIndexerT, Ty, indT>(
                 src_p, dst_p, ind_p, k, ind_nelems, axes_shape_and_strides,
                 orthog_indexer, indices_indexer, axes_indexer));
     });
@@ -217,9 +190,9 @@ sycl::event take_impl(sycl::queue &q,
 }
 
 template <typename ProjectorT,
-          typename OrthogStrider,
-          typename IndicesStrider,
-          typename AxesStrider,
+          typename OrthogIndexer,
+          typename IndicesIndexer,
+          typename AxesIndexer,
           typename T,
           typename indT>
 class PutFunctor
@@ -231,9 +204,9 @@ class PutFunctor
     int k_ = 0;
     size_t ind_nelems_ = 0;
     const ssize_t *axes_shape_and_strides_ = nullptr;
-    const OrthogStrider orthog_strider;
-    const IndicesStrider ind_strider;
-    const AxesStrider axes_strider;
+    const OrthogIndexer orthog_strider;
+    const IndicesIndexer ind_strider;
+    const AxesIndexer axes_strider;
 
 public:
     PutFunctor(char *dst_cp,
@@ -242,9 +215,9 @@ class PutFunctor
                int k,
                size_t ind_nelems,
                const ssize_t *axes_shape_and_strides,
-               const OrthogStrider &orthog_strider_,
-               const IndicesStrider &ind_strider_,
-               const AxesStrider &axes_strider_)
+               const OrthogIndexer &orthog_strider_,
+               const IndicesIndexer &ind_strider_,
+               const AxesIndexer &axes_strider_)
         : dst_(dst_cp), val_(val_cp), ind_(ind_cp), k_(k),
           ind_nelems_(ind_nelems),
           axes_shape_and_strides_(axes_shape_and_strides),
@@ -266,16 +239,17 @@ class PutFunctor
         ssize_t dst_offset = orthog_offsets.get_first_offset();
         ssize_t val_offset = orthog_offsets.get_second_offset();
 
-        const ProjectorT proj{};
+        constexpr ProjectorT proj{};
         for (int axis_idx = 0; axis_idx < k_; ++axis_idx) {
             indT *ind_data = reinterpret_cast<indT *>(ind_[axis_idx]);
 
             ssize_t ind_offset = ind_strider(i_along, axis_idx);
-            ssize_t i = static_cast<ssize_t>(ind_data[ind_offset]);
-
-            proj(axes_shape_and_strides_[axis_idx], i);
 
-            dst_offset += i * axes_shape_and_strides_[k_ + axis_idx];
+            // proj produces an index in the range of the given axis
+            ssize_t projected_idx =
+                proj(axes_shape_and_strides_[axis_idx], ind_data[ind_offset]);
+            dst_offset +=
+                projected_idx * axes_shape_and_strides_[k_ + axis_idx];
         }
 
         val_offset += axes_strider(i_along);
@@ -284,6 +258,14 @@ class PutFunctor
     }
 };
 
+template <typename ProjectorT,
+          typename OrthogIndexer,
+          typename IndicesIndexer,
+          typename AxesIndexer,
+          typename T,
+          typename indT>
+class put_kernel;
+
 typedef sycl::event (*put_fn_ptr_t)(sycl::queue &,
                                     size_t,
                                     size_t,
@@ -324,20 +306,29 @@ sycl::event put_impl(sycl::queue &q,
     sycl::event put_ev = q.submit([&](sycl::handler &cgh) {
         cgh.depends_on(depends);
 
-        const TwoOffsets_StridedIndexer orthog_indexer{
-            nd, dst_offset, val_offset, orthog_shape_and_strides};
-        const NthStrideOffset indices_indexer{ind_nd, ind_offsets,
-                                              ind_shape_and_strides};
-        const StridedIndexer axes_indexer{ind_nd, 0,
-                                          axes_shape_and_strides + (2 * k)};
+        using OrthogIndexerT =
+            dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer;
+        const OrthogIndexerT orthog_indexer{nd, dst_offset, val_offset,
+                                            orthog_shape_and_strides};
+
+        using NthStrideIndexerT = dpctl::tensor::offset_utils::NthStrideOffset;
+        const NthStrideIndexerT indices_indexer{ind_nd, ind_offsets,
+                                                ind_shape_and_strides};
+
+        using AxesIndexerT = dpctl::tensor::offset_utils::StridedIndexer;
+        const AxesIndexerT axes_indexer{ind_nd, 0,
+                                        axes_shape_and_strides + (2 * k)};
+
+        using KernelName =
+            put_kernel<ProjectorT, OrthogIndexerT, NthStrideIndexerT,
+                       AxesIndexerT, Ty, indT>;
 
         const size_t gws = orthog_nelems * ind_nelems;
 
-        cgh.parallel_for<put_kernel<ProjectorT, TwoOffsets_StridedIndexer,
-                                    NthStrideOffset, StridedIndexer, Ty, indT>>(
+        cgh.parallel_for<KernelName>(
             sycl::range<1>(gws),
-            PutFunctor<ProjectorT, TwoOffsets_StridedIndexer, NthStrideOffset,
-                       StridedIndexer, Ty, indT>(
+            PutFunctor<ProjectorT, OrthogIndexerT, NthStrideIndexerT,
+                       AxesIndexerT, Ty, indT>(
                 dst_p, val_p, ind_p, k, ind_nelems, axes_shape_and_strides,
                 orthog_indexer, indices_indexer, axes_indexer));
     });
@@ -352,7 +343,8 @@ template <typename fnT, typename T, typename indT> struct TakeWrapFactory
         if constexpr (std::is_integral<indT>::value &&
                       !std::is_same<indT, bool>::value)
         {
-            fnT fn = take_impl<WrapIndex, T, indT>;
+            using dpctl::tensor::indexing_utils::WrapIndex;
+            fnT fn = take_impl<WrapIndex<indT>, T, indT>;
             return fn;
         }
         else {
@@ -369,7 +361,8 @@ template <typename fnT, typename T, typename indT> struct TakeClipFactory
         if constexpr (std::is_integral<indT>::value &&
                       !std::is_same<indT, bool>::value)
         {
-            fnT fn = take_impl<ClipIndex, T, indT>;
+            using dpctl::tensor::indexing_utils::ClipIndex;
+            fnT fn = take_impl<ClipIndex<indT>, T, indT>;
             return fn;
         }
         else {
@@ -386,7 +379,8 @@ template <typename fnT, typename T, typename indT> struct PutWrapFactory
         if constexpr (std::is_integral<indT>::value &&
                       !std::is_same<indT, bool>::value)
         {
-            fnT fn = put_impl<WrapIndex, T, indT>;
+            using dpctl::tensor::indexing_utils::WrapIndex;
+            fnT fn = put_impl<WrapIndex<indT>, T, indT>;
             return fn;
         }
         else {
@@ -403,7 +397,8 @@ template <typename fnT, typename T, typename indT> struct PutClipFactory
         if constexpr (std::is_integral<indT>::value &&
                       !std::is_same<indT, bool>::value)
         {
-            fnT fn = put_impl<ClipIndex, T, indT>;
+            using dpctl::tensor::indexing_utils::ClipIndex;
+            fnT fn = put_impl<ClipIndex<indT>, T, indT>;
             return fn;
         }
         else {
diff --git a/dpctl/tensor/libtensor/include/utils/indexing_utils.hpp b/dpctl/tensor/libtensor/include/utils/indexing_utils.hpp
new file mode 100644
index 0000000000..c890cc1262
--- /dev/null
+++ b/dpctl/tensor/libtensor/include/utils/indexing_utils.hpp
@@ -0,0 +1,142 @@
+//===----- indexing_utils.hpp - Utilities for indexing modes  -----*-C++-*/===//
+//
+//                      Data Parallel Control (dpctl)
+//
+// Copyright 2020-2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines utilities for handling out-of-bounds integer indices in
+/// kernels that involve indexing operations, such as take, put, or advanced
+/// tensor integer indexing.
+//===----------------------------------------------------------------------===//
+
+#pragma once
+#include <cstdint>
+#include <limits>
+#include <sycl/sycl.hpp>
+#include <type_traits>
+
+#include "kernels/dpctl_tensor_types.hpp"
+
+namespace dpctl
+{
+namespace tensor
+{
+namespace indexing_utils
+{
+
+/*
+ * ssize_t for indices is a design choice, dpctl::tensor::usm_ndarray
+ * uses py::ssize_t for shapes and strides internally and Python uses
+ * py_ssize_t for sizes of e.g. lists.
+ */
+
+template <typename IndT> struct WrapIndex
+{
+    static_assert(std::is_integral_v<IndT>);
+
+    ssize_t operator()(ssize_t max_item, IndT ind) const
+    {
+        ssize_t projected;
+        constexpr ssize_t unit(1);
+        max_item = sycl::max(max_item, unit);
+
+        constexpr std::uintmax_t ind_max = std::numeric_limits<IndT>::max();
+        constexpr std::uintmax_t ssize_max =
+            std::numeric_limits<ssize_t>::max();
+
+        if constexpr (std::is_signed_v<IndT>) {
+            constexpr std::intmax_t ind_min = std::numeric_limits<IndT>::min();
+            constexpr std::intmax_t ssize_min =
+                std::numeric_limits<ssize_t>::min();
+
+            if constexpr (ind_max <= ssize_max && ind_min >= ssize_min) {
+                const ssize_t ind_ = static_cast<ssize_t>(ind);
+                const ssize_t lb = -max_item;
+                const ssize_t ub = max_item - 1;
+                projected = sycl::clamp(ind_, lb, ub);
+            }
+            else {
+                const IndT lb = static_cast<IndT>(-max_item);
+                const IndT ub = static_cast<IndT>(max_item - 1);
+                projected = static_cast<ssize_t>(sycl::clamp(ind, lb, ub));
+            }
+            return (projected < 0) ? projected + max_item : projected;
+        }
+        else {
+            if constexpr (ind_max <= ssize_max) {
+                const ssize_t ind_ = static_cast<ssize_t>(ind);
+                const ssize_t ub = max_item - 1;
+                projected = sycl::min(ind_, ub);
+            }
+            else {
+                const IndT ub = static_cast<IndT>(max_item - 1);
+                projected = static_cast<ssize_t>(sycl::min(ind, ub));
+            }
+            return projected;
+        }
+    }
+};
+
+template <typename IndT> struct ClipIndex
+{
+    static_assert(std::is_integral_v<IndT>);
+
+    ssize_t operator()(ssize_t max_item, IndT ind) const
+    {
+        ssize_t projected;
+        constexpr ssize_t unit(1);
+        max_item = sycl::max<ssize_t>(max_item, unit);
+
+        constexpr std::uintmax_t ind_max = std::numeric_limits<IndT>::max();
+        constexpr std::uintmax_t ssize_max =
+            std::numeric_limits<ssize_t>::max();
+        if constexpr (std::is_signed_v<IndT>) {
+            constexpr std::intmax_t ind_min = std::numeric_limits<IndT>::min();
+            constexpr std::intmax_t ssize_min =
+                std::numeric_limits<ssize_t>::min();
+
+            if constexpr (ind_max <= ssize_max && ind_min >= ssize_min) {
+                const ssize_t ind_ = static_cast<ssize_t>(ind);
+                constexpr ssize_t lb(0);
+                const ssize_t ub = max_item - 1;
+                projected = sycl::clamp(ind_, lb, ub);
+            }
+            else {
+                constexpr IndT lb(0);
+                const IndT ub = static_cast<IndT>(max_item - 1);
+                projected = static_cast<size_t>(sycl::clamp(ind, lb, ub));
+            }
+        }
+        else {
+            if constexpr (ind_max <= ssize_max) {
+                const ssize_t ind_ = static_cast<ssize_t>(ind);
+                const ssize_t ub = max_item - 1;
+                projected = sycl::min(ind_, ub);
+            }
+            else {
+                const IndT ub = static_cast<IndT>(max_item - 1);
+                projected = static_cast<ssize_t>(sycl::min(ind, ub));
+            }
+        }
+        return projected;
+    }
+};
+
+} // namespace indexing_utils
+} // namespace tensor
+} // namespace dpctl
diff --git a/dpctl/tests/test_usm_ndarray_indexing.py b/dpctl/tests/test_usm_ndarray_indexing.py
index 1028c39e08..56289f0fc5 100644
--- a/dpctl/tests/test_usm_ndarray_indexing.py
+++ b/dpctl/tests/test_usm_ndarray_indexing.py
@@ -1798,3 +1798,38 @@ def test__copy_utils():
     check__take_multi_index(cu._take_multi_index)
     check__place_impl_validation(cu._place_impl)
     check__put_multi_index_validation(cu._put_multi_index)
+
+
+@pytest.mark.parametrize("mode", ["wrap", "clip"])
+def test_take_indices_oob_py_ssize_t(mode):
+    get_queue_or_skip()
+
+    x = dpt.arange(10, dtype="i4")
+    inds1 = dpt.full(5, dpt.iinfo(dpt.uint64).max, dtype=dpt.uint64)
+    inds2 = dpt.full(5, dpt.iinfo(dpt.uint64).max, dtype=dpt.uint64)
+
+    # sweep through a small range of indices
+    # to check that OOB indices are well-behaved
+    for i in range(1, 10):
+        inds2 -= i
+        r1 = dpt.take(x, inds1, mode=mode)
+        r2 = dpt.take(x, inds2, mode=mode)
+
+        assert dpt.all(r1 == r2)
+
+
+@pytest.mark.parametrize("mode", ["wrap", "clip"])
+def test_put_indices_oob_py_ssize_t(mode):
+    get_queue_or_skip()
+
+    x = dpt.full(10, -1, dtype="i4")
+    inds = dpt.full(1, dpt.iinfo(dpt.uint64).max, dtype=dpt.uint64)
+
+    # OOB inds are positive, so always
+    # clip to the top of range
+    for i in range(1, 10):
+        inds -= i
+        dpt.put(x, inds, i, mode=mode)
+
+        assert dpt.all(x[:-1] == -1)
+        assert x[-1] == i