diff --git a/CHANGELOG.md b/CHANGELOG.md index 85602f23e9..f244fddafd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -92,6 +92,7 @@ The full list of changes that went into this release are: * Element-wise `tensor.divide` and comparison operations allow greater range of Python integer and integer array combinations [gh-1771](https://github.com/IntelPython/dpctl/pull/1771) * Fix for unexpected behavior when using floating point types for array indexing [gh-1792](https://github.com/IntelPython/dpctl/pull/1792) * Enable `pytest --pyargs dpctl.tests` [gh-1833](https://github.com/IntelPython/dpctl/pull/1833) +* Fix for undefined behavior in indexing using integer arrays [gh-1894](https://github.com/IntelPython/dpctl/pull/1894) ### Maintenance diff --git a/dpctl/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp b/dpctl/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp index 485fbef513..462323f001 100644 --- a/dpctl/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp +++ b/dpctl/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp @@ -30,6 +30,7 @@ #include #include "dpctl_tensor_types.hpp" +#include "utils/indexing_utils.hpp" #include "utils/offset_utils.hpp" #include "utils/type_utils.hpp" @@ -42,54 +43,10 @@ namespace kernels namespace indexing { -using namespace dpctl::tensor::offset_utils; - -template -class take_kernel; template -class put_kernel; - -class WrapIndex -{ -public: - WrapIndex() = default; - - void operator()(ssize_t max_item, ssize_t &ind) const - { - max_item = std::max(max_item, 1); - ind = sycl::clamp(ind, -max_item, max_item - 1); - ind = (ind < 0) ? ind + max_item : ind; - return; - } -}; - -class ClipIndex -{ -public: - ClipIndex() = default; - - void operator()(ssize_t max_item, ssize_t &ind) const - { - max_item = std::max(max_item, 1); - ind = sycl::clamp(ind, ssize_t(0), max_item - 1); - return; - } -}; - -template class TakeFunctor @@ -101,9 +58,9 @@ class TakeFunctor int k_ = 0; size_t ind_nelems_ = 0; const ssize_t *axes_shape_and_strides_ = nullptr; - const OrthogStrider orthog_strider; - const IndicesStrider ind_strider; - const AxesStrider axes_strider; + const OrthogIndexer orthog_strider; + const IndicesIndexer ind_strider; + const AxesIndexer axes_strider; public: TakeFunctor(const char *src_cp, @@ -112,9 +69,9 @@ class TakeFunctor int k, size_t ind_nelems, const ssize_t *axes_shape_and_strides, - const OrthogStrider &orthog_strider_, - const IndicesStrider &ind_strider_, - const AxesStrider &axes_strider_) + const OrthogIndexer &orthog_strider_, + const IndicesIndexer &ind_strider_, + const AxesIndexer &axes_strider_) : src_(src_cp), dst_(dst_cp), ind_(ind_cp), k_(k), ind_nelems_(ind_nelems), axes_shape_and_strides_(axes_shape_and_strides), @@ -136,16 +93,16 @@ class TakeFunctor ssize_t src_offset = orthog_offsets.get_first_offset(); ssize_t dst_offset = orthog_offsets.get_second_offset(); - const ProjectorT proj{}; + constexpr ProjectorT proj{}; for (int axis_idx = 0; axis_idx < k_; ++axis_idx) { indT *ind_data = reinterpret_cast(ind_[axis_idx]); ssize_t ind_offset = ind_strider(i_along, axis_idx); - ssize_t i = static_cast(ind_data[ind_offset]); - - proj(axes_shape_and_strides_[axis_idx], i); - - src_offset += i * axes_shape_and_strides_[k_ + axis_idx]; + // proj produces an index in the range of the given axis + ssize_t projected_idx = + proj(axes_shape_and_strides_[axis_idx], ind_data[ind_offset]); + src_offset += + projected_idx * axes_shape_and_strides_[k_ + axis_idx]; } dst_offset += axes_strider(i_along); @@ -154,6 +111,14 @@ class TakeFunctor } }; +template +class take_kernel; + typedef sycl::event (*take_fn_ptr_t)(sycl::queue &, size_t, size_t, @@ -194,21 +159,29 @@ sycl::event take_impl(sycl::queue &q, sycl::event take_ev = q.submit([&](sycl::handler &cgh) { cgh.depends_on(depends); - const TwoOffsets_StridedIndexer orthog_indexer{ - nd, src_offset, dst_offset, orthog_shape_and_strides}; - const NthStrideOffset indices_indexer{ind_nd, ind_offsets, - ind_shape_and_strides}; - const StridedIndexer axes_indexer{ind_nd, 0, - axes_shape_and_strides + (2 * k)}; + using OrthogIndexerT = + dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer; + const OrthogIndexerT orthog_indexer{nd, src_offset, dst_offset, + orthog_shape_and_strides}; + + using NthStrideIndexerT = dpctl::tensor::offset_utils::NthStrideOffset; + const NthStrideIndexerT indices_indexer{ind_nd, ind_offsets, + ind_shape_and_strides}; + + using AxesIndexerT = dpctl::tensor::offset_utils::StridedIndexer; + const AxesIndexerT axes_indexer{ind_nd, 0, + axes_shape_and_strides + (2 * k)}; + + using KernelName = + take_kernel; const size_t gws = orthog_nelems * ind_nelems; - cgh.parallel_for< - take_kernel>( + cgh.parallel_for( sycl::range<1>(gws), - TakeFunctor( + TakeFunctor( src_p, dst_p, ind_p, k, ind_nelems, axes_shape_and_strides, orthog_indexer, indices_indexer, axes_indexer)); }); @@ -217,9 +190,9 @@ sycl::event take_impl(sycl::queue &q, } template class PutFunctor @@ -231,9 +204,9 @@ class PutFunctor int k_ = 0; size_t ind_nelems_ = 0; const ssize_t *axes_shape_and_strides_ = nullptr; - const OrthogStrider orthog_strider; - const IndicesStrider ind_strider; - const AxesStrider axes_strider; + const OrthogIndexer orthog_strider; + const IndicesIndexer ind_strider; + const AxesIndexer axes_strider; public: PutFunctor(char *dst_cp, @@ -242,9 +215,9 @@ class PutFunctor int k, size_t ind_nelems, const ssize_t *axes_shape_and_strides, - const OrthogStrider &orthog_strider_, - const IndicesStrider &ind_strider_, - const AxesStrider &axes_strider_) + const OrthogIndexer &orthog_strider_, + const IndicesIndexer &ind_strider_, + const AxesIndexer &axes_strider_) : dst_(dst_cp), val_(val_cp), ind_(ind_cp), k_(k), ind_nelems_(ind_nelems), axes_shape_and_strides_(axes_shape_and_strides), @@ -266,16 +239,17 @@ class PutFunctor ssize_t dst_offset = orthog_offsets.get_first_offset(); ssize_t val_offset = orthog_offsets.get_second_offset(); - const ProjectorT proj{}; + constexpr ProjectorT proj{}; for (int axis_idx = 0; axis_idx < k_; ++axis_idx) { indT *ind_data = reinterpret_cast(ind_[axis_idx]); ssize_t ind_offset = ind_strider(i_along, axis_idx); - ssize_t i = static_cast(ind_data[ind_offset]); - - proj(axes_shape_and_strides_[axis_idx], i); - dst_offset += i * axes_shape_and_strides_[k_ + axis_idx]; + // proj produces an index in the range of the given axis + ssize_t projected_idx = + proj(axes_shape_and_strides_[axis_idx], ind_data[ind_offset]); + dst_offset += + projected_idx * axes_shape_and_strides_[k_ + axis_idx]; } val_offset += axes_strider(i_along); @@ -284,6 +258,14 @@ class PutFunctor } }; +template +class put_kernel; + typedef sycl::event (*put_fn_ptr_t)(sycl::queue &, size_t, size_t, @@ -324,20 +306,29 @@ sycl::event put_impl(sycl::queue &q, sycl::event put_ev = q.submit([&](sycl::handler &cgh) { cgh.depends_on(depends); - const TwoOffsets_StridedIndexer orthog_indexer{ - nd, dst_offset, val_offset, orthog_shape_and_strides}; - const NthStrideOffset indices_indexer{ind_nd, ind_offsets, - ind_shape_and_strides}; - const StridedIndexer axes_indexer{ind_nd, 0, - axes_shape_and_strides + (2 * k)}; + using OrthogIndexerT = + dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer; + const OrthogIndexerT orthog_indexer{nd, dst_offset, val_offset, + orthog_shape_and_strides}; + + using NthStrideIndexerT = dpctl::tensor::offset_utils::NthStrideOffset; + const NthStrideIndexerT indices_indexer{ind_nd, ind_offsets, + ind_shape_and_strides}; + + using AxesIndexerT = dpctl::tensor::offset_utils::StridedIndexer; + const AxesIndexerT axes_indexer{ind_nd, 0, + axes_shape_and_strides + (2 * k)}; + + using KernelName = + put_kernel; const size_t gws = orthog_nelems * ind_nelems; - cgh.parallel_for>( + cgh.parallel_for( sycl::range<1>(gws), - PutFunctor( + PutFunctor( dst_p, val_p, ind_p, k, ind_nelems, axes_shape_and_strides, orthog_indexer, indices_indexer, axes_indexer)); }); @@ -352,7 +343,8 @@ template struct TakeWrapFactory if constexpr (std::is_integral::value && !std::is_same::value) { - fnT fn = take_impl; + using dpctl::tensor::indexing_utils::WrapIndex; + fnT fn = take_impl, T, indT>; return fn; } else { @@ -369,7 +361,8 @@ template struct TakeClipFactory if constexpr (std::is_integral::value && !std::is_same::value) { - fnT fn = take_impl; + using dpctl::tensor::indexing_utils::ClipIndex; + fnT fn = take_impl, T, indT>; return fn; } else { @@ -386,7 +379,8 @@ template struct PutWrapFactory if constexpr (std::is_integral::value && !std::is_same::value) { - fnT fn = put_impl; + using dpctl::tensor::indexing_utils::WrapIndex; + fnT fn = put_impl, T, indT>; return fn; } else { @@ -403,7 +397,8 @@ template struct PutClipFactory if constexpr (std::is_integral::value && !std::is_same::value) { - fnT fn = put_impl; + using dpctl::tensor::indexing_utils::ClipIndex; + fnT fn = put_impl, T, indT>; return fn; } else { diff --git a/dpctl/tensor/libtensor/include/utils/indexing_utils.hpp b/dpctl/tensor/libtensor/include/utils/indexing_utils.hpp new file mode 100644 index 0000000000..c890cc1262 --- /dev/null +++ b/dpctl/tensor/libtensor/include/utils/indexing_utils.hpp @@ -0,0 +1,142 @@ +//===----- indexing_utils.hpp - Utilities for indexing modes -----*-C++-*/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2024 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines utilities for handling out-of-bounds integer indices in +/// kernels that involve indexing operations, such as take, put, or advanced +/// tensor integer indexing. +//===----------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include + +#include "kernels/dpctl_tensor_types.hpp" + +namespace dpctl +{ +namespace tensor +{ +namespace indexing_utils +{ + +/* + * ssize_t for indices is a design choice, dpctl::tensor::usm_ndarray + * uses py::ssize_t for shapes and strides internally and Python uses + * py_ssize_t for sizes of e.g. lists. + */ + +template struct WrapIndex +{ + static_assert(std::is_integral_v); + + ssize_t operator()(ssize_t max_item, IndT ind) const + { + ssize_t projected; + constexpr ssize_t unit(1); + max_item = sycl::max(max_item, unit); + + constexpr std::uintmax_t ind_max = std::numeric_limits::max(); + constexpr std::uintmax_t ssize_max = + std::numeric_limits::max(); + + if constexpr (std::is_signed_v) { + constexpr std::intmax_t ind_min = std::numeric_limits::min(); + constexpr std::intmax_t ssize_min = + std::numeric_limits::min(); + + if constexpr (ind_max <= ssize_max && ind_min >= ssize_min) { + const ssize_t ind_ = static_cast(ind); + const ssize_t lb = -max_item; + const ssize_t ub = max_item - 1; + projected = sycl::clamp(ind_, lb, ub); + } + else { + const IndT lb = static_cast(-max_item); + const IndT ub = static_cast(max_item - 1); + projected = static_cast(sycl::clamp(ind, lb, ub)); + } + return (projected < 0) ? projected + max_item : projected; + } + else { + if constexpr (ind_max <= ssize_max) { + const ssize_t ind_ = static_cast(ind); + const ssize_t ub = max_item - 1; + projected = sycl::min(ind_, ub); + } + else { + const IndT ub = static_cast(max_item - 1); + projected = static_cast(sycl::min(ind, ub)); + } + return projected; + } + } +}; + +template struct ClipIndex +{ + static_assert(std::is_integral_v); + + ssize_t operator()(ssize_t max_item, IndT ind) const + { + ssize_t projected; + constexpr ssize_t unit(1); + max_item = sycl::max(max_item, unit); + + constexpr std::uintmax_t ind_max = std::numeric_limits::max(); + constexpr std::uintmax_t ssize_max = + std::numeric_limits::max(); + if constexpr (std::is_signed_v) { + constexpr std::intmax_t ind_min = std::numeric_limits::min(); + constexpr std::intmax_t ssize_min = + std::numeric_limits::min(); + + if constexpr (ind_max <= ssize_max && ind_min >= ssize_min) { + const ssize_t ind_ = static_cast(ind); + constexpr ssize_t lb(0); + const ssize_t ub = max_item - 1; + projected = sycl::clamp(ind_, lb, ub); + } + else { + constexpr IndT lb(0); + const IndT ub = static_cast(max_item - 1); + projected = static_cast(sycl::clamp(ind, lb, ub)); + } + } + else { + if constexpr (ind_max <= ssize_max) { + const ssize_t ind_ = static_cast(ind); + const ssize_t ub = max_item - 1; + projected = sycl::min(ind_, ub); + } + else { + const IndT ub = static_cast(max_item - 1); + projected = static_cast(sycl::min(ind, ub)); + } + } + return projected; + } +}; + +} // namespace indexing_utils +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tests/test_usm_ndarray_indexing.py b/dpctl/tests/test_usm_ndarray_indexing.py index 1028c39e08..56289f0fc5 100644 --- a/dpctl/tests/test_usm_ndarray_indexing.py +++ b/dpctl/tests/test_usm_ndarray_indexing.py @@ -1798,3 +1798,38 @@ def test__copy_utils(): check__take_multi_index(cu._take_multi_index) check__place_impl_validation(cu._place_impl) check__put_multi_index_validation(cu._put_multi_index) + + +@pytest.mark.parametrize("mode", ["wrap", "clip"]) +def test_take_indices_oob_py_ssize_t(mode): + get_queue_or_skip() + + x = dpt.arange(10, dtype="i4") + inds1 = dpt.full(5, dpt.iinfo(dpt.uint64).max, dtype=dpt.uint64) + inds2 = dpt.full(5, dpt.iinfo(dpt.uint64).max, dtype=dpt.uint64) + + # sweep through a small range of indices + # to check that OOB indices are well-behaved + for i in range(1, 10): + inds2 -= i + r1 = dpt.take(x, inds1, mode=mode) + r2 = dpt.take(x, inds2, mode=mode) + + assert dpt.all(r1 == r2) + + +@pytest.mark.parametrize("mode", ["wrap", "clip"]) +def test_put_indices_oob_py_ssize_t(mode): + get_queue_or_skip() + + x = dpt.full(10, -1, dtype="i4") + inds = dpt.full(1, dpt.iinfo(dpt.uint64).max, dtype=dpt.uint64) + + # OOB inds are positive, so always + # clip to the top of range + for i in range(1, 10): + inds -= i + dpt.put(x, inds, i, mode=mode) + + assert dpt.all(x[:-1] == -1) + assert x[-1] == i