From 36bc04ac28ec7742ca35a87b68d22d37567d0241 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 6 Sep 2023 05:28:08 -0500 Subject: [PATCH 1/2] Restricted use of reduce_over_axis0 special kernels The kernel is applicable if both inputs are F-contiguous, or if the first input if F-contiguous and we are reducing to 1d C-contiguous array. Closes gh-1391 --- dpctl/tensor/libtensor/source/sum_reductions.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dpctl/tensor/libtensor/source/sum_reductions.cpp b/dpctl/tensor/libtensor/source/sum_reductions.cpp index 7628813c6d..13ab268b55 100644 --- a/dpctl/tensor/libtensor/source/sum_reductions.cpp +++ b/dpctl/tensor/libtensor/source/sum_reductions.cpp @@ -218,7 +218,9 @@ std::pair py_sum_over_axis( return std::make_pair(keep_args_event, sum_over_axis_contig_ev); } } - else if (is_src_f_contig & is_dst_c_contig) { + else if (is_src_f_contig && + ((is_dst_c_contig && dst_nd == 1) || dst.is_f_contiguous())) + { auto fn = sum_over_axis0_contig_atomic_dispatch_table[src_typeid] [dst_typeid]; if (fn != nullptr) { From 8d8ef0b74ff97744c26d411aabbffd65bfe6770e Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 6 Sep 2023 07:33:16 -0500 Subject: [PATCH 2/2] Add test based on gh-1391 --- dpctl/tests/test_tensor_sum.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/dpctl/tests/test_tensor_sum.py b/dpctl/tests/test_tensor_sum.py index fc2a0ec8de..403a823324 100644 --- a/dpctl/tests/test_tensor_sum.py +++ b/dpctl/tests/test_tensor_sum.py @@ -172,3 +172,18 @@ def test_largish_reduction(arg_dtype, n): assert dpt.all(dpt.equal(y1, y2)) assert dpt.all(dpt.equal(y1, n * m)) + + +def test_axis0_bug(): + "gh-1391" + get_queue_or_skip() + + sh = (1, 2, 3) + a = dpt.arange(sh[0] * sh[1] * sh[2], dtype="i4") + a = dpt.reshape(a, sh) + aT = dpt.permute_dims(a, (2, 1, 0)) + + s = dpt.sum(aT, axis=2) + expected = dpt.asarray([[0, 3], [1, 4], [2, 5]]) + + assert dpt.all(s == expected)