From 482f7778246904ff8b8e4408e75ca3f813f5f9bf Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 12 Feb 2025 13:50:03 +0100 Subject: [PATCH 1/4] Disallow minlength=None in bincount --- dpnp/dpnp_iface_histograms.py | 30 +++++++++++++++++---------- dpnp/tests/test_histogram.py | 38 +++++++++++++++++++++++++++++------ 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index 00d37a4267f0..5c1db90f314b 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -225,17 +225,23 @@ def _get_bin_edges(a, bins, range, usm_type): def _bincount_validate(x, weights, minlength): + dpnp.check_supported_arrays_type(x) if x.ndim > 1: raise ValueError("object too deep for desired array") + if x.ndim < 1: raise ValueError("object of too small depth for desired array") + if not dpnp.issubdtype(x.dtype, dpnp.integer) and not dpnp.issubdtype( x.dtype, dpnp.bool ): raise TypeError("x must be an integer array") + if weights is not None: + dpnp.check_supported_arrays_type(weights) if x.shape != weights.shape: raise ValueError("The weights and x don't have the same length.") + if not ( dpnp.issubdtype(weights.dtype, dpnp.integer) or dpnp.issubdtype(weights.dtype, dpnp.floating) @@ -245,10 +251,12 @@ def _bincount_validate(x, weights, minlength): f"Weights must be integer or float. Got {weights.dtype}" ) - if minlength is not None: - minlength = int(minlength) - if minlength < 0: - raise ValueError("minlength must be non-negative") + if minlength is None: + raise TypeError("use 0 instead of None for minlength") + + minlength = int(minlength) + if minlength < 0: + raise ValueError("minlength must be non-negative") def _bincount_run_native( @@ -262,9 +270,7 @@ def _bincount_run_native( if min_v < 0: raise ValueError("x argument must have no negative arguments") - size = int(dpnp.max(max_v)) + 1 - if minlength is not None: - size = max(size, minlength) + size = max(int(max_v) + 1, minlength) # bincount implementation uses atomics, but atomics doesn't work with # host usm memory @@ -299,9 +305,9 @@ def _bincount_run_native( return n_casted -def bincount(x, weights=None, minlength=None): +def bincount(x, weights=None, minlength=0): """ - bincount(x, /, weights=None, minlength=None) + bincount(x, /, weights=None, minlength=0) Count number of occurrences of each value in array of non-negative ints. @@ -313,10 +319,12 @@ def bincount(x, weights=None, minlength=None): Input 1-dimensional array with non-negative integer values. weights : {None, dpnp.ndarray, usm_ndarray}, optional Weights, array of the same shape as `x`. + Default: ``None`` - minlength : {None, int}, optional + minlength : int, optional A minimum number of bins for the output array. - Default: ``None`` + + Default: ``0`` Returns ------- diff --git a/dpnp/tests/test_histogram.py b/dpnp/tests/test_histogram.py index 6d9f53ed920d..236feb6e1d43 100644 --- a/dpnp/tests/test_histogram.py +++ b/dpnp/tests/test_histogram.py @@ -587,12 +587,10 @@ def test_different_bins_amount(self, bins_count): @pytest.mark.parametrize( "array", [[1, 2, 3], [1, 2, 2, 1, 2, 4], [2, 2, 2, 2]], - ids=["[1, 2, 3]", "[1, 2, 2, 1, 2, 4]", "[2, 2, 2, 2]"], + ids=["size=3", "size=6", "size=4"], ) - @pytest.mark.parametrize( - "minlength", [0, 1, 3, 5], ids=["0", "1", "3", "5"] - ) - def test_bincount_minlength(self, array, minlength): + @pytest.mark.parametrize("minlength", [0, 1, 3, 5]) + def test_minlength(self, array, minlength): np_a = numpy.array(array) dpnp_a = dpnp.array(array) @@ -600,6 +598,20 @@ def test_bincount_minlength(self, array, minlength): result = dpnp.bincount(dpnp_a, minlength=minlength) assert_allclose(expected, result) + # TODO: uncomment once numpy 2.3.0 is released + # @testing.with_requires("numpy>=2.3") + # @pytest.mark.parametrize("xp", [dpnp, numpy]) + @pytest.mark.parametrize("xp", [dpnp]) + def test_minlength_none(self, xp): + a = xp.array([1, 2, 3]) + assert_raises_regex( + TypeError, + "use 0 instead of None for minlength", + xp.bincount, + a, + minlength=None, + ) + @pytest.mark.parametrize( "array", [[1, 2, 2, 1, 2, 4]], ids=["[1, 2, 2, 1, 2, 4]"] ) @@ -608,7 +620,7 @@ def test_bincount_minlength(self, array, minlength): [None, [0.3, 0.5, 0.2, 0.7, 1.0, -0.6], [2, 2, 2, 2, 2, 2]], ids=["None", "[0.3, 0.5, 0.2, 0.7, 1., -0.6]", "[2, 2, 2, 2, 2, 2]"], ) - def test_bincount_weights(self, array, weights): + def test_weights(self, array, weights): np_a = numpy.array(array) np_weights = numpy.array(weights) if weights is not None else weights dpnp_a = dpnp.array(array) @@ -618,6 +630,20 @@ def test_bincount_weights(self, array, weights): result = dpnp.bincount(dpnp_a, weights=dpnp_weights) assert_allclose(expected, result) + @pytest.mark.parametrize( + "data", + [numpy.arange(5), 3, [2, 1]], + ids=["numpy.ndarray", "scalar", "list"], + ) + def test_unsupported_data_weights(self, data): + # check input array + msg = "An array must be any of supported type" + assert_raises_regex(TypeError, msg, dpnp.bincount, data) + + # check array of weights + a = dpnp.ones(5, dtype=dpnp.int32) + assert_raises_regex(TypeError, msg, dpnp.bincount, a, weights=data) + class TestHistogramDd: @pytest.mark.usefixtures("suppress_complex_warning") From 6908545435f906c6890e3ebcf1458b9bf5ee6427 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 12 Feb 2025 13:52:31 +0100 Subject: [PATCH 2/4] Add blank line prior Default value --- dpnp/dpnp_iface_histograms.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index 5c1db90f314b..8f5975c074fe 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -424,6 +424,7 @@ def digitize(x, bins, right=False): increasing or decreasing. right : bool, optional Indicates whether the intervals include the right or the left bin edge. + Default: ``False``. Returns @@ -683,6 +684,7 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None): given range. If `bins` is a sequence, it defines the bin edges, including the rightmost edge, allowing for non-uniform bin widths. + Default: ``10``. range : {None, 2-tuple of float}, optional The lower and upper range of the bins. If not provided, range is simply @@ -691,12 +693,14 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None): affects the automatic bin computation as well. While bin width is computed to be optimal based on the actual data within `range`, the bin count will fill the entire range including portions containing no data. + Default: ``None``. weights : {None, dpnp.ndarray, usm_ndarray}, optional An array of weights, of the same shape as `a`. Each value in `a` only contributes its associated weight towards the bin count (instead of 1). This is currently not used by any of the bin estimators, but may be in the future. + Default: ``None``. Returns From b83c9baa2a9cb7c92615fa4c4ae41e40e0db72ee Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 14 Feb 2025 12:36:31 +0100 Subject: [PATCH 3/4] Add explicit pytest.mark.xfail per numpy version instead of TODO comment --- dpnp/tests/test_histogram.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/dpnp/tests/test_histogram.py b/dpnp/tests/test_histogram.py index 236feb6e1d43..f7dc2c63fe44 100644 --- a/dpnp/tests/test_histogram.py +++ b/dpnp/tests/test_histogram.py @@ -7,7 +7,6 @@ assert_array_equal, assert_raises, assert_raises_regex, - suppress_warnings, ) import dpnp @@ -18,6 +17,7 @@ get_float_dtypes, get_integer_dtypes, has_support_aspect64, + numpy_version, ) @@ -598,10 +598,21 @@ def test_minlength(self, array, minlength): result = dpnp.bincount(dpnp_a, minlength=minlength) assert_allclose(expected, result) - # TODO: uncomment once numpy 2.3.0 is released - # @testing.with_requires("numpy>=2.3") - # @pytest.mark.parametrize("xp", [dpnp, numpy]) - @pytest.mark.parametrize("xp", [dpnp]) + @pytest.mark.filterwarnings("ignore::DeprecationWarning") + @pytest.mark.parametrize( + "xp", + [ + dpnp, + pytest.param( + numpy, + marks=pytest.mark.xfail( + numpy_version() < "2.3.0", + reason="numpy deprecates but accepts that", + strict=True, + ), + ), + ], + ) def test_minlength_none(self, xp): a = xp.array([1, 2, 3]) assert_raises_regex( From 310914cc28bdf7d06c8083783ea3e71e187e4f95 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 14 Feb 2025 17:11:20 +0100 Subject: [PATCH 4/4] Improve test coverage in dpnp.histogram --- dpnp/tests/helper.py | 7 ++++--- dpnp/tests/test_histogram.py | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/dpnp/tests/helper.py b/dpnp/tests/helper.py index eafe86db69a7..de0c949f0508 100644 --- a/dpnp/tests/helper.py +++ b/dpnp/tests/helper.py @@ -6,7 +6,8 @@ from numpy.testing import assert_allclose, assert_array_equal import dpnp -from dpnp.tests import config + +from . import config def assert_dtype_allclose( @@ -86,14 +87,14 @@ def assert_dtype_allclose( assert dpnp_arr.dtype == numpy_arr.dtype -def get_integer_dtypes(no_unsigned=False): +def get_integer_dtypes(all_int_types=False, no_unsigned=False): """ Build a list of integer types supported by DPNP. """ dtypes = [dpnp.int32, dpnp.int64] - if config.all_int_types: + if config.all_int_types or all_int_types: dtypes += [dpnp.int8, dpnp.int16] if not no_unsigned: dtypes += [dpnp.uint8, dpnp.uint16, dpnp.uint32, dpnp.uint64] diff --git a/dpnp/tests/test_histogram.py b/dpnp/tests/test_histogram.py index 719058415e8d..726de6a0fb01 100644 --- a/dpnp/tests/test_histogram.py +++ b/dpnp/tests/test_histogram.py @@ -282,9 +282,10 @@ def test_weights(self, density): assert_dtype_allclose(result_hist, expected_hist) assert_dtype_allclose(result_edges, expected_edges) - def test_integer_weights(self): + @pytest.mark.parametrize("dt", get_integer_dtypes(all_int_types=True)) + def test_integer_weights(self, dt): v = numpy.array([1, 2, 2, 4]) - w = numpy.array([4, 3, 2, 1]) + w = numpy.array([4, 3, 2, 1], dtype=dt) iv = dpnp.array(v) iw = dpnp.array(w)