From 15cdc25fabbaf8c2aba359eb9830d9c1efea4b57 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 20 Aug 2022 22:51:00 +0200 Subject: [PATCH 1/3] BUG: reindex using wrong fill value when indexing cols and index for uint dtypes --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/array_algos/take.py | 10 +++++++--- pandas/tests/frame/methods/test_reindex.py | 10 ++++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index c197f3df45814..c16ffc2127157 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -1001,6 +1001,7 @@ Indexing - Bug in :meth:`Series.__setitem__` when setting incompatible values into a ``PeriodDtype`` or ``IntervalDtype`` :class:`Series` raising when indexing with a boolean mask but coercing when indexing with otherwise-equivalent indexers; these now consistently coerce, along with :meth:`Series.mask` and :meth:`Series.where` (:issue:`45768`) - Bug in :meth:`DataFrame.where` with multiple columns with datetime-like dtypes failing to downcast results consistent with other dtypes (:issue:`45837`) - Bug in :func:`isin` upcasting to ``float64`` with unsigned integer dtype and list-like argument without a dtype (:issue:`46485`) +- Bug in :meth:`DataFrame.reindex` filling with wrong values when indexing columns and index for ``uint`` dtypes (:issue:`48184`) - Bug in :meth:`Series.loc.__setitem__` and :meth:`Series.loc.__getitem__` not raising when using multiple keys without using a :class:`MultiIndex` (:issue:`13831`) - Bug in :meth:`Index.reindex` raising ``AssertionError`` when ``level`` was specified but no :class:`MultiIndex` was given; level is ignored now (:issue:`35132`) - Bug when setting a value too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`, :issue:`32878`) diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py index 188725f003f1e..f82aeb6df5e32 100644 --- a/pandas/core/array_algos/take.py +++ b/pandas/core/array_algos/take.py @@ -546,9 +546,13 @@ def _take_2d_multi_object( out[:, col_mask] = fill_value for i in range(len(row_idx)): u_ = row_idx[i] - for j in range(len(col_idx)): - v = col_idx[j] - out[i, j] = arr[u_, v] + + if u_ != -1: + for j in range(len(col_idx)): + v = col_idx[j] + + if v != -1: + out[i, j] = arr[u_, v] def _take_preprocess_indexer_and_fill_value( diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 8575e7895ae5a..56a38a74595fe 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -772,6 +772,16 @@ def test_reindex_fill_value(self): expected = df.reindex(range(15)).fillna(0) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("dtype", ["uint8", "uint16", "uint32", "uint64"]) + def test_reindex_uint_dtypes_fill_value(self, dtype): + # GH#48184 + df = DataFrame({"a": [1, 2], "b": [1, 2]}, dtype=dtype) + result = df.reindex(columns=list("abcd"), index=[0, 1, 2, 3], fill_value=10) + expected = DataFrame( + {"a": [1, 2, 10, 10], "b": [1, 2, 10, 10], "c": 10, "d": 10}, dtype=dtype + ) + tm.assert_frame_equal(result, expected) + def test_reindex_dups(self): # GH4746, reindex on duplicate index error messages From 5f48e510644b96e63787971ef6daece4f80cc2f6 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 30 Aug 2022 10:02:37 +0200 Subject: [PATCH 2/3] Move whatsnew --- doc/source/whatsnew/v1.5.0.rst | 1 - doc/source/whatsnew/v1.6.0.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 7b82fdef9f4ff..711352775400e 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -1054,7 +1054,6 @@ Indexing - Bug in :meth:`Series.__setitem__` when setting incompatible values into a ``PeriodDtype`` or ``IntervalDtype`` :class:`Series` raising when indexing with a boolean mask but coercing when indexing with otherwise-equivalent indexers; these now consistently coerce, along with :meth:`Series.mask` and :meth:`Series.where` (:issue:`45768`) - Bug in :meth:`DataFrame.where` with multiple columns with datetime-like dtypes failing to downcast results consistent with other dtypes (:issue:`45837`) - Bug in :func:`isin` upcasting to ``float64`` with unsigned integer dtype and list-like argument without a dtype (:issue:`46485`) -- Bug in :meth:`DataFrame.reindex` filling with wrong values when indexing columns and index for ``uint`` dtypes (:issue:`48184`) - Bug in :meth:`Series.loc.__setitem__` and :meth:`Series.loc.__getitem__` not raising when using multiple keys without using a :class:`MultiIndex` (:issue:`13831`) - Bug in :meth:`Index.reindex` raising ``AssertionError`` when ``level`` was specified but no :class:`MultiIndex` was given; level is ignored now (:issue:`35132`) - Bug when setting a value too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`, :issue:`32878`) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index eac5e5d3a0f52..c0a60283fcf76 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -151,7 +151,7 @@ Interval Indexing ^^^^^^^^ -- +- Bug in :meth:`DataFrame.reindex` filling with wrong values when indexing columns and index for ``uint`` dtypes (:issue:`48184`) - Missing From 4897545fa4628f2f4e0ea49b29384a3e946ceb0b Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 31 Aug 2022 19:43:20 +0200 Subject: [PATCH 3/3] Use fixture --- pandas/tests/frame/methods/test_reindex.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 56a38a74595fe..daa60be085fd8 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -772,13 +772,13 @@ def test_reindex_fill_value(self): expected = df.reindex(range(15)).fillna(0) tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("dtype", ["uint8", "uint16", "uint32", "uint64"]) - def test_reindex_uint_dtypes_fill_value(self, dtype): + def test_reindex_uint_dtypes_fill_value(self, any_unsigned_int_numpy_dtype): # GH#48184 - df = DataFrame({"a": [1, 2], "b": [1, 2]}, dtype=dtype) + df = DataFrame({"a": [1, 2], "b": [1, 2]}, dtype=any_unsigned_int_numpy_dtype) result = df.reindex(columns=list("abcd"), index=[0, 1, 2, 3], fill_value=10) expected = DataFrame( - {"a": [1, 2, 10, 10], "b": [1, 2, 10, 10], "c": 10, "d": 10}, dtype=dtype + {"a": [1, 2, 10, 10], "b": [1, 2, 10, 10], "c": 10, "d": 10}, + dtype=any_unsigned_int_numpy_dtype, ) tm.assert_frame_equal(result, expected)