diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 9d788ffcfabe1..b7df9aa692655 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -545,6 +545,7 @@ Other Deprecations - A deprecation warning is now shown for :meth:`DataFrame.to_latex` indicating the arguments signature may change and emulate more the arguments to :meth:`.Styler.to_latex` in future versions (:issue:`44411`) - Deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`) - Deprecated :meth:`Index.__getitem__` with a bool key; use ``index.values[key]`` to get the old behavior (:issue:`44051`) +- Deprecated downcasting column-by-column in :meth:`DataFrame.where` with integer-dtypes (:issue:`44597`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 91c4946b64fe8..d9fc1a903009e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1227,6 +1227,15 @@ def where(self, other, cond) -> list[Block]: if m.any(): taken = result.take(m.nonzero()[0], axis=axis) r = maybe_downcast_numeric(taken, self.dtype) + if r.dtype != taken.dtype: + warnings.warn( + "Downcasting integer-dtype results in .where is " + "deprecated and will change in a future version. " + "To retain the old behavior, explicitly cast the results " + "to the desired dtype.", + FutureWarning, + stacklevel=find_stack_level(), + ) nb = self.make_block(r.T, placement=self._mgr_locs[m]) result_blocks.append(nb) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 6ab0e2f718f8a..3d55ff5f98407 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -98,7 +98,7 @@ def test_where_upcasting(self): tm.assert_series_equal(result, expected) - def test_where_alignment(self, where_frame, float_string_frame): + def test_where_alignment(self, where_frame, float_string_frame, mixed_int_frame): # aligning def _check_align(df, cond, other, check_dtypes=True): rs = df.where(cond, other) @@ -141,7 +141,11 @@ def _check_align(df, cond, other, check_dtypes=True): # check other is ndarray cond = df > 0 - _check_align(df, cond, (_safe_add(df).values)) + warn = None + if df is mixed_int_frame: + warn = FutureWarning + with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"): + _check_align(df, cond, (_safe_add(df).values)) # integers are upcast, so don't check the dtypes cond = df > 0 @@ -461,7 +465,7 @@ def test_where_complex(self): df[df.abs() >= 5] = np.nan tm.assert_frame_equal(df, expected) - def test_where_axis(self): + def test_where_axis(self, using_array_manager): # GH 9736 df = DataFrame(np.random.randn(2, 2)) mask = DataFrame([[False, False], [False, False]]) @@ -499,8 +503,10 @@ def test_where_axis(self): assert return_value is None tm.assert_frame_equal(result, expected) + warn = FutureWarning if using_array_manager else None expected = DataFrame([[0, np.nan], [0, np.nan]]) - result = df.where(mask, s, axis="columns") + with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"): + result = df.where(mask, s, axis="columns") tm.assert_frame_equal(result, expected) expected = DataFrame( @@ -717,6 +723,23 @@ def test_where_try_cast_deprecated(frame_or_series): obj.where(mask, -1, try_cast=False) +def test_where_int_downcasting_deprecated(using_array_manager): + # GH#44597 + arr = np.arange(6).astype(np.int16).reshape(3, 2) + df = DataFrame(arr) + + mask = np.zeros(arr.shape, dtype=bool) + mask[:, 0] = True + + msg = "Downcasting integer-dtype" + warn = FutureWarning if not using_array_manager else None + with tm.assert_produces_warning(warn, match=msg): + res = df.where(mask, 2 ** 17) + + expected = DataFrame({0: arr[:, 0], 1: np.array([2 ** 17] * 3, dtype=np.int32)}) + tm.assert_frame_equal(res, expected) + + def test_where_copies_with_noop(frame_or_series): # GH-39595 result = frame_or_series([1, 2, 3, 4]) diff --git a/pandas/tests/frame/methods/test_clip.py b/pandas/tests/frame/methods/test_clip.py index c851e65a7ad4f..e692948c92a26 100644 --- a/pandas/tests/frame/methods/test_clip.py +++ b/pandas/tests/frame/methods/test_clip.py @@ -136,7 +136,7 @@ def test_clip_against_unordered_columns(self): tm.assert_frame_equal(result_lower, expected_lower) tm.assert_frame_equal(result_lower_upper, expected_lower_upper) - def test_clip_with_na_args(self, float_frame): + def test_clip_with_na_args(self, float_frame, using_array_manager): """Should process np.nan argument as None""" # GH#17276 tm.assert_frame_equal(float_frame.clip(np.nan), float_frame) @@ -151,7 +151,9 @@ def test_clip_with_na_args(self, float_frame): ) tm.assert_frame_equal(result, expected) - result = df.clip(lower=[4, 5, np.nan], axis=1) + warn = FutureWarning if using_array_manager else None + with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"): + result = df.clip(lower=[4, 5, np.nan], axis=1) expected = DataFrame( {"col_0": [4, 4, 4], "col_1": [5, 5, 6], "col_2": [7, 8, 9]} )