From 9b30bdd98a7a675fc66000c3c01947e807da982e Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 25 Nov 2023 23:32:15 +0100 Subject: [PATCH 01/10] CoW: Warn for cases that go through putmask --- pandas/core/internals/base.py | 15 ++++- pandas/core/internals/blocks.py | 58 ++++++++++++++++++- pandas/core/internals/managers.py | 25 +------- pandas/tests/copy_view/test_clip.py | 8 ++- pandas/tests/copy_view/test_indexing.py | 19 ++---- pandas/tests/copy_view/test_methods.py | 25 +++++--- pandas/tests/copy_view/test_replace.py | 8 ++- pandas/tests/frame/methods/test_replace.py | 8 ++- .../indexing/test_chaining_and_caching.py | 5 +- 9 files changed, 115 insertions(+), 56 deletions(-) diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index fe366c7375c6a..ee45707791fbf 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -14,7 +14,10 @@ import numpy as np -from pandas._config import using_copy_on_write +from pandas._config import ( + using_copy_on_write, + warn_copy_on_write, +) from pandas._libs import ( algos as libalgos, @@ -49,6 +52,11 @@ ) +class _AlreadyWarned: + def __init__(self): + self.warned_already = False + + class DataManager(PandasObject): # TODO share more methods/attributes @@ -203,12 +211,17 @@ def putmask(self, mask, new, align: bool = True) -> Self: align_keys = ["mask"] new = extract_array(new, extract_numpy=True) + already_warned = None + if warn_copy_on_write(): + already_warned = _AlreadyWarned() + return self.apply_with_block( "putmask", align_keys=align_keys, mask=mask, new=new, using_cow=using_copy_on_write(), + already_warned=already_warned, ) @final diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 535d18f99f0ef..ec58b46371149 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -18,6 +18,7 @@ from pandas._config import ( get_option, using_copy_on_write, + warn_copy_on_write, ) from pandas._libs import ( @@ -136,6 +137,29 @@ _dtype_obj = np.dtype("object") +COW_WARNING_GENERAL_MSG = """\ +Setting a value on a view: behaviour will change in pandas 3.0. +You are mutating a Series or DataFrame object, and currently this mutation will +also have effect on other Series or DataFrame objects that share data with this +object. In pandas 3.0 (with Copy-on-Write), updating one Series or DataFrame object +will never modify another. +""" + + +COW_WARNING_SETITEM_MSG = """\ +Setting a value on a view: behaviour will change in pandas 3.0. +Currently, the mutation will also have effect on the object that shares data +with this object. For example, when setting a value in a Series that was +extracted from a column of a DataFrame, that DataFrame will also be updated: + + ser = df["col"] + ser[0] = 0 <--- in pandas 2, this also updates `df` + +In pandas 3.0 (with Copy-on-Write), updating one Series/DataFrame will never +modify another, and thus in the example above, `df` will not be changed. +""" + + def maybe_split(meth: F) -> F: """ If we have a multi-column block, split and operate block-wise. Otherwise @@ -1355,7 +1379,9 @@ def setitem(self, indexer, value, using_cow: bool = False) -> Block: values[indexer] = casted return self - def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: + def putmask( + self, mask, new, using_cow: bool = False, already_warned=None + ) -> list[Block]: """ putmask the data to the block; it is possible that we may create a new dtype of block @@ -1388,6 +1414,19 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: return [self.copy(deep=False)] return [self] + if ( + warn_copy_on_write() + and already_warned is not None + and not already_warned.warned_already + ): + if self.refs.has_reference(): + warnings.warn( + COW_WARNING_SETITEM_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) + already_warned.warned_already = True + try: casted = np_can_hold_element(values.dtype, new) @@ -2020,7 +2059,9 @@ def where( return [nb] @final - def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: + def putmask( + self, mask, new, using_cow: bool = False, already_warned=None + ) -> list[Block]: """ See Block.putmask.__doc__ """ @@ -2038,6 +2079,19 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: return [self.copy(deep=False)] return [self] + if ( + warn_copy_on_write() + and already_warned is not None + and not already_warned.warned_already + ): + if self.refs.has_reference(): + warnings.warn( + COW_WARNING_SETITEM_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) + already_warned.warned_already = True + self = self._maybe_copy(using_cow, inplace=True) values = self.values if values.ndim == 2: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 843441e4865c7..6d6552f38c59c 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -72,6 +72,8 @@ interleaved_dtype, ) from pandas.core.internals.blocks import ( + COW_WARNING_GENERAL_MSG, + COW_WARNING_SETITEM_MSG, Block, NumpyBlock, ensure_block_shape, @@ -100,29 +102,6 @@ from pandas.api.extensions import ExtensionArray -COW_WARNING_GENERAL_MSG = """\ -Setting a value on a view: behaviour will change in pandas 3.0. -You are mutating a Series or DataFrame object, and currently this mutation will -also have effect on other Series or DataFrame objects that share data with this -object. In pandas 3.0 (with Copy-on-Write), updating one Series or DataFrame object -will never modify another. -""" - - -COW_WARNING_SETITEM_MSG = """\ -Setting a value on a view: behaviour will change in pandas 3.0. -Currently, the mutation will also have effect on the object that shares data -with this object. For example, when setting a value in a Series that was -extracted from a column of a DataFrame, that DataFrame will also be updated: - - ser = df["col"] - ser[0] = 0 <--- in pandas 2, this also updates `df` - -In pandas 3.0 (with Copy-on-Write), updating one Series/DataFrame will never -modify another, and thus in the example above, `df` will not be changed. -""" - - class BaseBlockManager(DataManager): """ Core internal data structure to implement DataFrame, Series, etc. diff --git a/pandas/tests/copy_view/test_clip.py b/pandas/tests/copy_view/test_clip.py index 6a27a0633a4aa..1363500f0d6c0 100644 --- a/pandas/tests/copy_view/test_clip.py +++ b/pandas/tests/copy_view/test_clip.py @@ -5,12 +5,16 @@ from pandas.tests.copy_view.util import get_array -def test_clip_inplace_reference(using_copy_on_write): +def test_clip_inplace_reference(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1.5, 2, 3]}) df_copy = df.copy() arr_a = get_array(df, "a") view = df[:] - df.clip(lower=2, inplace=True) + if warn_copy_on_write: + with tm.assert_cow_warning(): + df.clip(lower=2, inplace=True) + else: + df.clip(lower=2, inplace=True) if using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), arr_a) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 2e623f885b648..20522218876d6 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -367,10 +367,11 @@ def test_subset_set_with_mask(backend, using_copy_on_write, warn_copy_on_write): mask = subset > 3 - # TODO(CoW-warn) should warn -> mask is a DataFrame, which ends up going through - # DataFrame._where(..., inplace=True) - if using_copy_on_write or warn_copy_on_write: + if using_copy_on_write: subset[mask] = 0 + elif warn_copy_on_write: + with tm.assert_cow_warning(): + subset[mask] = 0 else: with pd.option_context("chained_assignment", "warn"): with tm.assert_produces_warning(SettingWithCopyWarning): @@ -867,18 +868,8 @@ def test_series_subset_set_with_indexer( and indexer.dtype.kind == "i" ): warn = FutureWarning - is_mask = ( - indexer_si is tm.setitem - and isinstance(indexer, np.ndarray) - and indexer.dtype.kind == "b" - ) if warn_copy_on_write: - # TODO(CoW-warn) should also warn for setting with mask - # -> Series.__setitem__ with boolean mask ends up using Series._set_values - # or Series._where depending on value being set - with tm.assert_cow_warning( - not is_mask, raise_on_extra_warnings=warn is not None - ): + with tm.assert_cow_warning(raise_on_extra_warnings=warn is not None): indexer_si(subset)[indexer] = 0 else: with tm.assert_produces_warning(warn, match=msg): diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 6416dd50daddc..f5ac2530a9ee2 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1407,11 +1407,12 @@ def test_items(using_copy_on_write, warn_copy_on_write): @pytest.mark.parametrize("dtype", ["int64", "Int64"]) -def test_putmask(using_copy_on_write, dtype): +def test_putmask(using_copy_on_write, dtype, warn_copy_on_write): df = DataFrame({"a": [1, 2], "b": 1, "c": 2}, dtype=dtype) view = df[:] df_orig = df.copy() - df[df == df] = 5 + with tm.assert_cow_warning(warn_copy_on_write): + df[df == df] = 5 if using_copy_on_write: assert not np.shares_memory(get_array(view, "a"), get_array(df, "a")) @@ -1445,15 +1446,21 @@ def test_putmask_aligns_rhs_no_reference(using_copy_on_write, dtype): @pytest.mark.parametrize( "val, exp, warn", [(5.5, True, FutureWarning), (5, False, None)] ) -def test_putmask_dont_copy_some_blocks(using_copy_on_write, val, exp, warn): +def test_putmask_dont_copy_some_blocks( + using_copy_on_write, val, exp, warn, warn_copy_on_write +): df = DataFrame({"a": [1, 2], "b": 1, "c": 1.5}) view = df[:] df_orig = df.copy() indexer = DataFrame( [[True, False, False], [True, False, False]], columns=list("abc") ) - with tm.assert_produces_warning(warn, match="incompatible dtype"): - df[indexer] = val + if warn_copy_on_write: + with tm.assert_cow_warning(): + df[indexer] = val + else: + with tm.assert_produces_warning(warn, match="incompatible dtype"): + df[indexer] = val if using_copy_on_write: assert not np.shares_memory(get_array(view, "a"), get_array(df, "a")) @@ -1785,13 +1792,17 @@ def test_update_frame(using_copy_on_write, warn_copy_on_write): tm.assert_frame_equal(view, expected) -def test_update_series(using_copy_on_write): +def test_update_series(using_copy_on_write, warn_copy_on_write): ser1 = Series([1.0, 2.0, 3.0]) ser2 = Series([100.0], index=[1]) ser1_orig = ser1.copy() view = ser1[:] - ser1.update(ser2) + if warn_copy_on_write: + with tm.assert_cow_warning(): + ser1.update(ser2) + else: + ser1.update(ser2) expected = Series([1.0, 100.0, 3.0]) tm.assert_series_equal(ser1, expected) diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index d11a2893becdc..3d8559a1905fc 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -279,14 +279,18 @@ def test_replace_categorical(using_copy_on_write, val): @pytest.mark.parametrize("method", ["where", "mask"]) -def test_masking_inplace(using_copy_on_write, method): +def test_masking_inplace(using_copy_on_write, method, warn_copy_on_write): df = DataFrame({"a": [1.5, 2, 3]}) df_orig = df.copy() arr_a = get_array(df, "a") view = df[:] method = getattr(df, method) - method(df["a"] > 1.6, -1, inplace=True) + if warn_copy_on_write: + with tm.assert_cow_warning(): + method(df["a"] > 1.6, -1, inplace=True) + else: + method(df["a"] > 1.6, -1, inplace=True) if using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), arr_a) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index f07c53060a06b..cbca15ea1ed2d 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -717,7 +717,7 @@ def test_replace_value_is_none(self, datetime_frame): datetime_frame.iloc[0, 0] = orig_value datetime_frame.iloc[1, 0] = orig2 - def test_replace_for_new_dtypes(self, datetime_frame): + def test_replace_for_new_dtypes(self, datetime_frame, warn_copy_on_write): # dtypes tsframe = datetime_frame.copy().astype(np.float32) tsframe.loc[tsframe.index[:5], "A"] = np.nan @@ -732,7 +732,11 @@ def test_replace_for_new_dtypes(self, datetime_frame): tsframe.loc[tsframe.index[:5], "B"] = -1e8 b = tsframe["B"] - b[b == -1e8] = np.nan + if warn_copy_on_write: + with tm.assert_cow_warning(): + b[b == -1e8] = np.nan + else: + b[b == -1e8] = np.nan tsframe["B"] = b msg = "DataFrame.fillna with 'method' is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 21aab6652a300..b406118246e88 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -164,9 +164,8 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": data})) else: - # TODO(CoW-warn) should warn - # with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" + with tm.assert_cow_warning(warn_copy_on_write): + df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata})) recarray = np.rec.fromarrays([data], names=["response"]) From 8585bd761a05847705dee803a30c438588f363b5 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 26 Nov 2023 00:29:29 +0100 Subject: [PATCH 02/10] Fix --- pandas/tests/indexing/test_chaining_and_caching.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index b406118246e88..3243e5577feb4 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -176,9 +176,8 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": data})) else: - # TODO(CoW-warn) should warn - # with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" + with tm.assert_cow_warning(warn_copy_on_write): + df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata})) df = DataFrame({"response": data, "response1": data}) @@ -189,9 +188,8 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) df.response[mask] = "none" tm.assert_frame_equal(df, df_original) else: - # TODO(CoW-warn) should warn - # with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" + with tm.assert_cow_warning(warn_copy_on_write): + df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data})) # GH 6056 @@ -202,7 +200,6 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) df["A"].iloc[0] = np.nan expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]}) else: - # TODO(CoW-warn) custom warning message with tm.assert_cow_warning(warn_copy_on_write): df["A"].iloc[0] = np.nan expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) From cad8aad6e320e25c2d73d346a5243029866e6190 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 27 Nov 2023 22:21:35 +0100 Subject: [PATCH 03/10] Update base.py --- pandas/core/internals/base.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index ee45707791fbf..5ac82c25bcde9 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -54,6 +54,11 @@ class _AlreadyWarned: def __init__(self): + # This class is used on the manager level to the block level to + # ensure that we warn only once. The block method can update the + # warned_already option without returning a value to keep the + # interface consistent. This is only a temporary solution for + # CoW warnings. self.warned_already = False From 4aa030af74a8d6c99c9225d55d06722eed8842dc Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 27 Nov 2023 23:52:58 +0100 Subject: [PATCH 04/10] Update base.py --- pandas/core/internals/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index 5ac82c25bcde9..60ab251e437c7 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -54,10 +54,10 @@ class _AlreadyWarned: def __init__(self): - # This class is used on the manager level to the block level to + # This class is used on the manager level to the block level to # ensure that we warn only once. The block method can update the - # warned_already option without returning a value to keep the - # interface consistent. This is only a temporary solution for + # warned_already option without returning a value to keep the + # interface consistent. This is only a temporary solution for # CoW warnings. self.warned_already = False From b5992b31ff6a18ee9a1dd35d4a0bbc07b9647dc2 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Fri, 1 Dec 2023 21:43:53 +0100 Subject: [PATCH 05/10] Update test --- pandas/tests/indexing/test_chaining_and_caching.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 08e9a94dc6ead..8287e7107ba66 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -155,8 +155,6 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) if using_copy_on_write: tm.assert_frame_equal(df, DataFrame({"response": data})) else: - with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata})) recarray = np.rec.fromarrays([data], names=["response"]) @@ -167,8 +165,6 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) if using_copy_on_write: tm.assert_frame_equal(df, DataFrame({"response": data})) else: - with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata})) df = DataFrame({"response": data, "response1": data}) @@ -179,8 +175,6 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) if using_copy_on_write: tm.assert_frame_equal(df, df_original) else: - with tm.assert_cow_warning(warn_copy_on_write): - df.response[mask] = "none" tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data})) # GH 6056 @@ -191,8 +185,6 @@ def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write) if using_copy_on_write: expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]}) else: - with tm.assert_cow_warning(warn_copy_on_write): - df["A"].iloc[0] = np.nan expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) result = df.head() tm.assert_frame_equal(result, expected) From 9dee585247789ad613464d0fdbea624b298a6327 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Fri, 1 Dec 2023 22:12:40 +0100 Subject: [PATCH 06/10] Fixup --- .../copy_view/test_chained_assignment_deprecation.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index 7b08d9b80fc9b..4f53dd8a78858 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -70,7 +70,7 @@ def test_methods_iloc_getitem_item_cache(func, args, using_copy_on_write): @pytest.mark.parametrize( "indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])] ) -def test_series_setitem(indexer, using_copy_on_write): +def test_series_setitem(indexer, using_copy_on_write, warn_copy_on_write): # ensure we only get a single warning for those typical cases of chained # assignment df = DataFrame({"a": [1, 2, 3], "b": 1}) @@ -79,7 +79,11 @@ def test_series_setitem(indexer, using_copy_on_write): # fail if multiple warnings are raised with pytest.warns() as record: df["a"][indexer] = 0 - assert len(record) == 1 + assert ( + len(record) == 2 + if warn_copy_on_write and isinstance(indexer, np.ndarray) + else 1 + ) if using_copy_on_write: assert record[0].category == ChainedAssignmentError else: From bfc6c804e1dfe709292300d308be954a943c0845 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 2 Dec 2023 00:01:13 +0100 Subject: [PATCH 07/10] CoW: Add warnings for interpolate --- pandas/core/internals/base.py | 7 ++++- pandas/core/internals/blocks.py | 31 +++++++++++++++++++- pandas/tests/copy_view/test_interp_fillna.py | 30 +++++++++++++++++-- 3 files changed, 64 insertions(+), 4 deletions(-) diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index 60ab251e437c7..3b4a1fa076cb1 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -281,7 +281,11 @@ def replace_list( def interpolate(self, inplace: bool, **kwargs) -> Self: return self.apply_with_block( - "interpolate", inplace=inplace, **kwargs, using_cow=using_copy_on_write() + "interpolate", + inplace=inplace, + **kwargs, + using_cow=using_copy_on_write(), + already_warned=_AlreadyWarned(), ) def pad_or_backfill(self, inplace: bool, **kwargs) -> Self: @@ -290,6 +294,7 @@ def pad_or_backfill(self, inplace: bool, **kwargs) -> Self: inplace=inplace, **kwargs, using_cow=using_copy_on_write(), + already_warned=_AlreadyWarned(), ) def shift(self, periods: int, fill_value) -> Self: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ec58b46371149..955b664443fe5 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1654,6 +1654,7 @@ def pad_or_backfill( limit_area: Literal["inside", "outside"] | None = None, downcast: Literal["infer"] | None = None, using_cow: bool = False, + already_warned=None, ) -> list[Block]: if not self._can_hold_na: # If there are no NAs, then interpolate is a no-op @@ -1674,6 +1675,19 @@ def pad_or_backfill( limit_area=limit_area, copy=copy, ) + if ( + not copy + and warn_copy_on_write() + and already_warned is not None + and not already_warned.warned_already + ): + if self.refs.has_reference(): + warnings.warn( + COW_WARNING_SETITEM_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) + already_warned.warned_already = True if axis == 1: new_values = new_values.T @@ -1694,6 +1708,7 @@ def interpolate( limit_area: Literal["inside", "outside"] | None = None, downcast: Literal["infer"] | None = None, using_cow: bool = False, + already_warned=None, **kwargs, ) -> list[Block]: inplace = validate_bool_kwarg(inplace, "inplace") @@ -1732,6 +1747,20 @@ def interpolate( ) data = extract_array(new_values, extract_numpy=True) + if ( + not copy + and warn_copy_on_write() + and already_warned is not None + and not already_warned.warned_already + ): + if self.refs.has_reference(): + warnings.warn( + COW_WARNING_SETITEM_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) + already_warned.warned_already = True + nb = self.make_block_same_class(data, refs=refs) return nb._maybe_downcast([nb], downcast, using_cow, caller="interpolate") @@ -2175,9 +2204,9 @@ def pad_or_backfill( limit_area: Literal["inside", "outside"] | None = None, downcast: Literal["infer"] | None = None, using_cow: bool = False, + already_warned=None, ) -> list[Block]: values = self.values - copy, refs = self._get_refs_and_copy(using_cow, inplace) if values.ndim == 2 and axis == 1: # NDArrayBackedExtensionArray.fillna assumes axis=0 diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index cdb06de90a568..d214b62312426 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -91,12 +91,13 @@ def test_interpolate_inplace_no_reference_no_copy(using_copy_on_write, vals): @pytest.mark.parametrize( "vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]] ) -def test_interpolate_inplace_with_refs(using_copy_on_write, vals): +def test_interpolate_inplace_with_refs(using_copy_on_write, vals, warn_copy_on_write): df = DataFrame({"a": [1, np.nan, 2]}) df_orig = df.copy() arr = get_array(df, "a") view = df[:] - df.interpolate(method="linear", inplace=True) + with tm.assert_cow_warning(warn_copy_on_write): + df.interpolate(method="linear", inplace=True) if using_copy_on_write: # Check that copy was triggered in interpolate and that we don't @@ -109,6 +110,31 @@ def test_interpolate_inplace_with_refs(using_copy_on_write, vals): assert np.shares_memory(arr, get_array(df, "a")) +@pytest.mark.parametrize("func", ["ffill", "bfill"]) +@pytest.mark.parametrize("dtype", ["float64", "Float64"]) +def test_interp_fill_functions_inplace( + using_copy_on_write, func, warn_copy_on_write, dtype +): + # Check that these takes the same code paths as interpolate + df = DataFrame({"a": [1, np.nan, 2]}, dtype=dtype) + df_orig = df.copy() + arr = get_array(df, "a") + view = df[:] + + with tm.assert_cow_warning(warn_copy_on_write and dtype == "float64"): + getattr(df, func)(inplace=True) + + if using_copy_on_write: + # Check that copy was triggered in interpolate and that we don't + # have any references left + assert not np.shares_memory(arr, get_array(df, "a")) + tm.assert_frame_equal(df_orig, view) + assert df._mgr._has_no_reference(0) + assert view._mgr._has_no_reference(0) + else: + assert np.shares_memory(arr, get_array(df, "a")) is (dtype == "float64") + + def test_interpolate_cleaned_fill_method(using_copy_on_write): # Check that "method is set to None" case works correctly df = DataFrame({"a": ["a", np.nan, "c"], "b": 1}) From 7c5961fb8ffa3d404bcbac324ba3a049c51602b1 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 2 Dec 2023 00:04:31 +0100 Subject: [PATCH 08/10] Fixup --- pandas/tests/copy_view/test_methods.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index ba8e4bd684198..dba32460a6165 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1607,7 +1607,8 @@ def test_interpolate_creates_copy(using_copy_on_write, warn_copy_on_write): view = df[:] expected = df.copy() - df.ffill(inplace=True) + with tm.assert_cow_warning(warn_copy_on_write): + df.ffill(inplace=True) with tm.assert_cow_warning(warn_copy_on_write): df.iloc[0, 0] = 100.5 From 6121ab2c43ba0d99d552bd8bb3073f14d180f9c0 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 2 Dec 2023 18:48:07 +0100 Subject: [PATCH 09/10] Update --- .../copy_view/test_chained_assignment_deprecation.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index 4f53dd8a78858..f83d810cd4614 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -36,11 +36,15 @@ def test_methods_iloc_warn(using_copy_on_write): ("ffill", ()), ], ) -def test_methods_iloc_getitem_item_cache(func, args, using_copy_on_write): - df = DataFrame({"a": [1, 2, 3], "b": 1}) +def test_methods_iloc_getitem_item_cache( + func, args, using_copy_on_write, warn_copy_on_write +): + df = DataFrame({"a": [1.5, 2, 3], "b": 1.5}) ser = df.iloc[:, 0] - # TODO(CoW-warn) should warn about updating a view - getattr(ser, func)(*args, inplace=True) + with tm.assert_cow_warning( + warn_copy_on_write and func not in ("replace", "fillna") + ): + getattr(ser, func)(*args, inplace=True) # parent that holds item_cache is dead, so don't increase ref count ser = df.copy()["a"] From 1c87f0ad6f8028f7b1c1817dfea05b6a735c289a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 8 Dec 2023 09:29:30 +0100 Subject: [PATCH 10/10] cleanup merge --- pandas/core/internals/blocks.py | 4 ++-- .../copy_view/test_chained_assignment_deprecation.py | 7 ++----- pandas/tests/frame/methods/test_replace.py | 8 ++------ pandas/tests/indexing/test_chaining_and_caching.py | 2 +- 4 files changed, 7 insertions(+), 14 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4b515f7b6b885..f0f8430c991ad 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1686,7 +1686,7 @@ def pad_or_backfill( ): if self.refs.has_reference(): warnings.warn( - COW_WARNING_SETITEM_MSG, + COW_WARNING_GENERAL_MSG, FutureWarning, stacklevel=find_stack_level(), ) @@ -1758,7 +1758,7 @@ def interpolate( ): if self.refs.has_reference(): warnings.warn( - COW_WARNING_SETITEM_MSG, + COW_WARNING_GENERAL_MSG, FutureWarning, stacklevel=find_stack_level(), ) diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index b6801745a54f0..2829617c84cd2 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -47,6 +47,7 @@ def test_methods_iloc_getitem_item_cache( ): df = DataFrame({"a": [1.5, 2, 3], "b": 1.5}) ser = df.iloc[:, 0] + # TODO(CoW-warn) should warn about updating a view for all methods with tm.assert_cow_warning( warn_copy_on_write and func not in ("replace", "fillna") ): @@ -89,11 +90,7 @@ def test_series_setitem(indexer, using_copy_on_write, warn_copy_on_write): # fail if multiple warnings are raised with pytest.warns() as record: df["a"][indexer] = 0 - assert ( - len(record) == 2 - if warn_copy_on_write and isinstance(indexer, np.ndarray) - else 1 - ) + assert len(record) == 1 if using_copy_on_write: assert record[0].category == ChainedAssignmentError else: diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 28e4d298acc2b..13e2c1a249ac2 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -717,7 +717,7 @@ def test_replace_value_is_none(self, datetime_frame): datetime_frame.iloc[0, 0] = orig_value datetime_frame.iloc[1, 0] = orig2 - def test_replace_for_new_dtypes(self, datetime_frame, warn_copy_on_write): + def test_replace_for_new_dtypes(self, datetime_frame): # dtypes tsframe = datetime_frame.copy().astype(np.float32) tsframe.loc[tsframe.index[:5], "A"] = np.nan @@ -729,11 +729,7 @@ def test_replace_for_new_dtypes(self, datetime_frame, warn_copy_on_write): tsframe.loc[tsframe.index[:5], "A"] = np.nan tsframe.loc[tsframe.index[-5:], "A"] = np.nan - tsframe.loc[tsframe.index[:5], "B"] = -1e8 - - b = tsframe["B"] - with tm.assert_cow_warning(warn_copy_on_write): - b[b == -1e8] = np.nan + tsframe.loc[tsframe.index[:5], "B"] = np.nan msg = "DataFrame.fillna with 'method' is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): # TODO: what is this even testing? diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 2e08b393d0283..b97df376ac47f 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -139,7 +139,7 @@ def test_altering_series_clears_parent_cache( class TestChaining: - def test_setitem_chained_setfault(self, using_copy_on_write, warn_copy_on_write): + def test_setitem_chained_setfault(self, using_copy_on_write): # GH6026 data = ["right", "left", "left", "left", "right", "left", "timeout"] mdata = ["right", "left", "left", "left", "right", "left", "none"]