From 46afccf716bb7dd07f2e047177d27ea780cdf278 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 9 Feb 2023 18:59:39 +0100 Subject: [PATCH 1/2] CoW: Ensure that iterrows does not allow mutating parent --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/frame.py | 6 ++++++ pandas/tests/copy_view/test_methods.py | 10 ++++++++++ 3 files changed, 17 insertions(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 0ea18d69f0dbb..fc6f22e4656a2 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -222,6 +222,7 @@ Copy-on-Write improvements - :meth:`DataFrame.to_timestamp` / :meth:`Series.to_timestamp` - :meth:`DataFrame.to_period` / :meth:`Series.to_period` - :meth:`DataFrame.truncate` + - :meth:`DataFrame.iterrows` - :meth:`DataFrame.tz_convert` / :meth:`Series.tz_localize` - :meth:`DataFrame.infer_objects` / :meth:`Series.infer_objects` - :meth:`DataFrame.astype` / :meth:`Series.astype` diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2361c254f5161..1a49cfa87638a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1390,8 +1390,14 @@ def iterrows(self) -> Iterable[tuple[Hashable, Series]]: """ columns = self.columns klass = self._constructor_sliced + using_cow = using_copy_on_write() for k, v in zip(self.index, self.values): s = klass(v, index=columns, name=k).__finalize__(self) + if using_cow and self._mgr.is_single_block: + s._mgr.blocks[0].refs = self._mgr.blocks[0].refs # type: ignore[union-attr] # noqa + s._mgr.blocks[0].refs.add_reference( # type: ignore[union-attr] + s._mgr.blocks[0] + ) # type: ignore[arg-type, union-attr] yield k, s def itertuples( diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 6b54345723118..723cf4066b852 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1272,6 +1272,16 @@ def test_asfreq_noop(using_copy_on_write): tm.assert_frame_equal(df, df_orig) +def test_iterrows(using_copy_on_write): + df = DataFrame({"a": 0, "b": 1}, index=[1, 2, 3]) + df_orig = df.copy() + + for _, sub in df.iterrows(): + sub.iloc[0] = 100 + if using_copy_on_write: + tm.assert_frame_equal(df, df_orig) + + def test_interpolate_creates_copy(using_copy_on_write): # GH#51126 df = DataFrame({"a": [1.5, np.nan, 3]}) From eb1e1a1fd920196106e01c29ff6061f294acc7c3 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 15 Feb 2023 22:44:12 +0100 Subject: [PATCH 2/2] Fix mypy --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8cfc7961d2d16..4154def5a7b77 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1398,8 +1398,8 @@ def iterrows(self) -> Iterable[tuple[Hashable, Series]]: if using_cow and self._mgr.is_single_block: s._mgr.blocks[0].refs = self._mgr.blocks[0].refs # type: ignore[union-attr] # noqa s._mgr.blocks[0].refs.add_reference( # type: ignore[union-attr] - s._mgr.blocks[0] - ) # type: ignore[arg-type, union-attr] + s._mgr.blocks[0] # type: ignore[arg-type, union-attr] + ) yield k, s def itertuples(