From f5dea7e67eb696ba2b4f26a8c817dd07825ce6ea Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Tue, 6 Jun 2023 07:10:04 -0700 Subject: [PATCH 1/2] BUG: Fix metadata propagation in squeeze and describe --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/generic.py | 7 +++++-- pandas/tests/generic/test_finalize.py | 7 ++----- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 6bb972c21d927..480548f705763 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -482,6 +482,7 @@ Styler Metadata ^^^^^^^^ +- Fixed metadata propagation in :meth:`DataFrame.squeeze`, and :meth:`DataFrame.describe` (:issue:`28283`) - Fixed metadata propagation in :meth:`DataFrame.std` (:issue:`28283`) Other diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 90a0444872ec7..16287c194db8c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -957,12 +957,15 @@ def squeeze(self, axis: Axis | None = None): 1 """ axes = range(self._AXIS_LEN) if axis is None else (self._get_axis_number(axis),) - return self.iloc[ + result = self.iloc[ tuple( 0 if i in axes and len(a) == 1 else slice(None) for i, a in enumerate(self.axes) ) ] + if isinstance(result, NDFrame): + result = result.__finalize__(self, method="squeeze") + return result # ---------------------------------------------------------------------- # Rename @@ -11137,7 +11140,7 @@ def describe( include=include, exclude=exclude, percentiles=percentiles, - ) + ).__finalize__(self, method="describe") @final def pct_change( diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 9dfa2c8a5a90a..bb9ca42b5b3e4 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -246,8 +246,8 @@ (pd.DataFrame, frame_mi_data, operator.methodcaller("droplevel", "A")), (pd.DataFrame, frame_data, operator.methodcaller("pop", "A")), pytest.param( - (pd.DataFrame, frame_data, operator.methodcaller("squeeze")), - marks=not_implemented_mark, + # Squeeze on columns, otherwise we'll end up with a scalar + (pd.DataFrame, frame_data, operator.methodcaller("squeeze", axis="columns")), ), (pd.Series, ([1, 2],), operator.methodcaller("squeeze")), (pd.Series, ([1, 2],), operator.methodcaller("rename_axis", index="a")), @@ -374,11 +374,9 @@ ), pytest.param( (pd.Series, ([1, 2],), operator.methodcaller("describe")), - marks=not_implemented_mark, ), pytest.param( (pd.DataFrame, frame_data, operator.methodcaller("describe")), - marks=not_implemented_mark, ), (pd.Series, ([1, 2],), operator.methodcaller("pct_change")), (pd.DataFrame, frame_data, operator.methodcaller("pct_change")), @@ -767,7 +765,6 @@ def test_groupby_finalize(obj, method): lambda x: x.agg("sem"), lambda x: x.agg("size"), lambda x: x.agg("ohlc"), - lambda x: x.agg("describe"), ], ) @not_implemented_mark From 3c59ba42d156cc310b31a39dd3d14c4eda5b8c5a Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Tue, 6 Jun 2023 11:58:54 -0700 Subject: [PATCH 2/2] remove unnecessary param --- pandas/tests/generic/test_finalize.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index bb9ca42b5b3e4..e6a0687155a6a 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -245,10 +245,8 @@ ), (pd.DataFrame, frame_mi_data, operator.methodcaller("droplevel", "A")), (pd.DataFrame, frame_data, operator.methodcaller("pop", "A")), - pytest.param( - # Squeeze on columns, otherwise we'll end up with a scalar - (pd.DataFrame, frame_data, operator.methodcaller("squeeze", axis="columns")), - ), + # Squeeze on columns, otherwise we'll end up with a scalar + (pd.DataFrame, frame_data, operator.methodcaller("squeeze", axis="columns")), (pd.Series, ([1, 2],), operator.methodcaller("squeeze")), (pd.Series, ([1, 2],), operator.methodcaller("rename_axis", index="a")), (pd.DataFrame, frame_data, operator.methodcaller("rename_axis", columns="a")), @@ -372,12 +370,8 @@ ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), operator.methodcaller("tz_localize", "CET"), ), - pytest.param( - (pd.Series, ([1, 2],), operator.methodcaller("describe")), - ), - pytest.param( - (pd.DataFrame, frame_data, operator.methodcaller("describe")), - ), + (pd.Series, ([1, 2],), operator.methodcaller("describe")), + (pd.DataFrame, frame_data, operator.methodcaller("describe")), (pd.Series, ([1, 2],), operator.methodcaller("pct_change")), (pd.DataFrame, frame_data, operator.methodcaller("pct_change")), (pd.Series, ([1],), operator.methodcaller("transform", lambda x: x - x.min())),