diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 535bc5f3bd7bf..1be72c44d1a55 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -443,6 +443,7 @@ Groupby/resample/rolling - Bug in :meth:`.GroupBy.mean`, :meth:`.GroupBy.median` and :meth:`DataFrame.pivot_table` not propagating metadata (:issue:`28283`) - Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly when window is an offset and dates are in descending order (:issue:`40002`) - Bug in :class:`SeriesGroupBy` and :class:`DataFrameGroupBy` on an empty ``Series`` or ``DataFrame`` would lose index, columns, and/or data types when directly using the methods ``idxmax``, ``idxmin``, ``mad``, ``min``, ``max``, ``sum``, ``prod``, and ``skew`` or using them through ``apply``, ``aggregate``, or ``resample`` (:issue:`26411`) +- Bug in :meth:`DataFrameGroupBy.apply` where a :class:`MultiIndex` would be created instead of an :class:`Index` if a :class:`:meth:`core.window.rolling.RollingGroupby` object was created (:issue:`39732`) - Bug in :meth:`DataFrameGroupBy.sample` where error was raised when ``weights`` was specified and the index was an :class:`Int64Index` (:issue:`39927`) - Bug in :meth:`DataFrameGroupBy.aggregate` and :meth:`.Resampler.aggregate` would sometimes raise ``SpecificationError`` when passed a dictionary and columns were missing; will now always raise a ``KeyError`` instead (:issue:`40004`) - diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index c169e29b74dbb..927eb8eed8454 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -5,7 +5,6 @@ """ import collections from typing import List -import warnings from pandas._typing import final @@ -31,10 +30,7 @@ def _shallow_copy(self, obj, **kwargs): obj = obj.obj for attr in self._attributes: if attr not in kwargs: - # TODO: Remove once win_type deprecation is enforced - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "win_type", FutureWarning) - kwargs[attr] = getattr(self, attr) + kwargs[attr] = getattr(self, attr) return self._constructor(obj, **kwargs) @@ -65,10 +61,7 @@ def _gotitem(self, key, ndim, subset=None): # we need to make a shallow copy of ourselves # with the same groupby - # TODO: Remove once win_type deprecation is enforced - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "win_type", FutureWarning) - kwargs = {attr: getattr(self, attr) for attr in self._attributes} + kwargs = {attr: getattr(self, attr) for attr in self._attributes} # Try to select from a DataFrame, falling back to a Series try: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 7bcdb348b8a1e..bf9fdb5d0cff7 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1918,7 +1918,12 @@ def rolling(self, *args, **kwargs): """ from pandas.core.window import RollingGroupby - return RollingGroupby(self, *args, **kwargs) + return RollingGroupby( + self._selected_obj, + *args, + _grouper=self.grouper, + **kwargs, + ) @final @Substitution(name="groupby") @@ -1930,7 +1935,12 @@ def expanding(self, *args, **kwargs): """ from pandas.core.window import ExpandingGroupby - return ExpandingGroupby(self, *args, **kwargs) + return ExpandingGroupby( + self._selected_obj, + *args, + _grouper=self.grouper, + **kwargs, + ) @final @Substitution(name="groupby") @@ -1941,7 +1951,12 @@ def ewm(self, *args, **kwargs): """ from pandas.core.window import ExponentialMovingWindowGroupby - return ExponentialMovingWindowGroupby(self, *args, **kwargs) + return ExponentialMovingWindowGroupby( + self._selected_obj, + *args, + _grouper=self.grouper, + **kwargs, + ) @final def _fill(self, direction, limit=None): diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 518119b63209e..208b5ab0023eb 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -213,7 +213,15 @@ class ExponentialMovingWindow(BaseWindow): 4 3.233686 """ - _attributes = ["com", "min_periods", "adjust", "ignore_na", "axis"] + _attributes = [ + "com", + "min_periods", + "adjust", + "ignore_na", + "axis", + "halflife", + "times", + ] def __init__( self, @@ -227,17 +235,18 @@ def __init__( ignore_na: bool = False, axis: int = 0, times: Optional[Union[str, np.ndarray, FrameOrSeries]] = None, - **kwargs, ): - self.obj = obj - self.min_periods = max(int(min_periods), 1) + super().__init__( + obj=obj, + min_periods=max(int(min_periods), 1), + on=None, + center=False, + closed=None, + method="single", + axis=axis, + ) self.adjust = adjust self.ignore_na = ignore_na - self.axis = axis - self.on = None - self.center = False - self.closed = None - self.method = "single" if times is not None: if isinstance(times, str): times = self._selected_obj[times] @@ -556,9 +565,7 @@ class ExponentialMovingWindowGroupby(BaseWindowGroupby, ExponentialMovingWindow) Provide an exponential moving window groupby implementation. """ - @property - def _constructor(self): - return ExponentialMovingWindow + _attributes = ExponentialMovingWindow._attributes + BaseWindowGroupby._attributes def _get_window_indexer(self) -> GroupbyIndexer: """ @@ -569,7 +576,7 @@ def _get_window_indexer(self) -> GroupbyIndexer: GroupbyIndexer """ window_indexer = GroupbyIndexer( - groupby_indicies=self._groupby.indices, + groupby_indicies=self._grouper.indices, window_indexer=ExponentialMovingWindowIndexer, ) return window_indexer diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 64e092d853456..c201216a91ab1 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -94,9 +94,7 @@ class Expanding(RollingAndExpandingMixin): _attributes = ["min_periods", "center", "axis", "method"] - def __init__( - self, obj, min_periods=1, center=None, axis=0, method="single", **kwargs - ): + def __init__(self, obj, min_periods=1, center=None, axis=0, method="single"): super().__init__( obj=obj, min_periods=min_periods, center=center, axis=axis, method=method ) @@ -629,9 +627,7 @@ class ExpandingGroupby(BaseWindowGroupby, Expanding): Provide a expanding groupby implementation. """ - @property - def _constructor(self): - return Expanding + _attributes = Expanding._attributes + BaseWindowGroupby._attributes def _get_window_indexer(self) -> GroupbyIndexer: """ @@ -642,7 +638,7 @@ def _get_window_indexer(self) -> GroupbyIndexer: GroupbyIndexer """ window_indexer = GroupbyIndexer( - groupby_indicies=self._groupby.indices, + groupby_indicies=self._grouper.indices, window_indexer=ExpandingIndexer, ) return window_indexer diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 20bf0142b0855..844f04ab7c196 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -4,6 +4,7 @@ """ from __future__ import annotations +import copy from datetime import timedelta from functools import partial import inspect @@ -64,10 +65,6 @@ ) import pandas.core.common as common from pandas.core.construction import extract_array -from pandas.core.groupby.base import ( - GotItemMixin, - ShallowMixin, -) from pandas.core.indexes.api import ( Index, MultiIndex, @@ -114,19 +111,10 @@ from pandas.core.internals import Block # noqa:F401 -class BaseWindow(ShallowMixin, SelectionMixin): +class BaseWindow(SelectionMixin): """Provides utilities for performing windowing operations.""" - _attributes: List[str] = [ - "window", - "min_periods", - "center", - "win_type", - "axis", - "on", - "closed", - "method", - ] + _attributes: List[str] = [] exclusions: Set[str] = set() def __init__( @@ -140,10 +128,7 @@ def __init__( on: Optional[Union[str, Index]] = None, closed: Optional[str] = None, method: str = "single", - **kwargs, ): - - self.__dict__.update(kwargs) self.obj = obj self.on = on self.closed = closed @@ -262,8 +247,12 @@ def _gotitem(self, key, ndim, subset=None): # create a new object to prevent aliasing if subset is None: subset = self.obj - self = self._shallow_copy(subset) - self._reset_cache() + # TODO: Remove once win_type deprecation is enforced + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "win_type", FutureWarning) + self = type(self)( + subset, **{attr: getattr(self, attr) for attr in self._attributes} + ) if subset.ndim == 2: if is_scalar(key) and key in subset or is_list_like(key): self._selection = key @@ -289,7 +278,7 @@ def __repr__(self) -> str: attrs_list = ( f"{attr_name}={getattr(self, attr_name)}" for attr_name in self._attributes - if getattr(self, attr_name, None) is not None + if getattr(self, attr_name, None) is not None and attr_name[0] != "_" ) attrs = ",".join(attrs_list) return f"{type(self).__name__} [{attrs}]" @@ -544,19 +533,23 @@ def aggregate(self, func, *args, **kwargs): agg = aggregate -class BaseWindowGroupby(GotItemMixin, BaseWindow): +class BaseWindowGroupby(BaseWindow): """ Provide the groupby windowing facilities. """ - def __init__(self, obj, *args, **kwargs): - kwargs.pop("parent", None) - groupby = kwargs.pop("groupby", None) - if groupby is None: - groupby, obj = obj, obj._selected_obj - self._groupby = groupby - self._groupby.mutated = True - self._groupby.grouper.mutated = True + _attributes = ["_grouper"] + + def __init__( + self, + obj, + *args, + _grouper=None, + **kwargs, + ): + if _grouper is None: + raise ValueError("Must pass a Grouper object.") + self._grouper = _grouper super().__init__(obj, *args, **kwargs) def _apply( @@ -576,9 +569,7 @@ def _apply( # 1st set of levels = group by labels # 2nd set of levels = original index # Ignore 2nd set of levels if a group by label include an index level - result_index_names = [ - grouping.name for grouping in self._groupby.grouper._groupings - ] + result_index_names = copy.copy(self._grouper.names) grouped_object_index = None column_keys = [ @@ -595,10 +586,10 @@ def _apply( # Our result will have still kept the column in the result result = result.drop(columns=column_keys, errors="ignore") - codes = self._groupby.grouper.codes - levels = self._groupby.grouper.levels + codes = self._grouper.codes + levels = copy.copy(self._grouper.levels) - group_indices = self._groupby.grouper.indices.values() + group_indices = self._grouper.indices.values() if group_indices: indexer = np.concatenate(list(group_indices)) else: @@ -632,7 +623,7 @@ def _apply_pairwise( Apply the given pairwise function given 2 pandas objects (DataFrame/Series) """ # Manually drop the grouping column first - target = target.drop(columns=self._groupby.grouper.names, errors="ignore") + target = target.drop(columns=self._grouper.names, errors="ignore") result = super()._apply_pairwise(target, other, pairwise, func) # 1) Determine the levels + codes of the groupby levels if other is not None: @@ -643,12 +634,12 @@ def _apply_pairwise( result = concat( [ result.take(gb_indices).reindex(result.index) - for gb_indices in self._groupby.indices.values() + for gb_indices in self._grouper.indices.values() ] ) gb_pairs = ( - common.maybe_make_list(pair) for pair in self._groupby.indices.keys() + common.maybe_make_list(pair) for pair in self._grouper.indices.keys() ) groupby_codes = [] groupby_levels = [] @@ -662,10 +653,10 @@ def _apply_pairwise( else: # When we evaluate the pairwise=True result, repeat the groupby # labels by the number of columns in the original object - groupby_codes = self._groupby.grouper.codes - groupby_levels = self._groupby.grouper.levels + groupby_codes = self._grouper.codes + groupby_levels = self._grouper.levels - group_indices = self._groupby.grouper.indices.values() + group_indices = self._grouper.indices.values() if group_indices: indexer = np.concatenate(list(group_indices)) else: @@ -692,7 +683,7 @@ def _apply_pairwise( # 3) Create the resulting index by combining 1) + 2) result_codes = groupby_codes + result_codes result_levels = groupby_levels + result_levels - result_names = self._groupby.grouper.names + result_names + result_names = self._grouper.names + result_names result_index = MultiIndex( result_levels, result_codes, names=result_names, verify_integrity=False @@ -708,9 +699,9 @@ def _create_data(self, obj: FrameOrSeries) -> FrameOrSeries: # to the groups # GH 36197 if not obj.empty: - groupby_order = np.concatenate( - list(self._groupby.grouper.indices.values()) - ).astype(np.int64) + groupby_order = np.concatenate(list(self._grouper.indices.values())).astype( + np.int64 + ) obj = obj.take(groupby_order) return super()._create_data(obj) @@ -900,6 +891,17 @@ class Window(BaseWindow): 2013-01-01 09:00:06 4.0 """ + _attributes = [ + "window", + "min_periods", + "center", + "win_type", + "axis", + "on", + "closed", + "method", + ] + def validate(self): super().validate() @@ -1390,6 +1392,18 @@ def corr_func(x, y): class Rolling(RollingAndExpandingMixin): + + _attributes = [ + "window", + "min_periods", + "center", + "win_type", + "axis", + "on", + "closed", + "method", + ] + def validate(self): super().validate() @@ -2168,9 +2182,7 @@ class RollingGroupby(BaseWindowGroupby, Rolling): Provide a rolling groupby implementation. """ - @property - def _constructor(self): - return Rolling + _attributes = Rolling._attributes + BaseWindowGroupby._attributes def _get_window_indexer(self) -> GroupbyIndexer: """ @@ -2200,7 +2212,7 @@ def _get_window_indexer(self) -> GroupbyIndexer: window_indexer = GroupbyIndexer( index_array=index_array, window_size=window, - groupby_indicies=self._groupby.indices, + groupby_indicies=self._grouper.indices, window_indexer=rolling_indexer, indexer_kwargs=indexer_kwargs, ) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index c272544e6af9e..1b9259fd8240e 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -53,7 +53,7 @@ def test_constructor_invalid(frame_or_series, w): @pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) def test_numpy_compat(method): # see gh-12811 - e = Expanding(Series([2, 4, 6]), window=2) + e = Expanding(Series([2, 4, 6])) msg = "numpy operations are not valid with window objects" diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index d3c2b5467e5bb..c3c5bbe460134 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -83,6 +83,9 @@ def test_rolling(self, f): result = getattr(r, f)() expected = g.apply(lambda x: getattr(x.rolling(4), f)()) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("f", ["std", "var"]) @@ -92,6 +95,9 @@ def test_rolling_ddof(self, f): result = getattr(r, f)(ddof=1) expected = g.apply(lambda x: getattr(x.rolling(4), f)(ddof=1)) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -100,10 +106,14 @@ def test_rolling_ddof(self, f): def test_rolling_quantile(self, interpolation): g = self.frame.groupby("A") r = g.rolling(window=4) + result = r.quantile(0.4, interpolation=interpolation) expected = g.apply( lambda x: x.rolling(4).quantile(0.4, interpolation=interpolation) ) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("f", ["corr", "cov"]) @@ -137,6 +147,9 @@ def test_rolling_apply(self, raw): # reduction result = r.apply(lambda x: x.sum(), raw=raw) expected = g.apply(lambda x: x.rolling(4).apply(lambda y: y.sum(), raw=raw)) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index tm.assert_frame_equal(result, expected) def test_rolling_apply_mutability(self): @@ -643,6 +656,16 @@ def test_groupby_rolling_resulting_multiindex(self): ) tm.assert_index_equal(result.index, expected_index) + def test_groupby_rolling_object_doesnt_affect_groupby_apply(self): + # GH 39732 + g = self.frame.groupby("A") + expected = g.apply(lambda x: x.rolling(4).sum()).index + _ = g.rolling(window=4) + result = g.apply(lambda x: x.rolling(4).sum()).index + tm.assert_index_equal(result, expected) + assert not g.mutated + assert not g.grouper.mutated + class TestExpanding: def setup_method(self): @@ -657,6 +680,9 @@ def test_expanding(self, f): result = getattr(r, f)() expected = g.apply(lambda x: getattr(x.expanding(), f)()) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("f", ["std", "var"]) @@ -666,6 +692,9 @@ def test_expanding_ddof(self, f): result = getattr(r, f)(ddof=0) expected = g.apply(lambda x: getattr(x.expanding(), f)(ddof=0)) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -674,10 +703,14 @@ def test_expanding_ddof(self, f): def test_expanding_quantile(self, interpolation): g = self.frame.groupby("A") r = g.expanding() + result = r.quantile(0.4, interpolation=interpolation) expected = g.apply( lambda x: x.expanding().quantile(0.4, interpolation=interpolation) ) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("f", ["corr", "cov"]) @@ -715,6 +748,9 @@ def test_expanding_apply(self, raw): # reduction result = r.apply(lambda x: x.sum(), raw=raw) expected = g.apply(lambda x: x.expanding().apply(lambda y: y.sum(), raw=raw)) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index tm.assert_frame_equal(result, expected)