From 281c8abb0a14096c440d9de98eba4c2d5e3bf3de Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 23 Apr 2021 11:51:57 -0700 Subject: [PATCH 1/2] TYP: resample --- pandas/core/resample.py | 40 +++++++++++++++++++++++++---------- pandas/core/sorting.py | 14 ++++++++---- pandas/core/window/rolling.py | 12 +++++------ 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 469325cf04189..b9e3999b9869c 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -12,6 +12,7 @@ from pandas._libs import lib from pandas._libs.tslibs import ( + BaseOffset, IncompatibleFrequency, NaT, Period, @@ -489,11 +490,11 @@ def _apply_loffset(self, result): self.loffset = None return result - def _get_resampler_for_grouping(self, groupby, **kwargs): + def _get_resampler_for_grouping(self, groupby): """ Return the correct class for resampling with groupby. """ - return self._resampler_for_grouping(self, groupby=groupby, **kwargs) + return self._resampler_for_grouping(self, groupby=groupby) def _wrap_result(self, result): """ @@ -1039,9 +1040,10 @@ class _GroupByMixin(PandasObject): Provide the groupby facilities. """ - _attributes: list[str] + _attributes: list[str] # in practice the same as Resampler._attributes - def __init__(self, obj, *args, **kwargs): + def __init__(self, obj, **kwargs): + # reached via ._gotitem and _get_resampler_for_grouping parent = kwargs.pop("parent", None) groupby = kwargs.pop("groupby", None) @@ -1053,8 +1055,7 @@ def __init__(self, obj, *args, **kwargs): for attr in self._attributes: setattr(self, attr, kwargs.get(attr, getattr(parent, attr))) - # error: Too many arguments for "__init__" of "object" - super().__init__(None) # type: ignore[call-arg] + self.binner = kwargs.get("binner", getattr(parent, "binner")) self._groupby = groupby self._groupby.mutated = True self._groupby.grouper.mutated = True @@ -1822,8 +1823,13 @@ def _take_new_index( def _get_timestamp_range_edges( - first, last, freq, closed="left", origin="start_day", offset=None -): + first: Timestamp, + last: Timestamp, + freq: BaseOffset, + closed: str = "left", + origin="start_day", + offset=None, +) -> tuple[Timestamp, Timestamp]: """ Adjust the `first` Timestamp to the preceding Timestamp that resides on the provided offset. Adjust the `last` Timestamp to the following @@ -1895,8 +1901,13 @@ def _get_timestamp_range_edges( def _get_period_range_edges( - first, last, freq, closed="left", origin="start_day", offset=None -): + first: Period, + last: Period, + freq: BaseOffset, + closed: str = "left", + origin="start_day", + offset=None, +) -> tuple[Period, Period]: """ Adjust the provided `first` and `last` Periods to the respective Period of the given offset that encompasses them. @@ -2029,7 +2040,14 @@ def _adjust_dates_anchored( return fresult, lresult -def asfreq(obj, freq, method=None, how=None, normalize=False, fill_value=None): +def asfreq( + obj: FrameOrSeries, + freq, + method=None, + how=None, + normalize: bool = False, + fill_value=None, +) -> FrameOrSeries: """ Utility frequency conversion method for Series/DataFrame. diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index dd7ae904c866c..f5cd390f077a6 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -232,7 +232,7 @@ def decons_group_index(comp_labels, shape): return label_list[::-1] -def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull: bool): +def decons_obs_group_ids(comp_ids: np.ndarray, obs_ids, shape, labels, xnull: bool): """ Reconstruct labels from observed group ids. @@ -360,6 +360,10 @@ def nargsort( key : Optional[Callable], default None mask : Optional[np.ndarray], default None Passed when called by ExtensionArray.argsort. + + Returns + ------- + np.ndarray[np.intp] """ if key is not None: @@ -404,7 +408,7 @@ def nargsort( indexer = np.concatenate([nan_idx, indexer]) else: raise ValueError(f"invalid na_position: {na_position}") - return indexer + return ensure_platform_int(indexer) def nargminmax(values, method: str, axis: int = 0): @@ -644,7 +648,9 @@ def get_group_index_sorter( return ensure_platform_int(sorter) -def compress_group_index(group_index, sort: bool = True): +def compress_group_index( + group_index: np.ndarray, sort: bool = True +) -> tuple[np.ndarray, np.ndarray]: """ Group_index is offsets into cartesian product of all possible labels. This space can be huge, so this function compresses it, by computing offsets @@ -682,7 +688,7 @@ def _reorder_by_uniques( sorter = uniques.argsort() # reverse_indexer is where elements came from - reverse_indexer = np.empty(len(sorter), dtype=np.int64) + reverse_indexer = np.empty(len(sorter), dtype=np.intp) reverse_indexer.put(sorter, np.arange(len(sorter))) mask = labels < 0 diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index e4710254d9311..130cac5d60cf1 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -43,10 +43,7 @@ ) from pandas.core.dtypes.generic import ( ABCDataFrame, - ABCDatetimeIndex, - ABCPeriodIndex, ABCSeries, - ABCTimedeltaIndex, ) from pandas.core.dtypes.missing import notna @@ -58,8 +55,11 @@ ) import pandas.core.common as com from pandas.core.indexes.api import ( + DatetimeIndex, Index, MultiIndex, + PeriodIndex, + TimedeltaIndex, ) from pandas.core.internals import ArrayManager from pandas.core.reshape.concat import concat @@ -1455,9 +1455,7 @@ def validate(self): # we allow rolling on a datetimelike index if ( self.obj.empty - or isinstance( - self._on, (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex) - ) + or isinstance(self._on, (DatetimeIndex, TimedeltaIndex, PeriodIndex)) ) and isinstance(self.window, (str, BaseOffset, timedelta)): self._validate_monotonic() @@ -1470,7 +1468,7 @@ def validate(self): f"passed window {self.window} is not " "compatible with a datetimelike index" ) from err - if isinstance(self._on, ABCPeriodIndex): + if isinstance(self._on, PeriodIndex): self._win_freq_i8 = freq.nanos / (self._on.freq.nanos / self._on.freq.n) else: self._win_freq_i8 = freq.nanos From c5f4a977229d9150d80ed73eee49057f73daf243 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 27 Apr 2021 15:43:38 -0700 Subject: [PATCH 2/2] TPY: closed, offset --- pandas/core/resample.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index b9e3999b9869c..14d065de7f77f 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -4,6 +4,7 @@ from datetime import timedelta from textwrap import dedent from typing import ( + TYPE_CHECKING, Callable, no_type_check, ) @@ -85,6 +86,9 @@ Tick, ) +if TYPE_CHECKING: + from typing import Literal + _shared_docs_kwargs: dict[str, str] = {} @@ -1055,7 +1059,8 @@ def __init__(self, obj, **kwargs): for attr in self._attributes: setattr(self, attr, kwargs.get(attr, getattr(parent, attr))) - self.binner = kwargs.get("binner", getattr(parent, "binner")) + # error: Too many arguments for "__init__" of "object" + super().__init__(None) # type: ignore[call-arg] self._groupby = groupby self._groupby.mutated = True self._groupby.grouper.mutated = True @@ -1451,7 +1456,7 @@ class TimeGrouper(Grouper): def __init__( self, freq="Min", - closed: str | None = None, + closed: Literal["left", "right"] | None = None, label: str | None = None, how="mean", axis=0, @@ -1826,9 +1831,9 @@ def _get_timestamp_range_edges( first: Timestamp, last: Timestamp, freq: BaseOffset, - closed: str = "left", + closed: Literal["right", "left"] = "left", origin="start_day", - offset=None, + offset: Timedelta | None = None, ) -> tuple[Timestamp, Timestamp]: """ Adjust the `first` Timestamp to the preceding Timestamp that resides on @@ -1904,9 +1909,9 @@ def _get_period_range_edges( first: Period, last: Period, freq: BaseOffset, - closed: str = "left", + closed: Literal["right", "left"] = "left", origin="start_day", - offset=None, + offset: Timedelta | None = None, ) -> tuple[Period, Period]: """ Adjust the provided `first` and `last` Periods to the respective Period of @@ -1970,7 +1975,12 @@ def _insert_nat_bin( def _adjust_dates_anchored( - first, last, freq, closed="right", origin="start_day", offset=None + first, + last, + freq, + closed: Literal["right", "left"] = "right", + origin="start_day", + offset: Timedelta | None = None, ): # First and last offsets should be calculated from the start day to fix an # error cause by resampling across multiple days when a one day period is