From 265acdffc79fc331843450cd07e4cfacfffb67e1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 1 May 2020 19:17:35 -0700 Subject: [PATCH] REF: move bits of offsets to liboffsets, de-privatize --- asv_bench/benchmarks/io/parsers.py | 4 +- pandas/_libs/tslibs/frequencies.pyx | 6 +- pandas/_libs/tslibs/offsets.pyx | 113 ++++++++++++++- pandas/_libs/tslibs/parsing.pyx | 4 +- pandas/_libs/tslibs/period.pyx | 6 +- pandas/core/arrays/datetimes.py | 2 +- pandas/core/arrays/period.py | 4 +- pandas/io/parsers.py | 4 +- pandas/tests/io/parser/test_parse_dates.py | 4 +- pandas/tseries/frequencies.py | 2 +- pandas/tseries/offsets.py | 159 ++++----------------- 11 files changed, 157 insertions(+), 151 deletions(-) diff --git a/asv_bench/benchmarks/io/parsers.py b/asv_bench/benchmarks/io/parsers.py index c5e099bd44eac..ec3eddfff7184 100644 --- a/asv_bench/benchmarks/io/parsers.py +++ b/asv_bench/benchmarks/io/parsers.py @@ -2,7 +2,7 @@ try: from pandas._libs.tslibs.parsing import ( - _concat_date_cols, + concat_date_cols, _does_string_look_like_datetime, ) except ImportError: @@ -39,4 +39,4 @@ def setup(self, value, dim): ) def time_check_concat(self, value, dim): - _concat_date_cols(self.object) + concat_date_cols(self.object) diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index d60f5cfd3f8c1..31747f96399ee 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -175,13 +175,13 @@ cpdef get_freq_code(freqstr): if is_integer_object(freqstr): return freqstr, 1 - base, stride = _base_and_stride(freqstr) + base, stride = base_and_stride(freqstr) code = _period_str_to_code(base) return code, stride -cpdef _base_and_stride(str freqstr): +cpdef base_and_stride(str freqstr): """ Return base freq and stride info from string representation @@ -267,7 +267,7 @@ cpdef str get_base_alias(freqstr): ------- base_alias : str """ - return _base_and_stride(freqstr)[0] + return base_and_stride(freqstr)[0] cpdef int get_to_timestamp_base(int base): diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 4c7d03d51e909..3dfaa36888f62 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -2,6 +2,7 @@ import cython import time from typing import Any +import warnings from cpython.datetime cimport (PyDateTime_IMPORT, PyDateTime_Check, PyDelta_Check, @@ -103,7 +104,7 @@ def as_datetime(obj): return obj -cpdef bint _is_normalized(dt): +cpdef bint is_normalized(dt): if (dt.hour != 0 or dt.minute != 0 or dt.second != 0 or dt.microsecond != 0 or getattr(dt, 'nanosecond', 0) != 0): return False @@ -230,7 +231,7 @@ def _get_calendar(weekmask, holidays, calendar): holidays = holidays + calendar.holidays().tolist() except AttributeError: pass - holidays = [_to_dt64D(dt) for dt in holidays] + holidays = [to_dt64D(dt) for dt in holidays] holidays = tuple(sorted(holidays)) kwargs = {'weekmask': weekmask} @@ -241,7 +242,7 @@ def _get_calendar(weekmask, holidays, calendar): return busdaycalendar, holidays -def _to_dt64D(dt): +def to_dt64D(dt): # Currently # > np.datetime64(dt.datetime(2013,5,1),dtype='datetime64[D]') # numpy.datetime64('2013-05-01T02:00:00.000000+0200') @@ -264,7 +265,7 @@ def _to_dt64D(dt): # Validation -def _validate_business_time(t_input): +def validate_business_time(t_input): if isinstance(t_input, str): try: t = time.strptime(t_input, '%H:%M') @@ -440,6 +441,9 @@ class _BaseOffset: # that allows us to use methods that can go in a `cdef class` return self * 1 + # ------------------------------------------------------------------ + # Name and Rendering Methods + def __repr__(self) -> str: className = getattr(self, '_outputName', type(self).__name__) @@ -455,6 +459,44 @@ class _BaseOffset: out = f'<{n_str}{className}{plural}{self._repr_attrs()}>' return out + @property + def name(self) -> str: + return self.rule_code + + @property + def _prefix(self) -> str: + raise NotImplementedError("Prefix not defined") + + @property + def rule_code(self) -> str: + return self._prefix + + @property + def freqstr(self) -> str: + try: + code = self.rule_code + except NotImplementedError: + return str(repr(self)) + + if self.n != 1: + fstr = f"{self.n}{code}" + else: + fstr = code + + try: + if self._offset: + fstr += self._offset_str() + except AttributeError: + # TODO: standardize `_offset` vs `offset` naming convention + pass + + return fstr + + def _offset_str(self) -> str: + return "" + + # ------------------------------------------------------------------ + def _get_offset_day(self, datetime other): # subclass must implement `_day_opt`; calling from the base class # will raise NotImplementedError. @@ -530,6 +572,26 @@ class _BaseOffset: return state + @property + def nanos(self): + raise ValueError(f"{self} is a non-fixed frequency") + + def onOffset(self, dt) -> bool: + warnings.warn( + "onOffset is a deprecated, use is_on_offset instead", + FutureWarning, + stacklevel=1, + ) + return self.is_on_offset(dt) + + def isAnchored(self) -> bool: + warnings.warn( + "isAnchored is a deprecated, use is_anchored instead", + FutureWarning, + stacklevel=1, + ) + return self.is_anchored() + class BaseOffset(_BaseOffset): # Here we add __rfoo__ methods that don't play well with cdef classes @@ -564,6 +626,49 @@ class _Tick: return _wrap_timedelta_result(result) +class BusinessMixin: + """ + Mixin to business types to provide related functions. + """ + + @property + def offset(self): + """ + Alias for self._offset. + """ + # Alias for backward compat + return self._offset + + def _repr_attrs(self) -> str: + if self.offset: + attrs = [f"offset={repr(self.offset)}"] + else: + attrs = [] + out = "" + if attrs: + out += ": " + ", ".join(attrs) + return out + + +class CustomMixin: + """ + Mixin for classes that define and validate calendar, holidays, + and weekdays attributes. + """ + + def __init__(self, weekmask, holidays, calendar): + calendar, holidays = _get_calendar( + weekmask=weekmask, holidays=holidays, calendar=calendar + ) + # Custom offset instances are identified by the + # following two attributes. See DateOffset._params() + # holidays, weekmask + + object.__setattr__(self, "weekmask", weekmask) + object.__setattr__(self, "holidays", holidays) + object.__setattr__(self, "calendar", calendar) + + # ---------------------------------------------------------------------- # RelativeDelta Arithmetic diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 1b980aea372e2..5fda0db4891c3 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -938,7 +938,7 @@ cdef inline object convert_to_unicode(object item, bint keep_trivial_numbers): @cython.wraparound(False) @cython.boundscheck(False) -def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True): +def concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True): """ Concatenates elements from numpy arrays in `date_cols` into strings. @@ -957,7 +957,7 @@ def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True): -------- >>> dates=np.array(['3/31/2019', '4/31/2019'], dtype=object) >>> times=np.array(['11:20', '10:45'], dtype=object) - >>> result = _concat_date_cols((dates, times)) + >>> result = concat_date_cols((dates, times)) >>> result array(['3/31/2019 11:20', '4/31/2019 10:45'], dtype=object) """ diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index c4a7df0017619..5cf8fedbf0431 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1685,7 +1685,7 @@ cdef class _Period: resampled : Period """ freq = self._maybe_convert_freq(freq) - how = _validate_end_alias(how) + how = validate_end_alias(how) base1, mult1 = get_freq_code(self.freq) base2, mult2 = get_freq_code(freq) @@ -1758,7 +1758,7 @@ cdef class _Period: """ if freq is not None: freq = self._maybe_convert_freq(freq) - how = _validate_end_alias(how) + how = validate_end_alias(how) end = how == 'E' if end: @@ -2509,7 +2509,7 @@ def quarter_to_myear(year: int, quarter: int, freq): return year, month -def _validate_end_alias(how): +def validate_end_alias(how): how_dict = {'S': 'S', 'E': 'E', 'START': 'S', 'FINISH': 'E', 'BEGIN': 'S', 'END': 'E'} diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e3fbb906ed6b1..f7408e69f7dec 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1115,7 +1115,7 @@ def to_period(self, freq=None): # https://github.com/pandas-dev/pandas/issues/33358 if res is None: - base, stride = libfrequencies._base_and_stride(freq) + base, stride = libfrequencies.base_and_stride(freq) res = f"{stride}{base}" freq = res diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index b7dfcd4cb188c..c00230c3b5ab3 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -432,7 +432,7 @@ def to_timestamp(self, freq=None, how="start"): """ from pandas.core.arrays import DatetimeArray - how = libperiod._validate_end_alias(how) + how = libperiod.validate_end_alias(how) end = how == "E" if end: @@ -524,7 +524,7 @@ def asfreq(self, freq=None, how: str = "E") -> "PeriodArray": PeriodIndex(['2010-01', '2011-01', '2012-01', '2013-01', '2014-01', '2015-01'], dtype='period[M]', freq='M') """ - how = libperiod._validate_end_alias(how) + how = libperiod.validate_end_alias(how) freq = Period._maybe_convert_freq(freq) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index f289db39347ae..aca2f9f5ac5bb 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -3264,7 +3264,7 @@ def _make_date_converter( ): def converter(*date_cols): if date_parser is None: - strs = parsing._concat_date_cols(date_cols) + strs = parsing.concat_date_cols(date_cols) try: return tools.to_datetime( @@ -3292,7 +3292,7 @@ def converter(*date_cols): try: return tools.to_datetime( parsing.try_parse_dates( - parsing._concat_date_cols(date_cols), + parsing.concat_date_cols(date_cols), parser=date_parser, dayfirst=dayfirst, ), diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index e11bbb89c885c..ed947755e3419 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -81,7 +81,7 @@ def date_parser(*date_cols): ------- parsed : Series """ - return parsing.try_parse_dates(parsing._concat_date_cols(date_cols)) + return parsing.try_parse_dates(parsing.concat_date_cols(date_cols)) result = parser.read_csv( StringIO(data), @@ -208,7 +208,7 @@ def test_concat_date_col_fail(container, dim): date_cols = tuple(container([value]) for _ in range(dim)) with pytest.raises(ValueError, match=msg): - parsing._concat_date_cols(date_cols) + parsing.concat_date_cols(date_cols) @pytest.mark.parametrize("keep_date_col", [True, False]) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 12320cd52cec8..df0d71e8c80c5 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -124,7 +124,7 @@ def to_offset(freq) -> Optional[DateOffset]: stride = freq[1] if isinstance(stride, str): name, stride = stride, name - name, _ = libfreqs._base_and_stride(name) + name, _ = libfreqs.base_and_stride(name) delta = _get_offset(name) * stride elif isinstance(freq, timedelta): diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 286ee91bc7d4f..c1ab752bf9550 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -2,7 +2,6 @@ import functools import operator from typing import Any, Optional -import warnings from dateutil.easter import easter import numpy as np @@ -24,13 +23,14 @@ from pandas._libs.tslibs.offsets import ( ApplyTypeError, BaseOffset, - _get_calendar, - _is_normalized, - _to_dt64D, + BusinessMixin, + CustomMixin, apply_index_wraps, as_datetime, + is_normalized, roll_yearday, shift_month, + to_dt64D, ) from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, Substitution, cache_readonly @@ -249,6 +249,7 @@ def __add__(date): """ _params = cache_readonly(BaseOffset._params.fget) + freqstr = cache_readonly(BaseOffset.freqstr.fget) _use_relativedelta = False _adjust_dst = False _attributes = frozenset(["n", "normalize"] + list(liboffsets.relativedelta_kwds)) @@ -366,22 +367,6 @@ def is_anchored(self) -> bool: # if there were a canonical docstring for what is_anchored means. return self.n == 1 - def onOffset(self, dt): - warnings.warn( - "onOffset is a deprecated, use is_on_offset instead", - FutureWarning, - stacklevel=2, - ) - return self.is_on_offset(dt) - - def isAnchored(self) -> bool: - warnings.warn( - "isAnchored is a deprecated, use is_anchored instead", - FutureWarning, - stacklevel=2, - ) - return self.is_anchored() - # TODO: Combine this with BusinessMixin version by defining a whitelisted # set of attributes on each object rather than the existing behavior of # iterating over internal ``__dict__`` @@ -400,10 +385,6 @@ def _repr_attrs(self) -> str: out += ": " + ", ".join(attrs) return out - @property - def name(self) -> str: - return self.rule_code - def rollback(self, dt): """ Roll provided date backward to next offset only if not on offset. @@ -433,7 +414,7 @@ def rollforward(self, dt): return dt def is_on_offset(self, dt): - if self.normalize and not _is_normalized(dt): + if self.normalize and not is_normalized(dt): return False # TODO, see #1395 if type(self) == DateOffset or isinstance(self, Tick): @@ -446,43 +427,6 @@ def is_on_offset(self, dt): b = (dt + self) - self return a == b - # way to get around weirdness with rule_code - @property - def _prefix(self) -> str: - raise NotImplementedError("Prefix not defined") - - @property - def rule_code(self) -> str: - return self._prefix - - @cache_readonly - def freqstr(self) -> str: - try: - code = self.rule_code - except NotImplementedError: - return repr(self) - - if self.n != 1: - fstr = f"{self.n}{code}" - else: - fstr = code - - try: - if self._offset: - fstr += self._offset_str() - except AttributeError: - # TODO: standardize `_offset` vs `offset` naming convention - pass - - return fstr - - def _offset_str(self) -> str: - return "" - - @property - def nanos(self): - raise ValueError(f"{self} is a non-fixed frequency") - class SingleConstructorOffset(DateOffset): @classmethod @@ -493,49 +437,6 @@ def _from_name(cls, suffix=None): return cls() -class _CustomMixin: - """ - Mixin for classes that define and validate calendar, holidays, - and weekdays attributes. - """ - - def __init__(self, weekmask, holidays, calendar): - calendar, holidays = _get_calendar( - weekmask=weekmask, holidays=holidays, calendar=calendar - ) - # Custom offset instances are identified by the - # following two attributes. See DateOffset._params() - # holidays, weekmask - - object.__setattr__(self, "weekmask", weekmask) - object.__setattr__(self, "holidays", holidays) - object.__setattr__(self, "calendar", calendar) - - -class BusinessMixin: - """ - Mixin to business types to provide related functions. - """ - - @property - def offset(self): - """ - Alias for self._offset. - """ - # Alias for backward compat - return self._offset - - def _repr_attrs(self) -> str: - if self.offset: - attrs = [f"offset={repr(self.offset)}"] - else: - attrs = [] - out = "" - if attrs: - out += ": " + ", ".join(attrs) - return out - - class BusinessDay(BusinessMixin, SingleConstructorOffset): """ DateOffset subclass representing possibly n business days. @@ -643,7 +544,7 @@ def apply_index(self, i): return result def is_on_offset(self, dt: datetime) -> bool: - if self.normalize and not _is_normalized(dt): + if self.normalize and not is_normalized(dt): return False return dt.weekday() < 5 @@ -661,8 +562,8 @@ def __init__(self, start="09:00", end="17:00", offset=timedelta(0)): if not len(end): raise ValueError("Must include at least 1 end time") - start = np.array([liboffsets._validate_business_time(x) for x in start]) - end = np.array([liboffsets._validate_business_time(x) for x in end]) + start = np.array([liboffsets.validate_business_time(x) for x in start]) + end = np.array([liboffsets.validate_business_time(x) for x in end]) # Validation of input if len(start) != len(end): @@ -889,7 +790,7 @@ def apply(self, other): # adjust by business days first if bd != 0: - if isinstance(self, _CustomMixin): # GH 30593 + if isinstance(self, CustomMixin): # GH 30593 skip_bd = CustomBusinessDay( n=bd, weekmask=self.weekmask, @@ -949,7 +850,7 @@ def apply(self, other): raise ApplyTypeError("Only know how to combine business hour with datetime") def is_on_offset(self, dt): - if self.normalize and not _is_normalized(dt): + if self.normalize and not is_normalized(dt): return False if dt.tzinfo is not None: @@ -964,7 +865,7 @@ def _is_on_offset(self, dt): """ Slight speedups using calculated values. """ - # if self.normalize and not _is_normalized(dt): + # if self.normalize and not is_normalized(dt): # return False # Valid BH can be on the different BusinessDay during midnight # Distinguish by the time spent from previous opening time @@ -1009,7 +910,7 @@ def __init__( super().__init__(start=start, end=end, offset=offset) -class CustomBusinessDay(_CustomMixin, BusinessDay): +class CustomBusinessDay(CustomMixin, BusinessDay): """ DateOffset subclass representing custom business days excluding holidays. @@ -1044,7 +945,7 @@ def __init__( BaseOffset.__init__(self, n, normalize) object.__setattr__(self, "_offset", offset) - _CustomMixin.__init__(self, weekmask, holidays, calendar) + CustomMixin.__init__(self, weekmask, holidays, calendar) @apply_wraps def apply(self, other): @@ -1080,13 +981,13 @@ def apply_index(self, i): raise NotImplementedError def is_on_offset(self, dt: datetime) -> bool: - if self.normalize and not _is_normalized(dt): + if self.normalize and not is_normalized(dt): return False - day64 = _to_dt64D(dt) + day64 = to_dt64D(dt) return np.is_busday(day64, busdaycal=self.calendar) -class CustomBusinessHour(_CustomMixin, BusinessHourMixin, SingleConstructorOffset): +class CustomBusinessHour(CustomMixin, BusinessHourMixin, SingleConstructorOffset): """ DateOffset subclass representing possibly n custom business days. """ @@ -1111,7 +1012,7 @@ def __init__( BaseOffset.__init__(self, n, normalize) object.__setattr__(self, "_offset", offset) - _CustomMixin.__init__(self, weekmask, holidays, calendar) + CustomMixin.__init__(self, weekmask, holidays, calendar) BusinessHourMixin.__init__(self, start=start, end=end, offset=offset) @@ -1126,7 +1027,7 @@ class MonthOffset(SingleConstructorOffset): __init__ = BaseOffset.__init__ def is_on_offset(self, dt: datetime) -> bool: - if self.normalize and not _is_normalized(dt): + if self.normalize and not is_normalized(dt): return False return dt.day == self._get_offset_day(dt) @@ -1178,7 +1079,7 @@ class BusinessMonthBegin(MonthOffset): _day_opt = "business_start" -class _CustomBusinessMonth(_CustomMixin, BusinessMixin, MonthOffset): +class _CustomBusinessMonth(CustomMixin, BusinessMixin, MonthOffset): """ DateOffset subclass representing custom business month(s). @@ -1220,7 +1121,7 @@ def __init__( BaseOffset.__init__(self, n, normalize) object.__setattr__(self, "_offset", offset) - _CustomMixin.__init__(self, weekmask, holidays, calendar) + CustomMixin.__init__(self, weekmask, holidays, calendar) @cache_readonly def cbday_roll(self): @@ -1409,7 +1310,7 @@ class SemiMonthEnd(SemiMonthOffset): _min_day_of_month = 1 def is_on_offset(self, dt: datetime) -> bool: - if self.normalize and not _is_normalized(dt): + if self.normalize and not is_normalized(dt): return False days_in_month = ccalendar.get_days_in_month(dt.year, dt.month) return dt.day in (self.day_of_month, days_in_month) @@ -1467,7 +1368,7 @@ class SemiMonthBegin(SemiMonthOffset): _prefix = "SMS" def is_on_offset(self, dt: datetime) -> bool: - if self.normalize and not _is_normalized(dt): + if self.normalize and not is_normalized(dt): return False return dt.day in (1, self.day_of_month) @@ -1606,7 +1507,7 @@ def _end_apply_index(self, dtindex): return base + off + Timedelta(1, "ns") - Timedelta(1, "D") def is_on_offset(self, dt: datetime) -> bool: - if self.normalize and not _is_normalized(dt): + if self.normalize and not is_normalized(dt): return False elif self.weekday is None: return True @@ -1649,7 +1550,7 @@ def apply(self, other): return liboffsets.shift_day(shifted, to_day - shifted.day) def is_on_offset(self, dt): - if self.normalize and not _is_normalized(dt): + if self.normalize and not is_normalized(dt): return False return dt.day == self._get_offset_day(dt) @@ -1848,7 +1749,7 @@ def apply(self, other): return shift_month(other, months, self._day_opt) def is_on_offset(self, dt: datetime) -> bool: - if self.normalize and not _is_normalized(dt): + if self.normalize and not is_normalized(dt): return False mod_month = (dt.month - self.startingMonth) % 3 return mod_month == 0 and dt.day == self._get_offset_day(dt) @@ -1943,7 +1844,7 @@ def apply_index(self, dtindex): return type(dtindex)._simple_new(shifted, dtype=dtindex.dtype) def is_on_offset(self, dt: datetime) -> bool: - if self.normalize and not _is_normalized(dt): + if self.normalize and not is_normalized(dt): return False return dt.month == self.month and dt.day == self._get_offset_day(dt) @@ -2088,7 +1989,7 @@ def is_anchored(self) -> bool: ) def is_on_offset(self, dt: datetime) -> bool: - if self.normalize and not _is_normalized(dt): + if self.normalize and not is_normalized(dt): return False dt = datetime(dt.year, dt.month, dt.day) year_end = self.get_year_end(dt) @@ -2411,7 +2312,7 @@ def year_has_extra_week(self, dt: datetime) -> bool: return weeks_in_year == 53 def is_on_offset(self, dt: datetime) -> bool: - if self.normalize and not _is_normalized(dt): + if self.normalize and not is_normalized(dt): return False if self._offset.is_on_offset(dt): return True @@ -2482,7 +2383,7 @@ def apply(self, other): return new def is_on_offset(self, dt: datetime) -> bool: - if self.normalize and not _is_normalized(dt): + if self.normalize and not is_normalized(dt): return False return date(dt.year, dt.month, dt.day) == easter(dt.year)