diff --git a/pandas/_libs/tslibs/dtypes.pxd b/pandas/_libs/tslibs/dtypes.pxd new file mode 100644 index 0000000000000..23c473726e5a9 --- /dev/null +++ b/pandas/_libs/tslibs/dtypes.pxd @@ -0,0 +1,56 @@ + +cdef enum PeriodDtypeCode: + # Annual freqs with various fiscal year ends. + # eg, 2005 for A_FEB runs Mar 1, 2004 to Feb 28, 2005 + A = 1000 # Default alias + A_DEC = 1000 # Annual - December year end + A_JAN = 1001 # Annual - January year end + A_FEB = 1002 # Annual - February year end + A_MAR = 1003 # Annual - March year end + A_APR = 1004 # Annual - April year end + A_MAY = 1005 # Annual - May year end + A_JUN = 1006 # Annual - June year end + A_JUL = 1007 # Annual - July year end + A_AUG = 1008 # Annual - August year end + A_SEP = 1009 # Annual - September year end + A_OCT = 1010 # Annual - October year end + A_NOV = 1011 # Annual - November year end + + # Quarterly frequencies with various fiscal year ends. + # eg, Q42005 for Q_OCT runs Aug 1, 2005 to Oct 31, 2005 + Q_DEC = 2000 # Quarterly - December year end + Q_JAN = 2001 # Quarterly - January year end + Q_FEB = 2002 # Quarterly - February year end + Q_MAR = 2003 # Quarterly - March year end + Q_APR = 2004 # Quarterly - April year end + Q_MAY = 2005 # Quarterly - May year end + Q_JUN = 2006 # Quarterly - June year end + Q_JUL = 2007 # Quarterly - July year end + Q_AUG = 2008 # Quarterly - August year end + Q_SEP = 2009 # Quarterly - September year end + Q_OCT = 2010 # Quarterly - October year end + Q_NOV = 2011 # Quarterly - November year end + + M = 3000 # Monthly + + W_SUN = 4000 # Weekly - Sunday end of week + W_MON = 4001 # Weekly - Monday end of week + W_TUE = 4002 # Weekly - Tuesday end of week + W_WED = 4003 # Weekly - Wednesday end of week + W_THU = 4004 # Weekly - Thursday end of week + W_FRI = 4005 # Weekly - Friday end of week + W_SAT = 4006 # Weekly - Saturday end of week + + B = 5000 # Business days + D = 6000 # Daily + H = 7000 # Hourly + T = 8000 # Minutely + S = 9000 # Secondly + L = 10000 # Millisecondly + U = 11000 # Microsecondly + N = 12000 # Nanosecondly + + +cdef class PeriodPseudoDtype: + cdef readonly: + PeriodDtypeCode dtype_code diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx new file mode 100644 index 0000000000000..d0d4e579a456b --- /dev/null +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -0,0 +1,108 @@ +# period frequency constants corresponding to scikits timeseries +# originals + + +cdef class PeriodPseudoDtype: + """ + Similar to an actual dtype, this contains all of the information + describing a PeriodDtype in an integer code. + """ + # cdef readonly: + # PeriodDtypeCode dtype_code + + def __cinit__(self, PeriodDtypeCode code): + self.dtype_code = code + + def __eq__(self, other): + if not isinstance(other, PeriodPseudoDtype): + return False + if not isinstance(self, PeriodPseudoDtype): + # cython semantics, this is a reversed op + return False + return self.dtype_code == other.dtype_code + + @property + def date_offset(self): + """ + Corresponding DateOffset object. + + This mapping is mainly for backward-compatibility. + """ + from .offsets import to_offset + + freqstr = _reverse_period_code_map.get(self.dtype_code) + # equiv: freqstr = libfrequencies.get_freq_str(self.dtype_code) + + return to_offset(freqstr) + + @classmethod + def from_date_offset(cls, offset): + code = offset._period_dtype_code + return cls(code) + + +_period_code_map = { + # Annual freqs with various fiscal year ends. + # eg, 2005 for A-FEB runs Mar 1, 2004 to Feb 28, 2005 + "A-DEC": 1000, # Annual - December year end + "A-JAN": 1001, # Annual - January year end + "A-FEB": 1002, # Annual - February year end + "A-MAR": 1003, # Annual - March year end + "A-APR": 1004, # Annual - April year end + "A-MAY": 1005, # Annual - May year end + "A-JUN": 1006, # Annual - June year end + "A-JUL": 1007, # Annual - July year end + "A-AUG": 1008, # Annual - August year end + "A-SEP": 1009, # Annual - September year end + "A-OCT": 1010, # Annual - October year end + "A-NOV": 1011, # Annual - November year end + + # Quarterly frequencies with various fiscal year ends. + # eg, Q42005 for Q-OCT runs Aug 1, 2005 to Oct 31, 2005 + "Q-DEC": 2000, # Quarterly - December year end + "Q-JAN": 2001, # Quarterly - January year end + "Q-FEB": 2002, # Quarterly - February year end + "Q-MAR": 2003, # Quarterly - March year end + "Q-APR": 2004, # Quarterly - April year end + "Q-MAY": 2005, # Quarterly - May year end + "Q-JUN": 2006, # Quarterly - June year end + "Q-JUL": 2007, # Quarterly - July year end + "Q-AUG": 2008, # Quarterly - August year end + "Q-SEP": 2009, # Quarterly - September year end + "Q-OCT": 2010, # Quarterly - October year end + "Q-NOV": 2011, # Quarterly - November year end + + "M": 3000, # Monthly + + "W-SUN": 4000, # Weekly - Sunday end of week + "W-MON": 4001, # Weekly - Monday end of week + "W-TUE": 4002, # Weekly - Tuesday end of week + "W-WED": 4003, # Weekly - Wednesday end of week + "W-THU": 4004, # Weekly - Thursday end of week + "W-FRI": 4005, # Weekly - Friday end of week + "W-SAT": 4006, # Weekly - Saturday end of week + + "B": 5000, # Business days + "D": 6000, # Daily + "H": 7000, # Hourly + "T": 8000, # Minutely + "S": 9000, # Secondly + "L": 10000, # Millisecondly + "U": 11000, # Microsecondly + "N": 12000, # Nanosecondly +} + +_reverse_period_code_map = { + _period_code_map[key]: key for key in _period_code_map} + +# Yearly aliases; careful not to put these in _reverse_period_code_map +_period_code_map.update({"Y" + key[1:]: _period_code_map[key] + for key in _period_code_map + if key.startswith("A-")}) + +_period_code_map.update({ + "Q": 2000, # Quarterly - December year end (default quarterly) + "A": 1000, # Annual + "W": 4000, # Weekly + "C": 5000, # Custom Business Day +}) diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index 8246e24319dbd..8ca442de59f9f 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -12,6 +12,8 @@ from pandas._libs.tslibs.offsets import ( opattern, ) +from .dtypes import _period_code_map, _reverse_period_code_map + # --------------------------------------------------------------------- # Period codes @@ -31,73 +33,6 @@ class FreqGroup: FR_NS = 12000 -# period frequency constants corresponding to scikits timeseries -# originals -_period_code_map = { - # Annual freqs with various fiscal year ends. - # eg, 2005 for A-FEB runs Mar 1, 2004 to Feb 28, 2005 - "A-DEC": 1000, # Annual - December year end - "A-JAN": 1001, # Annual - January year end - "A-FEB": 1002, # Annual - February year end - "A-MAR": 1003, # Annual - March year end - "A-APR": 1004, # Annual - April year end - "A-MAY": 1005, # Annual - May year end - "A-JUN": 1006, # Annual - June year end - "A-JUL": 1007, # Annual - July year end - "A-AUG": 1008, # Annual - August year end - "A-SEP": 1009, # Annual - September year end - "A-OCT": 1010, # Annual - October year end - "A-NOV": 1011, # Annual - November year end - - # Quarterly frequencies with various fiscal year ends. - # eg, Q42005 for Q-OCT runs Aug 1, 2005 to Oct 31, 2005 - "Q-DEC": 2000, # Quarterly - December year end - "Q-JAN": 2001, # Quarterly - January year end - "Q-FEB": 2002, # Quarterly - February year end - "Q-MAR": 2003, # Quarterly - March year end - "Q-APR": 2004, # Quarterly - April year end - "Q-MAY": 2005, # Quarterly - May year end - "Q-JUN": 2006, # Quarterly - June year end - "Q-JUL": 2007, # Quarterly - July year end - "Q-AUG": 2008, # Quarterly - August year end - "Q-SEP": 2009, # Quarterly - September year end - "Q-OCT": 2010, # Quarterly - October year end - "Q-NOV": 2011, # Quarterly - November year end - - "M": 3000, # Monthly - - "W-SUN": 4000, # Weekly - Sunday end of week - "W-MON": 4001, # Weekly - Monday end of week - "W-TUE": 4002, # Weekly - Tuesday end of week - "W-WED": 4003, # Weekly - Wednesday end of week - "W-THU": 4004, # Weekly - Thursday end of week - "W-FRI": 4005, # Weekly - Friday end of week - "W-SAT": 4006, # Weekly - Saturday end of week - - "B": 5000, # Business days - "D": 6000, # Daily - "H": 7000, # Hourly - "T": 8000, # Minutely - "S": 9000, # Secondly - "L": 10000, # Millisecondly - "U": 11000, # Microsecondly - "N": 12000} # Nanosecondly - - -_reverse_period_code_map = { - _period_code_map[key]: key for key in _period_code_map} - -# Yearly aliases; careful not to put these in _reverse_period_code_map -_period_code_map.update({'Y' + key[1:]: _period_code_map[key] - for key in _period_code_map - if key.startswith('A-')}) - -_period_code_map.update({ - "Q": 2000, # Quarterly - December year end (default quarterly) - "A": 1000, # Annual - "W": 4000, # Weekly - "C": 5000}) # Custom Business Day - # Map attribute-name resolutions to resolution abbreviations _attrname_to_abbrevs = { "year": "A", diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 77b60d0c22322..63dc3407b4c55 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -48,6 +48,7 @@ from pandas._libs.tslibs.np_datetime cimport ( from pandas._libs.tslibs.timezones cimport utc_pytz as UTC from pandas._libs.tslibs.tzconversion cimport tz_convert_single +from .dtypes cimport PeriodDtypeCode from .timedeltas cimport delta_to_nanoseconds @@ -892,36 +893,43 @@ cdef class Tick(SingleConstructorOffset): cdef class Day(Tick): _nanos_inc = 24 * 3600 * 1_000_000_000 _prefix = "D" + _period_dtype_code = PeriodDtypeCode.D cdef class Hour(Tick): _nanos_inc = 3600 * 1_000_000_000 _prefix = "H" + _period_dtype_code = PeriodDtypeCode.H cdef class Minute(Tick): _nanos_inc = 60 * 1_000_000_000 _prefix = "T" + _period_dtype_code = PeriodDtypeCode.T cdef class Second(Tick): _nanos_inc = 1_000_000_000 _prefix = "S" + _period_dtype_code = PeriodDtypeCode.S cdef class Milli(Tick): _nanos_inc = 1_000_000 _prefix = "L" + _period_dtype_code = PeriodDtypeCode.L cdef class Micro(Tick): _nanos_inc = 1000 _prefix = "U" + _period_dtype_code = PeriodDtypeCode.U cdef class Nano(Tick): _nanos_inc = 1 _prefix = "N" + _period_dtype_code = PeriodDtypeCode.N def delta_to_tick(delta: timedelta) -> Tick: @@ -1281,7 +1289,7 @@ cdef class BusinessDay(BusinessMixin): """ DateOffset subclass representing possibly n business days. """ - + _period_dtype_code = PeriodDtypeCode.B _prefix = "B" _attributes = tuple(["n", "normalize", "offset"]) @@ -1945,6 +1953,15 @@ cdef class YearEnd(YearOffset): _prefix = "A" _day_opt = "end" + cdef readonly: + int _period_dtype_code + + def __init__(self, n=1, normalize=False, month=None): + # Because YearEnd can be the freq for a Period, define its + # _period_dtype_code at construction for performance + YearOffset.__init__(self, n, normalize, month) + self._period_dtype_code = PeriodDtypeCode.A + self.month % 12 + cdef class YearBegin(YearOffset): """ @@ -2099,6 +2116,14 @@ cdef class QuarterEnd(QuarterOffset): _prefix = "Q" _day_opt = "end" + cdef readonly: + int _period_dtype_code + + def __init__(self, n=1, normalize=False, startingMonth=None): + # Because QuarterEnd can be the freq for a Period, define its + # _period_dtype_code at construction for performance + QuarterOffset.__init__(self, n, normalize, startingMonth) + self._period_dtype_code = PeriodDtypeCode.Q_DEC + self.startingMonth % 12 cdef class QuarterBegin(QuarterOffset): """ @@ -2148,6 +2173,7 @@ cdef class MonthEnd(MonthOffset): """ DateOffset of one month end. """ + _period_dtype_code = PeriodDtypeCode.M _prefix = "M" _day_opt = "end" @@ -2452,6 +2478,7 @@ cdef class Week(SingleConstructorOffset): cdef readonly: object weekday # int or None + int _period_dtype_code def __init__(self, n=1, normalize=False, weekday=None): BaseOffset.__init__(self, n, normalize) @@ -2461,6 +2488,8 @@ cdef class Week(SingleConstructorOffset): if self.weekday < 0 or self.weekday > 6: raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}") + self._period_dtype_code = PeriodDtypeCode.W_SUN + (weekday + 1) % 7 + cpdef __setstate__(self, state): self.n = state.pop("n") self.normalize = state.pop("normalize") diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 14cce1c000207..e88a20bc549bd 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -55,6 +55,9 @@ from pandas._libs.tslibs.ccalendar cimport ( get_days_in_month, ) from pandas._libs.tslibs.ccalendar cimport c_MONTH_NUMBERS + +from pandas._libs.tslibs.dtypes cimport PeriodPseudoDtype + from pandas._libs.tslibs.frequencies cimport ( attrname_to_abbrevs, get_freq_code, @@ -1514,11 +1517,16 @@ cdef class _Period: cdef readonly: int64_t ordinal + PeriodPseudoDtype _dtype BaseOffset freq def __cinit__(self, int64_t ordinal, BaseOffset freq): self.ordinal = ordinal self.freq = freq + # Note: this is more performant than PeriodDtype.from_date_offset(freq) + # because from_date_offset cannot be made a cdef method (until cython + # supported cdef classmethods) + self._dtype = PeriodPseudoDtype(freq._period_dtype_code) @classmethod def _maybe_convert_freq(cls, object freq): @@ -1662,13 +1670,13 @@ cdef class _Period: """ freq = self._maybe_convert_freq(freq) how = validate_end_alias(how) - base1, mult1 = get_freq_code(self.freq) - base2, mult2 = get_freq_code(freq) + base1 = self._dtype.dtype_code + base2, _ = get_freq_code(freq) - # mult1 can't be negative or 0 + # self.n can't be negative or 0 end = how == 'E' if end: - ordinal = self.ordinal + mult1 - 1 + ordinal = self.ordinal + self.freq.n - 1 else: ordinal = self.ordinal ordinal = period_asfreq(ordinal, base1, base2, end) @@ -1751,12 +1759,12 @@ cdef class _Period: return endpoint - Timedelta(1, 'ns') if freq is None: - base, mult = get_freq_code(self.freq) + base = self._dtype.dtype_code freq = get_to_timestamp_base(base) else: freq = self._maybe_convert_freq(freq) - base, mult = get_freq_code(freq) + base, _ = get_freq_code(freq) val = self.asfreq(freq, how) dt64 = period_ordinal_to_dt64(val.ordinal, base) @@ -1764,12 +1772,12 @@ cdef class _Period: @property def year(self) -> int: - base, mult = get_freq_code(self.freq) + base = self._dtype.dtype_code return pyear(self.ordinal, base) @property def month(self) -> int: - base, mult = get_freq_code(self.freq) + base = self._dtype.dtype_code return pmonth(self.ordinal, base) @property @@ -1792,7 +1800,7 @@ cdef class _Period: >>> p.day 11 """ - base, mult = get_freq_code(self.freq) + base = self._dtype.dtype_code return pday(self.ordinal, base) @property @@ -1822,7 +1830,7 @@ cdef class _Period: >>> p.hour 0 """ - base, mult = get_freq_code(self.freq) + base = self._dtype.dtype_code return phour(self.ordinal, base) @property @@ -1846,7 +1854,7 @@ cdef class _Period: >>> p.minute 3 """ - base, mult = get_freq_code(self.freq) + base = self._dtype.dtype_code return pminute(self.ordinal, base) @property @@ -1870,12 +1878,12 @@ cdef class _Period: >>> p.second 12 """ - base, mult = get_freq_code(self.freq) + base = self._dtype.dtype_code return psecond(self.ordinal, base) @property def weekofyear(self) -> int: - base, mult = get_freq_code(self.freq) + base = self._dtype.dtype_code return pweek(self.ordinal, base) @property @@ -1956,7 +1964,7 @@ cdef class _Period: >>> per.end_time.dayofweek 2 """ - base, mult = get_freq_code(self.freq) + base = self._dtype.dtype_code return pweekday(self.ordinal, base) @property @@ -2044,12 +2052,12 @@ cdef class _Period: >>> period.dayofyear 1 """ - base, mult = get_freq_code(self.freq) + base = self._dtype.dtype_code return pday_of_year(self.ordinal, base) @property def quarter(self) -> int: - base, mult = get_freq_code(self.freq) + base = self._dtype.dtype_code return pquarter(self.ordinal, base) @property @@ -2093,7 +2101,7 @@ cdef class _Period: >>> per.year 2017 """ - base, mult = get_freq_code(self.freq) + base = self._dtype.dtype_code return pqyear(self.ordinal, base) @property @@ -2127,7 +2135,7 @@ cdef class _Period: >>> p.days_in_month 29 """ - base, mult = get_freq_code(self.freq) + base = self._dtype.dtype_code return pdays_in_month(self.ordinal, base) @property @@ -2165,7 +2173,7 @@ cdef class _Period: return self.freq.freqstr def __repr__(self) -> str: - base, mult = get_freq_code(self.freq) + base = self._dtype.dtype_code formatted = period_format(self.ordinal, base) return f"Period('{formatted}', '{self.freqstr}')" @@ -2173,7 +2181,7 @@ cdef class _Period: """ Return a string representation for a particular DataFrame """ - base, mult = get_freq_code(self.freq) + base = self._dtype.dtype_code formatted = period_format(self.ordinal, base) value = str(formatted) return value @@ -2325,7 +2333,7 @@ cdef class _Period: >>> a.strftime('%b. %d, %Y was a %A') 'Jan. 01, 2001 was a Monday' """ - base, mult = get_freq_code(self.freq) + base = self._dtype.dtype_code return period_format(self.ordinal, base, fmt) diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index bbabfed4cb976..b0c524a257684 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -9,6 +9,7 @@ def test_namespace(): "base", "ccalendar", "conversion", + "dtypes", "fields", "frequencies", "nattype", diff --git a/setup.py b/setup.py index 63510867f0dd7..9f411ec10cd80 100755 --- a/setup.py +++ b/setup.py @@ -308,8 +308,8 @@ class CheckSDist(sdist_class): "pandas/_libs/ops.pyx", "pandas/_libs/parsers.pyx", "pandas/_libs/tslibs/base.pyx", - "pandas/_libs/tslibs/c_timestamp.pyx", "pandas/_libs/tslibs/ccalendar.pyx", + "pandas/_libs/tslibs/dtypes.pyx", "pandas/_libs/tslibs/period.pyx", "pandas/_libs/tslibs/strptime.pyx", "pandas/_libs/tslibs/np_datetime.pyx", @@ -605,6 +605,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.tslib": {"pyxfile": "_libs/tslib", "depends": tseries_depends}, "_libs.tslibs.base": {"pyxfile": "_libs/tslibs/base"}, "_libs.tslibs.ccalendar": {"pyxfile": "_libs/tslibs/ccalendar"}, + "_libs.tslibs.dtypes": {"pyxfile": "_libs/tslibs/dtypes"}, "_libs.tslibs.conversion": { "pyxfile": "_libs/tslibs/conversion", "depends": tseries_depends,