From b9e11a6c6bfe05a93a307f4c542acb8b318a15a6 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 30 May 2020 19:18:57 -0700 Subject: [PATCH 1/7] ENH: implement tslibs.dtypes --- pandas/_libs/tslibs/dtypes.pxd | 56 ++++++++++++++ pandas/_libs/tslibs/dtypes.pyx | 114 ++++++++++++++++++++++++++++ pandas/_libs/tslibs/frequencies.pyx | 69 +---------------- pandas/_libs/tslibs/offsets.pyx | 11 ++- pandas/_libs/tslibs/period.pyx | 46 ++++++----- pandas/tests/tslibs/test_api.py | 1 + setup.py | 3 +- 7 files changed, 210 insertions(+), 90 deletions(-) create mode 100644 pandas/_libs/tslibs/dtypes.pxd create mode 100644 pandas/_libs/tslibs/dtypes.pyx diff --git a/pandas/_libs/tslibs/dtypes.pxd b/pandas/_libs/tslibs/dtypes.pxd new file mode 100644 index 0000000000000..36df048dce672 --- /dev/null +++ b/pandas/_libs/tslibs/dtypes.pxd @@ -0,0 +1,56 @@ + +cdef enum PeriodDtypeCode: + # Annual freqs with various fiscal year ends. + # eg, 2005 for A_FEB runs Mar 1, 2004 to Feb 28, 2005 + A = 1000 # Default alias + A_DEC = 1000 # Annual - December year end + A_JAN = 1001 # Annual - January year end + A_FEB = 1002 # Annual - February year end + A_MAR = 1003 # Annual - March year end + A_APR = 1004 # Annual - April year end + A_MAY = 1005 # Annual - May year end + A_JUN = 1006 # Annual - June year end + A_JUL = 1007 # Annual - July year end + A_AUG = 1008 # Annual - August year end + A_SEP = 1009 # Annual - September year end + A_OCT = 1010 # Annual - October year end + A_NOV = 1011 # Annual - November year end + + # Quarterly frequencies with various fiscal year ends. + # eg, Q42005 for Q_OCT runs Aug 1, 2005 to Oct 31, 2005 + Q_DEC = 2000 # Quarterly - December year end + Q_JAN = 2001 # Quarterly - January year end + Q_FEB = 2002 # Quarterly - February year end + Q_MAR = 2003 # Quarterly - March year end + Q_APR = 2004 # Quarterly - April year end + Q_MAY = 2005 # Quarterly - May year end + Q_JUN = 2006 # Quarterly - June year end + Q_JUL = 2007 # Quarterly - July year end + Q_AUG = 2008 # Quarterly - August year end + Q_SEP = 2009 # Quarterly - September year end + Q_OCT = 2010 # Quarterly - October year end + Q_NOV = 2011 # Quarterly - November year end + + M = 3000 # Monthly + + W_SUN = 4000 # Weekly - Sunday end of week + W_MON = 4001 # Weekly - Monday end of week + W_TUE = 4002 # Weekly - Tuesday end of week + W_WED = 4003 # Weekly - Wednesday end of week + W_THU = 4004 # Weekly - Thursday end of week + W_FRI = 4005 # Weekly - Friday end of week + W_SAT = 4006 # Weekly - Saturday end of week + + B = 5000 # Business days + D = 6000 # Daily + H = 7000 # Hourly + T = 8000 # Minutely + S = 9000 # Secondly + L = 10000 # Millisecondly + U = 11000 # Microsecondly + N = 12000 # Nanosecondly + + +cdef class PeriodDtype: + cdef readonly: + PeriodDtypeCode dtype_code diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx new file mode 100644 index 0000000000000..c2d787a0bcce7 --- /dev/null +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -0,0 +1,114 @@ +# period frequency constants corresponding to scikits timeseries +# originals + + +cdef class PeriodDtype: + # cdef readonly: + # PeriodDtypeCode dtype_code + + def __cinit__(self, PeriodDtypeCode code): + self.dtype_code = code + + def __eq__(self, other): + if not isinstance(other, PeriodDtype): + return False + if not isinstance(self, PeriodDtype): + # cython semantics, this is a reversed op + return False + return self.dtype_code == other.dtype_code + + @property + def date_offset(self): + """ + Corresponding DateOffset object. + + This mapping is mainly for backward-compatibility. + """ + from .offsets import to_offset + + freqstr = _reverse_period_code_map.get(self.dtype_code) + # equiv: freqstr = libfrequencies.get_freq_str(self.dtype_code) + + return to_offset(freqstr) + + @classmethod + def from_date_offset(cls, offset): + try: + # For some DateOffset classes we have pinned the answer + code = offset.period_dtype_code + except AttributeError: + from .frequencies import get_freq_code + code, _ = get_freq_code(offset) + return cls(code) + + @classmethod + def from_freqstr(cls, freqstr: str): + # TODO: Check that this is actually obj.freqstr or rule_code or whatever + return cls(_period_code_map[freqstr]) + + +_period_code_map = { + # Annual freqs with various fiscal year ends. + # eg, 2005 for A-FEB runs Mar 1, 2004 to Feb 28, 2005 + "A-DEC": 1000, # Annual - December year end + "A-JAN": 1001, # Annual - January year end + "A-FEB": 1002, # Annual - February year end + "A-MAR": 1003, # Annual - March year end + "A-APR": 1004, # Annual - April year end + "A-MAY": 1005, # Annual - May year end + "A-JUN": 1006, # Annual - June year end + "A-JUL": 1007, # Annual - July year end + "A-AUG": 1008, # Annual - August year end + "A-SEP": 1009, # Annual - September year end + "A-OCT": 1010, # Annual - October year end + "A-NOV": 1011, # Annual - November year end + + # Quarterly frequencies with various fiscal year ends. + # eg, Q42005 for Q-OCT runs Aug 1, 2005 to Oct 31, 2005 + "Q-DEC": 2000, # Quarterly - December year end + "Q-JAN": 2001, # Quarterly - January year end + "Q-FEB": 2002, # Quarterly - February year end + "Q-MAR": 2003, # Quarterly - March year end + "Q-APR": 2004, # Quarterly - April year end + "Q-MAY": 2005, # Quarterly - May year end + "Q-JUN": 2006, # Quarterly - June year end + "Q-JUL": 2007, # Quarterly - July year end + "Q-AUG": 2008, # Quarterly - August year end + "Q-SEP": 2009, # Quarterly - September year end + "Q-OCT": 2010, # Quarterly - October year end + "Q-NOV": 2011, # Quarterly - November year end + + "M": 3000, # Monthly + + "W-SUN": 4000, # Weekly - Sunday end of week + "W-MON": 4001, # Weekly - Monday end of week + "W-TUE": 4002, # Weekly - Tuesday end of week + "W-WED": 4003, # Weekly - Wednesday end of week + "W-THU": 4004, # Weekly - Thursday end of week + "W-FRI": 4005, # Weekly - Friday end of week + "W-SAT": 4006, # Weekly - Saturday end of week + + "B": 5000, # Business days + "D": 6000, # Daily + "H": 7000, # Hourly + "T": 8000, # Minutely + "S": 9000, # Secondly + "L": 10000, # Millisecondly + "U": 11000, # Microsecondly + "N": 12000, # Nanosecondly +} + +_reverse_period_code_map = { + _period_code_map[key]: key for key in _period_code_map} + +# Yearly aliases; careful not to put these in _reverse_period_code_map +_period_code_map.update({"Y" + key[1:]: _period_code_map[key] + for key in _period_code_map + if key.startswith("A-")}) + +_period_code_map.update({ + "Q": 2000, # Quarterly - December year end (default quarterly) + "A": 1000, # Annual + "W": 4000, # Weekly + "C": 5000, # Custom Business Day +}) diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index 8246e24319dbd..8ca442de59f9f 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -12,6 +12,8 @@ from pandas._libs.tslibs.offsets import ( opattern, ) +from .dtypes import _period_code_map, _reverse_period_code_map + # --------------------------------------------------------------------- # Period codes @@ -31,73 +33,6 @@ class FreqGroup: FR_NS = 12000 -# period frequency constants corresponding to scikits timeseries -# originals -_period_code_map = { - # Annual freqs with various fiscal year ends. - # eg, 2005 for A-FEB runs Mar 1, 2004 to Feb 28, 2005 - "A-DEC": 1000, # Annual - December year end - "A-JAN": 1001, # Annual - January year end - "A-FEB": 1002, # Annual - February year end - "A-MAR": 1003, # Annual - March year end - "A-APR": 1004, # Annual - April year end - "A-MAY": 1005, # Annual - May year end - "A-JUN": 1006, # Annual - June year end - "A-JUL": 1007, # Annual - July year end - "A-AUG": 1008, # Annual - August year end - "A-SEP": 1009, # Annual - September year end - "A-OCT": 1010, # Annual - October year end - "A-NOV": 1011, # Annual - November year end - - # Quarterly frequencies with various fiscal year ends. - # eg, Q42005 for Q-OCT runs Aug 1, 2005 to Oct 31, 2005 - "Q-DEC": 2000, # Quarterly - December year end - "Q-JAN": 2001, # Quarterly - January year end - "Q-FEB": 2002, # Quarterly - February year end - "Q-MAR": 2003, # Quarterly - March year end - "Q-APR": 2004, # Quarterly - April year end - "Q-MAY": 2005, # Quarterly - May year end - "Q-JUN": 2006, # Quarterly - June year end - "Q-JUL": 2007, # Quarterly - July year end - "Q-AUG": 2008, # Quarterly - August year end - "Q-SEP": 2009, # Quarterly - September year end - "Q-OCT": 2010, # Quarterly - October year end - "Q-NOV": 2011, # Quarterly - November year end - - "M": 3000, # Monthly - - "W-SUN": 4000, # Weekly - Sunday end of week - "W-MON": 4001, # Weekly - Monday end of week - "W-TUE": 4002, # Weekly - Tuesday end of week - "W-WED": 4003, # Weekly - Wednesday end of week - "W-THU": 4004, # Weekly - Thursday end of week - "W-FRI": 4005, # Weekly - Friday end of week - "W-SAT": 4006, # Weekly - Saturday end of week - - "B": 5000, # Business days - "D": 6000, # Daily - "H": 7000, # Hourly - "T": 8000, # Minutely - "S": 9000, # Secondly - "L": 10000, # Millisecondly - "U": 11000, # Microsecondly - "N": 12000} # Nanosecondly - - -_reverse_period_code_map = { - _period_code_map[key]: key for key in _period_code_map} - -# Yearly aliases; careful not to put these in _reverse_period_code_map -_period_code_map.update({'Y' + key[1:]: _period_code_map[key] - for key in _period_code_map - if key.startswith('A-')}) - -_period_code_map.update({ - "Q": 2000, # Quarterly - December year end (default quarterly) - "A": 1000, # Annual - "W": 4000, # Weekly - "C": 5000}) # Custom Business Day - # Map attribute-name resolutions to resolution abbreviations _attrname_to_abbrevs = { "year": "A", diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index b804ed883e693..272603d490e30 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -43,6 +43,7 @@ from pandas._libs.tslibs.np_datetime cimport ( from pandas._libs.tslibs.timezones cimport utc_pytz as UTC from pandas._libs.tslibs.tzconversion cimport tz_convert_single +from .dtypes cimport PeriodDtypeCode from .timedeltas cimport delta_to_nanoseconds @@ -860,36 +861,43 @@ cdef class Tick(SingleConstructorOffset): cdef class Day(Tick): _nanos_inc = 24 * 3600 * 1_000_000_000 _prefix = "D" + period_dtype_code = PeriodDtypeCode.D cdef class Hour(Tick): _nanos_inc = 3600 * 1_000_000_000 _prefix = "H" + period_dtype_code = PeriodDtypeCode.H cdef class Minute(Tick): _nanos_inc = 60 * 1_000_000_000 _prefix = "T" + period_dtype_code = PeriodDtypeCode.T cdef class Second(Tick): _nanos_inc = 1_000_000_000 _prefix = "S" + period_dtype_code = PeriodDtypeCode.S cdef class Milli(Tick): _nanos_inc = 1_000_000 _prefix = "L" + period_dtype_code = PeriodDtypeCode.L cdef class Micro(Tick): _nanos_inc = 1000 _prefix = "U" + period_dtype_code = PeriodDtypeCode.U cdef class Nano(Tick): _nanos_inc = 1 _prefix = "N" + period_dtype_code = PeriodDtypeCode.N def delta_to_tick(delta: timedelta) -> Tick: @@ -1253,7 +1261,7 @@ cdef class BusinessDay(BusinessMixin): """ DateOffset subclass representing possibly n business days. """ - + period_dtype_code = PeriodDtypeCode.B _prefix = "B" _attributes = tuple(["n", "normalize", "offset"]) @@ -2118,6 +2126,7 @@ cdef class MonthEnd(MonthOffset): """ DateOffset of one month end. """ + period_dtype_code = PeriodDtypeCode.M _prefix = "M" _day_opt = "end" diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index b2b7eb000a2f3..abdc03fad988d 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -73,6 +73,7 @@ from pandas._libs.tslibs.offsets cimport ( ) from pandas._libs.tslibs.tzconversion cimport tz_convert_utc_to_tzlocal +from .dtypes cimport PeriodDtype cdef: enum: @@ -1509,11 +1510,14 @@ cdef class _Period: cdef readonly: int64_t ordinal + PeriodDtype dtype object freq def __cinit__(self, ordinal, freq): self.ordinal = ordinal self.freq = freq + self.dtype = PeriodDtype.from_date_offset(freq) + @classmethod def _maybe_convert_freq(cls, object freq): @@ -1657,13 +1661,13 @@ cdef class _Period: """ freq = self._maybe_convert_freq(freq) how = validate_end_alias(how) - base1, mult1 = get_freq_code(self.freq) - base2, mult2 = get_freq_code(freq) + base1 = self.dtype.dtype_code + base2, _ = get_freq_code(freq) - # mult1 can't be negative or 0 + # self.n can't be negative or 0 end = how == 'E' if end: - ordinal = self.ordinal + mult1 - 1 + ordinal = self.ordinal + self.freq.n - 1 else: ordinal = self.ordinal ordinal = period_asfreq(ordinal, base1, base2, end) @@ -1737,10 +1741,10 @@ cdef class _Period: return endpoint - Timedelta(1, 'ns') if freq is None: - base, mult = get_freq_code(self.freq) + base = self.dtype.dtype_code freq = get_to_timestamp_base(base) - base, mult = get_freq_code(freq) + base, _ = get_freq_code(freq) val = self.asfreq(freq, how) dt64 = period_ordinal_to_dt64(val.ordinal, base) @@ -1748,12 +1752,12 @@ cdef class _Period: @property def year(self) -> int: - base, mult = get_freq_code(self.freq) + base = self.dtype.dtype_code return pyear(self.ordinal, base) @property def month(self) -> int: - base, mult = get_freq_code(self.freq) + base = self.dtype.dtype_code return pmonth(self.ordinal, base) @property @@ -1776,7 +1780,7 @@ cdef class _Period: >>> p.day 11 """ - base, mult = get_freq_code(self.freq) + base = self.dtype.dtype_code return pday(self.ordinal, base) @property @@ -1806,7 +1810,7 @@ cdef class _Period: >>> p.hour 0 """ - base, mult = get_freq_code(self.freq) + base = self.dtype.dtype_code return phour(self.ordinal, base) @property @@ -1830,7 +1834,7 @@ cdef class _Period: >>> p.minute 3 """ - base, mult = get_freq_code(self.freq) + base = self.dtype.dtype_code return pminute(self.ordinal, base) @property @@ -1854,12 +1858,12 @@ cdef class _Period: >>> p.second 12 """ - base, mult = get_freq_code(self.freq) + base = self.dtype.dtype_code return psecond(self.ordinal, base) @property def weekofyear(self) -> int: - base, mult = get_freq_code(self.freq) + base = self.dtype.dtype_code return pweek(self.ordinal, base) @property @@ -1940,7 +1944,7 @@ cdef class _Period: >>> per.end_time.dayofweek 2 """ - base, mult = get_freq_code(self.freq) + base = self.dtype.dtype_code return pweekday(self.ordinal, base) @property @@ -2028,12 +2032,12 @@ cdef class _Period: >>> period.dayofyear 1 """ - base, mult = get_freq_code(self.freq) + base = self.dtype.dtype_code return pday_of_year(self.ordinal, base) @property def quarter(self) -> int: - base, mult = get_freq_code(self.freq) + base = self.dtype.dtype_code return pquarter(self.ordinal, base) @property @@ -2077,7 +2081,7 @@ cdef class _Period: >>> per.year 2017 """ - base, mult = get_freq_code(self.freq) + base = self.dtype.dtype_code return pqyear(self.ordinal, base) @property @@ -2111,7 +2115,7 @@ cdef class _Period: >>> p.days_in_month 29 """ - base, mult = get_freq_code(self.freq) + base = self.dtype.dtype_code return pdays_in_month(self.ordinal, base) @property @@ -2149,7 +2153,7 @@ cdef class _Period: return self.freq.freqstr def __repr__(self) -> str: - base, mult = get_freq_code(self.freq) + base = self.dtype.dtype_code formatted = period_format(self.ordinal, base) return f"Period('{formatted}', '{self.freqstr}')" @@ -2157,7 +2161,7 @@ cdef class _Period: """ Return a string representation for a particular DataFrame """ - base, mult = get_freq_code(self.freq) + base = self.dtype.dtype_code formatted = period_format(self.ordinal, base) value = str(formatted) return value @@ -2309,7 +2313,7 @@ cdef class _Period: >>> a.strftime('%b. %d, %Y was a %A') 'Jan. 01, 2001 was a Monday' """ - base, mult = get_freq_code(self.freq) + base = self.dtype.dtype_code return period_format(self.ordinal, base, fmt) diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index 5518760dbacb3..90e52c7340e94 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -9,6 +9,7 @@ def test_namespace(): "base", "ccalendar", "conversion", + "dtypes", "fields", "frequencies", "nattype", diff --git a/setup.py b/setup.py index 63510867f0dd7..9f411ec10cd80 100755 --- a/setup.py +++ b/setup.py @@ -308,8 +308,8 @@ class CheckSDist(sdist_class): "pandas/_libs/ops.pyx", "pandas/_libs/parsers.pyx", "pandas/_libs/tslibs/base.pyx", - "pandas/_libs/tslibs/c_timestamp.pyx", "pandas/_libs/tslibs/ccalendar.pyx", + "pandas/_libs/tslibs/dtypes.pyx", "pandas/_libs/tslibs/period.pyx", "pandas/_libs/tslibs/strptime.pyx", "pandas/_libs/tslibs/np_datetime.pyx", @@ -605,6 +605,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.tslib": {"pyxfile": "_libs/tslib", "depends": tseries_depends}, "_libs.tslibs.base": {"pyxfile": "_libs/tslibs/base"}, "_libs.tslibs.ccalendar": {"pyxfile": "_libs/tslibs/ccalendar"}, + "_libs.tslibs.dtypes": {"pyxfile": "_libs/tslibs/dtypes"}, "_libs.tslibs.conversion": { "pyxfile": "_libs/tslibs/conversion", "depends": tseries_depends, From b79165f827f7c1665e5388463c0c75a92124fbbc Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 30 May 2020 20:26:06 -0700 Subject: [PATCH 2/7] unrelated docstring fix --- pandas/_libs/tslibs/timedeltas.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index f7f8b86359732..a1c10e9392a1a 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -907,19 +907,19 @@ cdef class _Timedelta(ABCTimedelta): Examples -------- >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') - >>> td.resolution + >>> td.resolution_string 'N' >>> td = pd.Timedelta('1 days 2 min 3 us') - >>> td.resolution + >>> td.resolution_string 'U' >>> td = pd.Timedelta('2 min 3 s') - >>> td.resolution + >>> td.resolution_string 'S' >>> td = pd.Timedelta(36, unit='us') - >>> td.resolution + >>> td.resolution_string 'U' """ self._ensure_components() From c0a275159142b0b1c4c8d4f8966732adb7c38f25 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 31 May 2020 08:33:57 -0700 Subject: [PATCH 3/7] PERF: define period_dtype_code on all pertinent offsets --- pandas/_libs/tslibs/dtypes.pyx | 12 +----------- pandas/_libs/tslibs/offsets.pyx | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index c2d787a0bcce7..c1a9c8f7247a3 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -33,19 +33,9 @@ cdef class PeriodDtype: @classmethod def from_date_offset(cls, offset): - try: - # For some DateOffset classes we have pinned the answer - code = offset.period_dtype_code - except AttributeError: - from .frequencies import get_freq_code - code, _ = get_freq_code(offset) + code = offset.period_dtype_code return cls(code) - @classmethod - def from_freqstr(cls, freqstr: str): - # TODO: Check that this is actually obj.freqstr or rule_code or whatever - return cls(_period_code_map[freqstr]) - _period_code_map = { # Annual freqs with various fiscal year ends. diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 272603d490e30..e41fab2e89287 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1923,6 +1923,15 @@ cdef class YearEnd(YearOffset): _prefix = "A" _day_opt = "end" + cdef readonly: + int period_dtype_code + + def __init__(self, n=1, normalize=False, month=None): + # Because YearEnd can be the freq for a Period, define its + # period_dtype_code at construction for performance + YearOffset.__init__(self, n, normalize, month) + self.period_dtype_code = PeriodDtypeCode.A + self.month % 12 + cdef class YearBegin(YearOffset): """ @@ -2077,6 +2086,14 @@ cdef class QuarterEnd(QuarterOffset): _prefix = "Q" _day_opt = "end" + cdef readonly: + int period_dtype_code + + def __init__(self, n=1, normalize=False, startingMonth=None): + # Because QuarterEnd can be the freq for a Period, define its + # period_dtype_code at construction for performance + QuarterOffset.__init__(self, n, normalize, startingMonth) + self.period_dtype_code = PeriodDtypeCode.Q_DEC + self.startingMonth % 12 cdef class QuarterBegin(QuarterOffset): """ @@ -2430,6 +2447,7 @@ cdef class Week(SingleConstructorOffset): cdef readonly: object weekday # int or None + int period_dtype_code def __init__(self, n=1, normalize=False, weekday=None): BaseOffset.__init__(self, n, normalize) @@ -2439,6 +2457,8 @@ cdef class Week(SingleConstructorOffset): if self.weekday < 0 or self.weekday > 6: raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}") + self.period_dtype_code = PeriodDtypeCode.W_SUN + (weekday + 1) % 7 + cpdef __setstate__(self, state): self.n = state.pop("n") self.normalize = state.pop("normalize") From 5ff1766346f347c419cfee7d462f745086dba3b9 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 31 May 2020 08:48:34 -0700 Subject: [PATCH 4/7] optimize constructor --- pandas/_libs/tslibs/period.pyx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index abdc03fad988d..061121b223562 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1516,8 +1516,10 @@ cdef class _Period: def __cinit__(self, ordinal, freq): self.ordinal = ordinal self.freq = freq - self.dtype = PeriodDtype.from_date_offset(freq) - + # Note: this is more performant than PeriodDtype.from_date_offset(freq) + # because from_date_offset cannot be made a cdef method (until cython + # supported cdef classmethods) + self.dtype = PeriodDtype(freq.period_dtype_code) @classmethod def _maybe_convert_freq(cls, object freq): From 5ee836a223e801730363e095a648159d6ca132ff Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 31 May 2020 09:37:42 -0700 Subject: [PATCH 5/7] privatize --- pandas/_libs/tslibs/dtypes.pyx | 2 +- pandas/_libs/tslibs/offsets.pyx | 34 ++++++++++++++--------------- pandas/_libs/tslibs/period.pyx | 38 ++++++++++++++++----------------- 3 files changed, 37 insertions(+), 37 deletions(-) diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index c1a9c8f7247a3..d53b20063cadc 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -33,7 +33,7 @@ cdef class PeriodDtype: @classmethod def from_date_offset(cls, offset): - code = offset.period_dtype_code + code = offset._period_dtype_code return cls(code) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index e41fab2e89287..a0c3c3ddfea50 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -861,43 +861,43 @@ cdef class Tick(SingleConstructorOffset): cdef class Day(Tick): _nanos_inc = 24 * 3600 * 1_000_000_000 _prefix = "D" - period_dtype_code = PeriodDtypeCode.D + _period_dtype_code = PeriodDtypeCode.D cdef class Hour(Tick): _nanos_inc = 3600 * 1_000_000_000 _prefix = "H" - period_dtype_code = PeriodDtypeCode.H + _period_dtype_code = PeriodDtypeCode.H cdef class Minute(Tick): _nanos_inc = 60 * 1_000_000_000 _prefix = "T" - period_dtype_code = PeriodDtypeCode.T + _period_dtype_code = PeriodDtypeCode.T cdef class Second(Tick): _nanos_inc = 1_000_000_000 _prefix = "S" - period_dtype_code = PeriodDtypeCode.S + _period_dtype_code = PeriodDtypeCode.S cdef class Milli(Tick): _nanos_inc = 1_000_000 _prefix = "L" - period_dtype_code = PeriodDtypeCode.L + _period_dtype_code = PeriodDtypeCode.L cdef class Micro(Tick): _nanos_inc = 1000 _prefix = "U" - period_dtype_code = PeriodDtypeCode.U + _period_dtype_code = PeriodDtypeCode.U cdef class Nano(Tick): _nanos_inc = 1 _prefix = "N" - period_dtype_code = PeriodDtypeCode.N + _period_dtype_code = PeriodDtypeCode.N def delta_to_tick(delta: timedelta) -> Tick: @@ -1261,7 +1261,7 @@ cdef class BusinessDay(BusinessMixin): """ DateOffset subclass representing possibly n business days. """ - period_dtype_code = PeriodDtypeCode.B + _period_dtype_code = PeriodDtypeCode.B _prefix = "B" _attributes = tuple(["n", "normalize", "offset"]) @@ -1924,13 +1924,13 @@ cdef class YearEnd(YearOffset): _day_opt = "end" cdef readonly: - int period_dtype_code + int _period_dtype_code def __init__(self, n=1, normalize=False, month=None): # Because YearEnd can be the freq for a Period, define its - # period_dtype_code at construction for performance + # _period_dtype_code at construction for performance YearOffset.__init__(self, n, normalize, month) - self.period_dtype_code = PeriodDtypeCode.A + self.month % 12 + self._period_dtype_code = PeriodDtypeCode.A + self.month % 12 cdef class YearBegin(YearOffset): @@ -2087,13 +2087,13 @@ cdef class QuarterEnd(QuarterOffset): _day_opt = "end" cdef readonly: - int period_dtype_code + int _period_dtype_code def __init__(self, n=1, normalize=False, startingMonth=None): # Because QuarterEnd can be the freq for a Period, define its - # period_dtype_code at construction for performance + # _period_dtype_code at construction for performance QuarterOffset.__init__(self, n, normalize, startingMonth) - self.period_dtype_code = PeriodDtypeCode.Q_DEC + self.startingMonth % 12 + self._period_dtype_code = PeriodDtypeCode.Q_DEC + self.startingMonth % 12 cdef class QuarterBegin(QuarterOffset): """ @@ -2143,7 +2143,7 @@ cdef class MonthEnd(MonthOffset): """ DateOffset of one month end. """ - period_dtype_code = PeriodDtypeCode.M + _period_dtype_code = PeriodDtypeCode.M _prefix = "M" _day_opt = "end" @@ -2447,7 +2447,7 @@ cdef class Week(SingleConstructorOffset): cdef readonly: object weekday # int or None - int period_dtype_code + int _period_dtype_code def __init__(self, n=1, normalize=False, weekday=None): BaseOffset.__init__(self, n, normalize) @@ -2457,7 +2457,7 @@ cdef class Week(SingleConstructorOffset): if self.weekday < 0 or self.weekday > 6: raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}") - self.period_dtype_code = PeriodDtypeCode.W_SUN + (weekday + 1) % 7 + self._period_dtype_code = PeriodDtypeCode.W_SUN + (weekday + 1) % 7 cpdef __setstate__(self, state): self.n = state.pop("n") diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 061121b223562..aaf39c5694297 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1510,7 +1510,7 @@ cdef class _Period: cdef readonly: int64_t ordinal - PeriodDtype dtype + PeriodDtype _dtype object freq def __cinit__(self, ordinal, freq): @@ -1519,7 +1519,7 @@ cdef class _Period: # Note: this is more performant than PeriodDtype.from_date_offset(freq) # because from_date_offset cannot be made a cdef method (until cython # supported cdef classmethods) - self.dtype = PeriodDtype(freq.period_dtype_code) + self._dtype = PeriodDtype(freq._period_dtype_code) @classmethod def _maybe_convert_freq(cls, object freq): @@ -1663,7 +1663,7 @@ cdef class _Period: """ freq = self._maybe_convert_freq(freq) how = validate_end_alias(how) - base1 = self.dtype.dtype_code + base1 = self._dtype.dtype_code base2, _ = get_freq_code(freq) # self.n can't be negative or 0 @@ -1743,7 +1743,7 @@ cdef class _Period: return endpoint - Timedelta(1, 'ns') if freq is None: - base = self.dtype.dtype_code + base = self._dtype.dtype_code freq = get_to_timestamp_base(base) base, _ = get_freq_code(freq) @@ -1754,12 +1754,12 @@ cdef class _Period: @property def year(self) -> int: - base = self.dtype.dtype_code + base = self._dtype.dtype_code return pyear(self.ordinal, base) @property def month(self) -> int: - base = self.dtype.dtype_code + base = self._dtype.dtype_code return pmonth(self.ordinal, base) @property @@ -1782,7 +1782,7 @@ cdef class _Period: >>> p.day 11 """ - base = self.dtype.dtype_code + base = self._dtype.dtype_code return pday(self.ordinal, base) @property @@ -1812,7 +1812,7 @@ cdef class _Period: >>> p.hour 0 """ - base = self.dtype.dtype_code + base = self._dtype.dtype_code return phour(self.ordinal, base) @property @@ -1836,7 +1836,7 @@ cdef class _Period: >>> p.minute 3 """ - base = self.dtype.dtype_code + base = self._dtype.dtype_code return pminute(self.ordinal, base) @property @@ -1860,12 +1860,12 @@ cdef class _Period: >>> p.second 12 """ - base = self.dtype.dtype_code + base = self._dtype.dtype_code return psecond(self.ordinal, base) @property def weekofyear(self) -> int: - base = self.dtype.dtype_code + base = self._dtype.dtype_code return pweek(self.ordinal, base) @property @@ -1946,7 +1946,7 @@ cdef class _Period: >>> per.end_time.dayofweek 2 """ - base = self.dtype.dtype_code + base = self._dtype.dtype_code return pweekday(self.ordinal, base) @property @@ -2034,12 +2034,12 @@ cdef class _Period: >>> period.dayofyear 1 """ - base = self.dtype.dtype_code + base = self._dtype.dtype_code return pday_of_year(self.ordinal, base) @property def quarter(self) -> int: - base = self.dtype.dtype_code + base = self._dtype.dtype_code return pquarter(self.ordinal, base) @property @@ -2083,7 +2083,7 @@ cdef class _Period: >>> per.year 2017 """ - base = self.dtype.dtype_code + base = self._dtype.dtype_code return pqyear(self.ordinal, base) @property @@ -2117,7 +2117,7 @@ cdef class _Period: >>> p.days_in_month 29 """ - base = self.dtype.dtype_code + base = self._dtype.dtype_code return pdays_in_month(self.ordinal, base) @property @@ -2155,7 +2155,7 @@ cdef class _Period: return self.freq.freqstr def __repr__(self) -> str: - base = self.dtype.dtype_code + base = self._dtype.dtype_code formatted = period_format(self.ordinal, base) return f"Period('{formatted}', '{self.freqstr}')" @@ -2163,7 +2163,7 @@ cdef class _Period: """ Return a string representation for a particular DataFrame """ - base = self.dtype.dtype_code + base = self._dtype.dtype_code formatted = period_format(self.ordinal, base) value = str(formatted) return value @@ -2315,7 +2315,7 @@ cdef class _Period: >>> a.strftime('%b. %d, %Y was a %A') 'Jan. 01, 2001 was a Monday' """ - base = self.dtype.dtype_code + base = self._dtype.dtype_code return period_format(self.ordinal, base, fmt) From 96346c6f7e9dec96360c5cbb12c2cb0c81e46f4e Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 31 May 2020 15:37:46 -0700 Subject: [PATCH 6/7] absolute import --- pandas/_libs/tslibs/period.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index aaf39c5694297..a8aadc71a55bd 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -53,6 +53,9 @@ from pandas._libs.tslibs.ccalendar cimport ( get_days_in_month, ) from pandas._libs.tslibs.ccalendar cimport c_MONTH_NUMBERS + +from pandas._libs.tslibs.dtypes cimport PeriodDtype + from pandas._libs.tslibs.frequencies cimport ( attrname_to_abbrevs, get_freq_code, @@ -73,7 +76,6 @@ from pandas._libs.tslibs.offsets cimport ( ) from pandas._libs.tslibs.tzconversion cimport tz_convert_utc_to_tzlocal -from .dtypes cimport PeriodDtype cdef: enum: From c45246ed35cdd4c783aff9926a3486e92409def8 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 1 Jun 2020 19:17:37 -0700 Subject: [PATCH 7/7] PeriodDtype -> PeriodPseudoDtype --- pandas/_libs/tslibs/dtypes.pxd | 2 +- pandas/_libs/tslibs/dtypes.pyx | 6 +++--- pandas/_libs/tslibs/period.pyx | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/tslibs/dtypes.pxd b/pandas/_libs/tslibs/dtypes.pxd index 36df048dce672..23c473726e5a9 100644 --- a/pandas/_libs/tslibs/dtypes.pxd +++ b/pandas/_libs/tslibs/dtypes.pxd @@ -51,6 +51,6 @@ cdef enum PeriodDtypeCode: N = 12000 # Nanosecondly -cdef class PeriodDtype: +cdef class PeriodPseudoDtype: cdef readonly: PeriodDtypeCode dtype_code diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index d53b20063cadc..5876439f9e800 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -2,7 +2,7 @@ # originals -cdef class PeriodDtype: +cdef class PeriodPseudoDtype: # cdef readonly: # PeriodDtypeCode dtype_code @@ -10,9 +10,9 @@ cdef class PeriodDtype: self.dtype_code = code def __eq__(self, other): - if not isinstance(other, PeriodDtype): + if not isinstance(other, PeriodPseudoDtype): return False - if not isinstance(self, PeriodDtype): + if not isinstance(self, PeriodPseudoDtype): # cython semantics, this is a reversed op return False return self.dtype_code == other.dtype_code diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index f826c8cd22124..896b6251ab2da 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -54,7 +54,7 @@ from pandas._libs.tslibs.ccalendar cimport ( ) from pandas._libs.tslibs.ccalendar cimport c_MONTH_NUMBERS -from pandas._libs.tslibs.dtypes cimport PeriodDtype +from pandas._libs.tslibs.dtypes cimport PeriodPseudoDtype from pandas._libs.tslibs.frequencies cimport ( attrname_to_abbrevs, @@ -1512,7 +1512,7 @@ cdef class _Period: cdef readonly: int64_t ordinal - PeriodDtype _dtype + PeriodPseudoDtype _dtype object freq def __cinit__(self, ordinal, freq): @@ -1521,7 +1521,7 @@ cdef class _Period: # Note: this is more performant than PeriodDtype.from_date_offset(freq) # because from_date_offset cannot be made a cdef method (until cython # supported cdef classmethods) - self._dtype = PeriodDtype(freq._period_dtype_code) + self._dtype = PeriodPseudoDtype(freq._period_dtype_code) @classmethod def _maybe_convert_freq(cls, object freq):