From e16e7cd40c77cebf08cd0172cbe098c2db7c8a68 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 12 Nov 2019 17:09:59 -0800 Subject: [PATCH 01/12] tslibs annotations --- pandas/_libs/tslibs/c_timestamp.pyx | 2 +- pandas/_libs/tslibs/nattype.pyx | 2 +- pandas/_libs/tslibs/timedeltas.pyx | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/c_timestamp.pyx b/pandas/_libs/tslibs/c_timestamp.pyx index 8e4143a053ba3..d661b161b5005 100644 --- a/pandas/_libs/tslibs/c_timestamp.pyx +++ b/pandas/_libs/tslibs/c_timestamp.pyx @@ -201,7 +201,7 @@ cdef class _Timestamp(datetime): """ return np.datetime64(self.value, 'ns') - def to_numpy(self, dtype=None, copy=False): + def to_numpy(self, dtype=None, copy=False) -> np.datetime64: """ Convert the Timestamp to a NumPy datetime64. diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index e491d6111a919..48885e7142549 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -239,7 +239,7 @@ cdef class _NaT(datetime): """ return np.datetime64('NaT', 'ns') - def to_numpy(self, dtype=None, copy=False): + def to_numpy(self, dtype=None, copy=False) -> np.datetime64: """ Convert the Timestamp to a NumPy datetime64. diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 9d8ed62388655..af849e9eae34b 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -847,9 +847,9 @@ cdef class _Timedelta(timedelta): """ return np.timedelta64(self.value, 'ns') - def to_numpy(self, dtype=None, copy=False): + def to_numpy(self, dtype=None, copy=False) -> np.timedelta64: """ - Convert the Timestamp to a NumPy timedelta64. + Convert the Timedelta to a NumPy timedelta64. .. versionadded:: 0.25.0 From 180db71f19d9e906be8e61938f8446af61e3fffe Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 12 Nov 2019 17:10:25 -0800 Subject: [PATCH 02/12] comment --- pandas/core/apply.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index d9f6bdae288ed..a8877f56584f4 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -183,6 +183,8 @@ def apply_raw(self): if "Function does not reduce" not in str(err): # catch only ValueError raised intentionally in libreduction raise + # We expect np.apply_along_axis to give a two-dimensional result, or + # also raise. result = np.apply_along_axis(self.f, self.axis, self.values) # TODO: mixed type case From 14d0400925eea4a2c1c5b5139ae82be4e0c77049 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 12 Nov 2019 18:27:13 -0800 Subject: [PATCH 03/12] CLN: no need to pass name --- pandas/_libs/reduction.pyx | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index fa9c12777eb5b..057e1953e5a82 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -195,10 +195,10 @@ cdef class _BaseGrouper: return values, index cdef inline _update_cached_objs(self, object cached_typ, object cached_ityp, - Slider islider, Slider vslider, object name): + Slider islider, Slider vslider): if cached_typ is None: cached_ityp = self.ityp(islider.buf) - cached_typ = self.typ(vslider.buf, index=cached_ityp, name=name) + cached_typ = self.typ(vslider.buf, index=cached_ityp, name=self.name) else: # See the comment in indexes/base.py about _index_data. # We need this for EA-backed indexes that have a reference @@ -207,7 +207,7 @@ cdef class _BaseGrouper: cached_ityp._engine.clear_mapping() object.__setattr__(cached_typ._data._block, 'values', vslider.buf) object.__setattr__(cached_typ, '_index', cached_ityp) - object.__setattr__(cached_typ, 'name', name) + object.__setattr__(cached_typ, 'name', self.name) return cached_typ, cached_ityp @@ -238,7 +238,7 @@ cdef class SeriesBinGrouper(_BaseGrouper): self.typ = series._constructor self.ityp = series.index._constructor self.index = series.index.values - self.name = getattr(series, 'name', None) + self.name = series.name self.dummy_arr, self.dummy_index = self._check_dummy(dummy) @@ -256,7 +256,7 @@ cdef class SeriesBinGrouper(_BaseGrouper): object res bint initialized = 0 Slider vslider, islider - object name, cached_typ = None, cached_ityp = None + object cached_typ = None, cached_ityp = None counts = np.zeros(self.ngroups, dtype=np.int64) @@ -270,7 +270,6 @@ cdef class SeriesBinGrouper(_BaseGrouper): group_size = 0 n = len(self.arr) - name = self.name vslider = Slider(self.arr, self.dummy_arr) islider = Slider(self.index, self.dummy_index) @@ -283,7 +282,7 @@ cdef class SeriesBinGrouper(_BaseGrouper): vslider.set_length(group_size) cached_typ, cached_ityp = self._update_cached_objs( - cached_typ, cached_ityp, islider, vslider, name) + cached_typ, cached_ityp, islider, vslider) cached_ityp._engine.clear_mapping() res = self.f(cached_typ) @@ -343,7 +342,7 @@ cdef class SeriesGrouper(_BaseGrouper): self.typ = series._constructor self.ityp = series.index._constructor self.index = series.index.values - self.name = getattr(series, 'name', None) + self.name = series.name self.dummy_arr, self.dummy_index = self._check_dummy(dummy) self.ngroups = ngroups @@ -357,13 +356,12 @@ cdef class SeriesGrouper(_BaseGrouper): object res bint initialized = 0 Slider vslider, islider - object name, cached_typ = None, cached_ityp = None + object cached_typ = None, cached_ityp = None labels = self.labels counts = np.zeros(self.ngroups, dtype=np.int64) group_size = 0 n = len(self.arr) - name = self.name vslider = Slider(self.arr, self.dummy_arr) islider = Slider(self.index, self.dummy_index) @@ -385,7 +383,7 @@ cdef class SeriesGrouper(_BaseGrouper): vslider.set_length(group_size) cached_typ, cached_ityp = self._update_cached_objs( - cached_typ, cached_ityp, islider, vslider, name) + cached_typ, cached_ityp, islider, vslider) cached_ityp._engine.clear_mapping() res = self.f(cached_typ) From 5f91e31b1e02875bfc9a3277bee37409d18eaf12 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 12 Nov 2019 20:15:29 -0800 Subject: [PATCH 04/12] annotations --- pandas/_libs/internals.pyx | 6 +++--- pandas/_libs/sparse.pyx | 12 ++++++------ pandas/_libs/tslibs/c_timestamp.pyx | 10 +++++----- pandas/_libs/tslibs/nattype.pyx | 20 ++++++++++---------- pandas/_libs/tslibs/period.pyx | 15 ++++++++------- pandas/_libs/tslibs/resolution.pyx | 6 +++--- pandas/_libs/tslibs/timedeltas.pyx | 16 ++++++++-------- pandas/_libs/tslibs/timestamps.pyx | 26 +++++++++++++------------- pandas/_libs/writers.pyx | 7 ++----- pandas/io/pytables.py | 4 ++-- 10 files changed, 60 insertions(+), 62 deletions(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 08decb44a8a53..8e61a772912af 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -85,7 +85,7 @@ cdef class BlockPlacement: return iter(self._as_array) @property - def as_slice(self): + def as_slice(self) -> slice: cdef: slice s = self._ensure_has_slice() if s is None: @@ -118,7 +118,7 @@ cdef class BlockPlacement: return self._as_array @property - def is_slice_like(self): + def is_slice_like(self) -> bool: cdef: slice s = self._ensure_has_slice() return s is not None @@ -441,7 +441,7 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True): yield blkno, result -def get_blkno_placements(blknos, group=True): +def get_blkno_placements(blknos, group: bool = True): """ Parameters diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 578995a3eb3b6..ee83901040b36 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -57,7 +57,7 @@ cdef class IntIndex(SparseIndex): return output @property - def nbytes(self): + def nbytes(self) -> int: return self.indices.nbytes def check_integrity(self): @@ -91,7 +91,7 @@ cdef class IntIndex(SparseIndex): if not monotonic: raise ValueError("Indices must be strictly increasing") - def equals(self, other): + def equals(self, other) -> bool: if not isinstance(other, IntIndex): return False @@ -103,7 +103,7 @@ cdef class IntIndex(SparseIndex): return same_length and same_indices @property - def ngaps(self): + def ngaps(self) -> int: return self.length - self.npoints def to_int_index(self): @@ -348,11 +348,11 @@ cdef class BlockIndex(SparseIndex): return output @property - def nbytes(self): + def nbytes(self) -> int: return self.blocs.nbytes + self.blengths.nbytes @property - def ngaps(self): + def ngaps(self) -> int: return self.length - self.npoints cpdef check_integrity(self): @@ -388,7 +388,7 @@ cdef class BlockIndex(SparseIndex): if blengths[i] == 0: raise ValueError(f'Zero-length block {i}') - def equals(self, other): + def equals(self, other) -> bool: if not isinstance(other, BlockIndex): return False diff --git a/pandas/_libs/tslibs/c_timestamp.pyx b/pandas/_libs/tslibs/c_timestamp.pyx index d661b161b5005..8512b34b9e78c 100644 --- a/pandas/_libs/tslibs/c_timestamp.pyx +++ b/pandas/_libs/tslibs/c_timestamp.pyx @@ -369,18 +369,18 @@ cdef class _Timestamp(datetime): return out[0] @property - def _repr_base(self): + def _repr_base(self) -> str: return '{date} {time}'.format(date=self._date_repr, time=self._time_repr) @property - def _date_repr(self): + def _date_repr(self) -> str: # Ideal here would be self.strftime("%Y-%m-%d"), but # the datetime strftime() methods require year >= 1900 return '%d-%.2d-%.2d' % (self.year, self.month, self.day) @property - def _time_repr(self): + def _time_repr(self) -> str: result = '%.2d:%.2d:%.2d' % (self.hour, self.minute, self.second) if self.nanosecond != 0: @@ -391,7 +391,7 @@ cdef class _Timestamp(datetime): return result @property - def _short_repr(self): + def _short_repr(self) -> str: # format a Timestamp with only _date_repr if possible # otherwise _repr_base if (self.hour == 0 and @@ -403,7 +403,7 @@ cdef class _Timestamp(datetime): return self._repr_base @property - def asm8(self): + def asm8(self) -> np.datetime64: """ Return numpy datetime64 format in nanoseconds. """ diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 48885e7142549..fdf60afc5110c 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -230,10 +230,10 @@ cdef class _NaT(datetime): return NotImplemented @property - def asm8(self): + def asm8(self) -> np.datetime64: return np.datetime64(NPY_NAT, 'ns') - def to_datetime64(self): + def to_datetime64(self) -> np.datetime64: """ Return a numpy.datetime64 object with 'ns' precision. """ @@ -265,7 +265,7 @@ cdef class _NaT(datetime): def __str__(self) -> str: return 'NaT' - def isoformat(self, sep='T'): + def isoformat(self, sep='T') -> str: # This allows Timestamp(ts.isoformat()) to always correctly roundtrip. return 'NaT' @@ -286,31 +286,31 @@ cdef class _NaT(datetime): return np.nan @property - def is_leap_year(self): + def is_leap_year(self) -> bool: return False @property - def is_month_start(self): + def is_month_start(self) -> bool: return False @property - def is_quarter_start(self): + def is_quarter_start(self) -> bool: return False @property - def is_year_start(self): + def is_year_start(self) -> bool: return False @property - def is_month_end(self): + def is_month_end(self) -> bool: return False @property - def is_quarter_end(self): + def is_quarter_end(self) -> bool: return False @property - def is_year_end(self): + def is_year_end(self) -> bool: return False diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 2512fdb891e3e..969710dbcb587 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1738,7 +1738,7 @@ cdef class _Period: return Period(ordinal=ordinal, freq=freq) @property - def start_time(self): + def start_time(self) -> Timestamp: """ Get the Timestamp for the start of the period. @@ -1768,13 +1768,13 @@ cdef class _Period: return self.to_timestamp(how='S') @property - def end_time(self): + def end_time(self) -> Timestamp: # freq.n can't be negative or 0 # ordinal = (self + self.freq.n).start_time.value - 1 ordinal = (self + self.freq).start_time.value - 1 return Timestamp(ordinal) - def to_timestamp(self, freq=None, how='start', tz=None): + def to_timestamp(self, freq=None, how='start', tz=None) -> Timestamp: """ Return the Timestamp representation of the Period. @@ -2204,7 +2204,7 @@ cdef class _Period: return self.days_in_month @property - def is_leap_year(self): + def is_leap_year(self) -> bool: return bool(is_leapyear(self.year)) @classmethod @@ -2237,8 +2237,8 @@ cdef class _Period: object_state = None, self.freq, self.ordinal return (Period, object_state) - def strftime(self, fmt): - """ + def strftime(self, fmt) -> str: + r""" Returns the string representation of the :class:`Period`, depending on the selected ``fmt``. ``fmt`` must be a string containing one or several directives. The method recognizes the same @@ -2416,7 +2416,8 @@ class Period(_Period): # ('T', 5) but may be passed in as a string like '5T' # ordinal is the period offset from the gregorian proleptic epoch - cdef _Period self + cdef: + _Period self if freq is not None: freq = cls._maybe_convert_freq(freq) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index c0b20c14e9920..1ac5b4aeb0274 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -106,7 +106,7 @@ cdef inline int _reso_stamp(npy_datetimestruct *dts): return RESO_DAY -def get_freq_group(freq): +def get_freq_group(freq) -> int: """ Return frequency code group of given frequency str or offset. @@ -189,7 +189,7 @@ class Resolution: _freq_reso_map = {v: k for k, v in _reso_freq_map.items()} @classmethod - def get_str(cls, reso): + def get_str(cls, reso) -> str: """ Return resolution str against resolution code. @@ -228,7 +228,7 @@ class Resolution: return get_freq_group(cls.get_freq(resostr)) @classmethod - def get_freq(cls, resostr): + def get_freq(cls, resostr: str) -> str: """ Return frequency str against resolution str. diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index af849e9eae34b..4002271982694 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -841,7 +841,7 @@ cdef class _Timedelta(timedelta): """ return timedelta(microseconds=int(self.value) / 1000) - def to_timedelta64(self): + def to_timedelta64(self) -> np.timedelta64: """ Return a numpy.timedelta64 object with 'ns' precision. """ @@ -920,7 +920,7 @@ cdef class _Timedelta(timedelta): return self.value @property - def asm8(self): + def asm8(self) -> np.timedelta64: """ Return a numpy timedelta64 array scalar view. @@ -955,7 +955,7 @@ cdef class _Timedelta(timedelta): return np.int64(self.value).view('m8[ns]') @property - def resolution_string(self): + def resolution_string(self) -> str: """ Return a string representing the lowest timedelta resolution. @@ -1013,7 +1013,7 @@ cdef class _Timedelta(timedelta): return "D" @property - def resolution(self): + def resolution(self) -> str: """ Return a string representing the lowest timedelta resolution. @@ -1063,7 +1063,7 @@ cdef class _Timedelta(timedelta): return self.resolution_string @property - def nanoseconds(self): + def nanoseconds(self) -> int: """ Return the number of nanoseconds (n), where 0 <= n < 1 microsecond. @@ -1095,7 +1095,7 @@ cdef class _Timedelta(timedelta): self._ensure_components() return self._ns - def _repr_base(self, format=None): + def _repr_base(self, format=None) -> str: """ Parameters @@ -1148,10 +1148,10 @@ cdef class _Timedelta(timedelta): def __str__(self) -> str: return self._repr_base(format='long') - def __bool__(self): + def __bool__(self) -> bool: return self.value != 0 - def isoformat(self): + def isoformat(self) -> str: """ Format Timedelta as ISO 8601 Duration like ``P[n]Y[n]M[n]DT[n]H[n]M[n]S``, where the ``[n]`` s are replaced by the diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 317dc769636fb..e39fef0375d92 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -325,7 +325,7 @@ class Timestamp(_Timestamp): Function is not implemented. Use pd.to_datetime(). """ - raise NotImplementedError("Timestamp.strptime() is not implmented." + raise NotImplementedError("Timestamp.strptime() is not implemented." "Use to_datetime() to parse date strings.") @classmethod @@ -605,7 +605,7 @@ timedelta}, default 'raise' """ return self.weekday() - def day_name(self, locale=None): + def day_name(self, locale=None) -> str: """ Return the day name of the Timestamp with specified locale. @@ -622,7 +622,7 @@ timedelta}, default 'raise' """ return self._get_date_name_field('day_name', locale) - def month_name(self, locale=None): + def month_name(self, locale=None) -> str: """ Return the month name of the Timestamp with specified locale. @@ -640,7 +640,7 @@ timedelta}, default 'raise' return self._get_date_name_field('month_name', locale) @property - def weekday_name(self): + def weekday_name(self) -> str: """ .. deprecated:: 0.23.0 Use ``Timestamp.day_name()`` instead @@ -690,7 +690,7 @@ timedelta}, default 'raise' return getattr(self.freq, 'freqstr', self.freq) @property - def is_month_start(self): + def is_month_start(self) -> bool: """ Return True if date is first day of month. """ @@ -700,7 +700,7 @@ timedelta}, default 'raise' return self._get_start_end_field('is_month_start') @property - def is_month_end(self): + def is_month_end(self) -> bool: """ Return True if date is last day of month. """ @@ -710,7 +710,7 @@ timedelta}, default 'raise' return self._get_start_end_field('is_month_end') @property - def is_quarter_start(self): + def is_quarter_start(self) -> bool: """ Return True if date is first day of the quarter. """ @@ -720,7 +720,7 @@ timedelta}, default 'raise' return self._get_start_end_field('is_quarter_start') @property - def is_quarter_end(self): + def is_quarter_end(self) -> bool: """ Return True if date is last day of the quarter. """ @@ -730,7 +730,7 @@ timedelta}, default 'raise' return self._get_start_end_field('is_quarter_end') @property - def is_year_start(self): + def is_year_start(self) -> bool: """ Return True if date is first day of the year. """ @@ -740,7 +740,7 @@ timedelta}, default 'raise' return self._get_start_end_field('is_year_start') @property - def is_year_end(self): + def is_year_end(self) -> bool: """ Return True if date is last day of the year. """ @@ -750,7 +750,7 @@ timedelta}, default 'raise' return self._get_start_end_field('is_year_end') @property - def is_leap_year(self): + def is_leap_year(self) -> bool: """ Return True if year is a leap year. """ @@ -994,7 +994,7 @@ default 'raise' return create_timestamp_from_ts(value, dts, _tzinfo, self.freq) - def isoformat(self, sep='T'): + def isoformat(self, sep='T') -> str: base = super(_Timestamp, self).isoformat(sep=sep) if self.nanosecond == 0: return base @@ -1011,7 +1011,7 @@ default 'raise' return base1 + base2 - def _has_time_component(self): + def _has_time_component(self) -> bool: """ Returns if the Timestamp has a time component in addition to the date part diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index 1775893b9f2bf..1120c1501b31e 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -70,7 +70,7 @@ def write_csv_rows(list data, ndarray data_index, @cython.boundscheck(False) @cython.wraparound(False) -def convert_json_to_lines(object arr): +def convert_json_to_lines(arr: object) -> str: """ replace comma separated json with line feeds, paying special attention to quotes & brackets @@ -149,7 +149,7 @@ cpdef inline Py_ssize_t word_len(object val): @cython.wraparound(False) def string_array_replace_from_nan_rep( ndarray[object, ndim=1] arr, object nan_rep, - object replace=None): + object replace=np.nan): """ Replace the values in the array with 'replacement' if they are 'nan_rep'. Return the same array. @@ -157,9 +157,6 @@ def string_array_replace_from_nan_rep( cdef: Py_ssize_t length = len(arr), i = 0 - if replace is None: - replace = np.nan - for i in range(length): if arr[i] == nan_rep: arr[i] = replace diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index ee08e2abb2289..9e72a0aa2c867 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4842,7 +4842,7 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None, errors="strict"): return data.reshape(shape) -def _maybe_convert(values, val_kind, encoding, errors): +def _maybe_convert(values: np.ndarray, val_kind, encoding, errors): if _need_convert(val_kind): conv = _get_converter(val_kind, encoding, errors) # conv = np.frompyfunc(conv, 1, 1) @@ -4862,7 +4862,7 @@ def _get_converter(kind, encoding, errors): raise ValueError("invalid kind {kind}".format(kind=kind)) -def _need_convert(kind): +def _need_convert(kind) -> bool: kind = _ensure_decoded(kind) if kind in ("datetime", "datetime64", "string"): return True From d382c3679b86153fb4be99720de4f1088fd0c05f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 Nov 2019 13:00:27 -0800 Subject: [PATCH 05/12] revert --- pandas/_libs/reduction.pyx | 20 +++++++++++--------- pandas/_libs/tslibs/c_timestamp.pyx | 12 ++++++------ pandas/_libs/tslibs/nattype.pyx | 22 +++++++++++----------- pandas/_libs/tslibs/period.pyx | 15 +++++++-------- pandas/_libs/tslibs/resolution.pyx | 6 +++--- pandas/_libs/tslibs/timedeltas.pyx | 20 ++++++++++---------- pandas/_libs/tslibs/timestamps.pyx | 26 +++++++++++++------------- 7 files changed, 61 insertions(+), 60 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 7fd888c28eef3..11dc2d04bb74e 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -193,10 +193,10 @@ cdef class _BaseGrouper: return values, index cdef inline _update_cached_objs(self, object cached_typ, object cached_ityp, - Slider islider, Slider vslider): + Slider islider, Slider vslider, object name): if cached_typ is None: cached_ityp = self.ityp(islider.buf) - cached_typ = self.typ(vslider.buf, index=cached_ityp, name=self.name) + cached_typ = self.typ(vslider.buf, index=cached_ityp, name=name) else: # See the comment in indexes/base.py about _index_data. # We need this for EA-backed indexes that have a reference @@ -205,7 +205,7 @@ cdef class _BaseGrouper: cached_ityp._engine.clear_mapping() object.__setattr__(cached_typ._data._block, 'values', vslider.buf) object.__setattr__(cached_typ, '_index', cached_ityp) - object.__setattr__(cached_typ, 'name', self.name) + object.__setattr__(cached_typ, 'name', name) return cached_typ, cached_ityp @@ -236,7 +236,7 @@ cdef class SeriesBinGrouper(_BaseGrouper): self.typ = series._constructor self.ityp = series.index._constructor self.index = series.index.values - self.name = series.name + self.name = getattr(series, 'name', None) self.dummy_arr, self.dummy_index = self._check_dummy(dummy) @@ -254,7 +254,7 @@ cdef class SeriesBinGrouper(_BaseGrouper): object res bint initialized = 0 Slider vslider, islider - object cached_typ = None, cached_ityp = None + object name, cached_typ = None, cached_ityp = None counts = np.zeros(self.ngroups, dtype=np.int64) @@ -268,6 +268,7 @@ cdef class SeriesBinGrouper(_BaseGrouper): group_size = 0 n = len(self.arr) + name = self.name vslider = Slider(self.arr, self.dummy_arr) islider = Slider(self.index, self.dummy_index) @@ -282,7 +283,7 @@ cdef class SeriesBinGrouper(_BaseGrouper): vslider.set_length(group_size) cached_typ, cached_ityp = self._update_cached_objs( - cached_typ, cached_ityp, islider, vslider) + cached_typ, cached_ityp, islider, vslider, name) cached_ityp._engine.clear_mapping() res = self.f(cached_typ) @@ -341,7 +342,7 @@ cdef class SeriesGrouper(_BaseGrouper): self.typ = series._constructor self.ityp = series.index._constructor self.index = series.index.values - self.name = series.name + self.name = getattr(series, 'name', None) self.dummy_arr, self.dummy_index = self._check_dummy(dummy) self.ngroups = ngroups @@ -355,12 +356,13 @@ cdef class SeriesGrouper(_BaseGrouper): object res bint initialized = 0 Slider vslider, islider - object cached_typ = None, cached_ityp = None + object name, cached_typ = None, cached_ityp = None labels = self.labels counts = np.zeros(self.ngroups, dtype=np.int64) group_size = 0 n = len(self.arr) + name = self.name vslider = Slider(self.arr, self.dummy_arr) islider = Slider(self.index, self.dummy_index) @@ -384,7 +386,7 @@ cdef class SeriesGrouper(_BaseGrouper): vslider.set_length(group_size) cached_typ, cached_ityp = self._update_cached_objs( - cached_typ, cached_ityp, islider, vslider) + cached_typ, cached_ityp, islider, vslider, name) cached_ityp._engine.clear_mapping() res = self.f(cached_typ) diff --git a/pandas/_libs/tslibs/c_timestamp.pyx b/pandas/_libs/tslibs/c_timestamp.pyx index 8512b34b9e78c..8e4143a053ba3 100644 --- a/pandas/_libs/tslibs/c_timestamp.pyx +++ b/pandas/_libs/tslibs/c_timestamp.pyx @@ -201,7 +201,7 @@ cdef class _Timestamp(datetime): """ return np.datetime64(self.value, 'ns') - def to_numpy(self, dtype=None, copy=False) -> np.datetime64: + def to_numpy(self, dtype=None, copy=False): """ Convert the Timestamp to a NumPy datetime64. @@ -369,18 +369,18 @@ cdef class _Timestamp(datetime): return out[0] @property - def _repr_base(self) -> str: + def _repr_base(self): return '{date} {time}'.format(date=self._date_repr, time=self._time_repr) @property - def _date_repr(self) -> str: + def _date_repr(self): # Ideal here would be self.strftime("%Y-%m-%d"), but # the datetime strftime() methods require year >= 1900 return '%d-%.2d-%.2d' % (self.year, self.month, self.day) @property - def _time_repr(self) -> str: + def _time_repr(self): result = '%.2d:%.2d:%.2d' % (self.hour, self.minute, self.second) if self.nanosecond != 0: @@ -391,7 +391,7 @@ cdef class _Timestamp(datetime): return result @property - def _short_repr(self) -> str: + def _short_repr(self): # format a Timestamp with only _date_repr if possible # otherwise _repr_base if (self.hour == 0 and @@ -403,7 +403,7 @@ cdef class _Timestamp(datetime): return self._repr_base @property - def asm8(self) -> np.datetime64: + def asm8(self): """ Return numpy datetime64 format in nanoseconds. """ diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index fdf60afc5110c..e491d6111a919 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -230,16 +230,16 @@ cdef class _NaT(datetime): return NotImplemented @property - def asm8(self) -> np.datetime64: + def asm8(self): return np.datetime64(NPY_NAT, 'ns') - def to_datetime64(self) -> np.datetime64: + def to_datetime64(self): """ Return a numpy.datetime64 object with 'ns' precision. """ return np.datetime64('NaT', 'ns') - def to_numpy(self, dtype=None, copy=False) -> np.datetime64: + def to_numpy(self, dtype=None, copy=False): """ Convert the Timestamp to a NumPy datetime64. @@ -265,7 +265,7 @@ cdef class _NaT(datetime): def __str__(self) -> str: return 'NaT' - def isoformat(self, sep='T') -> str: + def isoformat(self, sep='T'): # This allows Timestamp(ts.isoformat()) to always correctly roundtrip. return 'NaT' @@ -286,31 +286,31 @@ cdef class _NaT(datetime): return np.nan @property - def is_leap_year(self) -> bool: + def is_leap_year(self): return False @property - def is_month_start(self) -> bool: + def is_month_start(self): return False @property - def is_quarter_start(self) -> bool: + def is_quarter_start(self): return False @property - def is_year_start(self) -> bool: + def is_year_start(self): return False @property - def is_month_end(self) -> bool: + def is_month_end(self): return False @property - def is_quarter_end(self) -> bool: + def is_quarter_end(self): return False @property - def is_year_end(self) -> bool: + def is_year_end(self): return False diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 969710dbcb587..2512fdb891e3e 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1738,7 +1738,7 @@ cdef class _Period: return Period(ordinal=ordinal, freq=freq) @property - def start_time(self) -> Timestamp: + def start_time(self): """ Get the Timestamp for the start of the period. @@ -1768,13 +1768,13 @@ cdef class _Period: return self.to_timestamp(how='S') @property - def end_time(self) -> Timestamp: + def end_time(self): # freq.n can't be negative or 0 # ordinal = (self + self.freq.n).start_time.value - 1 ordinal = (self + self.freq).start_time.value - 1 return Timestamp(ordinal) - def to_timestamp(self, freq=None, how='start', tz=None) -> Timestamp: + def to_timestamp(self, freq=None, how='start', tz=None): """ Return the Timestamp representation of the Period. @@ -2204,7 +2204,7 @@ cdef class _Period: return self.days_in_month @property - def is_leap_year(self) -> bool: + def is_leap_year(self): return bool(is_leapyear(self.year)) @classmethod @@ -2237,8 +2237,8 @@ cdef class _Period: object_state = None, self.freq, self.ordinal return (Period, object_state) - def strftime(self, fmt) -> str: - r""" + def strftime(self, fmt): + """ Returns the string representation of the :class:`Period`, depending on the selected ``fmt``. ``fmt`` must be a string containing one or several directives. The method recognizes the same @@ -2416,8 +2416,7 @@ class Period(_Period): # ('T', 5) but may be passed in as a string like '5T' # ordinal is the period offset from the gregorian proleptic epoch - cdef: - _Period self + cdef _Period self if freq is not None: freq = cls._maybe_convert_freq(freq) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 1ac5b4aeb0274..c0b20c14e9920 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -106,7 +106,7 @@ cdef inline int _reso_stamp(npy_datetimestruct *dts): return RESO_DAY -def get_freq_group(freq) -> int: +def get_freq_group(freq): """ Return frequency code group of given frequency str or offset. @@ -189,7 +189,7 @@ class Resolution: _freq_reso_map = {v: k for k, v in _reso_freq_map.items()} @classmethod - def get_str(cls, reso) -> str: + def get_str(cls, reso): """ Return resolution str against resolution code. @@ -228,7 +228,7 @@ class Resolution: return get_freq_group(cls.get_freq(resostr)) @classmethod - def get_freq(cls, resostr: str) -> str: + def get_freq(cls, resostr): """ Return frequency str against resolution str. diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 4002271982694..9d8ed62388655 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -841,15 +841,15 @@ cdef class _Timedelta(timedelta): """ return timedelta(microseconds=int(self.value) / 1000) - def to_timedelta64(self) -> np.timedelta64: + def to_timedelta64(self): """ Return a numpy.timedelta64 object with 'ns' precision. """ return np.timedelta64(self.value, 'ns') - def to_numpy(self, dtype=None, copy=False) -> np.timedelta64: + def to_numpy(self, dtype=None, copy=False): """ - Convert the Timedelta to a NumPy timedelta64. + Convert the Timestamp to a NumPy timedelta64. .. versionadded:: 0.25.0 @@ -920,7 +920,7 @@ cdef class _Timedelta(timedelta): return self.value @property - def asm8(self) -> np.timedelta64: + def asm8(self): """ Return a numpy timedelta64 array scalar view. @@ -955,7 +955,7 @@ cdef class _Timedelta(timedelta): return np.int64(self.value).view('m8[ns]') @property - def resolution_string(self) -> str: + def resolution_string(self): """ Return a string representing the lowest timedelta resolution. @@ -1013,7 +1013,7 @@ cdef class _Timedelta(timedelta): return "D" @property - def resolution(self) -> str: + def resolution(self): """ Return a string representing the lowest timedelta resolution. @@ -1063,7 +1063,7 @@ cdef class _Timedelta(timedelta): return self.resolution_string @property - def nanoseconds(self) -> int: + def nanoseconds(self): """ Return the number of nanoseconds (n), where 0 <= n < 1 microsecond. @@ -1095,7 +1095,7 @@ cdef class _Timedelta(timedelta): self._ensure_components() return self._ns - def _repr_base(self, format=None) -> str: + def _repr_base(self, format=None): """ Parameters @@ -1148,10 +1148,10 @@ cdef class _Timedelta(timedelta): def __str__(self) -> str: return self._repr_base(format='long') - def __bool__(self) -> bool: + def __bool__(self): return self.value != 0 - def isoformat(self) -> str: + def isoformat(self): """ Format Timedelta as ISO 8601 Duration like ``P[n]Y[n]M[n]DT[n]H[n]M[n]S``, where the ``[n]`` s are replaced by the diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 91357588048b6..03ed26337d539 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -324,7 +324,7 @@ class Timestamp(_Timestamp): Function is not implemented. Use pd.to_datetime(). """ - raise NotImplementedError("Timestamp.strptime() is not implemented." + raise NotImplementedError("Timestamp.strptime() is not implmented." "Use to_datetime() to parse date strings.") @classmethod @@ -604,7 +604,7 @@ timedelta}, default 'raise' """ return self.weekday() - def day_name(self, locale=None) -> str: + def day_name(self, locale=None): """ Return the day name of the Timestamp with specified locale. @@ -621,7 +621,7 @@ timedelta}, default 'raise' """ return self._get_date_name_field('day_name', locale) - def month_name(self, locale=None) -> str: + def month_name(self, locale=None): """ Return the month name of the Timestamp with specified locale. @@ -639,7 +639,7 @@ timedelta}, default 'raise' return self._get_date_name_field('month_name', locale) @property - def weekday_name(self) -> str: + def weekday_name(self): """ .. deprecated:: 0.23.0 Use ``Timestamp.day_name()`` instead @@ -689,7 +689,7 @@ timedelta}, default 'raise' return getattr(self.freq, 'freqstr', self.freq) @property - def is_month_start(self) -> bool: + def is_month_start(self): """ Return True if date is first day of month. """ @@ -699,7 +699,7 @@ timedelta}, default 'raise' return self._get_start_end_field('is_month_start') @property - def is_month_end(self) -> bool: + def is_month_end(self): """ Return True if date is last day of month. """ @@ -709,7 +709,7 @@ timedelta}, default 'raise' return self._get_start_end_field('is_month_end') @property - def is_quarter_start(self) -> bool: + def is_quarter_start(self): """ Return True if date is first day of the quarter. """ @@ -719,7 +719,7 @@ timedelta}, default 'raise' return self._get_start_end_field('is_quarter_start') @property - def is_quarter_end(self) -> bool: + def is_quarter_end(self): """ Return True if date is last day of the quarter. """ @@ -729,7 +729,7 @@ timedelta}, default 'raise' return self._get_start_end_field('is_quarter_end') @property - def is_year_start(self) -> bool: + def is_year_start(self): """ Return True if date is first day of the year. """ @@ -739,7 +739,7 @@ timedelta}, default 'raise' return self._get_start_end_field('is_year_start') @property - def is_year_end(self) -> bool: + def is_year_end(self): """ Return True if date is last day of the year. """ @@ -749,7 +749,7 @@ timedelta}, default 'raise' return self._get_start_end_field('is_year_end') @property - def is_leap_year(self) -> bool: + def is_leap_year(self): """ Return True if year is a leap year. """ @@ -992,7 +992,7 @@ default 'raise' return create_timestamp_from_ts(value, dts, _tzinfo, self.freq) - def isoformat(self, sep='T') -> str: + def isoformat(self, sep='T'): base = super(_Timestamp, self).isoformat(sep=sep) if self.nanosecond == 0: return base @@ -1009,7 +1009,7 @@ default 'raise' return base1 + base2 - def _has_time_component(self) -> bool: + def _has_time_component(self): """ Returns if the Timestamp has a time component in addition to the date part From 0ac7b6063fb4974aedd5f905e5c2adb04e184dca Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 Nov 2019 13:02:15 -0800 Subject: [PATCH 06/12] revert --- pandas/io/pytables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9e72a0aa2c867..ee08e2abb2289 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4842,7 +4842,7 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None, errors="strict"): return data.reshape(shape) -def _maybe_convert(values: np.ndarray, val_kind, encoding, errors): +def _maybe_convert(values, val_kind, encoding, errors): if _need_convert(val_kind): conv = _get_converter(val_kind, encoding, errors) # conv = np.frompyfunc(conv, 1, 1) @@ -4862,7 +4862,7 @@ def _get_converter(kind, encoding, errors): raise ValueError("invalid kind {kind}".format(kind=kind)) -def _need_convert(kind) -> bool: +def _need_convert(kind): kind = _ensure_decoded(kind) if kind in ("datetime", "datetime64", "string"): return True From f0f1ded9daa6bec76cfdfb924c7172de1c05cd6a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 Nov 2019 13:04:01 -0800 Subject: [PATCH 07/12] revert --- pandas/core/apply.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 037c9c604161c..9c5806a3fe945 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -218,8 +218,6 @@ def apply_raw(self): if "Function does not reduce" not in str(err): # catch only ValueError raised intentionally in libreduction raise - # We expect np.apply_along_axis to give a two-dimensional result, or - # also raise. result = np.apply_along_axis(self.f, self.axis, self.values) # TODO: mixed type case From 1961bbebf386c31a40e97071a0e41227303589e2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 15 Nov 2019 09:38:46 -0800 Subject: [PATCH 08/12] types --- pandas/core/computation/scope.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index 81c7b04bf3284..ee82664f6cb21 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -29,7 +29,7 @@ def _ensure_scope( ) -def _replacer(x): +def _replacer(x) -> str: """Replace a number with its hexadecimal representation. Used to tag temporary variables with their calling scope's id. """ @@ -44,11 +44,11 @@ def _replacer(x): return hex(hexin) -def _raw_hex_id(obj): +def _raw_hex_id(obj) -> str: """Return the padded hexadecimal id of ``obj``.""" # interpret as a pointer since that's what really what id returns packed = struct.pack("@P", id(obj)) - return "".join(map(_replacer, packed)) + return "".join(_replacer(x) for x in packed) _DEFAULT_GLOBALS = { @@ -63,7 +63,7 @@ def _raw_hex_id(obj): } -def _get_pretty_string(obj): +def _get_pretty_string(obj) -> str: """ Return a prettier version of obj. @@ -74,7 +74,7 @@ def _get_pretty_string(obj): Returns ------- - s : str + str Pretty print object repr """ sio = StringIO() @@ -148,8 +148,9 @@ def __repr__(self) -> str: ) @property - def has_resolvers(self): - """Return whether we have any extra scope. + def has_resolvers(self) -> bool: + """ + Return whether we have any extra scope. For example, DataFrames pass Their columns as resolvers during calls to ``DataFrame.eval()`` and ``DataFrame.query()``. @@ -250,13 +251,13 @@ def _get_vars(self, stack, scopes): # scope after the loop del frame - def update(self, level): + def update(self, level: int): """ Update the current scope by going back `level` levels. Parameters ---------- - level : int or None, optional, default None + level : int """ sl = level + 1 @@ -270,7 +271,7 @@ def update(self, level): finally: del stack[:], stack - def add_tmp(self, value): + def add_tmp(self, value) -> str: """ Add a temporary variable to the scope. @@ -281,7 +282,7 @@ def add_tmp(self, value): Returns ------- - name : basestring + str The name of the temporary variable created. """ name = "{name}_{num}_{hex_id}".format( @@ -297,7 +298,7 @@ def add_tmp(self, value): return name @property - def ntemps(self): + def ntemps(self) -> int: """The number of temporary variables in this scope""" return len(self.temps) From d87f4e37d6b71d44247215717f7054d4caaac613 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 15 Nov 2019 14:46:08 -0800 Subject: [PATCH 09/12] types --- pandas/core/computation/align.py | 2 +- pandas/core/computation/engines.py | 9 +++++---- pandas/core/computation/ops.py | 14 +++++++------- pandas/core/computation/pytables.py | 13 +++++++------ 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py index 3e1e5ed89d877..dfb858d797f41 100644 --- a/pandas/core/computation/align.py +++ b/pandas/core/computation/align.py @@ -33,7 +33,7 @@ def _zip_axes_from_type(typ, new_axes): return axes -def _any_pandas_objects(terms): +def _any_pandas_objects(terms) -> bool: """Check a sequence of terms for instances of PandasObject.""" return any(isinstance(term.value, PandasObject) for term in terms) diff --git a/pandas/core/computation/engines.py b/pandas/core/computation/engines.py index dc6378e83d229..513eb0fd7f2a6 100644 --- a/pandas/core/computation/engines.py +++ b/pandas/core/computation/engines.py @@ -46,8 +46,9 @@ def __init__(self, expr): self.aligned_axes = None self.result_type = None - def convert(self): - """Convert an expression for evaluation. + def convert(self) -> str: + """ + Convert an expression for evaluation. Defaults to return the expression as a string. """ @@ -75,7 +76,7 @@ def evaluate(self): ) @property - def _is_aligned(self): + def _is_aligned(self) -> bool: return self.aligned_axes is not None and self.result_type is not None @abc.abstractmethod @@ -104,7 +105,7 @@ class NumExprEngine(AbstractEngine): def __init__(self, expr): super().__init__(expr) - def convert(self): + def convert(self) -> str: return str(super().convert()) def _evaluate(self): diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 8fab5bd87d4fe..f0d46f762a809 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -69,7 +69,7 @@ def __new__(cls, name, env, side=None, encoding=None): supr_new = super(Term, klass).__new__ return supr_new(klass) - def __init__(self, name, env, side=None, encoding=None): + def __init__(self, name: str, env, side=None, encoding=None): self._name = name self.env = env self.side = side @@ -79,7 +79,7 @@ def __init__(self, name, env, side=None, encoding=None): self.encoding = encoding @property - def local_name(self): + def local_name(self) -> str: return self.name.replace(_LOCAL_TAG, "") def __repr__(self) -> str: @@ -339,7 +339,7 @@ def _cast_inplace(terms, acceptable_dtypes, dtype): term.update(new_value) -def is_term(obj): +def is_term(obj) -> bool: return isinstance(obj, Term) @@ -396,7 +396,7 @@ def __call__(self, env): return self.func(left, right) - def evaluate(self, env, engine, parser, term_type, eval_in_python): + def evaluate(self, env, engine: str, parser, term_type, eval_in_python): """ Evaluate a binary operation *before* being passed to the engine. @@ -488,7 +488,7 @@ def _disallow_scalar_only_bool_ops(self): raise NotImplementedError("cannot evaluate scalar only bool ops") -def isnumeric(dtype): +def isnumeric(dtype) -> bool: return issubclass(np.dtype(dtype).type, np.number) @@ -561,7 +561,7 @@ def __repr__(self) -> str: return pprint_thing("{0}({1})".format(self.op, self.operand)) @property - def return_type(self): + def return_type(self) -> np.dtype: operand = self.operand if operand.return_type == np.dtype("bool"): return np.dtype("bool") @@ -588,7 +588,7 @@ def __repr__(self) -> str: class FuncNode: - def __init__(self, name): + def __init__(self, name: str): from pandas.core.computation.check import _NUMEXPR_INSTALLED, _NUMEXPR_VERSION if name not in _mathops or ( diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 3a2ea30cbc8b9..66995dcb55de5 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -129,12 +129,12 @@ def conform(self, rhs): return rhs @property - def is_valid(self): + def is_valid(self) -> bool: """ return True if this is a valid field """ return self.lhs in self.queryables @property - def is_in_table(self): + def is_in_table(self) -> bool: """ return True if this is a valid column name for generation (e.g. an actual column in the table) """ return self.queryables.get(self.lhs) is not None @@ -154,12 +154,12 @@ def metadata(self): """ the metadata of my field """ return getattr(self.queryables.get(self.lhs), "metadata", None) - def generate(self, v): + def generate(self, v) -> str: """ create and return the op string for this TermValue """ val = v.tostring(self.encoding) return "({lhs} {op} {val})".format(lhs=self.lhs, op=self.op, val=val) - def convert_value(self, v): + def convert_value(self, v) -> "TermValue": """ convert the expression that is in the term to something that is accepted by pytables """ @@ -574,10 +574,11 @@ def evaluate(self): class TermValue: """ hold a term value the we use to construct a condition/filter """ - def __init__(self, value, converted, kind): + def __init__(self, value, converted, kind: str): self.value = value self.converted = converted self.kind = kind + assert isinstance(kind, str), kind def tostring(self, encoding): """ quote the string if not encoded @@ -593,7 +594,7 @@ def tostring(self, encoding): return self.converted -def maybe_expression(s): +def maybe_expression(s) -> bool: """ loose checking if s is a pytables-acceptable expression """ if not isinstance(s, str): return False From c364ec3cb7f0561945fea420a57418c9cb36b6c3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 15 Nov 2019 15:42:51 -0800 Subject: [PATCH 10/12] typing --- pandas/core/computation/ops.py | 21 +++++++++++---------- pandas/core/computation/pytables.py | 18 +++++++++++------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index f0d46f762a809..b3e53ffe080f8 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -69,7 +69,8 @@ def __new__(cls, name, env, side=None, encoding=None): supr_new = super(Term, klass).__new__ return supr_new(klass) - def __init__(self, name: str, env, side=None, encoding=None): + def __init__(self, name, env, side=None, encoding=None): + # name is a str for Term, but may be something else for subclasses self._name = name self.env = env self.side = side @@ -120,7 +121,7 @@ def update(self, value): self.value = value @property - def is_scalar(self): + def is_scalar(self) -> bool: return is_scalar(self._value) @property @@ -139,14 +140,14 @@ def type(self): return_type = type @property - def raw(self): + def raw(self) -> str: return pprint_thing( "{0}(name={1!r}, type={2})" "".format(self.__class__.__name__, self.name, self.type) ) @property - def is_datetime(self): + def is_datetime(self) -> bool: try: t = self.type.type except AttributeError: @@ -220,7 +221,7 @@ def return_type(self): return _result_type_many(*(term.type for term in com.flatten(self))) @property - def has_invalid_return_type(self): + def has_invalid_return_type(self) -> bool: types = self.operand_types obj_dtype_set = frozenset([np.dtype("object")]) return self.return_type == object and types - obj_dtype_set @@ -230,11 +231,11 @@ def operand_types(self): return frozenset(term.type for term in com.flatten(self)) @property - def is_scalar(self): + def is_scalar(self) -> bool: return all(operand.is_scalar for operand in self.operands) @property - def is_datetime(self): + def is_datetime(self) -> bool: try: t = self.return_type.type except AttributeError: @@ -354,7 +355,7 @@ class BinOp(Op): right : Term or Op """ - def __init__(self, op, lhs, rhs, **kwargs): + def __init__(self, op: str, lhs, rhs, **kwargs): super().__init__(op, (lhs, rhs)) self.lhs = lhs self.rhs = rhs @@ -505,7 +506,7 @@ class Div(BinOp): regardless of the value of ``truediv``. """ - def __init__(self, lhs, rhs, truediv, *args, **kwargs): + def __init__(self, lhs, rhs, truediv: bool, *args, **kwargs): super().__init__("/", lhs, rhs, *args, **kwargs) if not isnumeric(lhs.return_type) or not isnumeric(rhs.return_type): @@ -541,7 +542,7 @@ class UnaryOp(Op): * If no function associated with the passed operator token is found. """ - def __init__(self, op, operand): + def __init__(self, op: str, operand): super().__init__(op, (operand,)) self.operand = operand diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 66995dcb55de5..0c464231eadd4 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -2,6 +2,7 @@ import ast from functools import partial +from typing import Optional import numpy as np @@ -279,7 +280,7 @@ def evaluate(self): return self - def generate_filter_op(self, invert=False): + def generate_filter_op(self, invert: bool = False): if (self.op == "!=" and not invert) or (self.op == "==" and invert): return lambda axis, vals: ~axis.isin(vals) else: @@ -505,7 +506,7 @@ class Expr(expr.Expr): "major_axis>=20130101" """ - def __init__(self, where, queryables=None, encoding=None, scope_level=0): + def __init__(self, where, queryables=None, encoding=None, scope_level: int = 0): where = _validate_where(where) @@ -520,18 +521,21 @@ def __init__(self, where, queryables=None, encoding=None, scope_level=0): if isinstance(where, Expr): local_dict = where.env.scope - where = where.expr + _where = where.expr elif isinstance(where, (list, tuple)): + where = list(where) for idx, w in enumerate(where): if isinstance(w, Expr): local_dict = w.env.scope else: w = _validate_where(w) where[idx] = w - where = " & ".join(map("({})".format, com.flatten(where))) # noqa + _where = " & ".join(map("({})".format, com.flatten(where))) + else: + _where = where - self.expr = where + self.expr = _where self.env = Scope(scope_level + 1, local_dict=local_dict) if queryables is not None and isinstance(self.expr, str): @@ -574,11 +578,11 @@ def evaluate(self): class TermValue: """ hold a term value the we use to construct a condition/filter """ - def __init__(self, value, converted, kind: str): + def __init__(self, value, converted, kind: Optional[str]): self.value = value self.converted = converted self.kind = kind - assert isinstance(kind, str), kind + assert kind is None or isinstance(kind, str), kind def tostring(self, encoding): """ quote the string if not encoded From d637bf7ad73c69972ca66a9432febf68d6d94fdf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 15 Nov 2019 15:47:14 -0800 Subject: [PATCH 11/12] revert --- pandas/_libs/writers.pyx | 5 ++++- pandas/core/computation/pytables.py | 1 - 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index 1120c1501b31e..73201e75c3c88 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -149,7 +149,7 @@ cpdef inline Py_ssize_t word_len(object val): @cython.wraparound(False) def string_array_replace_from_nan_rep( ndarray[object, ndim=1] arr, object nan_rep, - object replace=np.nan): + object replace=None): """ Replace the values in the array with 'replacement' if they are 'nan_rep'. Return the same array. @@ -157,6 +157,9 @@ def string_array_replace_from_nan_rep( cdef: Py_ssize_t length = len(arr), i = 0 + if replace is None: + replace = np.nan + for i in range(length): if arr[i] == nan_rep: arr[i] = replace diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 0c464231eadd4..13a4814068d6a 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -582,7 +582,6 @@ def __init__(self, value, converted, kind: Optional[str]): self.value = value self.converted = converted self.kind = kind - assert kind is None or isinstance(kind, str), kind def tostring(self, encoding): """ quote the string if not encoded From 75e20f09c137fd5aa04e4c8322dabc1502bceb13 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 16 Nov 2019 11:35:25 -0800 Subject: [PATCH 12/12] mypy fixup --- pandas/core/computation/ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index b3e53ffe080f8..0fdbdda30ad35 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -506,8 +506,8 @@ class Div(BinOp): regardless of the value of ``truediv``. """ - def __init__(self, lhs, rhs, truediv: bool, *args, **kwargs): - super().__init__("/", lhs, rhs, *args, **kwargs) + def __init__(self, lhs, rhs, truediv: bool, **kwargs): + super().__init__("/", lhs, rhs, **kwargs) if not isnumeric(lhs.return_type) or not isnumeric(rhs.return_type): raise TypeError(