From cfc3d6aa190576ec51b076a1803c3e0cc072c769 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 22 Jan 2018 20:21:02 -0800 Subject: [PATCH 1/6] standardize import --- pandas/core/sparse/series.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 4b649927f8f72..257b0791e4841 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -19,7 +19,7 @@ from pandas.core import generic import pandas.core.common as com import pandas.core.ops as ops -import pandas._libs.index as _index +import pandas._libs.index as libindex from pandas.util._decorators import Appender from pandas.core.sparse.array import ( @@ -560,7 +560,7 @@ def _set_values(self, key, value): key = key.values values = self.values.to_dense() - values[key] = _index.convert_scalar(values, value) + values[key] = libindex.convert_scalar(values, value) values = SparseArray(values, fill_value=self.fill_value, kind=self.kind) self._data = SingleBlockManager(values, self.index) From 7e6f48e08e940a90788c301b9e864d9974e3901b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 22 Jan 2018 20:21:17 -0800 Subject: [PATCH 2/6] remove unused --- pandas/_libs/index.pyx | 16 ---------------- pandas/core/indexing.py | 17 ----------------- 2 files changed, 33 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index f8371d4855803..0fd37c6a6af73 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -73,10 +73,6 @@ cpdef object get_value_box(ndarray arr, object loc): return util.get_value_1d(arr, i) -def set_value_at(ndarray arr, object loc, object val): - return util.set_value_at(arr, loc, val) - - # Don't populate hash tables in monotonic indexes larger than this _SIZE_CUTOFF = 1000000 @@ -404,18 +400,6 @@ cdef Py_ssize_t _bin_search(ndarray values, object val) except -1: else: return mid + 1 -_pad_functions = { - 'object': algos.pad_object, - 'int64': algos.pad_int64, - 'float64': algos.pad_float64 -} - -_backfill_functions = { - 'object': algos.backfill_object, - 'int64': algos.backfill_int64, - 'float64': algos.backfill_float64 -} - cdef class DatetimeEngine(Int64Engine): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 3ca150cda83c7..9463512ac11de 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1936,10 +1936,6 @@ def _convert_key(self, key, is_setter=False): return key -# 32-bit floating point machine epsilon -_eps = 1.1920929e-07 - - def length_of_indexer(indexer, target=None): """return the length of a single non-tuple indexer which could be a slice """ @@ -1992,19 +1988,6 @@ def convert_to_index_sliceable(obj, key): return None -def is_index_slice(obj): - def _is_valid_index(x): - return (is_integer(x) or is_float(x) and - np.allclose(x, int(x), rtol=_eps, atol=0)) - - def _crit(v): - return v is None or _is_valid_index(v) - - both_none = obj.start is None and obj.stop is None - - return not both_none and (_crit(obj.start) and _crit(obj.stop)) - - def check_bool_indexer(ax, key): # boolean indexing, need to check that the data are aligned, otherwise # disallowed From cddf84ba58d0dc7f1d713110f91dfaeef3cc7a79 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 22 Jan 2018 20:21:32 -0800 Subject: [PATCH 3/6] remove camelCase, add typing --- pandas/_libs/tslibs/offsets.pyx | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 700ba5b6e48f7..a0ac6389c0646 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -306,8 +306,8 @@ class _BaseOffset(object): def __call__(self, other): return self.apply(other) - def __mul__(self, someInt): - return self.__class__(n=someInt * self.n, normalize=self.normalize, + def __mul__(self, other): + return self.__class__(n=other * self.n, normalize=self.normalize, **self.kwds) def __neg__(self): @@ -374,8 +374,8 @@ class _BaseOffset(object): class BaseOffset(_BaseOffset): # Here we add __rfoo__ methods that don't play well with cdef classes - def __rmul__(self, someInt): - return self.__mul__(someInt) + def __rmul__(self, other): + return self.__mul__(other) def __radd__(self, other): return self.__add__(other) @@ -840,6 +840,8 @@ cpdef int roll_qtrday(datetime other, int n, int month, object day_opt, ------- n : int number of periods to increment """ + cdef: + int months_since # TODO: Merge this with roll_yearday by setting modby=12 there? # code de-duplication versus perf hit? # TODO: with small adjustments this could be used in shift_quarters From f7ff5f7f7d9357f5ce30b201d48d8fef027da02b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 22 Jan 2018 20:27:34 -0800 Subject: [PATCH 4/6] cleanup unused cimports --- pandas/_libs/lib.pyx | 7 ------- pandas/_libs/src/inference.pyx | 9 ++++----- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 1632f5d016439..f7ab5cb448dc5 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -17,8 +17,6 @@ from numpy cimport (ndarray, PyArray_NDIM, PyArray_GETITEM, np.import_array() np.import_ufunc() -from libc.stdlib cimport malloc, free - from cpython cimport (Py_INCREF, PyTuple_SET_ITEM, PyList_Check, PyFloat_Check, PyString_Check, @@ -37,17 +35,12 @@ except ImportError: cimport cpython -isnan = np.isnan -cdef double NaN = np.NaN -cdef double nan = NaN from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, PyTime_Check, PyDelta_Check, PyDateTime_IMPORT) PyDateTime_IMPORT -from tslibs.np_datetime cimport get_timedelta64_value, get_datetime64_value - from tslib import NaT, Timestamp, Timedelta, array_to_datetime from interval import Interval from missing cimport checknull diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index e15f276b39bf8..52ae32023e2b4 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -10,10 +10,9 @@ from datetime import datetime, timedelta iNaT = util.get_nat() cdef bint PY2 = sys.version_info[0] == 2 +cdef double nan = np.NaN -from util cimport (UINT8_MAX, UINT16_MAX, UINT32_MAX, UINT64_MAX, - INT8_MIN, INT8_MAX, INT16_MIN, INT16_MAX, - INT32_MAX, INT32_MIN, INT64_MAX, INT64_MIN) +from util cimport UINT8_MAX, UINT64_MAX, INT64_MAX, INT64_MIN # core.common import for fast inference checks @@ -331,7 +330,7 @@ def infer_dtype(object value, bint skipna=False): bint seen_pdnat = False bint seen_val = False - if isinstance(value, np.ndarray): + if util.is_array(value): values = value elif hasattr(value, 'dtype'): @@ -349,7 +348,7 @@ def infer_dtype(object value, bint skipna=False): raise ValueError("cannot infer type for {0}".format(type(value))) else: - if not isinstance(value, list): + if not PyList_Check(value): value = list(value) from pandas.core.dtypes.cast import ( construct_1d_object_array_from_listlike) From c7da0bec3d13725315b0fce5269b867c06f4855b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 22 Jan 2018 20:29:43 -0800 Subject: [PATCH 5/6] remove unused cimport --- pandas/_libs/lib.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index f7ab5cb448dc5..e337c2b25b887 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -25,8 +25,7 @@ from cpython cimport (Py_INCREF, PyTuple_SET_ITEM, PyTuple_New, PyObject_RichCompareBool, PyBytes_GET_SIZE, - PyUnicode_GET_SIZE, - PyObject) + PyUnicode_GET_SIZE) try: from cpython cimport PyString_GET_SIZE From 77bbcf42bda687777ae87a50b7fc7eea63f14f30 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 22 Jan 2018 20:37:57 -0800 Subject: [PATCH 6/6] use c versions for isinstance --- pandas/_libs/index.pyx | 4 ++-- pandas/_libs/internals.pyx | 7 ++++--- pandas/_libs/reduction.pyx | 13 ++++++------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 0fd37c6a6af73..15aef867ba413 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -550,7 +550,7 @@ cpdef convert_scalar(ndarray arr, object value): # we don't turn bools into int/float/complex if arr.descr.type_num == NPY_DATETIME: - if isinstance(value, np.ndarray): + if util.is_array(value): pass elif isinstance(value, (datetime, np.datetime64, date)): return Timestamp(value).value @@ -561,7 +561,7 @@ cpdef convert_scalar(ndarray arr, object value): raise ValueError("cannot set a Timestamp with a non-timestamp") elif arr.descr.type_num == NPY_TIMEDELTA: - if isinstance(value, np.ndarray): + if util.is_array(value): pass elif isinstance(value, timedelta): return Timedelta(value).value diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 93a45335efc9c..a5abe324254ce 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -4,6 +4,7 @@ cimport cython from cython cimport Py_ssize_t from cpython cimport PyObject +from cpython.slice cimport PySlice_Check cdef extern from "Python.h": Py_ssize_t PY_SSIZE_T_MAX @@ -32,7 +33,7 @@ cdef class BlockPlacement: self._has_slice = False self._has_array = False - if isinstance(val, slice): + if PySlice_Check(val): slc = slice_canonize(val) if slc.start != slc.stop: @@ -118,7 +119,7 @@ cdef class BlockPlacement: else: val = self._as_array[loc] - if not isinstance(val, slice) and val.ndim == 0: + if not PySlice_Check(val) and val.ndim == 0: return val return BlockPlacement(val) @@ -288,7 +289,7 @@ def slice_getitem(slice slc not None, ind): s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc) - if isinstance(ind, slice): + if PySlice_Check(ind): ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind, s_len) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index d51583c7aa473..4ca87a777e497 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -24,9 +24,9 @@ is_numpy_prior_1_6_2 = LooseVersion(np.__version__) < '1.6.2' cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt): - if isinstance(obj, np.ndarray) \ - or isinstance(obj, list) and len(obj) == cnt \ - or getattr(obj, 'shape', None) == (cnt,): + if (util.is_array(obj) or + isinstance(obj, list) and len(obj) == cnt or + getattr(obj, 'shape', None) == (cnt,)): raise ValueError('function does not reduce') return np.empty(size, dtype='O') @@ -150,8 +150,7 @@ cdef class Reducer: else: res = self.f(chunk) - if hasattr(res, 'values') and isinstance( - res.values, np.ndarray): + if hasattr(res, 'values') and util.is_array(res.values): res = res.values if i == 0: result = _get_result_array(res, @@ -433,10 +432,10 @@ cdef class SeriesGrouper: cdef inline _extract_result(object res): """ extract the result object, it might be a 0-dim ndarray or a len-1 0-dim, or a scalar """ - if hasattr(res, 'values') and isinstance(res.values, np.ndarray): + if hasattr(res, 'values') and util.is_array(res.values): res = res.values if not np.isscalar(res): - if isinstance(res, np.ndarray): + if util.is_array(res): if res.ndim == 0: res = res.item() elif res.ndim == 1 and len(res) == 1: