From 83183852cbd7b7b948c2ea10eb6cfb3da7e8ad1e Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 11 Mar 2021 20:41:50 -0800 Subject: [PATCH 1/2] PERF: avoid double-verify of take indices --- pandas/core/generic.py | 1 + pandas/core/indexers.py | 11 +++++++---- pandas/core/internals/array_manager.py | 9 ++------- pandas/core/internals/managers.py | 20 +++++++++++++------- 4 files changed, 23 insertions(+), 18 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d2b63c42d777b..61f1549930945 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3655,6 +3655,7 @@ class max_speed self._consolidate_inplace() + indices = extract_array(indices, extract_numpy=True) new_data = self._mgr.take( indices, axis=self._get_block_manager_axis(axis), verify=True ) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 86d6b772fe2e4..d0a53ec80ce1a 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -235,7 +235,7 @@ def validate_indices(indices: np.ndarray, n: int) -> None: # Indexer Conversion -def maybe_convert_indices(indices, n: int): +def maybe_convert_indices(indices, n: int, verify: bool = True): """ Attempt to convert indices into valid, positive indices. @@ -248,6 +248,8 @@ def maybe_convert_indices(indices, n: int): Array of indices that we are to convert. n : int Number of elements in the array that we are indexing. + verify : bool, default True + Check that all entries are between 0 and n - 1, inclusive. Returns ------- @@ -273,9 +275,10 @@ def maybe_convert_indices(indices, n: int): indices = indices.copy() indices[mask] += n - mask = (indices >= n) | (indices < 0) - if mask.any(): - raise IndexError("indices are out-of-bounds") + if verify: + mask = (indices >= n) | (indices < 0) + if mask.any(): + raise IndexError("indices are out-of-bounds") return indices diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 6134325d249c2..432357b2e1aa6 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -1023,7 +1023,7 @@ def _reindex_indexer( return type(self)(new_arrays, new_axes, verify_integrity=False) - def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True): + def take(self: T, indexer, axis: int = 1, verify: bool = True) -> T: """ Take items along any axis. """ @@ -1036,12 +1036,7 @@ def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True ) n = self.shape_proper[axis] - if convert: - indexer = maybe_convert_indices(indexer, n) - - if verify: - if ((indexer == -1) | (indexer >= n)).any(): - raise Exception("Indices must be nonzero and less than the axis length") + indexer = maybe_convert_indices(indexer, n, verify=verify) new_labels = self._axes[axis].take(indexer) return self._reindex_indexer( diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 6bd3e37ae101e..9c21fcf957ecd 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1491,10 +1491,21 @@ def _make_na_block(self, placement, fill_value=None): block_values.fill(fill_value) return new_block(block_values, placement=placement, ndim=block_values.ndim) - def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True): + def take(self: T, indexer, axis: int = 1, verify: bool = True) -> T: """ Take items along any axis. + + indexer : np.ndarray or slice + axis : int, default 1 + verify : bool, default True + Check that all entries are between 0 and len(self) - 1, inclusive. + Pass verify=False if this check has been done by the caller. + + Returns + ------- + BlockManager """ + # We have 6 tests that get here with a slice indexer = ( np.arange(indexer.start, indexer.stop, indexer.step, dtype="int64") if isinstance(indexer, slice) @@ -1502,12 +1513,7 @@ def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True ) n = self.shape[axis] - if convert: - indexer = maybe_convert_indices(indexer, n) - - if verify: - if ((indexer == -1) | (indexer >= n)).any(): - raise Exception("Indices must be nonzero and less than the axis length") + indexer = maybe_convert_indices(indexer, n, verify=verify) new_labels = self.axes[axis].take(indexer) return self.reindex_indexer( From a3ab44a0ab55aa8c82c103ba4eaf9ba76dc6536e Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 12 Mar 2021 07:45:03 -0800 Subject: [PATCH 2/2] revert extract_array --- pandas/core/generic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 32127c2a38e82..67533259ae0c2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3655,7 +3655,6 @@ class max_speed self._consolidate_inplace() - indices = extract_array(indices, extract_numpy=True) new_data = self._mgr.take( indices, axis=self._get_block_manager_axis(axis), verify=True )