From 7fc8c8544aed01ba94c6952eb50a17fe76c1d3a6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 12 Apr 2020 12:21:35 -0700 Subject: [PATCH] CLN: use _values_for_argsort in fewer places --- pandas/core/algorithms.py | 2 +- pandas/core/arrays/categorical.py | 2 +- pandas/core/indexes/base.py | 16 ++++------------ pandas/core/indexes/extension.py | 3 +++ 4 files changed, 9 insertions(+), 14 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 62a3808d36ba2..9db9805e09b50 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -616,7 +616,7 @@ def factorize( values = _ensure_arraylike(values) original = values - if is_extension_array_dtype(values): + if is_extension_array_dtype(values.dtype): values = extract_array(values) codes, uniques = values.factorize(na_sentinel=na_sentinel) dtype = original.dtype diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index c9b8db28e0cf6..f5ed31182e690 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -370,7 +370,7 @@ def __init__( # we're inferring from values dtype = CategoricalDtype(categories, dtype.ordered) - elif is_categorical_dtype(values): + elif is_categorical_dtype(values.dtype): old_codes = ( values._values.codes if isinstance(values, ABCSeries) else values.codes ) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 530aaee24c7fb..97c1ea962ed0c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3597,12 +3597,8 @@ def _join_non_unique(self, other, how="left", return_indexers=False): # We only get here if dtypes match assert self.dtype == other.dtype - if is_extension_array_dtype(self.dtype): - lvalues = self._data._values_for_argsort() - rvalues = other._data._values_for_argsort() - else: - lvalues = self._values - rvalues = other._values + lvalues = self._get_engine_target() + rvalues = other._get_engine_target() left_idx, right_idx = _get_join_indexers( [lvalues], [rvalues], how=how, sort=True @@ -3774,12 +3770,8 @@ def _join_monotonic(self, other, how="left", return_indexers=False): else: return ret_index - if is_extension_array_dtype(self.dtype): - sv = self._data._values_for_argsort() - ov = other._data._values_for_argsort() - else: - sv = self._values - ov = other._values + sv = self._get_engine_target() + ov = other._get_engine_target() if self.is_unique and other.is_unique: # We can perform much better than the general case diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index c752990531b34..6e965ecea7cd8 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -232,6 +232,9 @@ def __array__(self, dtype=None) -> np.ndarray: return np.asarray(self._data, dtype=dtype) def _get_engine_target(self) -> np.ndarray: + # NB: _values_for_argsort happens to match the desired engine targets + # for all of our existing EA-backed indexes, but in general + # cannot be relied upon to exist. return self._data._values_for_argsort() @doc(Index.dropna)