From 73da53473da6495a0b9c88358dac95b685c12c2f Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 12 Feb 2023 01:06:58 +0100 Subject: [PATCH 1/4] PERF: Fix performance regression in get_loc of IntervalIndex --- pandas/core/arrays/interval.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 6805d32049d34..9dbe1f20c4a20 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -662,7 +662,9 @@ def _validate(cls, left, right, dtype: IntervalDtype) -> None: msg = "left side of interval must be <= right side" raise ValueError(msg) - def _shallow_copy(self: IntervalArrayT, left, right) -> IntervalArrayT: + def _shallow_copy( + self: IntervalArrayT, left, right, verify_integrity: bool = True + ) -> IntervalArrayT: """ Return a new IntervalArray with the replacement attributes @@ -675,7 +677,8 @@ def _shallow_copy(self: IntervalArrayT, left, right) -> IntervalArrayT: """ dtype = IntervalDtype(left.dtype, closed=self.closed) left, right, dtype = self._ensure_simple_new_inputs(left, right, dtype=dtype) - self._validate(left, right, dtype=dtype) + if verify_integrity: + self._validate(left, right, dtype=dtype) return self._simple_new(left, right, dtype=dtype) @@ -727,7 +730,7 @@ def __getitem__( if np.ndim(left) > 1: # GH#30588 multi-dimensional indexer disallowed raise ValueError("multi-dimensional indexing not allowed") - return self._shallow_copy(left, right) + return self._shallow_copy(left, right, verify_integrity=False) def __setitem__(self, key, value) -> None: value_left, value_right = self._validate_setitem_value(value) From 3d27f23de2c20c6aab56df428935ed0a8ebde407 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 12 Feb 2023 16:48:30 +0100 Subject: [PATCH 2/4] Switch to simple new --- pandas/core/arrays/interval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 9dbe1f20c4a20..8aa0cc38eced5 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -730,7 +730,7 @@ def __getitem__( if np.ndim(left) > 1: # GH#30588 multi-dimensional indexer disallowed raise ValueError("multi-dimensional indexing not allowed") - return self._shallow_copy(left, right, verify_integrity=False) + return self._simple_new(left, right, dtype=self.dtype) def __setitem__(self, key, value) -> None: value_left, value_right = self._validate_setitem_value(value) From 11ef578d7817aea8a947101096ac411096fcde82 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 12 Feb 2023 16:52:13 +0100 Subject: [PATCH 3/4] Remove --- pandas/core/arrays/interval.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 8aa0cc38eced5..9d881edbc3705 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -662,9 +662,7 @@ def _validate(cls, left, right, dtype: IntervalDtype) -> None: msg = "left side of interval must be <= right side" raise ValueError(msg) - def _shallow_copy( - self: IntervalArrayT, left, right, verify_integrity: bool = True - ) -> IntervalArrayT: + def _shallow_copy(self: IntervalArrayT, left, right) -> IntervalArrayT: """ Return a new IntervalArray with the replacement attributes @@ -677,8 +675,6 @@ def _shallow_copy( """ dtype = IntervalDtype(left.dtype, closed=self.closed) left, right, dtype = self._ensure_simple_new_inputs(left, right, dtype=dtype) - if verify_integrity: - self._validate(left, right, dtype=dtype) return self._simple_new(left, right, dtype=dtype) From ae8cf38b1875254ed4b106904ec890c41f5c45ad Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 13 Feb 2023 20:55:42 +0100 Subject: [PATCH 4/4] Fix mypy --- pandas/core/arrays/interval.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 9d881edbc3705..482909d195bd0 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -726,7 +726,11 @@ def __getitem__( if np.ndim(left) > 1: # GH#30588 multi-dimensional indexer disallowed raise ValueError("multi-dimensional indexing not allowed") - return self._simple_new(left, right, dtype=self.dtype) + # Argument 2 to "_simple_new" of "IntervalArray" has incompatible type + # "Union[Period, Timestamp, Timedelta, NaTType, DatetimeArray, TimedeltaArray, + # ndarray[Any, Any]]"; expected "Union[Union[DatetimeArray, TimedeltaArray], + # ndarray[Any, Any]]" + return self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type] def __setitem__(self, key, value) -> None: value_left, value_right = self._validate_setitem_value(value)