From e14f3e56e053eb271eff1f0878287ab04b30f36f Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Thu, 21 Mar 2024 21:07:07 -0700 Subject: [PATCH 1/9] Fix is_unique for slices of Indexes --- doc/source/whatsnew/v2.2.2.rst | 1 + pandas/_libs/index.pyx | 12 ++++++++---- pandas/tests/indexes/test_base.py | 7 +++++++ 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index 96f210ce6b7b9..92e67395ea51f 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -14,6 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pandas nullable on with missing values (:issue:`56702`) +- :meth:`Index.is_unique` could incorrectly return false if the ``Index`` was created from a slice of another ``Index``. (:issue:`57911`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index a700074d46ba8..4935d6d94d351 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -253,8 +253,10 @@ cdef class IndexEngine: return self.sizeof() cpdef _update_from_sliced(self, IndexEngine other, reverse: bool): - self.unique = other.unique - self.need_unique_check = other.need_unique_check + if other.unique: + self.unique = 1 + self.need_unique_check = 0 + if not other.need_monotonic_check and ( other.is_monotonic_increasing or other.is_monotonic_decreasing): self.need_monotonic_check = other.need_monotonic_check @@ -854,8 +856,10 @@ cdef class SharedEngine: pass cpdef _update_from_sliced(self, ExtensionEngine other, reverse: bool): - self.unique = other.unique - self.need_unique_check = other.need_unique_check + if other.unique: + self.unique = 1 + self.need_unique_check = 0 + if not other.need_monotonic_check and ( other.is_monotonic_increasing or other.is_monotonic_decreasing): self.need_monotonic_check = other.need_monotonic_check diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index beee14197bfb8..5a17c3565ec7d 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -963,6 +963,13 @@ def test_slice_keep_name(self): index = Index(["a", "b"], name="asdf") assert index.name == index[1:].name + def test_slice_is_unique(self): + # GH 57911 + index = Index([1, 1, 2, 3, 4]) + assert not index.is_unique + filtered_index = index[2:].copy() + assert filtered_index.is_unique + @pytest.mark.parametrize( "index", [ From 360aa3b220487e149078723cf454b7d94c0a4788 Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Sun, 24 Mar 2024 15:50:35 -0700 Subject: [PATCH 2/9] Update doc/source/whatsnew/v2.2.2.rst Co-authored-by: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com> --- doc/source/whatsnew/v2.2.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index c6ca6345ba405..741d1c1e533cd 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -15,7 +15,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pandas nullable on with missing values (:issue:`56702`) - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pyarrow nullable on with missing values (:issue:`57664`) -- :meth:`Index.is_unique` could incorrectly return false if the ``Index`` was created from a slice of another ``Index``. (:issue:`57911`) +- :meth:`Index.is_unique` could incorrectly return ``False`` if the ``Index`` was created from a slice of a non-unique ``Index``. (:issue:`57911`) - .. --------------------------------------------------------------------------- From 4ab184bf4b1e5986242e1dd84b7990a136b77a18 Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Sun, 24 Mar 2024 20:21:25 -0700 Subject: [PATCH 3/9] Handle monotonic on slices --- doc/source/whatsnew/v2.2.2.rst | 2 +- pandas/_libs/index.pyx | 32 +++++++++++++++++-------------- pandas/core/indexes/base.py | 3 +-- pandas/tests/indexes/test_base.py | 8 ++++++++ 4 files changed, 28 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index 741d1c1e533cd..c099f6e72ace1 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -15,7 +15,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pandas nullable on with missing values (:issue:`56702`) - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pyarrow nullable on with missing values (:issue:`57664`) -- :meth:`Index.is_unique` could incorrectly return ``False`` if the ``Index`` was created from a slice of a non-unique ``Index``. (:issue:`57911`) +- :meth:`Index.is_monotonic_decreasing`, :meth:`Index.is_monotonic_increasing`, and :meth:`Index.is_unique` could incorrectly be ``False`` for an ``Index`` created from a slice of another ``Index``. (:issue:`57911`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 4935d6d94d351..e0454a625f58e 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -252,17 +252,19 @@ cdef class IndexEngine: def __sizeof__(self) -> int: return self.sizeof() - cpdef _update_from_sliced(self, IndexEngine other, reverse: bool): + cpdef _update_from_sliced(self, IndexEngine other): if other.unique: self.unique = 1 self.need_unique_check = 0 - if not other.need_monotonic_check and ( - other.is_monotonic_increasing or other.is_monotonic_decreasing): - self.need_monotonic_check = other.need_monotonic_check - # reverse=True means the index has been reversed - self.monotonic_inc = other.monotonic_dec if reverse else other.monotonic_inc - self.monotonic_dec = other.monotonic_inc if reverse else other.monotonic_dec + if ( + not other.need_monotonic_check and + other.is_monotonic_increasing and + other.is_monotonic_decreasing + ): + self.need_monotonic_check = 0 + self.monotonic_inc = 1 + self.monotonic_dec = 1 @property def is_unique(self) -> bool: @@ -855,17 +857,19 @@ cdef class SharedEngine: # for compat with IndexEngine pass - cpdef _update_from_sliced(self, ExtensionEngine other, reverse: bool): + cpdef _update_from_sliced(self, ExtensionEngine other): if other.unique: self.unique = 1 self.need_unique_check = 0 - if not other.need_monotonic_check and ( - other.is_monotonic_increasing or other.is_monotonic_decreasing): - self.need_monotonic_check = other.need_monotonic_check - # reverse=True means the index has been reversed - self.monotonic_inc = other.monotonic_dec if reverse else other.monotonic_inc - self.monotonic_dec = other.monotonic_inc if reverse else other.monotonic_dec + if ( + not other.need_monotonic_check and + other.is_monotonic_increasing and + other.is_monotonic_decreasing + ): + self.need_monotonic_check = 0 + self.monotonic_inc = 1 + self.monotonic_dec = 1 @property def is_unique(self) -> bool: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 9a537c71f3cd0..a3df43a5bd0d3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5043,8 +5043,7 @@ def _getitem_slice(self, slobj: slice) -> Self: res = self._data[slobj] result = type(self)._simple_new(res, name=self._name, refs=self._references) if "_engine" in self._cache: - reverse = slobj.step is not None and slobj.step < 0 - result._engine._update_from_sliced(self._engine, reverse=reverse) # type: ignore[union-attr] + result._engine._update_from_sliced(self._engine) # type: ignore[union-attr] return result diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 5a17c3565ec7d..10b7ec761d63c 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -970,6 +970,14 @@ def test_slice_is_unique(self): filtered_index = index[2:].copy() assert filtered_index.is_unique + def test_slice_is_montonic(self): + """Test that is_monotonic resets on slices.""" + index = Index([1, 2, 3, 3]) + assert not index.is_monotonic_decreasing + filtered_index = index[2:].copy() + assert filtered_index.is_monotonic_decreasing + assert filtered_index.is_monotonic_increasing + @pytest.mark.parametrize( "index", [ From 3fb2b4fe26bc6eaff4a16c5ef83fba61b109bba5 Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Mon, 25 Mar 2024 21:22:58 -0700 Subject: [PATCH 4/9] Restore and fix monotonic code --- pandas/_libs/index.pyx | 44 ++++++++++++++++++++----------- pandas/core/indexes/base.py | 3 ++- pandas/tests/indexes/test_base.py | 5 ++++ 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index e0454a625f58e..9383ffa795d6e 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -252,19 +252,25 @@ cdef class IndexEngine: def __sizeof__(self) -> int: return self.sizeof() - cpdef _update_from_sliced(self, IndexEngine other): + cpdef _update_from_sliced(self, IndexEngine other, reverse: bool): if other.unique: self.unique = 1 self.need_unique_check = 0 - if ( - not other.need_monotonic_check and - other.is_monotonic_increasing and - other.is_monotonic_decreasing - ): + if not other.need_monotonic_check and ( + other.is_monotonic_increasing or other.is_monotonic_decreasing): self.need_monotonic_check = 0 - self.monotonic_inc = 1 - self.monotonic_dec = 1 + if len(self.values) > 0 and self.values[0] != self.values[-1]: + # reverse=True means the index has been reversed + if reverse: + self.monotonic_inc = other.monotonic_dec + self.monotonic_dec = other.monotonic_inc + else: + self.monotonic_inc = other.monotonic_inc + self.monotonic_dec = other.monotonic_dec + else: + self.monotonic_inc = 1 + self.monotonic_dec = 1 @property def is_unique(self) -> bool: @@ -857,19 +863,25 @@ cdef class SharedEngine: # for compat with IndexEngine pass - cpdef _update_from_sliced(self, ExtensionEngine other): + cpdef _update_from_sliced(self, ExtensionEngine other, reverse: bool): if other.unique: self.unique = 1 self.need_unique_check = 0 - if ( - not other.need_monotonic_check and - other.is_monotonic_increasing and - other.is_monotonic_decreasing - ): + if not other.need_monotonic_check and ( + other.is_monotonic_increasing or other.is_monotonic_decreasing): self.need_monotonic_check = 0 - self.monotonic_inc = 1 - self.monotonic_dec = 1 + if len(self.values) > 0 and self.values[0] != self.values[-1]: + # reverse=True means the index has been reversed + if reverse: + self.monotonic_inc = other.monotonic_dec + self.monotonic_dec = other.monotonic_inc + else: + self.monotonic_inc = other.monotonic_inc + self.monotonic_dec = other.monotonic_dec + else: + self.monotonic_inc = 1 + self.monotonic_dec = 1 @property def is_unique(self) -> bool: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a3df43a5bd0d3..9a537c71f3cd0 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5043,7 +5043,8 @@ def _getitem_slice(self, slobj: slice) -> Self: res = self._data[slobj] result = type(self)._simple_new(res, name=self._name, refs=self._references) if "_engine" in self._cache: - result._engine._update_from_sliced(self._engine) # type: ignore[union-attr] + reverse = slobj.step is not None and slobj.step < 0 + result._engine._update_from_sliced(self._engine, reverse=reverse) # type: ignore[union-attr] return result diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 10b7ec761d63c..3ed04d7524caa 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -974,10 +974,15 @@ def test_slice_is_montonic(self): """Test that is_monotonic resets on slices.""" index = Index([1, 2, 3, 3]) assert not index.is_monotonic_decreasing + filtered_index = index[2:].copy() assert filtered_index.is_monotonic_decreasing assert filtered_index.is_monotonic_increasing + filtered_index = index[1:].copy() + assert not filtered_index.is_monotonic_decreasing + assert filtered_index.is_monotonic_increasing + @pytest.mark.parametrize( "index", [ From 512ba5cb34c71fa3fffc318400907f55d9c80c42 Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Sat, 30 Mar 2024 13:29:18 -0700 Subject: [PATCH 5/9] Update docstring and comment for test --- pandas/tests/indexes/test_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index b2e7c84faafe9..100a9ff8911af 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -969,7 +969,8 @@ def test_slice_is_unique(self): assert filtered_index.is_unique def test_slice_is_montonic(self): - """Test that is_monotonic resets on slices.""" + """Test that is_monotonic_decreasing is correct on slices.""" + # GH 57911 index = Index([1, 2, 3, 3]) assert not index.is_monotonic_decreasing From 8aff2368d03c2b65c05074386b07f2d2d7d902f2 Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Mon, 27 May 2024 14:05:42 -0700 Subject: [PATCH 6/9] Update whatsnew for v3.0.0 --- doc/source/whatsnew/v2.2.2.rst | 1 - doc/source/whatsnew/v3.0.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index 124a58d96db1f..72a2f84c4aaee 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -30,7 +30,6 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pandas nullable on with missing values (:issue:`56702`) - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pyarrow nullable on with missing values (:issue:`57664`) -- :meth:`Index.is_monotonic_decreasing`, :meth:`Index.is_monotonic_increasing`, and :meth:`Index.is_unique` could incorrectly be ``False`` for an ``Index`` created from a slice of another ``Index``. (:issue:`57911`) - Avoid issuing a spurious ``DeprecationWarning`` when a custom :class:`DataFrame` or :class:`Series` subclass method is called (:issue:`57553`) - Fixed regression in precision of :func:`to_datetime` with string and ``unit`` input (:issue:`57051`) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 6a6abcf2d48fe..e8a0aaead705e 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -424,6 +424,7 @@ Strings Interval ^^^^^^^^ +- :meth:`Index.is_monotonic_decreasing`, :meth:`Index.is_monotonic_increasing`, and :meth:`Index.is_unique` could incorrectly be ``False`` for an ``Index`` created from a slice of another ``Index``. (:issue:`57911`) - Bug in :func:`interval_range` where start and end numeric types were always cast to 64 bit (:issue:`57268`) - From 1d68e8e2d1605133c6851e74013e963e3ffc645b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 22 Jul 2024 13:32:30 -0700 Subject: [PATCH 7/9] Update pandas/_libs/index.pyx Co-authored-by: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com> --- pandas/_libs/index.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index f51e219551e25..72f67d810d027 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -253,8 +253,8 @@ cdef class IndexEngine: cpdef _update_from_sliced(self, IndexEngine other, reverse: bool): if other.unique: - self.unique = 1 - self.need_unique_check = 0 + self.unique = other.unique + self.need_unique_check = other.need_unique_check if not other.need_monotonic_check and ( other.is_monotonic_increasing or other.is_monotonic_decreasing): From 3f5bdca99ce3cae53872b95ffaa6dcf2f069c05a Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Sun, 4 Aug 2024 14:18:25 -0700 Subject: [PATCH 8/9] Update pandas/_libs/index.pyx Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/_libs/index.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 72f67d810d027..1506a76aa94a6 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -893,8 +893,8 @@ cdef class SharedEngine: cpdef _update_from_sliced(self, ExtensionEngine other, reverse: bool): if other.unique: - self.unique = 1 - self.need_unique_check = 0 + self.unique = other.unique + self.need_unique_check = other.need_unique_check if not other.need_monotonic_check and ( other.is_monotonic_increasing or other.is_monotonic_decreasing): From 8a2fd84cf738d48b0a53fad8c0201a14dc516548 Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Sun, 4 Aug 2024 14:22:44 -0700 Subject: [PATCH 9/9] Add test for a null slice --- pandas/tests/indexes/test_base.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index e8c7be8370329..c13473cd2e746 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -985,6 +985,10 @@ def test_slice_is_montonic(self): assert not filtered_index.is_monotonic_decreasing assert filtered_index.is_monotonic_increasing + filtered_index = index[:].copy() + assert not filtered_index.is_monotonic_decreasing + assert filtered_index.is_monotonic_increasing + @pytest.mark.parametrize( "index", [