From be5e4e7a5e9f94e7f3110efd6ad4ea49b3b411ed Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 15 Dec 2020 23:22:00 +0100 Subject: [PATCH 1/3] BUG: MultiIndex.equals returning incorrectly True when Indexes contains NaN --- doc/source/whatsnew/v1.3.0.rst | 2 +- pandas/core/indexes/multi.py | 4 +++- pandas/tests/indexes/multi/test_equivalence.py | 10 ++++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 57dd1d05a274e..af96269019ca4 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -232,7 +232,7 @@ MultiIndex ^^^^^^^^^^ - Bug in :meth:`DataFrame.drop` raising ``TypeError`` when :class:`MultiIndex` is non-unique and no level is provided (:issue:`36293`) -- +- Bug in :meth:`MultiIndex.equals` incorrectly returning ``True`` when :class:`MultiIndex` containing ``NaN`` even when they are differntly ordered (:issue:`38439`) I/O ^^^ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 1edd98e980a2d..18b1ec8f0dd59 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3454,12 +3454,14 @@ def equals(self, other: object) -> bool: for i in range(self.nlevels): self_codes = self.codes[i] + other_codes = other.codes[i] + if not np.array_equal(self_codes == -1, other_codes == -1): + return False self_codes = self_codes[self_codes != -1] self_values = algos.take_nd( np.asarray(self.levels[i]._values), self_codes, allow_fill=False ) - other_codes = other.codes[i] other_codes = other_codes[other_codes != -1] other_values = algos.take_nd( np.asarray(other.levels[i]._values), other_codes, allow_fill=False diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py index c31c2416ff722..bb34760e28d96 100644 --- a/pandas/tests/indexes/multi/test_equivalence.py +++ b/pandas/tests/indexes/multi/test_equivalence.py @@ -209,6 +209,16 @@ def test_equals_missing_values(): assert not result +def test_equals_missing_values_differently_sorted(): + # GH#38439 + mi1 = pd.MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)]) + mi2 = pd.MultiIndex.from_tuples([(np.nan, np.nan), (81.0, np.nan)]) + assert not mi1.equals(mi2) + + mi2 = pd.MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)]) + assert mi1.equals(mi2) + + def test_is_(): mi = MultiIndex.from_tuples(zip(range(10), range(10))) assert mi.is_(mi) From 62d4be4d6f8ef59ede350d5711b6dddd0b8a89e1 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 16 Dec 2020 23:55:37 +0100 Subject: [PATCH 2/3] Refactor --- pandas/core/indexes/multi.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 18b1ec8f0dd59..12f1416da47f9 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3455,14 +3455,16 @@ def equals(self, other: object) -> bool: for i in range(self.nlevels): self_codes = self.codes[i] other_codes = other.codes[i] - if not np.array_equal(self_codes == -1, other_codes == -1): + self_mask = self_codes == -1 + other_mask = other_codes == -1 + if not np.array_equal(self_mask, other_mask): return False - self_codes = self_codes[self_codes != -1] + self_codes = self_codes[self_mask] self_values = algos.take_nd( np.asarray(self.levels[i]._values), self_codes, allow_fill=False ) - other_codes = other_codes[other_codes != -1] + other_codes = other_codes[other_mask] other_values = algos.take_nd( np.asarray(other.levels[i]._values), other_codes, allow_fill=False ) From 95f077f6a5123af7fdee5b0fb5f624c77fad2e32 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 16 Dec 2020 23:57:12 +0100 Subject: [PATCH 3/3] Refactor --- pandas/core/indexes/multi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 12f1416da47f9..78e7a8516178a 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3459,12 +3459,12 @@ def equals(self, other: object) -> bool: other_mask = other_codes == -1 if not np.array_equal(self_mask, other_mask): return False - self_codes = self_codes[self_mask] + self_codes = self_codes[~self_mask] self_values = algos.take_nd( np.asarray(self.levels[i]._values), self_codes, allow_fill=False ) - other_codes = other_codes[other_mask] + other_codes = other_codes[~other_mask] other_values = algos.take_nd( np.asarray(other.levels[i]._values), other_codes, allow_fill=False )