From d32222a64edc4fde6615071d632a09525ebecfff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Endre=20M=C3=A1rk=20Borza?= Date: Mon, 28 Oct 2019 23:36:42 +0100 Subject: [PATCH 01/10] add test for multi join recursion error --- pandas/tests/indexes/multi/test_join.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index 42d8cf761842e..06c1b91386103 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -87,3 +87,13 @@ def test_join_self_unique(idx, join_type): if idx.is_unique: joined = idx.join(idx, how=join_type) assert (idx == joined).all() + + +def test_join_multi_wrong_order(): + # GH 25760 + # GH 28956 + + midx1 = pd.MultiIndex.from_product([[1, 2], [3, 4]], names=['a', 'b']) + midx2 = pd.MultiIndex.from_product([[1, 2], [3, 4]], names=['b', 'a']) + + midx1.join(midx2) From 02e26457149363d0b4fa2c7ab408d4342a9aebe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Endre=20M=C3=A1rk=20Borza?= Date: Mon, 28 Oct 2019 23:37:00 +0100 Subject: [PATCH 02/10] fix multi join recursion error --- pandas/core/indexes/base.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4c15e4b26ed46..4e0dbe2c64d60 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3549,8 +3549,12 @@ def _join_multi(self, other, how, return_indexers=True): ldrop_names = list(self_names - overlap) rdrop_names = list(other_names - overlap) - self_jnlevels = self.droplevel(ldrop_names) - other_jnlevels = other.droplevel(rdrop_names) + if len(ldrop_names + rdrop_names) == 0: # if only the order differs + self_jnlevels = self + other_jnlevels = other.reorder_levels(self.names) + else: + self_jnlevels = self.droplevel(ldrop_names) + other_jnlevels = other.droplevel(rdrop_names) # Join left and right # Join on same leveled multi-index frames is supported From 8bafd6b4278fac0ffe6171b8ee2ef87b9973255f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Endre=20M=C3=A1rk=20Borza?= Date: Mon, 28 Oct 2019 23:41:39 +0100 Subject: [PATCH 03/10] black formatting --- pandas/tests/indexes/multi/test_join.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index 06c1b91386103..c07e0248a197c 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -93,7 +93,7 @@ def test_join_multi_wrong_order(): # GH 25760 # GH 28956 - midx1 = pd.MultiIndex.from_product([[1, 2], [3, 4]], names=['a', 'b']) - midx2 = pd.MultiIndex.from_product([[1, 2], [3, 4]], names=['b', 'a']) + midx1 = pd.MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"]) + midx2 = pd.MultiIndex.from_product([[1, 2], [3, 4]], names=["b", "a"]) midx1.join(midx2) From 99aa8bd12e4722204b7b8af621ac2f744e307aa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Endre=20M=C3=A1rk=20Borza?= Date: Thu, 31 Oct 2019 17:29:35 +0100 Subject: [PATCH 04/10] add assertion to midx join test --- pandas/tests/indexes/multi/test_join.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index c07e0248a197c..53755dca81852 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -96,4 +96,7 @@ def test_join_multi_wrong_order(): midx1 = pd.MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"]) midx2 = pd.MultiIndex.from_product([[1, 2], [3, 4]], names=["b", "a"]) - midx1.join(midx2) + join_idx, lidx, ridx = midx1.join(midx2, return_indexers=False) + + assert midx1.equals(join_idx) + assert midx2.equals(join_idx) From a00e80f2982f325a6853281aee43e50bba6985a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Endre=20M=C3=A1rk=20Borza?= Date: Thu, 31 Oct 2019 17:31:56 +0100 Subject: [PATCH 05/10] style corrections --- pandas/core/indexes/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4e0dbe2c64d60..16457b21fda3d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3549,7 +3549,8 @@ def _join_multi(self, other, how, return_indexers=True): ldrop_names = list(self_names - overlap) rdrop_names = list(other_names - overlap) - if len(ldrop_names + rdrop_names) == 0: # if only the order differs + # if only the order differs + if not len(ldrop_names + rdrop_names): self_jnlevels = self other_jnlevels = other.reorder_levels(self.names) else: From 505d81407c6597762f539fd43849981d9105363d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Endre=20M=C3=A1rk=20Borza?= Date: Thu, 31 Oct 2019 17:34:36 +0100 Subject: [PATCH 06/10] add assertion to midx join test --- pandas/tests/indexes/multi/test_join.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index 53755dca81852..ddd9616abbc58 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -98,5 +98,9 @@ def test_join_multi_wrong_order(): join_idx, lidx, ridx = midx1.join(midx2, return_indexers=False) + exp_ridx = np.array([-1, -1, -1, -1], dtype=np.intp) + assert midx1.equals(join_idx) assert midx2.equals(join_idx) + assert lidx is None + tm.assert_numpy_array_equal(ridx, exp_ridx) From 5f09a0134cd22c31cc77676674e42651b63be56a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Endre=20M=C3=A1rk=20Borza?= Date: Thu, 31 Oct 2019 17:41:52 +0100 Subject: [PATCH 07/10] whatsnew entry for join --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4007ecd5a9412..399b1ff33a985 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -421,6 +421,7 @@ Reshaping - :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`) - Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Previously every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`). - Better error message in :func:`get_dummies` when `columns` isn't a list-like value (:issue:`28383`) +- Bug in :meth:`Index.join` that caused infinite recursion error for mismatched multiindex name orders (:issue:`25760`, :issue:`28956`) Sparse ^^^^^^ From f22a665cf59cb51641d302f97bfce248106a3fa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Endre=20M=C3=A1rk=20Borza?= Date: Fri, 1 Nov 2019 00:39:02 +0100 Subject: [PATCH 08/10] exapnd multiindex test --- pandas/tests/indexes/multi/test_join.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index ddd9616abbc58..badc88a2a9628 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -100,7 +100,19 @@ def test_join_multi_wrong_order(): exp_ridx = np.array([-1, -1, -1, -1], dtype=np.intp) - assert midx1.equals(join_idx) - assert midx2.equals(join_idx) + tm.assert_index_equal(midx1, join_idx) assert lidx is None tm.assert_numpy_array_equal(ridx, exp_ridx) + + midx3 = pd.MultiIndex.from_tuples([(4, 1), (3, 2), (3, 1)], names=["b", "a"]) + + df1 = pd.DataFrame(index=midx1, data={"x": [10, 20, 30, 40]}) + df2 = pd.DataFrame(index=midx3, data={"y": ["foo", "bar", "fing"]}) + + df_joined = df1.join(df2) + + expected = pd.DataFrame( + index=midx1, data={"x": [10, 20, 30, 40], "y": ["fing", "foo", "bar", np.nan]} + ) + + tm.assert_equal(df_joined, expected) From 66fb17da25c836b9b6e7b9d14281a94cb3a07f81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Endre=20M=C3=A1rk=20Borza?= Date: Fri, 1 Nov 2019 00:43:56 +0100 Subject: [PATCH 09/10] whatsnew bugfix multiindex reshaping expand --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 399b1ff33a985..426f5bc75ace1 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -421,7 +421,7 @@ Reshaping - :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`) - Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Previously every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`). - Better error message in :func:`get_dummies` when `columns` isn't a list-like value (:issue:`28383`) -- Bug in :meth:`Index.join` that caused infinite recursion error for mismatched multiindex name orders (:issue:`25760`, :issue:`28956`) +- Bug in :meth:`Index.join` that caused infinite recursion error for mismatched MultiIndex name orders. Now, when names match in a join, but the order differs, the names are reordered to match the order in the index the join is called on (:issue:`25760`, :issue:`28956`) Sparse ^^^^^^ From 733b1c60e3db5b18ab7507ffd295fbb8c56f605e Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 1 Jan 2020 13:14:57 -0500 Subject: [PATCH 10/10] clean tests --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/tests/indexes/multi/test_join.py | 13 ------------- pandas/tests/reshape/merge/test_multi.py | 19 +++++++++++++++++++ 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 47c8abda19f2c..6804cc08a0638 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -946,7 +946,7 @@ Reshaping - :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`) - Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Previously every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`). - Better error message in :func:`get_dummies` when `columns` isn't a list-like value (:issue:`28383`) -- Bug in :meth:`Index.join` that caused infinite recursion error for mismatched MultiIndex name orders. Now, when names match in a join, but the order differs, the names are reordered to match the order in the index the join is called on (:issue:`25760`, :issue:`28956`) +- Bug in :meth:`Index.join` that caused infinite recursion error for mismatched ``MultiIndex`` name orders. (:issue:`25760`, :issue:`28956`) - Bug :meth:`Series.pct_change` where supplying an anchored frequency would throw a ValueError (:issue:`28664`) - Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`) - Bug in :meth:`DataFrame.replace` that caused non-numeric replacer's dtype not respected (:issue:`26632`) diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index badc88a2a9628..31ab521958342 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -103,16 +103,3 @@ def test_join_multi_wrong_order(): tm.assert_index_equal(midx1, join_idx) assert lidx is None tm.assert_numpy_array_equal(ridx, exp_ridx) - - midx3 = pd.MultiIndex.from_tuples([(4, 1), (3, 2), (3, 1)], names=["b", "a"]) - - df1 = pd.DataFrame(index=midx1, data={"x": [10, 20, 30, 40]}) - df2 = pd.DataFrame(index=midx3, data={"y": ["foo", "bar", "fing"]}) - - df_joined = df1.join(df2) - - expected = pd.DataFrame( - index=midx1, data={"x": [10, 20, 30, 40], "y": ["fing", "foo", "bar", np.nan]} - ) - - tm.assert_equal(df_joined, expected) diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index bce62571d55ec..d269196401554 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -828,3 +828,22 @@ def test_single_common_level(self): ).set_index(["key", "X", "Y"]) tm.assert_frame_equal(result, expected) + + def test_join_multi_wrong_order(self): + # GH 25760 + # GH 28956 + + midx1 = pd.MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"]) + midx3 = pd.MultiIndex.from_tuples([(4, 1), (3, 2), (3, 1)], names=["b", "a"]) + + left = pd.DataFrame(index=midx1, data={"x": [10, 20, 30, 40]}) + right = pd.DataFrame(index=midx3, data={"y": ["foo", "bar", "fing"]}) + + result = left.join(right) + + expected = pd.DataFrame( + index=midx1, + data={"x": [10, 20, 30, 40], "y": ["fing", "foo", "bar", np.nan]}, + ) + + tm.assert_frame_equal(result, expected)