From 34c95ea62118c9afb5668304643103a789eb3db4 Mon Sep 17 00:00:00 2001 From: Tim Tran Date: Mon, 6 Sep 2021 18:13:49 +0000 Subject: [PATCH 1/3] TST: Test for MultiIndex merge with CategoricalIndex (#36973) --- pandas/tests/reshape/merge/test_merge.py | 29 ++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 71134dcaf9ccc..08352654e9cab 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2542,3 +2542,32 @@ def test_mergeerror_on_left_index_mismatched_dtypes(): df_2 = DataFrame(data=["X"], columns=["C"], index=[999]) with pytest.raises(MergeError, match="Can only pass argument"): merge(df_1, df_2, on=["C"], left_index=True) + + +def test_multiindex_merge_with_unordered_categoricalindex(): + # GH 36973 + pcat = CategoricalDtype(categories=["P2", "P1"], ordered=False) + df1 = DataFrame( + { + "id": ["C", "C", "D"], + "p": Categorical(["P2", "P1", "P2"], dtype=pcat), + "a": [0, 1, 2], + } + ).set_index(["id", "p"]) + df2 = DataFrame( + { + "id": ["A", "C", "C"], + "p": Categorical(["P2", "P2", "P1"], dtype=pcat), + "d1": [10, 11, 12], + } + ).set_index(["id", "p"]) + result = merge(df1, df2, how="left", left_index=True, right_index=True) + expected = DataFrame( + { + "id": ["C", "C", "D"], + "p": Categorical(["P2", "P1", "P2"], dtype=pcat), + "a": [0, 1, 2], + "d1": [11.0, 12.0, np.nan], + } + ).set_index(["id", "p"]) + tm.assert_frame_equal(result, expected) From 62fb8edd9669cf2b7a136dd98880299bd271e2cc Mon Sep 17 00:00:00 2001 From: Tim Tran Date: Tue, 7 Sep 2021 01:21:56 +0000 Subject: [PATCH 2/3] parametrize over ordered=True/False --- pandas/tests/reshape/merge/test_merge.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 08352654e9cab..323fc3d0e7eea 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2544,9 +2544,10 @@ def test_mergeerror_on_left_index_mismatched_dtypes(): merge(df_1, df_2, on=["C"], left_index=True) -def test_multiindex_merge_with_unordered_categoricalindex(): +@pytest.mark.parametrize("ordered", [True, False]) +def test_multiindex_merge_with_unordered_categoricalindex(ordered): # GH 36973 - pcat = CategoricalDtype(categories=["P2", "P1"], ordered=False) + pcat = CategoricalDtype(categories=["P2", "P1"], ordered=ordered) df1 = DataFrame( { "id": ["C", "C", "D"], From 103452261b5b835b2ddf739aae946d932db5b762 Mon Sep 17 00:00:00 2001 From: Tim Tran Date: Tue, 7 Sep 2021 01:53:59 +0000 Subject: [PATCH 3/3] Move test into TestMergeCategorical class --- pandas/tests/reshape/merge/test_merge.py | 59 ++++++++++++------------ 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 323fc3d0e7eea..5c07a9662359e 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1813,6 +1813,35 @@ def tests_merge_categorical_unordered_equal(self): ) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("ordered", [True, False]) + def test_multiindex_merge_with_unordered_categoricalindex(self, ordered): + # GH 36973 + pcat = CategoricalDtype(categories=["P2", "P1"], ordered=ordered) + df1 = DataFrame( + { + "id": ["C", "C", "D"], + "p": Categorical(["P2", "P1", "P2"], dtype=pcat), + "a": [0, 1, 2], + } + ).set_index(["id", "p"]) + df2 = DataFrame( + { + "id": ["A", "C", "C"], + "p": Categorical(["P2", "P2", "P1"], dtype=pcat), + "d1": [10, 11, 12], + } + ).set_index(["id", "p"]) + result = merge(df1, df2, how="left", left_index=True, right_index=True) + expected = DataFrame( + { + "id": ["C", "C", "D"], + "p": Categorical(["P2", "P1", "P2"], dtype=pcat), + "a": [0, 1, 2], + "d1": [11.0, 12.0, np.nan], + } + ).set_index(["id", "p"]) + tm.assert_frame_equal(result, expected) + def test_other_columns(self, left, right): # non-merge columns should preserve if possible right = right.assign(Z=right.Z.astype("category")) @@ -2542,33 +2571,3 @@ def test_mergeerror_on_left_index_mismatched_dtypes(): df_2 = DataFrame(data=["X"], columns=["C"], index=[999]) with pytest.raises(MergeError, match="Can only pass argument"): merge(df_1, df_2, on=["C"], left_index=True) - - -@pytest.mark.parametrize("ordered", [True, False]) -def test_multiindex_merge_with_unordered_categoricalindex(ordered): - # GH 36973 - pcat = CategoricalDtype(categories=["P2", "P1"], ordered=ordered) - df1 = DataFrame( - { - "id": ["C", "C", "D"], - "p": Categorical(["P2", "P1", "P2"], dtype=pcat), - "a": [0, 1, 2], - } - ).set_index(["id", "p"]) - df2 = DataFrame( - { - "id": ["A", "C", "C"], - "p": Categorical(["P2", "P2", "P1"], dtype=pcat), - "d1": [10, 11, 12], - } - ).set_index(["id", "p"]) - result = merge(df1, df2, how="left", left_index=True, right_index=True) - expected = DataFrame( - { - "id": ["C", "C", "D"], - "p": Categorical(["P2", "P1", "P2"], dtype=pcat), - "a": [0, 1, 2], - "d1": [11.0, 12.0, np.nan], - } - ).set_index(["id", "p"]) - tm.assert_frame_equal(result, expected)