From 9ea76898726202c1ab7db0cdef4835dc78beabb8 Mon Sep 17 00:00:00 2001 From: Sukriti Bhardwaj Date: Sat, 18 Dec 2021 08:24:39 +0300 Subject: [PATCH 1/6] new test --- pandas/tests/reshape/concat/test_index.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index a4d6a41c7eb50..205ca02d79f68 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -257,3 +257,11 @@ def test_concat_multiindex_dfs_with_deepcopy(self): tm.assert_frame_equal(result_copy, expected) result_no_copy = concat(example_dict, names=["testname"]) tm.assert_frame_equal(result_no_copy, expected) + + def test_concat_multiindex_unique(self): + # GH#44786 + df1 = DataFrame({"col": ["a", "b", "c"]}, index=["1", "2", "2"]) + df2 = concat([df1], keys=["X"]) + result = np.array([df2.index.is_unique]) + expected = np.array([False]) + tm.assert_numpy_array_equal(result, expected) From 07d896f1c16e42f12b5dd1c6405b3d2d496e3586 Mon Sep 17 00:00:00 2001 From: Sukriti Bhardwaj Date: Sat, 18 Dec 2021 19:07:09 +0300 Subject: [PATCH 2/6] refactor --- pandas/tests/reshape/concat/test_index.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index 205ca02d79f68..ad55e5732f9bc 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -258,10 +258,9 @@ def test_concat_multiindex_dfs_with_deepcopy(self): result_no_copy = concat(example_dict, names=["testname"]) tm.assert_frame_equal(result_no_copy, expected) - def test_concat_multiindex_unique(self): + def test_concat_multiindex_unique_and_duplicate(self): # GH#44786 df1 = DataFrame({"col": ["a", "b", "c"]}, index=["1", "2", "2"]) - df2 = concat([df1], keys=["X"]) - result = np.array([df2.index.is_unique]) - expected = np.array([False]) - tm.assert_numpy_array_equal(result, expected) + result = concat([df1], keys=["X"]) + assert result.index.is_unique is False + assert result.index.has_duplicates is True From aada6dd66fe1cb20751865a277251108455763dc Mon Sep 17 00:00:00 2001 From: Sukriti Bhardwaj Date: Sat, 18 Dec 2021 19:10:49 +0300 Subject: [PATCH 3/6] change name of dataframe --- pandas/tests/reshape/concat/test_index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index ad55e5732f9bc..f859c41347bde 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -260,7 +260,7 @@ def test_concat_multiindex_dfs_with_deepcopy(self): def test_concat_multiindex_unique_and_duplicate(self): # GH#44786 - df1 = DataFrame({"col": ["a", "b", "c"]}, index=["1", "2", "2"]) - result = concat([df1], keys=["X"]) + df = DataFrame({"col": ["a", "b", "c"]}, index=["1", "2", "2"]) + result = concat([df], keys=["X"]) assert result.index.is_unique is False assert result.index.has_duplicates is True From f5b9b01f7f4183d53b59bc9f70c91b00b444ef8e Mon Sep 17 00:00:00 2001 From: Sukriti Bhardwaj Date: Sat, 18 Dec 2021 21:55:18 +0300 Subject: [PATCH 4/6] assertion update --- pandas/tests/reshape/concat/test_index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index f859c41347bde..9c31e27fc47b8 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -262,5 +262,5 @@ def test_concat_multiindex_unique_and_duplicate(self): # GH#44786 df = DataFrame({"col": ["a", "b", "c"]}, index=["1", "2", "2"]) result = concat([df], keys=["X"]) - assert result.index.is_unique is False - assert result.index.has_duplicates is True + assert not result.index.is_unique + assert result.index.has_duplicates From c5ad49369c75c049cc4c7869f40d419b28fb4bca Mon Sep 17 00:00:00 2001 From: Sukriti Bhardwaj Date: Mon, 27 Dec 2021 15:09:40 +0300 Subject: [PATCH 5/6] assertion update --- pandas/tests/reshape/concat/test_index.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index 9c31e27fc47b8..2f1bfc1da26fd 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -262,5 +262,7 @@ def test_concat_multiindex_unique_and_duplicate(self): # GH#44786 df = DataFrame({"col": ["a", "b", "c"]}, index=["1", "2", "2"]) result = concat([df], keys=["X"]) + expected_index = MultiIndex.from_arrays([["X", "X", "X"], ["1", "2", "2"]]) + tm.assert_frame_equal(result.index.to_frame(), expected_index.to_frame()) assert not result.index.is_unique assert result.index.has_duplicates From fb517a454e153b38697a1b4c23d981b9a37d8a06 Mon Sep 17 00:00:00 2001 From: Sukriti Bhardwaj Date: Mon, 27 Dec 2021 15:43:07 +0300 Subject: [PATCH 6/6] uptodate with master --- pandas/tests/reshape/concat/test_index.py | 49 +++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index 2f1bfc1da26fd..37cd9050f2703 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -258,6 +258,55 @@ def test_concat_multiindex_dfs_with_deepcopy(self): result_no_copy = concat(example_dict, names=["testname"]) tm.assert_frame_equal(result_no_copy, expected) + @pytest.mark.parametrize( + "mi1_list", + [ + [["a"], range(2)], + [["b"], np.arange(2.0, 4.0)], + [["c"], ["A", "B"]], + [["d"], pd.date_range(start="2017", end="2018", periods=2)], + ], + ) + @pytest.mark.parametrize( + "mi2_list", + [ + [["a"], range(2)], + [["b"], np.arange(2.0, 4.0)], + [["c"], ["A", "B"]], + [["d"], pd.date_range(start="2017", end="2018", periods=2)], + ], + ) + def test_concat_with_various_multiindex_dtypes( + self, mi1_list: list, mi2_list: list + ): + # GitHub #23478 + mi1 = MultiIndex.from_product(mi1_list) + mi2 = MultiIndex.from_product(mi2_list) + + df1 = DataFrame(np.zeros((1, len(mi1))), columns=mi1) + df2 = DataFrame(np.zeros((1, len(mi2))), columns=mi2) + + if mi1_list[0] == mi2_list[0]: + expected_mi = MultiIndex( + levels=[mi1_list[0], list(mi1_list[1])], + codes=[[0, 0, 0, 0], [0, 1, 0, 1]], + ) + else: + expected_mi = MultiIndex( + levels=[ + mi1_list[0] + mi2_list[0], + list(mi1_list[1]) + list(mi2_list[1]), + ], + codes=[[0, 0, 1, 1], [0, 1, 2, 3]], + ) + + expected_df = DataFrame(np.zeros((1, len(expected_mi))), columns=expected_mi) + + with tm.assert_produces_warning(None): + result_df = concat((df1, df2), axis=1) + + tm.assert_frame_equal(expected_df, result_df) + def test_concat_multiindex_unique_and_duplicate(self): # GH#44786 df = DataFrame({"col": ["a", "b", "c"]}, index=["1", "2", "2"])