From 9694f238308d129b00ec0d1acd4dd59417f2c5a2 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Tue, 24 Nov 2020 11:05:50 +0800 Subject: [PATCH 01/19] fix remove_unused_levels --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/indexes/multi.py | 3 +++ pandas/tests/indexes/multi/test_sorting.py | 18 ++++++++++++++++++ 3 files changed, 22 insertions(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 8aae870d50716..827a848f0b2f8 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -697,6 +697,7 @@ Indexing - Bug in setting a new label on a :class:`DataFrame` or :class:`Series` with a :class:`CategoricalIndex` incorrectly raising ``TypeError`` when the new label is not among the index's categories (:issue:`38098`) - Bug in :meth:`Series.loc` and :meth:`Series.iloc` raising ``ValueError`` when inserting a listlike ``np.array``, ``list`` or ``tuple`` in an ``object`` Series of equal length (:issue:`37748`, :issue:`37486`) - Bug in :meth:`Series.loc` and :meth:`Series.iloc` setting all the values of an ``object`` Series with those of a listlike ``ExtensionArray`` instead of inserting it (:issue:`38271`) +- Bug in :meth:`MultiIndex.remove_unused_levels` drops NaN when level contains NaN (:issue:`37510`) Missing ^^^^^^^ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 5312dfe84cfd8..dec029456bd8e 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1979,6 +1979,9 @@ def remove_unused_levels(self): has_na = int(len(uniques) and (uniques[0] == -1)) if len(uniques) != len(lev) + has_na: + + if None in lev and len(uniques) == len(lev): + break # We have unused levels changed = True diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index e5d178581136b..a1295054774e1 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -271,3 +271,21 @@ def test_argsort(idx): result = idx.argsort() expected = idx.values.argsort() tm.assert_numpy_array_equal(result, expected) + + +def test_not_remove_nan(): + # GH 37510 + df1 = DataFrame({"id1": [1, 2, 3, 4], + "id2": [3, 4, 1, 2], + "id3": [1, 1, 1, 1], + "x": [1, 2, 3, 4]}) + df1.set_index(["id1", "id2", "id3"], inplace=True) + + new_levels = ["n1", "n2", "n3", None] + df1.index = df1.index.set_levels(levels=new_levels, level="id1", inplace=True) + df1.index = df1.index.set_levels(levels=new_levels, level="id2", inplace=True) + + result = df1.unstack("id3")[("x", 1)].sort_index().index + expected = df1.droplevel(2, 0).sort_index().index + + assert result.equals(expected) From 565f0a4ecd49793822e8ede25066fad5d3fc3913 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Tue, 24 Nov 2020 11:27:20 +0800 Subject: [PATCH 02/19] update --- pandas/core/indexes/multi.py | 2 +- pandas/tests/indexes/multi/test_sorting.py | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index dec029456bd8e..3803cc5c4e841 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1980,7 +1980,7 @@ def remove_unused_levels(self): if len(uniques) != len(lev) + has_na: - if None in lev and len(uniques) == len(lev): + if -1 in level_codes and len(uniques) == len(lev): break # We have unused levels changed = True diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index a1295054774e1..0ab94c1e89d23 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -275,15 +275,18 @@ def test_argsort(idx): def test_not_remove_nan(): # GH 37510 - df1 = DataFrame({"id1": [1, 2, 3, 4], - "id2": [3, 4, 1, 2], - "id3": [1, 1, 1, 1], - "x": [1, 2, 3, 4]}) - df1.set_index(["id1", "id2", "id3"], inplace=True) - + df1 = DataFrame( + { + "id1": [1, 2, 3, 4], + "id2": [3, 4, 1, 2], + "id3": [1, 1, 1, 1], + "x": [1, 2, 3, 4], + } + ) + df1 = df1.set_index(["id1", "id2", "id3"]) new_levels = ["n1", "n2", "n3", None] - df1.index = df1.index.set_levels(levels=new_levels, level="id1", inplace=True) - df1.index = df1.index.set_levels(levels=new_levels, level="id2", inplace=True) + df1.index = df1.index.set_levels(levels=new_levels, level="id1") + df1.index = df1.index.set_levels(levels=new_levels, level="id2") result = df1.unstack("id3")[("x", 1)].sort_index().index expected = df1.droplevel(2, 0).sort_index().index From 9ddec1e1e2592cc4500a4599cade2773e2b52b28 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Tue, 24 Nov 2020 14:32:38 +0800 Subject: [PATCH 03/19] update --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/indexes/multi.py | 2 +- pandas/tests/indexes/multi/test_sorting.py | 18 +++++++++--------- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 827a848f0b2f8..d2bb26de21fca 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -698,6 +698,7 @@ Indexing - Bug in :meth:`Series.loc` and :meth:`Series.iloc` raising ``ValueError`` when inserting a listlike ``np.array``, ``list`` or ``tuple`` in an ``object`` Series of equal length (:issue:`37748`, :issue:`37486`) - Bug in :meth:`Series.loc` and :meth:`Series.iloc` setting all the values of an ``object`` Series with those of a listlike ``ExtensionArray`` instead of inserting it (:issue:`38271`) - Bug in :meth:`MultiIndex.remove_unused_levels` drops NaN when level contains NaN (:issue:`37510`) +- Bug in :meth:`MultiIndex.remove_unused_levels` was dropping missing values when levels contain ``NaN`` which set by ``set_levels`` (:issue:`37510`) Missing ^^^^^^^ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 3803cc5c4e841..bb8f344d4f0f8 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1980,7 +1980,7 @@ def remove_unused_levels(self): if len(uniques) != len(lev) + has_na: - if -1 in level_codes and len(uniques) == len(lev): + if lev.isna().any() and len(uniques) == len(lev): break # We have unused levels changed = True diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 0ab94c1e89d23..f0c51aa592c75 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -273,22 +273,22 @@ def test_argsort(idx): tm.assert_numpy_array_equal(result, expected) -def test_not_remove_nan(): +def test_remove_unused_levels_with_missing(): # GH 37510 df1 = DataFrame( { - "id1": [1, 2, 3, 4], - "id2": [3, 4, 1, 2], - "id3": [1, 1, 1, 1], + "L1": [1, 2, 3, 4], + "L2": [3, 4, 1, 2], + "L3": [1, 1, 1, 1], "x": [1, 2, 3, 4], } ) - df1 = df1.set_index(["id1", "id2", "id3"]) + df1 = df1.set_index(["L1", "L2", "L3"]) new_levels = ["n1", "n2", "n3", None] - df1.index = df1.index.set_levels(levels=new_levels, level="id1") - df1.index = df1.index.set_levels(levels=new_levels, level="id2") + df1.index = df1.index.set_levels(levels=new_levels, level="L1") + df1.index = df1.index.set_levels(levels=new_levels, level="L2") - result = df1.unstack("id3")[("x", 1)].sort_index().index + result = df1.unstack("L3")[("x", 1)].sort_index().index expected = df1.droplevel(2, 0).sort_index().index - assert result.equals(expected) + assert tm.assert_index_equal(result, expected) From a16be32361b247516218ade9e378454ba4b1079c Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Tue, 24 Nov 2020 15:14:45 +0800 Subject: [PATCH 04/19] drop assert --- pandas/tests/indexes/multi/test_sorting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index f0c51aa592c75..5816b72f2d501 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -291,4 +291,4 @@ def test_remove_unused_levels_with_missing(): result = df1.unstack("L3")[("x", 1)].sort_index().index expected = df1.droplevel(2, 0).sort_index().index - assert tm.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) From 82b816d19f71fa1785c207d28d60060a9883e6fc Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Thu, 26 Nov 2020 08:36:51 +0800 Subject: [PATCH 05/19] Update v1.2.0.rst --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index d2bb26de21fca..d6f5bb071b77c 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -689,6 +689,7 @@ Indexing - Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligning objects in ``__setitem__`` (:issue:`22046`) - Bug in :meth:`MultiIndex.drop` does not raise if labels are partially found (:issue:`37820`) - Bug in :meth:`DataFrame.loc` did not raise ``KeyError`` when missing combination was given with ``slice(None)`` for remaining levels (:issue:`19556`) +- Bug in :meth:`MultiIndex.remove_unused_levels` was dropping missing values when levels contain ``NaN`` set by ``set_levels`` (:issue:`37510`) - Bug in :meth:`DataFrame.loc` raising ``TypeError`` when non-integer slice was given to select values from :class:`MultiIndex` (:issue:`25165`, :issue:`24263`) - Bug in :meth:`Series.at` returning :class:`Series` with one element instead of scalar when index is a :class:`MultiIndex` with one level (:issue:`38053`) - Bug in :meth:`DataFrame.loc` returning and assigning elements in wrong order when indexer is differently ordered than the :class:`MultiIndex` to filter (:issue:`31330`, :issue:`34603`) From 427797bfd64e7a56aeb77fb5ca26334af760c94f Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Fri, 27 Nov 2020 09:16:07 +0800 Subject: [PATCH 06/19] hardcode fix and whatsnew improve --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/tests/indexes/multi/test_sorting.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index d6f5bb071b77c..36119bfaea063 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -689,7 +689,6 @@ Indexing - Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligning objects in ``__setitem__`` (:issue:`22046`) - Bug in :meth:`MultiIndex.drop` does not raise if labels are partially found (:issue:`37820`) - Bug in :meth:`DataFrame.loc` did not raise ``KeyError`` when missing combination was given with ``slice(None)`` for remaining levels (:issue:`19556`) -- Bug in :meth:`MultiIndex.remove_unused_levels` was dropping missing values when levels contain ``NaN`` set by ``set_levels`` (:issue:`37510`) - Bug in :meth:`DataFrame.loc` raising ``TypeError`` when non-integer slice was given to select values from :class:`MultiIndex` (:issue:`25165`, :issue:`24263`) - Bug in :meth:`Series.at` returning :class:`Series` with one element instead of scalar when index is a :class:`MultiIndex` with one level (:issue:`38053`) - Bug in :meth:`DataFrame.loc` returning and assigning elements in wrong order when indexer is differently ordered than the :class:`MultiIndex` to filter (:issue:`31330`, :issue:`34603`) @@ -830,6 +829,7 @@ Reshaping - Bug in :func:`merge_ordered` returned wrong join result when length of ``left_by`` or ``right_by`` equals to the rows of ``left`` or ``right`` (:issue:`38166`) - Bug in :func:`merge_ordered` didn't raise when elements in ``left_by`` or ``right_by`` not exist in ``left`` columns or ``right`` columns (:issue:`38167`) - Bug in :func:`DataFrame.drop_duplicates` not validating bool dtype for ``ignore_index`` keyword (:issue:`38274`) +- Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`) Sparse ^^^^^^ diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 5816b72f2d501..0b192b5fad135 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -289,6 +289,11 @@ def test_remove_unused_levels_with_missing(): df1.index = df1.index.set_levels(levels=new_levels, level="L2") result = df1.unstack("L3")[("x", 1)].sort_index().index - expected = df1.droplevel(2, 0).sort_index().index + expected_index = Index( + [ + ("n1", "n3"), ("n2", np.nan), ("n3", "n1"), ( np.nan, "n2") + ], + names=["L1", "L2"] + ) - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result.index, expected_index) From df03bb43762e83787d4c5dbf9574b15a6ddca59c Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Fri, 27 Nov 2020 10:09:53 +0800 Subject: [PATCH 07/19] fix-pre --- pandas/tests/indexes/multi/test_sorting.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 0b192b5fad135..10e65c2f7edf7 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -290,10 +290,7 @@ def test_remove_unused_levels_with_missing(): result = df1.unstack("L3")[("x", 1)].sort_index().index expected_index = Index( - [ - ("n1", "n3"), ("n2", np.nan), ("n3", "n1"), ( np.nan, "n2") - ], - names=["L1", "L2"] + [("n1", "n3"), ("n2", np.nan), ("n3", "n1"), (np.nan, "n2")], names=["L1", "L2"] ) tm.assert_index_equal(result.index, expected_index) From a92832fd1ca2c555db52ec4824bb524a3773c7f7 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Fri, 27 Nov 2020 11:04:02 +0800 Subject: [PATCH 08/19] Update test_sorting.py --- pandas/tests/indexes/multi/test_sorting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 10e65c2f7edf7..a310d92597248 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -288,7 +288,7 @@ def test_remove_unused_levels_with_missing(): df1.index = df1.index.set_levels(levels=new_levels, level="L1") df1.index = df1.index.set_levels(levels=new_levels, level="L2") - result = df1.unstack("L3")[("x", 1)].sort_index().index + result = df1.unstack("L3")[("x", 1)].sort_index() expected_index = Index( [("n1", "n3"), ("n2", np.nan), ("n3", "n1"), (np.nan, "n2")], names=["L1", "L2"] ) From 9a210acd5c4bbe0e9d80b6ab6c082321e31254ed Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Fri, 27 Nov 2020 11:45:42 +0800 Subject: [PATCH 09/19] Update test_sorting.py --- pandas/tests/indexes/multi/test_sorting.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index a310d92597248..0674fc0180b20 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -291,6 +291,10 @@ def test_remove_unused_levels_with_missing(): result = df1.unstack("L3")[("x", 1)].sort_index() expected_index = Index( [("n1", "n3"), ("n2", np.nan), ("n3", "n1"), (np.nan, "n2")], names=["L1", "L2"] + ).set_levels( + levels=new_levels, level="L1" + ).set_levels( + levels=new_levels, level="L2" ) tm.assert_index_equal(result.index, expected_index) From 8c6b1b179d70741196dc05b5788e594fd1b8bba3 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Fri, 27 Nov 2020 12:04:15 +0800 Subject: [PATCH 10/19] Update test_sorting.py --- pandas/tests/indexes/multi/test_sorting.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 0674fc0180b20..a604ae43e46cf 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -289,12 +289,13 @@ def test_remove_unused_levels_with_missing(): df1.index = df1.index.set_levels(levels=new_levels, level="L2") result = df1.unstack("L3")[("x", 1)].sort_index() - expected_index = Index( - [("n1", "n3"), ("n2", np.nan), ("n3", "n1"), (np.nan, "n2")], names=["L1", "L2"] - ).set_levels( - levels=new_levels, level="L1" - ).set_levels( - levels=new_levels, level="L2" + expected_index = ( + Index( + [("n1", "n3"), ("n2", np.nan), ("n3", "n1"), (np.nan, "n2")], + names=["L1", "L2"], + ) + .set_levels(levels=new_levels, level="L1") + .set_levels(levels=new_levels, level="L2") ) tm.assert_index_equal(result.index, expected_index) From 00295baed3d8769b52d49fa6ff8da6fdabfc869a Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Sat, 28 Nov 2020 20:00:36 +0800 Subject: [PATCH 11/19] Update test_sorting.py --- pandas/tests/indexes/multi/test_sorting.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index a604ae43e46cf..9eed22e33dce0 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -288,14 +288,11 @@ def test_remove_unused_levels_with_missing(): df1.index = df1.index.set_levels(levels=new_levels, level="L1") df1.index = df1.index.set_levels(levels=new_levels, level="L2") - result = df1.unstack("L3")[("x", 1)].sort_index() - expected_index = ( - Index( - [("n1", "n3"), ("n2", np.nan), ("n3", "n1"), (np.nan, "n2")], - names=["L1", "L2"], - ) - .set_levels(levels=new_levels, level="L1") - .set_levels(levels=new_levels, level="L2") + result = df1.unstack("L3")[("x", 1)].sort_index().index + expected = MultiIndex( + levels=[["n1", "n2", "n3", None], ["n1", "n2", "n3", None]], + codes=[[0, 1, 2, 3], [2, 3, 0, 1]], + names=["L1", "L2"] ) - tm.assert_index_equal(result.index, expected_index) + tm.assert_index_equal(result.index, expected) From 8eef22c6f86eaf9b6e5433d2e099cfc4ad169b83 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Sat, 28 Nov 2020 20:09:10 +0800 Subject: [PATCH 12/19] Update test_sorting.py --- pandas/tests/indexes/multi/test_sorting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 9eed22e33dce0..75acd6b24dc04 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -292,7 +292,7 @@ def test_remove_unused_levels_with_missing(): expected = MultiIndex( levels=[["n1", "n2", "n3", None], ["n1", "n2", "n3", None]], codes=[[0, 1, 2, 3], [2, 3, 0, 1]], - names=["L1", "L2"] + names=["L1", "L2"], ) tm.assert_index_equal(result.index, expected) From 0dc308a82c15c0177ceb5cc492a9468b1bc937eb Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Sat, 28 Nov 2020 21:06:48 +0800 Subject: [PATCH 13/19] Update test_sorting.py --- pandas/tests/indexes/multi/test_sorting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 75acd6b24dc04..d68cf91a4137b 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -295,4 +295,4 @@ def test_remove_unused_levels_with_missing(): names=["L1", "L2"], ) - tm.assert_index_equal(result.index, expected) + tm.assert_index_equal(result, expected) From 565a229f7335b945fc7f7b6947c59fbbd3bf3013 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Thu, 31 Dec 2020 10:55:44 +0800 Subject: [PATCH 14/19] Update v1.3.0.rst --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 53f254aee2e0e..811b31edf301a 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -267,7 +267,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ -- +- Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`) - Sparse From d14b0185eb7313af7972e08e1e337cc182886cf3 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Thu, 31 Dec 2020 10:56:53 +0800 Subject: [PATCH 15/19] Update v1.2.0.rst --- doc/source/whatsnew/v1.2.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 36119bfaea063..d2bb26de21fca 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -829,7 +829,6 @@ Reshaping - Bug in :func:`merge_ordered` returned wrong join result when length of ``left_by`` or ``right_by`` equals to the rows of ``left`` or ``right`` (:issue:`38166`) - Bug in :func:`merge_ordered` didn't raise when elements in ``left_by`` or ``right_by`` not exist in ``left`` columns or ``right`` columns (:issue:`38167`) - Bug in :func:`DataFrame.drop_duplicates` not validating bool dtype for ``ignore_index`` keyword (:issue:`38274`) -- Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`) Sparse ^^^^^^ From 3a6774f8de4219516cc203194657883e0d4e5f14 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Thu, 31 Dec 2020 10:58:34 +0800 Subject: [PATCH 16/19] Update v1.2.0.rst --- doc/source/whatsnew/v1.2.0.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index d2bb26de21fca..8aae870d50716 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -697,8 +697,6 @@ Indexing - Bug in setting a new label on a :class:`DataFrame` or :class:`Series` with a :class:`CategoricalIndex` incorrectly raising ``TypeError`` when the new label is not among the index's categories (:issue:`38098`) - Bug in :meth:`Series.loc` and :meth:`Series.iloc` raising ``ValueError`` when inserting a listlike ``np.array``, ``list`` or ``tuple`` in an ``object`` Series of equal length (:issue:`37748`, :issue:`37486`) - Bug in :meth:`Series.loc` and :meth:`Series.iloc` setting all the values of an ``object`` Series with those of a listlike ``ExtensionArray`` instead of inserting it (:issue:`38271`) -- Bug in :meth:`MultiIndex.remove_unused_levels` drops NaN when level contains NaN (:issue:`37510`) -- Bug in :meth:`MultiIndex.remove_unused_levels` was dropping missing values when levels contain ``NaN`` which set by ``set_levels`` (:issue:`37510`) Missing ^^^^^^^ From 272778ecf18df087562347deff84091263d3c744 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Thu, 31 Dec 2020 12:27:44 +0800 Subject: [PATCH 17/19] add tests --- pandas/tests/frame/test_stack_unstack.py | 24 +++++++++++++++++ pandas/tests/indexes/multi/test_sorting.py | 30 ++++++---------------- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 9825bcb0b5d57..e8ae9f6584ad6 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1907,3 +1907,27 @@ def test_unstack_with_missing_int_cast_to_float(self): ), ) tm.assert_frame_equal(result, expected) + + def test_unstack_with_level_has_nan(self): + # GH 37510 + df1 = DataFrame( + { + "L1": [1, 2, 3, 4], + "L2": [3, 4, 1, 2], + "L3": [1, 1, 1, 1], + "x": [1, 2, 3, 4], + } + ) + df1 = df1.set_index(["L1", "L2", "L3"]) + new_levels = ["n1", "n2", "n3", None] + df1.index = df1.index.set_levels(levels=new_levels, level="L1") + df1.index = df1.index.set_levels(levels=new_levels, level="L2") + + result = df1.unstack("L3")[("x", 1)].sort_index().index + expected = MultiIndex( + levels=[["n1", "n2", "n3", None], ["n1", "n2", "n3", None]], + codes=[[0, 1, 2, 3], [2, 3, 0, 1]], + names=["L1", "L2"], + ) + + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index d68cf91a4137b..a2d945426f7e8 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -5,6 +5,7 @@ from pandas.errors import PerformanceWarning, UnsortedIndexError +from pandas.core.indexes.frozen import FrozenList from pandas import CategoricalIndex, DataFrame, Index, MultiIndex, RangeIndex import pandas._testing as tm @@ -273,26 +274,11 @@ def test_argsort(idx): tm.assert_numpy_array_equal(result, expected) -def test_remove_unused_levels_with_missing(): +def test_remove_unused_levels_with_nan(): # GH 37510 - df1 = DataFrame( - { - "L1": [1, 2, 3, 4], - "L2": [3, 4, 1, 2], - "L3": [1, 1, 1, 1], - "x": [1, 2, 3, 4], - } - ) - df1 = df1.set_index(["L1", "L2", "L3"]) - new_levels = ["n1", "n2", "n3", None] - df1.index = df1.index.set_levels(levels=new_levels, level="L1") - df1.index = df1.index.set_levels(levels=new_levels, level="L2") - - result = df1.unstack("L3")[("x", 1)].sort_index().index - expected = MultiIndex( - levels=[["n1", "n2", "n3", None], ["n1", "n2", "n3", None]], - codes=[[0, 1, 2, 3], [2, 3, 0, 1]], - names=["L1", "L2"], - ) - - tm.assert_index_equal(result, expected) + idx = Index([(1, np.nan), (3, 4)], names=["id1", "id2"]) + idx = idx.set_levels(["a", np.nan], level="id1") + idx = idx.remove_unused_levels() + result = idx.levels + expected = FrozenList([['a', np.nan], [4]]) + assert str(result) == str(expected) From a11a112ef97fd45639b7c725d7a493adb23745be Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Thu, 31 Dec 2020 12:35:55 +0800 Subject: [PATCH 18/19] Update test_sorting.py --- pandas/tests/indexes/multi/test_sorting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index a2d945426f7e8..69b7097fe7915 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -5,9 +5,9 @@ from pandas.errors import PerformanceWarning, UnsortedIndexError -from pandas.core.indexes.frozen import FrozenList from pandas import CategoricalIndex, DataFrame, Index, MultiIndex, RangeIndex import pandas._testing as tm +from pandas.core.indexes.frozen import FrozenList def test_sortlevel(idx): @@ -280,5 +280,5 @@ def test_remove_unused_levels_with_nan(): idx = idx.set_levels(["a", np.nan], level="id1") idx = idx.remove_unused_levels() result = idx.levels - expected = FrozenList([['a', np.nan], [4]]) + expected = FrozenList([["a", np.nan], [4]]) assert str(result) == str(expected) From 3586e5639f18a0dd6a7b3ebf38eb4de80a8192ab Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Thu, 31 Dec 2020 15:13:46 +0800 Subject: [PATCH 19/19] Update test_sorting.py --- pandas/tests/indexes/multi/test_sorting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 69b7097fe7915..3de78c5e982d3 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -276,7 +276,7 @@ def test_argsort(idx): def test_remove_unused_levels_with_nan(): # GH 37510 - idx = Index([(1, np.nan), (3, 4)], names=["id1", "id2"]) + idx = Index([(1, np.nan), (3, 4)]).rename(["id1", "id2"]) idx = idx.set_levels(["a", np.nan], level="id1") idx = idx.remove_unused_levels() result = idx.levels