From 34392ce1e928f30e9797c8de07e02d5fe4698897 Mon Sep 17 00:00:00 2001 From: Nico Cernek Date: Thu, 1 Aug 2019 14:44:31 -0600 Subject: [PATCH 1/9] add test to reproduce bug --- pandas/tests/frame/test_join.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index adace5e4784ae..2251dfed55715 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -24,6 +24,14 @@ def right(): return DataFrame({"b": [300, 100, 200]}, index=[3, 1, 2]) +@pytest.fixture(params=[True, False, None]) +def sort_kw(request): + """Boolean sort keyword for join. + Includes the default of None. + """ + return request.param + + @pytest.mark.parametrize( "how, sort, expected", [ @@ -193,3 +201,28 @@ def test_join_left_sequence_non_unique_index(): ) tm.assert_frame_equal(joined, expected) + + +def test_suppress_future_warning_with_sort_kw(sort_kw): + a = DataFrame( + {'col1': [1, 2, 3, 4, 5], + 'col2': [6, 7, 8, 9, 10]}, + index=['a', 'c', 'e', 'f', 'i']) + + b = DataFrame( + {'col4': [1, 2, 3, 4, 5], + 'col3': [1, 2, 3, 4, 5]}, + index=['a', 'b', 'c', 'd', 'e']) + + c = DataFrame( + {'col5': [1, 2, 3, 4, 5]}, + index=['f', 'g', 'h', 'i', 'j']) + + if sort_kw is None: + # only warn if not explicitly specified + ctx = tm.assert_produces_warning(FutureWarning, check_stacklevel=False) + else: + ctx = tm.assert_produces_warning(None, check_stacklevel=False) + + with ctx: + a.join([b, c], sort=sort_kw) \ No newline at end of file From beb8c9cb515e5c4c0e5504f83cf29c927a1f869e Mon Sep 17 00:00:00 2001 From: Nico Cernek Date: Thu, 1 Aug 2019 14:50:17 -0600 Subject: [PATCH 2/9] add sort kw that was missing --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5980e3d133374..30480a80910ab 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7217,10 +7217,10 @@ def _join_compat( # join indexes only using concat if can_concat: if how == "left": - res = concat(frames, axis=1, join="outer", verify_integrity=True) + res = concat(frames, axis=1, join="outer", verify_integrity=True, sort=sort) return res.reindex(self.index, copy=False) else: - return concat(frames, axis=1, join=how, verify_integrity=True) + return concat(frames, axis=1, join=how, verify_integrity=True, sort=sort) joined = frames[0] From 716a7e7ed3b4e2c2891691c268ddb75b13f4e4ed Mon Sep 17 00:00:00 2001 From: Nico Cernek Date: Thu, 1 Aug 2019 15:05:30 -0600 Subject: [PATCH 3/9] correct the formatting with black --- pandas/core/frame.py | 8 ++++++-- pandas/tests/frame/test_join.py | 18 ++++++++---------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 30480a80910ab..2a93cd5937221 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7217,10 +7217,14 @@ def _join_compat( # join indexes only using concat if can_concat: if how == "left": - res = concat(frames, axis=1, join="outer", verify_integrity=True, sort=sort) + res = concat( + frames, axis=1, join="outer", verify_integrity=True, sort=sort + ) return res.reindex(self.index, copy=False) else: - return concat(frames, axis=1, join=how, verify_integrity=True, sort=sort) + return concat( + frames, axis=1, join=how, verify_integrity=True, sort=sort + ) joined = frames[0] diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index 2251dfed55715..4d037e1f3d330 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -205,18 +205,16 @@ def test_join_left_sequence_non_unique_index(): def test_suppress_future_warning_with_sort_kw(sort_kw): a = DataFrame( - {'col1': [1, 2, 3, 4, 5], - 'col2': [6, 7, 8, 9, 10]}, - index=['a', 'c', 'e', 'f', 'i']) + {"col1": [1, 2, 3, 4, 5], "col2": [6, 7, 8, 9, 10]}, + index=["a", "c", "e", "f", "i"], + ) b = DataFrame( - {'col4': [1, 2, 3, 4, 5], - 'col3': [1, 2, 3, 4, 5]}, - index=['a', 'b', 'c', 'd', 'e']) + {"col4": [1, 2, 3, 4, 5], "col3": [1, 2, 3, 4, 5]}, + index=["a", "b", "c", "d", "e"], + ) - c = DataFrame( - {'col5': [1, 2, 3, 4, 5]}, - index=['f', 'g', 'h', 'i', 'j']) + c = DataFrame({"col5": [1, 2, 3, 4, 5]}, index=["f", "g", "h", "i", "j"]) if sort_kw is None: # only warn if not explicitly specified @@ -225,4 +223,4 @@ def test_suppress_future_warning_with_sort_kw(sort_kw): ctx = tm.assert_produces_warning(None, check_stacklevel=False) with ctx: - a.join([b, c], sort=sort_kw) \ No newline at end of file + a.join([b, c], sort=sort_kw) From 60de57adea894be0b50f47c7dc69b6c5d9963683 Mon Sep 17 00:00:00 2001 From: Nico Cernek Date: Thu, 1 Aug 2019 16:48:54 -0600 Subject: [PATCH 4/9] add release note to v0.25.1 --- doc/source/whatsnew/v0.25.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index c80195af413f7..167a783a19e72 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -125,7 +125,7 @@ Reshaping ^^^^^^^^^ - A ``KeyError`` is now raised if ``.unstack()`` is called on a :class:`Series` or :class:`DataFrame` with a flat :class:`Index` passing a name which is not the correct one (:issue:`18303`) -- +- :meth:`DataFrame.join` now suppresses the ``FutureWarning`` when the sort parameter is specified (:issue:`21952`) - Sparse From 463f55739b4cb5a6c9334be34fa43e908d1dbec7 Mon Sep 17 00:00:00 2001 From: Nico Cernek Date: Thu, 1 Aug 2019 16:51:18 -0600 Subject: [PATCH 5/9] change the tests to be more brief and undo the fix such that tests fail. add assertion on test result --- pandas/core/frame.py | 4 ++-- pandas/tests/frame/test_join.py | 22 ++++++++++++++++------ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2a93cd5937221..0fb4ac3db2676 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7218,12 +7218,12 @@ def _join_compat( if can_concat: if how == "left": res = concat( - frames, axis=1, join="outer", verify_integrity=True, sort=sort + frames, axis=1, join="outer", verify_integrity=True, #sort=sort ) return res.reindex(self.index, copy=False) else: return concat( - frames, axis=1, join=how, verify_integrity=True, sort=sort + frames, axis=1, join=how, verify_integrity=True,# sort=sort ) joined = frames[0] diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index 4d037e1f3d330..c7b5502e50c54 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -205,16 +205,25 @@ def test_join_left_sequence_non_unique_index(): def test_suppress_future_warning_with_sort_kw(sort_kw): a = DataFrame( - {"col1": [1, 2, 3, 4, 5], "col2": [6, 7, 8, 9, 10]}, - index=["a", "c", "e", "f", "i"], + {'col1': [1, 2]}, + index=['c', 'a'] ) b = DataFrame( - {"col4": [1, 2, 3, 4, 5], "col3": [1, 2, 3, 4, 5]}, - index=["a", "b", "c", "d", "e"], + {'col2': [4, 5]}, + index=['b', 'a'] ) - c = DataFrame({"col5": [1, 2, 3, 4, 5]}, index=["f", "g", "h", "i", "j"]) + c = DataFrame( + {'col3': [7, 8]}, + index=['a', 'b'] + ) + + + expected = DataFrame( {'col1': {'a': 2.0, 'b': float('nan'), 'c': 1.0}, 'col2': {'a': 5.0, 'b': 4.0, 'c': float('nan')}, 'col3': {'a': 7.0, 'b': 8.0, 'c': float('nan')}} ) + if sort_kw is False: + expected = expected.reindex(index=['c', 'a', 'b']) + if sort_kw is None: # only warn if not explicitly specified @@ -223,4 +232,5 @@ def test_suppress_future_warning_with_sort_kw(sort_kw): ctx = tm.assert_produces_warning(None, check_stacklevel=False) with ctx: - a.join([b, c], sort=sort_kw) + result = a.join([b, c], how='outer', sort=sort_kw) + tm.assert_frame_equal(result, expected) From 9c4a3d613421f00701577408bfe6ec837d5498a1 Mon Sep 17 00:00:00 2001 From: Nico Cernek Date: Thu, 1 Aug 2019 16:52:37 -0600 Subject: [PATCH 6/9] uncomment the fix --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0fb4ac3db2676..2a93cd5937221 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7218,12 +7218,12 @@ def _join_compat( if can_concat: if how == "left": res = concat( - frames, axis=1, join="outer", verify_integrity=True, #sort=sort + frames, axis=1, join="outer", verify_integrity=True, sort=sort ) return res.reindex(self.index, copy=False) else: return concat( - frames, axis=1, join=how, verify_integrity=True,# sort=sort + frames, axis=1, join=how, verify_integrity=True, sort=sort ) joined = frames[0] From 0198ffef20eac5833539ed78131b2192a8edef97 Mon Sep 17 00:00:00 2001 From: Nico Cernek Date: Thu, 1 Aug 2019 16:53:33 -0600 Subject: [PATCH 7/9] reformat code with black --- pandas/tests/frame/test_join.py | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index c7b5502e50c54..2d0b8e4438fa4 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -204,26 +204,21 @@ def test_join_left_sequence_non_unique_index(): def test_suppress_future_warning_with_sort_kw(sort_kw): - a = DataFrame( - {'col1': [1, 2]}, - index=['c', 'a'] - ) - - b = DataFrame( - {'col2': [4, 5]}, - index=['b', 'a'] - ) + a = DataFrame({"col1": [1, 2]}, index=["c", "a"]) - c = DataFrame( - {'col3': [7, 8]}, - index=['a', 'b'] - ) + b = DataFrame({"col2": [4, 5]}, index=["b", "a"]) + c = DataFrame({"col3": [7, 8]}, index=["a", "b"]) - expected = DataFrame( {'col1': {'a': 2.0, 'b': float('nan'), 'c': 1.0}, 'col2': {'a': 5.0, 'b': 4.0, 'c': float('nan')}, 'col3': {'a': 7.0, 'b': 8.0, 'c': float('nan')}} ) + expected = DataFrame( + { + "col1": {"a": 2.0, "b": float("nan"), "c": 1.0}, + "col2": {"a": 5.0, "b": 4.0, "c": float("nan")}, + "col3": {"a": 7.0, "b": 8.0, "c": float("nan")}, + } + ) if sort_kw is False: - expected = expected.reindex(index=['c', 'a', 'b']) - + expected = expected.reindex(index=["c", "a", "b"]) if sort_kw is None: # only warn if not explicitly specified @@ -232,5 +227,5 @@ def test_suppress_future_warning_with_sort_kw(sort_kw): ctx = tm.assert_produces_warning(None, check_stacklevel=False) with ctx: - result = a.join([b, c], how='outer', sort=sort_kw) + result = a.join([b, c], how="outer", sort=sort_kw) tm.assert_frame_equal(result, expected) From 340326370a7107bf61959ba858d5b6f915b45dae Mon Sep 17 00:00:00 2001 From: Nico Cernek Date: Thu, 1 Aug 2019 17:02:53 -0600 Subject: [PATCH 8/9] move fixture to only apply to new test function --- pandas/tests/frame/test_join.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index 2d0b8e4438fa4..220968d4b3d29 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -24,14 +24,6 @@ def right(): return DataFrame({"b": [300, 100, 200]}, index=[3, 1, 2]) -@pytest.fixture(params=[True, False, None]) -def sort_kw(request): - """Boolean sort keyword for join. - Includes the default of None. - """ - return request.param - - @pytest.mark.parametrize( "how, sort, expected", [ @@ -203,6 +195,7 @@ def test_join_left_sequence_non_unique_index(): tm.assert_frame_equal(joined, expected) +@pytest.mark.parametrize("sort_kw", [True, False, None]) def test_suppress_future_warning_with_sort_kw(sort_kw): a = DataFrame({"col1": [1, 2]}, index=["c", "a"]) From e6952762b685c6785e24428683059076b01d1ec2 Mon Sep 17 00:00:00 2001 From: Nico Cernek Date: Fri, 2 Aug 2019 10:38:00 -0600 Subject: [PATCH 9/9] remove trailing whitespace Co-Authored-By: Tom Augspurger --- doc/source/whatsnew/v0.25.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 167a783a19e72..01e4046e8b743 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -125,7 +125,7 @@ Reshaping ^^^^^^^^^ - A ``KeyError`` is now raised if ``.unstack()`` is called on a :class:`Series` or :class:`DataFrame` with a flat :class:`Index` passing a name which is not the correct one (:issue:`18303`) -- :meth:`DataFrame.join` now suppresses the ``FutureWarning`` when the sort parameter is specified (:issue:`21952`) +- :meth:`DataFrame.join` now suppresses the ``FutureWarning`` when the sort parameter is specified (:issue:`21952`) - Sparse