From 0259cf5317c645d0d903df20c8913be0be1eca75 Mon Sep 17 00:00:00 2001 From: Elle Hanson Date: Mon, 21 Oct 2019 17:06:10 -0400 Subject: [PATCH 01/10] BUG: Fix NDFrame.interpolate non-existent variable #29132 NDFrame.interpolate fails if axis is specified by name and not integer --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/generic.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 48c1173a372a7..7f31a273c8798 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -249,6 +249,7 @@ Bug fixes ~~~~~~~~~ - Bug in :meth:`DataFrame.to_html` when using ``formatters=`` and ``max_cols`` together. (:issue:`25955`) +- Bug in :func:`pandas.core.generic.NDFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29132`) Categorical ^^^^^^^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a300748ee5bc8..0e3e435c83dc6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7023,6 +7023,8 @@ def interpolate( """ inplace = validate_bool_kwarg(inplace, "inplace") + axis = self._get_axis_number(axis) + if axis == 0: ax = self._info_axis_name _maybe_transposed_self = self @@ -7030,6 +7032,7 @@ def interpolate( _maybe_transposed_self = self.T ax = 1 else: + ax = axis _maybe_transposed_self = self ax = _maybe_transposed_self._get_axis_number(ax) From b63920b7ba8ab875a385960a724f9d2d3fff9cda Mon Sep 17 00:00:00 2001 From: Elle Hanson Date: Tue, 22 Oct 2019 00:27:02 -0400 Subject: [PATCH 02/10] Removed unreachable else statement in NDFrame.interpolate --- pandas/core/generic.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0e3e435c83dc6..a66b5359950ba 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7031,9 +7031,7 @@ def interpolate( elif axis == 1: _maybe_transposed_self = self.T ax = 1 - else: - ax = axis - _maybe_transposed_self = self + ax = _maybe_transposed_self._get_axis_number(ax) if _maybe_transposed_self.ndim == 2: From e1191b65a820735d082a6aa922367baa15e4eb42 Mon Sep 17 00:00:00 2001 From: Elle Hanson Date: Tue, 22 Oct 2019 13:08:37 -0400 Subject: [PATCH 03/10] Added a test for issue #29142 Added test_interpolate_axis_argument() to TestNDFrame --- pandas/tests/generic/test_generic.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 7b9e50ebbf342..39ae2f1e8c2bc 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -948,3 +948,7 @@ def test_deprecated_get_dtype_counts(self): df = DataFrame([1]) with tm.assert_produces_warning(FutureWarning): df.get_dtype_counts() + + @pytest.mark.parametrize("axis", [0, 1, "index", "columns", "rows"]) + def test_interpolate_axis_argument(self, axis): + DataFrame([0]).interpolate(axis=axis) From f8feeb9ca8c4269652a3569403c26c106cbdf409 Mon Sep 17 00:00:00 2001 From: Elle Hanson Date: Tue, 22 Oct 2019 13:13:20 -0400 Subject: [PATCH 04/10] Edited test for issue #29142 --- pandas/tests/generic/test_generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 39ae2f1e8c2bc..233e6ba6d0b10 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -951,4 +951,4 @@ def test_deprecated_get_dtype_counts(self): @pytest.mark.parametrize("axis", [0, 1, "index", "columns", "rows"]) def test_interpolate_axis_argument(self, axis): - DataFrame([0]).interpolate(axis=axis) + assert DataFrame([0]).interpolate(axis=axis) From 7331d2d8e8c0efa78eb0b8a3d5538a4b4139d761 Mon Sep 17 00:00:00 2001 From: Elle Hanson Date: Tue, 22 Oct 2019 17:22:12 -0400 Subject: [PATCH 05/10] Adding issue number to test --- pandas/tests/generic/test_generic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 233e6ba6d0b10..683945d2b39e3 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -951,4 +951,5 @@ def test_deprecated_get_dtype_counts(self): @pytest.mark.parametrize("axis", [0, 1, "index", "columns", "rows"]) def test_interpolate_axis_argument(self, axis): + # GH 29142 assert DataFrame([0]).interpolate(axis=axis) From 0a4067bf744acbb5d1b28079c88d36887a00848f Mon Sep 17 00:00:00 2001 From: Elle Hanson Date: Tue, 22 Oct 2019 17:38:46 -0400 Subject: [PATCH 06/10] Edit whatsnew file to reference a user facing method instead of NDFrame --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index ce5293ab874c2..88d91c1b45866 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -249,7 +249,7 @@ Bug fixes ~~~~~~~~~ - Bug in :meth:`DataFrame.to_html` when using ``formatters=`` and ``max_cols`` together. (:issue:`25955`) -- Bug in :func:`pandas.core.generic.NDFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29132`) +- Bug in :meth:`DataFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29132`) Categorical ^^^^^^^^^^^ From 82315596e6f830462046b8f17703e05d0d0c639d Mon Sep 17 00:00:00 2001 From: Elle Hanson Date: Tue, 22 Oct 2019 21:13:20 -0400 Subject: [PATCH 07/10] Added assert_frame_equal to check object returned by interpolate --- pandas/tests/generic/test_generic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 683945d2b39e3..8d860b10e2361 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -952,4 +952,6 @@ def test_deprecated_get_dtype_counts(self): @pytest.mark.parametrize("axis", [0, 1, "index", "columns", "rows"]) def test_interpolate_axis_argument(self, axis): # GH 29142 - assert DataFrame([0]).interpolate(axis=axis) + df = pd.DataFrame([0]) + result = df.interpolate(axis=axis) + tm.assert_frame_equal(result, df) From 499147ab1eca973b8d35a6425d4bedcf1508dec8 Mon Sep 17 00:00:00 2001 From: Elle Hanson Date: Wed, 23 Oct 2019 17:42:37 -0400 Subject: [PATCH 08/10] Moving GH #29142 to same file as other DataFrame.interpolate tests --- pandas/tests/frame/test_missing.py | 13 +++++++++++++ pandas/tests/generic/test_generic.py | 7 ------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 94667ecfa837d..dc202008254ca 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -398,6 +398,7 @@ def test_fillna_categorical_nan(self): result = df.cats.fillna(np.nan) tm.assert_series_equal(result, df.cats) + result = df.vals.fillna(np.nan) tm.assert_series_equal(result, df.vals) @@ -872,10 +873,22 @@ def test_interp_rowwise(self): result = df.interpolate(axis=1, method="values") assert_frame_equal(result, expected) + # GH 29142: test axis names + result = df.interpolate(axis="columns", method="values") + assert_frame_equal(result, expected) + result = df.interpolate(axis=0) expected = df.interpolate() assert_frame_equal(result, expected) + # GH 29142: test axis names + result = df.interpolate(axis="rows", method="values") + assert_frame_equal(result, expected) + + # GH 29142: test axis names + result = df.interpolate(axis="index", method="values") + assert_frame_equal(result, expected) + def test_rowwise_alt(self): df = DataFrame( { diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 8d860b10e2361..7b9e50ebbf342 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -948,10 +948,3 @@ def test_deprecated_get_dtype_counts(self): df = DataFrame([1]) with tm.assert_produces_warning(FutureWarning): df.get_dtype_counts() - - @pytest.mark.parametrize("axis", [0, 1, "index", "columns", "rows"]) - def test_interpolate_axis_argument(self, axis): - # GH 29142 - df = pd.DataFrame([0]) - result = df.interpolate(axis=axis) - tm.assert_frame_equal(result, df) From d2bb8a0c6aeb9629b8235ba0789a165e9ec98042 Mon Sep 17 00:00:00 2001 From: Elle Hanson Date: Thu, 24 Oct 2019 20:14:43 -0400 Subject: [PATCH 09/10] Moved interpolate axis name tests to a new parametrized function Also moved whatsnew comments --- doc/source/whatsnew/v1.0.0.rst | 4 ++-- pandas/tests/frame/test_missing.py | 23 ++++++++++++++--------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 88d91c1b45866..1bba01d7a2ecb 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -248,8 +248,6 @@ Performance improvements Bug fixes ~~~~~~~~~ -- Bug in :meth:`DataFrame.to_html` when using ``formatters=`` and ``max_cols`` together. (:issue:`25955`) -- Bug in :meth:`DataFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29132`) Categorical ^^^^^^^^^^^ @@ -297,6 +295,7 @@ Numeric - Bug in :meth:`DataFrame.quantile` with zero-column :class:`DataFrame` incorrectly raising (:issue:`23925`) - :class:`DataFrame` flex inequality comparisons methods (:meth:`DataFrame.lt`, :meth:`DataFrame.le`, :meth:`DataFrame.gt`, :meth: `DataFrame.ge`) with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`) - Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class:`Series` behavior by filling NA values (:issue:`28741`) +- Bug in :meth:`DataFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29142`) - Conversion @@ -357,6 +356,7 @@ I/O - Bug in :meth:`DataFrame.read_json` where using ``orient="index"`` would not maintain the order (:issue:`28557`) - Bug in :meth:`DataFrame.to_html` where the length of the ``formatters`` argument was not verified (:issue:`28469`) - Bug in :meth:`pandas.io.formats.style.Styler` formatting for floating values not displaying decimals correctly (:issue:`13257`) +- Bug in :meth:`DataFrame.to_html` when using ``formatters=`` and ``max_cols`` together. (:issue:`25955`) Plotting ^^^^^^^^ diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index dc202008254ca..88c1611f52369 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -873,20 +873,25 @@ def test_interp_rowwise(self): result = df.interpolate(axis=1, method="values") assert_frame_equal(result, expected) - # GH 29142: test axis names - result = df.interpolate(axis="columns", method="values") - assert_frame_equal(result, expected) - result = df.interpolate(axis=0) expected = df.interpolate() assert_frame_equal(result, expected) - # GH 29142: test axis names - result = df.interpolate(axis="rows", method="values") - assert_frame_equal(result, expected) + @pytest.mark.parametrize( + "axis_name, axis_number", + [ + pytest.param("rows", 0, id="rows_0"), + pytest.param("index", 0, id="index_0"), + pytest.param("columns", 1, id="columns_1"), + ], + ) + def test_interp_axis_names(self, axis_name, axis_number): + # GH 29132: test axis names + data = {0: [0, np.nan, 6], 1: [1, np.nan, 7], 2: [2, 5, 8]} - # GH 29142: test axis names - result = df.interpolate(axis="index", method="values") + df = DataFrame(data, dtype=np.float64) + result = df.interpolate(axis=axis_name, method="linear") + expected = df.interpolate(axis=axis_number, method="linear") assert_frame_equal(result, expected) def test_rowwise_alt(self): From aae361dfcb56bef211581a920e86cc3aac5bae86 Mon Sep 17 00:00:00 2001 From: Elle Hanson Date: Thu, 24 Oct 2019 20:24:17 -0400 Subject: [PATCH 10/10] Added RuntimeWarning assertion to test_fillna_categorical_nan RuntimeWarning occurs due to a call to df.median (np.nanmedian) --- pandas/tests/frame/test_missing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 88c1611f52369..1f4bbcb358378 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -391,7 +391,8 @@ def test_fillna_categorical_nan(self): cat = Categorical([np.nan, 2, np.nan]) val = Categorical([np.nan, np.nan, np.nan]) df = DataFrame({"cats": cat, "vals": val}) - res = df.fillna(df.median()) + with tm.assert_produces_warning(RuntimeWarning): + res = df.fillna(df.median()) v_exp = [np.nan, np.nan, np.nan] df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp}, dtype="category") tm.assert_frame_equal(res, df_exp)