From 18508e5eb12a4c4f6999ed6ef1047a3d32e1925d Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 30 Jan 2020 19:55:48 -0600 Subject: [PATCH 1/9] Add test --- pandas/tests/groupby/test_groupby.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index eb9552fbbebc1..39f7316e014de 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2037,3 +2037,24 @@ def test_groupby_list_level(): expected = pd.DataFrame(np.arange(0, 9).reshape(3, 3)) result = expected.groupby(level=[0]).mean() tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("func", ["min", "max"]) +def test_groupby_aggregate_period(func): + # GH 31471 + groups = [1, 2] + periods = pd.period_range("2020", periods=2, freq="Y") + + df = pd.DataFrame( + { + "a": groups, + "b": periods, + } + ) + + result = getattr(df.groupby("a")["b"], func)() + + idx = pd.Int64Index([1, 2], name="a") + expected = pd.Series(periods, index=idx, name="b") + + tm.assert_series_equal(result, expected) \ No newline at end of file From a8bc3a82bd7b33b6f99e8943492adef22f3ef6b1 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 30 Jan 2020 19:56:34 -0600 Subject: [PATCH 2/9] Check for period dtype --- pandas/core/groupby/ops.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 2e95daa392976..20774b921131c 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -31,6 +31,7 @@ is_extension_array_dtype, is_integer_dtype, is_numeric_dtype, + is_period_dtype, is_sparse, is_timedelta64_dtype, needs_i8_conversion, @@ -567,7 +568,9 @@ def _cython_operation( if swapped: result = result.swapaxes(0, axis) - if is_datetime64tz_dtype(orig_values.dtype): + if is_datetime64tz_dtype(orig_values.dtype) or is_period_dtype( + orig_values.dtype + ): result = type(orig_values)(result.astype(np.int64), dtype=orig_values.dtype) elif is_datetimelike and kind == "aggregate": result = result.astype(orig_values.dtype) From 23b7ba680b0a9826bbb9f0cb4eb82d7e82f152b2 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 30 Jan 2020 20:00:31 -0600 Subject: [PATCH 3/9] Release note --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 920919755dc23..e353cdfc10e80 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -179,6 +179,7 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted and has duplicates and the applied ``func`` does not mutate passed in objects (:issue:`30667`) - Bug in :meth:`DataFrameGroupby.transform` produces incorrect result with transformation functions (:issue:`30918`) +- Bug in :meth:`DataFrame.groupby` whereby taking the minimum or maximum of a column with period dtype would raise a ``TypeError``. (:issue:`31471`) Reshaping ^^^^^^^^^ From 46bb8dc8971cda085e7fd38c4d652bed4d50d6ba Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 30 Jan 2020 20:05:21 -0600 Subject: [PATCH 4/9] Blacken --- pandas/tests/groupby/test_groupby.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 39f7316e014de..ef351c97b6700 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2045,16 +2045,11 @@ def test_groupby_aggregate_period(func): groups = [1, 2] periods = pd.period_range("2020", periods=2, freq="Y") - df = pd.DataFrame( - { - "a": groups, - "b": periods, - } - ) + df = pd.DataFrame({"a": groups, "b": periods,}) result = getattr(df.groupby("a")["b"], func)() idx = pd.Int64Index([1, 2], name="a") expected = pd.Series(periods, index=idx, name="b") - tm.assert_series_equal(result, expected) \ No newline at end of file + tm.assert_series_equal(result, expected) From fd0e79b4221e2f20e30d5a01190d5a64c4923152 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 30 Jan 2020 20:08:17 -0600 Subject: [PATCH 5/9] Get rid of comma --- pandas/tests/groupby/test_groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index ef351c97b6700..4cd0af40754ab 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2045,7 +2045,7 @@ def test_groupby_aggregate_period(func): groups = [1, 2] periods = pd.period_range("2020", periods=2, freq="Y") - df = pd.DataFrame({"a": groups, "b": periods,}) + df = pd.DataFrame({"a": groups, "b": periods}) result = getattr(df.groupby("a")["b"], func)() From 9f063b3bdff7d7d6bd11ff5591402eba522a451c Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 31 Jan 2020 07:45:51 -0600 Subject: [PATCH 6/9] Move release note --- doc/source/whatsnew/v1.0.1.rst | 1 + doc/source/whatsnew/v1.1.0.rst | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst index b84448e3bf896..0bdffbdc391d2 100644 --- a/doc/source/whatsnew/v1.0.1.rst +++ b/doc/source/whatsnew/v1.0.1.rst @@ -98,6 +98,7 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ +- Bug in :meth:`DataFrame.groupby` whereby taking the minimum or maximum of a column with period dtype would raise a ``TypeError``. (:issue:`31471`) - - diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index e353cdfc10e80..920919755dc23 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -179,7 +179,6 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted and has duplicates and the applied ``func`` does not mutate passed in objects (:issue:`30667`) - Bug in :meth:`DataFrameGroupby.transform` produces incorrect result with transformation functions (:issue:`30918`) -- Bug in :meth:`DataFrame.groupby` whereby taking the minimum or maximum of a column with period dtype would raise a ``TypeError``. (:issue:`31471`) Reshaping ^^^^^^^^^ From f9730373a2d54c1aca764deae9d1c15d786b48f9 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 31 Jan 2020 07:48:34 -0600 Subject: [PATCH 7/9] Move test --- pandas/tests/groupby/aggregate/test_aggregate.py | 16 ++++++++++++++++ pandas/tests/groupby/test_groupby.py | 16 ---------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 2d31996a8a964..43b56a00dce5a 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -684,6 +684,22 @@ def aggfunc(x): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize("func", ["min", "max"]) +def test_groupby_aggregate_period(func): + # GH 31471 + groups = [1, 2] + periods = pd.period_range("2020", periods=2, freq="Y") + + df = pd.DataFrame({"a": groups, "b": periods}) + + result = getattr(df.groupby("a")["b"], func)() + + idx = pd.Int64Index([1, 2], name="a") + expected = pd.Series(periods, index=idx, name="b") + + tm.assert_series_equal(result, expected) + + class TestLambdaMangling: def test_basic(self): df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4cd0af40754ab..eb9552fbbebc1 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2037,19 +2037,3 @@ def test_groupby_list_level(): expected = pd.DataFrame(np.arange(0, 9).reshape(3, 3)) result = expected.groupby(level=[0]).mean() tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize("func", ["min", "max"]) -def test_groupby_aggregate_period(func): - # GH 31471 - groups = [1, 2] - periods = pd.period_range("2020", periods=2, freq="Y") - - df = pd.DataFrame({"a": groups, "b": periods}) - - result = getattr(df.groupby("a")["b"], func)() - - idx = pd.Int64Index([1, 2], name="a") - expected = pd.Series(periods, index=idx, name="b") - - tm.assert_series_equal(result, expected) From fc301d6b8ae4b4e1c82e34928eb59a1f82b3d014 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 31 Jan 2020 07:55:36 -0600 Subject: [PATCH 8/9] Add frame test --- .../tests/groupby/aggregate/test_aggregate.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 43b56a00dce5a..71af6533db764 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -685,21 +685,33 @@ def aggfunc(x): @pytest.mark.parametrize("func", ["min", "max"]) -def test_groupby_aggregate_period(func): +def test_groupby_aggregate_period_column(func): # GH 31471 groups = [1, 2] periods = pd.period_range("2020", periods=2, freq="Y") - df = pd.DataFrame({"a": groups, "b": periods}) result = getattr(df.groupby("a")["b"], func)() - idx = pd.Int64Index([1, 2], name="a") expected = pd.Series(periods, index=idx, name="b") tm.assert_series_equal(result, expected) +@pytest.mark.parametrize("func", ["min", "max"]) +def test_groupby_aggregate_period_frame(func): + # GH 31471 + groups = [1, 2] + periods = pd.period_range("2020", periods=2, freq="Y") + df = pd.DataFrame({"a": groups, "b": periods}) + + result = getattr(df.groupby("a"), func)() + idx = pd.Int64Index([1, 2], name="a") + expected = pd.DataFrame({"b": periods}, index=idx) + + tm.assert_frame_equal(result, expected) + + class TestLambdaMangling: def test_basic(self): df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) From a7b60ef525bbc8aa102605d96f9517f4b61e5ca6 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 1 Feb 2020 10:25:24 -0600 Subject: [PATCH 9/9] Add comment --- pandas/core/groupby/ops.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 03a6dca4b013f..761353ca5a6ca 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -571,6 +571,9 @@ def _cython_operation( if is_datetime64tz_dtype(orig_values.dtype) or is_period_dtype( orig_values.dtype ): + # We need to use the constructors directly for these dtypes + # since numpy won't recognize them + # https://github.com/pandas-dev/pandas/issues/31471 result = type(orig_values)(result.astype(np.int64), dtype=orig_values.dtype) elif is_datetimelike and kind == "aggregate": result = result.astype(orig_values.dtype)