diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst index 95fab6a18ffe1..478e986a26231 100644 --- a/doc/source/whatsnew/v1.0.1.rst +++ b/doc/source/whatsnew/v1.0.1.rst @@ -15,7 +15,7 @@ including other versions of pandas. Bug fixes ~~~~~~~~~ - +- Bug in :meth:`GroupBy.apply` was raising ``TypeError`` if called with function which returned a non-pandas non-scalar object (e.g. a list) (:issue:`31441`) Categorical ^^^^^^^^^^^ diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 8571761f77265..89164c527002a 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -501,9 +501,9 @@ def apply_frame_axis0(object frame, object f, object names, if not is_scalar(piece): # Need to copy data to avoid appending references - if hasattr(piece, "copy"): + try: piece = piece.copy(deep="all") - else: + except (TypeError, AttributeError): piece = copy(piece) results.append(piece) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 9c3a832121c7f..4a879e50144e4 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -785,3 +785,27 @@ def test_apply_index_has_complex_internals(index): df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index) result = df.groupby("group").apply(lambda x: x) tm.assert_frame_equal(result, df) + + +@pytest.mark.parametrize( + "function, expected_values", + [ + (lambda x: x.index.to_list(), [[0, 1], [2, 3]]), + (lambda x: set(x.index.to_list()), [{0, 1}, {2, 3}]), + (lambda x: tuple(x.index.to_list()), [(0, 1), (2, 3)]), + ( + lambda x: {n: i for (n, i) in enumerate(x.index.to_list())}, + [{0: 0, 1: 1}, {0: 2, 1: 3}], + ), + ( + lambda x: [{n: i} for (n, i) in enumerate(x.index.to_list())], + [[{0: 0}, {1: 1}], [{0: 2}, {1: 3}]], + ), + ], +) +def test_apply_function_returns_non_pandas_non_scalar(function, expected_values): + # GH 31441 + df = pd.DataFrame(["A", "A", "B", "B"], columns=["groups"]) + result = df.groupby("groups").apply(function) + expected = pd.Series(expected_values, index=pd.Index(["A", "B"], name="groups")) + tm.assert_series_equal(result, expected)