From f235902310b797b4f4084f20bc4e035170931766 Mon Sep 17 00:00:00 2001 From: Dan Ringwalt Date: Mon, 3 Aug 2015 15:58:39 -0400 Subject: [PATCH 1/2] ENH: Fixed DF.apply for functions returning a dict (closes #8735) --- doc/source/whatsnew/v0.17.0.txt | 2 ++ pandas/core/frame.py | 7 +++++-- pandas/src/reduce.pyx | 2 +- pandas/tests/test_frame.py | 19 +++++++++++++++++++ 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 9049d8de550d0..b6b55a92e774f 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -218,6 +218,8 @@ Other enhancements - Support pickling of ``Period`` objects (:issue:`10439`) +- ``DataFrame.apply`` will return a Series of dicts if the passed function returns a dict and ``reduce=True`` (:issue:`8735`). + .. _whatsnew_0170.api: .. _whatsnew_0170.api_breaking: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3e908bf9d579b..498a4bb7f8a98 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3922,10 +3922,13 @@ def _apply_standard(self, func, axis, ignore_failures=False, reduce=True): if reduce: try: - # the is the fast-path values = self.values - dummy = Series(NA, index=self._get_axis(axis), + # Create a dummy Series from an empty array + # Unlike filling with NA, this works for any dtype + index = self._get_axis(axis) + empty_arr = np.empty(len(index), dtype=values.dtype) + dummy = Series(empty_arr, index=self._get_axis(axis), dtype=values.dtype) labels = self._get_agg_axis(axis) diff --git a/pandas/src/reduce.pyx b/pandas/src/reduce.pyx index 09f8e0ab42924..eb736e4569009 100644 --- a/pandas/src/reduce.pyx +++ b/pandas/src/reduce.pyx @@ -133,7 +133,7 @@ cdef class Reducer: else: res = self.f(chunk) - if hasattr(res,'values'): + if hasattr(res,'values') and isinstance(res.values, np.ndarray): res = res.values if i == 0: result = _get_result_array(res, diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 16143fa612c48..6d65ab470be1f 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -11255,6 +11255,25 @@ def test_apply_multi_index(self): res = s.apply(lambda x: Series({'min': min(x), 'max': max(x)}), 1) tm.assertIsInstance(res.index, MultiIndex) + def test_apply_dict(self): + + # GH 8735 + A = DataFrame([['foo', 'bar'], ['spam', 'eggs']]) + A_dicts = pd.Series([dict([(0, 'foo'), (1, 'spam')]), + dict([(0, 'bar'), (1, 'eggs')])]) + B = DataFrame([[0, 1], [2, 3]]) + B_dicts = pd.Series([dict([(0, 0), (1, 2)]), dict([(0, 1), (1, 3)])]) + fn = lambda x: x.to_dict() + + for df, dicts in [(A, A_dicts), (B, B_dicts)]: + reduce_true = df.apply(fn, reduce=True) + reduce_false = df.apply(fn, reduce=False) + reduce_none = df.apply(fn, reduce=None) + + assert_series_equal(reduce_true, dicts) + assert_frame_equal(reduce_false, df) + assert_series_equal(reduce_none, dicts) + def test_applymap(self): applied = self.frame.applymap(lambda x: x * 2) assert_frame_equal(applied, self.frame * 2) From 3871a6b20eab1dd8d579fa7361b2b4c1129b18cd Mon Sep 17 00:00:00 2001 From: Dan Ringwalt Date: Thu, 6 Aug 2015 08:02:23 -0400 Subject: [PATCH 2/2] Avoid catching exceptions unnecessarily in DF.apply --- pandas/core/frame.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 498a4bb7f8a98..29d8b036f322f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3921,16 +3921,16 @@ def _apply_standard(self, func, axis, ignore_failures=False, reduce=True): # e.g. if we want to apply to a SparseFrame, then can't directly reduce if reduce: - try: - # the is the fast-path - values = self.values - # Create a dummy Series from an empty array - # Unlike filling with NA, this works for any dtype - index = self._get_axis(axis) - empty_arr = np.empty(len(index), dtype=values.dtype) - dummy = Series(empty_arr, index=self._get_axis(axis), - dtype=values.dtype) + # the is the fast-path + values = self.values + # Create a dummy Series from an empty array + # Unlike filling with NA, this works for any dtype + index = self._get_axis(axis) + empty_arr = np.empty(len(index), dtype=values.dtype) + dummy = Series(empty_arr, index=self._get_axis(axis), + dtype=values.dtype) + try: labels = self._get_agg_axis(axis) result = lib.reduce(values, func, axis=axis, dummy=dummy, labels=labels)