diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 16033dd75204c..5824e5824e8b5 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -389,9 +389,8 @@ Bug Fixes - Bug in ``GroupBy.transform()`` where int groups with a transform that didn't preserve the index were incorrectly truncated (:issue:`7972`). - - - +- Bug in ``groupby`` where callable objects without name attributes would take the wrong path, + and produce a ``DataFrame`` instead of a ``Series`` (:issue:`7929`) - Bug in ``read_html`` where the ``infer_types`` argument forced coercion of diff --git a/pandas/core/common.py b/pandas/core/common.py index d8314977742a4..bc4c95ed3323e 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -9,6 +9,7 @@ import csv import types from datetime import datetime, timedelta +from functools import partial from numpy.lib.format import read_array, write_array import numpy as np @@ -2432,7 +2433,22 @@ def _is_sequence(x): except (TypeError, AttributeError): return False - +def _get_callable_name(obj): + # typical case has name + if hasattr(obj, '__name__'): + return getattr(obj, '__name__') + # some objects don't; could recurse + if isinstance(obj, partial): + return _get_callable_name(obj.func) + # fall back to class name + if hasattr(obj, '__call__'): + return obj.__class__.__name__ + # everything failed (probably because the argument + # wasn't actually callable); we return None + # instead of the empty string in this case to allow + # distinguishing between no name and a name of '' + return None + _string_dtypes = frozenset(map(_get_dtype_from_object, (compat.binary_type, compat.text_type))) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index f26a7269772a3..1f89bfe4cec9a 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1221,7 +1221,8 @@ def apply(self, f, data, axis=0): group_keys = self._get_group_keys() # oh boy - if (f.__name__ not in _plotting_methods and + f_name = com._get_callable_name(f) + if (f_name not in _plotting_methods and hasattr(splitter, 'fast_apply') and axis == 0): try: values, mutated = splitter.fast_apply(f, group_keys) @@ -2185,11 +2186,11 @@ def _aggregate_multiple_funcs(self, arg): if isinstance(f, compat.string_types): columns.append(f) else: - columns.append(f.__name__) + # protect against callables without names + columns.append(com._get_callable_name(f)) arg = lzip(columns, arg) results = {} - for name, func in arg: if name in results: raise SpecificationError('Function names must be unique, ' diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index a52be0ee6a82e..5e91adbe1a2fa 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -38,6 +38,26 @@ def __getitem__(self): assert(not is_seq(A())) +def test_get_callable_name(): + from functools import partial + getname = com._get_callable_name + + def fn(x): + return x + lambda_ = lambda x: x + part1 = partial(fn) + part2 = partial(part1) + class somecall(object): + def __call__(self): + return x + + assert getname(fn) == 'fn' + assert getname(lambda_) + assert getname(part1) == 'fn' + assert getname(part2) == 'fn' + assert getname(somecall()) == 'somecall' + assert getname(1) is None + def test_notnull(): assert notnull(1.) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index f621b0fb94eaf..3a744129f0685 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -25,6 +25,7 @@ from pandas.core.panel import Panel from pandas.tools.merge import concat from collections import defaultdict +from functools import partial import pandas.core.common as com import numpy as np @@ -2910,6 +2911,24 @@ def test_multi_function_flexible_mix(self): assert_frame_equal(result, expected) assert_frame_equal(result2, expected) + def test_agg_callables(self): + # GH 7929 + df = DataFrame({'foo' : [1,2], 'bar' :[3,4]}).astype(np.int64) + + class fn_class(object): + def __call__(self, x): + return sum(x) + + equiv_callables = [sum, np.sum, + lambda x: sum(x), + lambda x: x.sum(), + partial(sum), fn_class()] + + expected = df.groupby("foo").agg(sum) + for ecall in equiv_callables: + result = df.groupby('foo').agg(ecall) + assert_frame_equal(result, expected) + def test_set_group_name(self): def f(group): assert group.name is not None @@ -4530,6 +4549,8 @@ def test_transform_doesnt_clobber_ints(self): tm.assert_frame_equal(result, expected) + + def assert_fp_equal(a, b): assert (np.abs(a - b) < 1e-12).all() diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index ff8b6945a23be..f4a96f5defab0 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -1,6 +1,7 @@ # pylint: disable=E1101 from datetime import datetime, timedelta +from functools import partial from pandas.compat import range, lrange, zip, product import numpy as np @@ -140,6 +141,30 @@ def _ohlc(group): exc.args += ('how=%s' % arg,) raise + def test_resample_how_callables(self): + # GH 7929 + data = np.arange(5, dtype=np.int64) + ind = pd.DatetimeIndex(start='2014-01-01', periods=len(data), freq='d') + df = pd.DataFrame({"A": data, "B": data}, index=ind) + + def fn(x, a=1): + return str(type(x)) + + class fn_class: + def __call__(self, x): + return str(type(x)) + + df_standard = df.resample("M", how=fn) + df_lambda = df.resample("M", how=lambda x: str(type(x))) + df_partial = df.resample("M", how=partial(fn)) + df_partial2 = df.resample("M", how=partial(fn, a=2)) + df_class = df.resample("M", how=fn_class()) + + assert_frame_equal(df_standard, df_lambda) + assert_frame_equal(df_standard, df_partial) + assert_frame_equal(df_standard, df_partial2) + assert_frame_equal(df_standard, df_class) + def test_resample_basic_from_daily(self): # from daily dti = DatetimeIndex( @@ -765,6 +790,7 @@ def test_resample_timegrouper(self): assert_frame_equal(result, expected) + def _simple_ts(start, end, freq='D'): rng = date_range(start, end, freq=freq) return Series(np.random.randn(len(rng)), index=rng)