Skip to content

BUG: Allow __name__less callables as groupby hows (GH7929) #7974

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 10, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -389,9 +389,8 @@ Bug Fixes
- Bug in ``GroupBy.transform()`` where int groups with a transform that
didn't preserve the index were incorrectly truncated (:issue:`7972`).




- Bug in ``groupby`` where callable objects without name attributes would take the wrong path,
and produce a ``DataFrame`` instead of a ``Series`` (:issue:`7929`)


- Bug in ``read_html`` where the ``infer_types`` argument forced coercion of
Expand Down
18 changes: 17 additions & 1 deletion pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import csv
import types
from datetime import datetime, timedelta
from functools import partial

from numpy.lib.format import read_array, write_array
import numpy as np
Expand Down Expand Up @@ -2432,7 +2433,22 @@ def _is_sequence(x):
except (TypeError, AttributeError):
return False


def _get_callable_name(obj):
# typical case has name
if hasattr(obj, '__name__'):
return getattr(obj, '__name__')
# some objects don't; could recurse
if isinstance(obj, partial):
return _get_callable_name(obj.func)
# fall back to class name
if hasattr(obj, '__call__'):
return obj.__class__.__name__
# everything failed (probably because the argument
# wasn't actually callable); we return None
# instead of the empty string in this case to allow
# distinguishing between no name and a name of ''
return None

_string_dtypes = frozenset(map(_get_dtype_from_object, (compat.binary_type,
compat.text_type)))

Expand Down
7 changes: 4 additions & 3 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1221,7 +1221,8 @@ def apply(self, f, data, axis=0):
group_keys = self._get_group_keys()

# oh boy
if (f.__name__ not in _plotting_methods and
f_name = com._get_callable_name(f)
if (f_name not in _plotting_methods and
hasattr(splitter, 'fast_apply') and axis == 0):
try:
values, mutated = splitter.fast_apply(f, group_keys)
Expand Down Expand Up @@ -2185,11 +2186,11 @@ def _aggregate_multiple_funcs(self, arg):
if isinstance(f, compat.string_types):
columns.append(f)
else:
columns.append(f.__name__)
# protect against callables without names
columns.append(com._get_callable_name(f))
arg = lzip(columns, arg)

results = {}

for name, func in arg:
if name in results:
raise SpecificationError('Function names must be unique, '
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,26 @@ def __getitem__(self):

assert(not is_seq(A()))

def test_get_callable_name():
from functools import partial
getname = com._get_callable_name

def fn(x):
return x
lambda_ = lambda x: x
part1 = partial(fn)
part2 = partial(part1)
class somecall(object):
def __call__(self):
return x

assert getname(fn) == 'fn'
assert getname(lambda_)
assert getname(part1) == 'fn'
assert getname(part2) == 'fn'
assert getname(somecall()) == 'somecall'
assert getname(1) is None


def test_notnull():
assert notnull(1.)
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from pandas.core.panel import Panel
from pandas.tools.merge import concat
from collections import defaultdict
from functools import partial
import pandas.core.common as com
import numpy as np

Expand Down Expand Up @@ -2910,6 +2911,24 @@ def test_multi_function_flexible_mix(self):
assert_frame_equal(result, expected)
assert_frame_equal(result2, expected)

def test_agg_callables(self):
# GH 7929
df = DataFrame({'foo' : [1,2], 'bar' :[3,4]}).astype(np.int64)

class fn_class(object):
def __call__(self, x):
return sum(x)

equiv_callables = [sum, np.sum,
lambda x: sum(x),
lambda x: x.sum(),
partial(sum), fn_class()]

expected = df.groupby("foo").agg(sum)
for ecall in equiv_callables:
result = df.groupby('foo').agg(ecall)
assert_frame_equal(result, expected)

def test_set_group_name(self):
def f(group):
assert group.name is not None
Expand Down Expand Up @@ -4530,6 +4549,8 @@ def test_transform_doesnt_clobber_ints(self):
tm.assert_frame_equal(result, expected)




def assert_fp_equal(a, b):
assert (np.abs(a - b) < 1e-12).all()

Expand Down
26 changes: 26 additions & 0 deletions pandas/tseries/tests/test_resample.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# pylint: disable=E1101

from datetime import datetime, timedelta
from functools import partial

from pandas.compat import range, lrange, zip, product
import numpy as np
Expand Down Expand Up @@ -140,6 +141,30 @@ def _ohlc(group):
exc.args += ('how=%s' % arg,)
raise

def test_resample_how_callables(self):
# GH 7929
data = np.arange(5, dtype=np.int64)
ind = pd.DatetimeIndex(start='2014-01-01', periods=len(data), freq='d')
df = pd.DataFrame({"A": data, "B": data}, index=ind)

def fn(x, a=1):
return str(type(x))

class fn_class:
def __call__(self, x):
return str(type(x))

df_standard = df.resample("M", how=fn)
df_lambda = df.resample("M", how=lambda x: str(type(x)))
df_partial = df.resample("M", how=partial(fn))
df_partial2 = df.resample("M", how=partial(fn, a=2))
df_class = df.resample("M", how=fn_class())

assert_frame_equal(df_standard, df_lambda)
assert_frame_equal(df_standard, df_partial)
assert_frame_equal(df_standard, df_partial2)
assert_frame_equal(df_standard, df_class)

def test_resample_basic_from_daily(self):
# from daily
dti = DatetimeIndex(
Expand Down Expand Up @@ -765,6 +790,7 @@ def test_resample_timegrouper(self):
assert_frame_equal(result, expected)



def _simple_ts(start, end, freq='D'):
rng = date_range(start, end, freq=freq)
return Series(np.random.randn(len(rng)), index=rng)
Expand Down