Skip to content

Commit a4a1d12

Browse files
committed
Merge pull request #10064 from jreback/groupby
BUG: Bug in grouping with multiple pd.Grouper where one is non-time based (GH10063)
2 parents 3aa8778 + 57427ac commit a4a1d12

File tree

3 files changed

+26
-4
lines changed

3 files changed

+26
-4
lines changed

doc/source/whatsnew/v0.16.1.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ Bug Fixes
231231
- Fixed bug in ``StataWriter`` resulting in changes to input ``DataFrame`` upon save (:issue:`9795`).
232232
- Bug in ``transform`` causing length mismatch when null entries were present and a fast aggregator was being used (:issue:`9697`)
233233
- Bug in ``equals`` causing false negatives when block order differed (:issue:`9330`)
234-
234+
- Bug in grouping with multiple ``pd.Grouper`` where one is non-time based (:issue:`10063`)
235235
- Bug in ``read_sql_table`` error when reading postgres table with timezone (:issue:`7139`)
236236
- Bug in ``DataFrame`` slicing may not retain metadata (:issue:`9776`)
237237
- Bug where ``TimdeltaIndex`` were not properly serialized in fixed ``HDFStore`` (:issue:`9635`)

pandas/core/groupby.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,10 @@ def _set_grouper(self, obj, sort=False):
280280
return self.grouper
281281

282282
def _get_binner_for_grouping(self, obj):
283-
raise AbstractMethodError(self)
283+
""" default to the standard binner here """
284+
group_axis = obj._get_axis(self.axis)
285+
return Grouping(group_axis, None, obj=obj, name=self.key,
286+
level=self.level, sort=self.sort, in_axis=False)
284287

285288
@property
286289
def groups(self):
@@ -1964,8 +1967,12 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
19641967
if self.name is None:
19651968
self.name = grouper.name
19661969

1970+
# we are done
1971+
if isinstance(self.grouper, Grouping):
1972+
self.grouper = self.grouper.grouper
1973+
19671974
# no level passed
1968-
if not isinstance(self.grouper, (Series, Index, Categorical, np.ndarray)):
1975+
elif not isinstance(self.grouper, (Series, Index, Categorical, np.ndarray)):
19691976
if getattr(self.grouper, 'ndim', 1) != 1:
19701977
t = self.name or str(type(self.grouper))
19711978
raise ValueError("Grouper for '%s' not 1-dimensional" % t)
@@ -2834,7 +2841,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
28342841
v = next(v for v in values if v is not None)
28352842
except StopIteration:
28362843
# If all values are None, then this will throw an error.
2837-
# We'd prefer it return an empty dataframe.
2844+
# We'd prefer it return an empty dataframe.
28382845
return DataFrame()
28392846
if v is None:
28402847
return DataFrame()

pandas/tests/test_groupby.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,21 @@ def test_grouper_creation_bug(self):
430430
expected = s.groupby(level='one').sum()
431431
assert_series_equal(result, expected)
432432

433+
def test_grouper_getting_correct_binner(self):
434+
435+
# GH 10063
436+
# using a non-time-based grouper and a time-based grouper
437+
# and specifying levels
438+
df = DataFrame({'A' : 1 },
439+
index=pd.MultiIndex.from_product([list('ab'),
440+
date_range('20130101',periods=80)],
441+
names=['one','two']))
442+
result = df.groupby([pd.Grouper(level='one'),pd.Grouper(level='two',freq='M')]).sum()
443+
expected = DataFrame({'A' : [31,28,21,31,28,21]},
444+
index=MultiIndex.from_product([list('ab'),date_range('20130101',freq='M',periods=3)],
445+
names=['one','two']))
446+
assert_frame_equal(result, expected)
447+
433448
def test_grouper_iter(self):
434449
self.assertEqual(sorted(self.df.groupby('A').grouper), ['bar', 'foo'])
435450

0 commit comments

Comments
 (0)