diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index c18bedd0cf6eb..d3d7fe1637900 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -747,6 +747,7 @@ Bug Fixes - Bug in ``Index`` construction with a mixed list of tuples (:issue:`10697`) - Bug in ``DataFrame.reset_index`` when index contains `NaT`. (:issue:`10388`) - Bug in ``ExcelReader`` when worksheet is empty (:issue:`6403`) +- Bug in ``BinGrouper.group_info`` where returned values are not compatible with base class (:issue:`10914`) - Bug causing ``DataFrame.where`` to not respect the ``axis`` parameter when the frame has a symmetric shape. (:issue:`9736`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 444f149e70e34..fae54fa298e85 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1790,8 +1790,10 @@ def indices(self): @cache_readonly def group_info(self): - # for compat - return self.bins, self.binlabels, self.ngroups + ngroups = self.ngroups + obs_group_ids = np.arange(ngroups) + comp_ids = np.repeat(np.arange(ngroups), np.diff(np.r_[0, self.bins])) + return comp_ids, obs_group_ids, ngroups @cache_readonly def ngroups(self): diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 7dafc88bf9239..0bee6f514cad0 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -916,6 +916,31 @@ def test_resample_timegrouper(self): result = df.groupby(pd.Grouper(freq='M', key='A')).count() assert_frame_equal(result, expected) + def test_resample_group_info(self): # GH10914 + for n, k in product((10000, 100000), (10, 100, 1000)): + dr = date_range(start='2015-08-27', periods=n // 10, freq='T') + ts = Series(np.random.randint(0, n // k, n), + index=np.random.choice(dr, n)) + + left = ts.resample('30T', how='nunique') + ix = date_range(start=ts.index.min(), + end=ts.index.max(), + freq='30T') + + vals = ts.values + bins = np.searchsorted(ix.values, ts.index, side='right') + + sorter = np.lexsort((vals, bins)) + vals, bins = vals[sorter], bins[sorter] + + mask = np.r_[True, vals[1:] != vals[:-1]] + mask |= np.r_[True, bins[1:] != bins[:-1]] + + arr = np.bincount(bins[mask] - 1, minlength=len(ix)) + right = Series(arr, index=ix) + + assert_series_equal(left, right) + def test_resmaple_dst_anchor(self): # 5172 dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz='US/Eastern')