From dbb054f9b787dbbe09ffa60424e19d1b1a0d973f Mon Sep 17 00:00:00 2001 From: wcwagner Date: Sun, 31 Jul 2016 22:10:39 -0400 Subject: [PATCH] BUG: loffset not applied when using resample with agg() (GH13218) --- doc/source/whatsnew/v0.19.0.txt | 1 + pandas/tseries/resample.py | 19 ++++- pandas/tseries/tests/test_resample.py | 111 +++++++++++++++++++++----- 3 files changed, 111 insertions(+), 20 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 30a0d918b46ec..435c9e8e21e2e 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -975,3 +975,4 @@ Bug Fixes - Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`) - Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`) - Bug in ``pd.to_datetime()`` did not cast floats correctly when ``unit`` was specified, resulting in truncated datetime (:issue:`13845`) +- Bug in ``resample`` where ``loffset`` was not applied when calling ``resample.agg()`` on a timeseries (:issue:`13218`) \ No newline at end of file diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 38c2e009a01f3..b959f22688adc 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -309,6 +309,9 @@ def aggregate(self, arg, *args, **kwargs): return self._groupby_and_aggregate(arg, *args, **kwargs) + # GH 13218 + if isinstance(arg, (dict, list)): + result = self._apply_loffset(result) return result @@ -368,7 +371,7 @@ def _gotitem(self, key, ndim, subset=None): return grouped def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs): - """ revaluate the obj with a groupby aggregation """ + """ re-evaluate the obj with a groupby aggregation """ if grouper is None: self._set_binner() @@ -396,7 +399,14 @@ def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs): return self._wrap_result(result) def _apply_loffset(self, result): - """if loffset if set, offset the result index""" + """ + if loffset is set, offset the result index + + Parameters + ---------- + result : Series or DataFrame + the result of resample + """ loffset = self.loffset if isinstance(loffset, compat.string_types): loffset = to_offset(self.loffset) @@ -406,6 +416,7 @@ def _apply_loffset(self, result): isinstance(result.index, DatetimeIndex) and len(result.index) > 0 ) + if needs_offset: result.index = result.index + loffset @@ -771,6 +782,10 @@ def aggregate(self, arg, *args, **kwargs): if result is None: result = self._downsample(arg, *args, **kwargs) + # GH 13218 + if isinstance(arg, (dict, list)): + result = self._apply_loffset(result) + return result agg = aggregate diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 85d8cd52e1866..82dbf5ff692ab 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -1098,25 +1098,35 @@ def test_resample_loffset(self): def test_resample_loffset_count(self): # GH 12725 - start_time = '1/1/2000 00:00:00' - rng = date_range(start_time, periods=100, freq='S') - ts = Series(np.random.randn(len(rng)), index=rng) - - result = ts.resample('10S', loffset='1s').count() - - expected_index = ( - date_range(start_time, periods=10, freq='10S') + - timedelta(seconds=1) - ) - expected = pd.Series(10, index=expected_index) - - assert_series_equal(result, expected) - - # Same issue should apply to .size() since it goes through - # same code path - result = ts.resample('10S', loffset='1s').size() + s = self.create_series() + df = s.to_frame('value') + result = df.resample('2D', loffset='2H').count() + expected_index = DatetimeIndex(start=df.index[0], + freq='2D', + periods=len(df.index) / 2) + expected_index = expected_index + timedelta(hours=2) + expected = DataFrame({'value': 2}, + index=expected_index) + assert_frame_equal(result, expected) - assert_series_equal(result, expected) + def test_resample_loffset_agg(self): + # GH 13218 + s = self.create_series() + expected_means = [s.values[i:i + 2].mean() + for i in range(0, len(s.values), 2)] + df = s.to_frame('value') + for arg in ['mean', {'value': 'mean'}, ['mean']]: + result = df.resample('2D', loffset='2H').agg(arg) + expected_index = DatetimeIndex(start=df.index[0], + freq='2D', + periods=len(df.index) / 2) + expected_index = expected_index + timedelta(hours=2) + expected = DataFrame({'value': expected_means}, + index=expected_index) + if isinstance(arg, list): + expected.columns = pd.MultiIndex.from_tuples([('value', + 'mean')]) + assert_frame_equal(result, expected) def test_resample_upsample(self): # from daily @@ -2509,6 +2519,36 @@ def test_evenly_divisible_with_no_extra_bins(self): result = df.resample('7D').sum() assert_frame_equal(result, expected) + def test_resample_loffset_count(self): + # GH 12725 + s = self.create_series() + df = s.to_frame('value') + result = df.resample('2D', loffset='2H').count() + expected_index = df.index.take( + np.arange(0, len(df.index), 2)).to_datetime() + expected_index = expected_index + timedelta(hours=2) + expected = DataFrame({'value': 2}, + index=expected_index) + assert_frame_equal(result, expected) + + def test_resample_loffset_agg(self): + # GH 13218 + s = self.create_series() + expected_means = [s.values[i:i + 2].mean() + for i in range(0, len(s.values), 2)] + df = s.to_frame('value') + for arg in ['mean', {'value': 'mean'}, ['mean']]: + result = df.resample('2D', loffset='2H').agg(arg) + expected_index = df.index.take( + np.arange(0, len(df.index), 2)).to_datetime() + expected_index = expected_index + timedelta(hours=2) + expected = DataFrame({'value': expected_means}, + index=expected_index) + if isinstance(arg, list): + expected.columns = pd.MultiIndex.from_tuples([('value', + 'mean')]) + assert_frame_equal(result, expected) + class TestTimedeltaIndex(Base, tm.TestCase): _multiprocess_can_split_ = True @@ -2531,6 +2571,41 @@ def test_asfreq_bug(self): freq='1T')) assert_frame_equal(result, expected) + def test_resample_loffset_count(self): + # GH 12725 + s = self.create_series() + df = s.to_frame('value') + result = df.resample('2D', loffset='2H').count() + + # GH 13022, 7687 resample w/ TimedeltaIndex results in incorrect index + expected_index = timedelta_range(start=df.index[0], + freq='2D', + periods=len(df.index) / 2) + expected = DataFrame({'value': 2}, + index=expected_index) + with tm.assertRaises(AssertionError): + assert_frame_equal(result, expected) + + def test_resample_loffset_agg(self): + # GH 13218 + s = self.create_series() + expected_means = [s.values[i:i + 2].mean() + for i in range(0, len(s.values), 2)] + df = s.to_frame('value') + for arg in ['mean', {'value': 'mean'}, ['mean']]: + result = df.resample('2D', loffset='2H').agg(arg) + expected_index = timedelta_range(start=df.index[0], + freq='2D', + periods=len(df.index) / 2) + expected = DataFrame({'value': expected_means}, + index=expected_index) + if isinstance(arg, list): + expected.columns = pd.MultiIndex.from_tuples([('value', + 'mean')]) + # GH 13022, 7687 - TODO: fix resample w/ TimedeltaIndex + with tm.assertRaises(AssertionError): + assert_frame_equal(result, expected) + class TestResamplerGrouper(tm.TestCase): def setUp(self):