From e412bc55d0f003c2730bb73cc3c5178eb2e698c0 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Tue, 23 Aug 2016 23:52:04 +0200 Subject: [PATCH 1/4] fix datetime issues - pretty print when date is out of bounds - decode_cf_datetime when first date is in bound but later dates are not - work around pandas Overflow error (https://github.com/pydata/pandas/issues/14068) --- xarray/conventions.py | 23 ++++++++++++++++++----- xarray/core/formatting.py | 17 ++++++++++++++++- xarray/test/test_conventions.py | 29 +++++++++++++++++++++++++++++ xarray/test/test_formatting.py | 26 ++++++++++++++++++++++++-- 4 files changed, 87 insertions(+), 8 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index 6ca947c9f32..696bcd07740 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -9,7 +9,7 @@ from pandas.tslib import OutOfBoundsDatetime from .core import indexing, ops, utils -from .core.formatting import format_timestamp, first_n_items +from .core.formatting import format_timestamp, first_n_items, last_item from .core.variable import as_variable, Variable from .core.pycompat import iteritems, OrderedDict, PY3, basestring @@ -142,6 +142,12 @@ def decode_cf_datetime(num_dates, units, calendar=None): # strings, in which case we fall back to using netCDF4 raise OutOfBoundsDatetime + # fixes: https://github.com/pydata/pandas/issues/14068 + # these lines check if the the lowest or the highest value in dates + # cause an OutOfBoundsDatetime (Overflow) error + pd.to_timedelta(flat_num_dates.min(), delta) + ref_date + pd.to_timedelta(flat_num_dates.max(), delta) + ref_date + dates = (pd.to_timedelta(flat_num_dates, delta) + ref_date).values except (OutOfBoundsDatetime, OverflowError): @@ -369,10 +375,17 @@ def __init__(self, array, units, calendar=None): self.array = array self.units = units self.calendar = calendar - # Verify at least one date can be decoded successfully. - # Otherwise, tracebacks end up swallowed by Dataset.__repr__ when users - # try to view their lazily decoded array. - example_value = first_n_items(array, 1) or 0 + + # Verify that at least the first and last date can be decoded + # successfully. Otherwise, tracebacks end up swallowed by + # Dataset.__repr__ when users try to view their lazily decoded array. + example_value = first_n_items(array, 1) or [0] + + if array.size > 1: + # fixes (part of) https://github.com/pydata/xarray/issues/975 + example_value_end = last_item(array) + example_value = np.concatenate((example_value, example_value_end)) + try: result = decode_cf_datetime(example_value, units, calendar) except Exception: diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index c4c86ed3e92..561b18e361d 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -9,6 +9,7 @@ import numpy as np import pandas as pd +from pandas.tslib import OutOfBoundsDatetime from .options import OPTIONS from .pycompat import PY2, iteritems, unicode_type, bytes_type, dask_array_type @@ -82,10 +83,24 @@ def first_n_items(x, n_desired): x = x[indexer] return np.asarray(x).flat[:n_desired] +def last_item(x): + """Returns the last item of an array""" + if x.size == 0: + # work around for https://github.com/numpy/numpy/issues/5195 + return [] + + indexer = (slice(-1, None), ) * x.ndim + return x[indexer] + def format_timestamp(t): """Cast given object to a Timestamp and return a nicely formatted string""" - datetime_str = unicode_type(pd.Timestamp(t)) + # Timestamp is only valid for 1678 to 2262 + try: + datetime_str = unicode_type(pd.Timestamp(t)) + except OutOfBoundsDatetime: + datetime_str = unicode_type(t.__str__()) + try: date_str, time_str = datetime_str.split() except ValueError: diff --git a/xarray/test/test_conventions.py b/xarray/test/test_conventions.py index 8298feaa5e8..e62d87824c7 100644 --- a/xarray/test/test_conventions.py +++ b/xarray/test/test_conventions.py @@ -186,6 +186,35 @@ def test_cf_datetime(self): pd.Index(actual), units, calendar) self.assertArrayEqual(num_dates, np.around(encoded, 1)) + def test_decode_cf_datetime_overflow(self): + # checks for + # https://github.com/pydata/pandas/issues/14068 + # https://github.com/pydata/xarray/issues/975 + + from datetime import datetime + units = 'days since 2000-01-01 00:00:00' + + # date after 2262 and before 1678 + days = (-117608, 95795) + expected = (datetime(1677, 12, 31), datetime(2262, 4, 12)) + + for i, day in enumerate(days): + result = conventions.decode_cf_datetime(day, units) + self.assertEqual(result, expected[i]) + + def test_decode_cf_datetime_transition_to_invalid(self): + # manually create dataset with not-decoded date + from datetime import datetime + ds = Dataset(coords={'time' : [0, 266 * 365]}) + units = 'days since 2000-01-01 00:00:00' + ds.time.attrs = dict(units=units) + ds_decoded = conventions.decode_cf(ds) + + expected = [datetime(2000, 1, 1, 0, 0), + datetime(2265, 10, 28, 0, 0)] + + self.assertArrayEqual(ds_decoded.time.values, expected) + def test_decoded_cf_datetime_array(self): actual = conventions.DecodedCFDatetimeArray( np.array([0, 1, 2]), 'days since 1900-01-01', 'standard') diff --git a/xarray/test/test_formatting.py b/xarray/test/test_formatting.py index 789f4bdd7ed..e389a892341 100644 --- a/xarray/test/test_formatting.py +++ b/xarray/test/test_formatting.py @@ -37,6 +37,16 @@ def test_first_n_items(self): with self.assertRaisesRegexp(ValueError, 'at least one item'): formatting.first_n_items(array, 0) + def test_last_item(self): + array = np.arange(100) + + reshape = ((10, 10), (1, 100), (2, 2, 5, 5)) + expected = np.array(99) + + for r in reshape: + result = formatting.last_item(array.reshape(r)) + self.assertEqual(result, expected) + def test_format_item(self): cases = [ (pd.Timestamp('2000-01-01T12'), '2000-01-01T12:00:00'), @@ -74,8 +84,8 @@ def test_format_items(self): actual = ' '.join(formatting.format_items(item)) self.assertEqual(expected, actual) - - def test_format_array_flat(self): + +def test_format_array_flat(self): actual = formatting.format_array_flat(np.arange(100), 13) expected = '0 1 2 3 4 ...' self.assertEqual(expected, actual) @@ -106,3 +116,15 @@ def test_pretty_print(self): def test_maybe_truncate(self): self.assertEqual(formatting.maybe_truncate(u'ß', 10), u'ß') + + def test_format_timestamp_out_of_bounds(self): + from datetime import datetime + date = datetime(1300, 12, 1) + expected = '1300-12-01' + result = formatting.format_timestamp(date) + self.assertEqual(result, expected) + + date = datetime(2300, 12, 1) + expected = '2300-12-01' + result = formatting.format_timestamp(date) + self.assertEqual(result, expected) From 5862aa4a78b2dbee56131cedfd0dd438cc79205c Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Wed, 24 Aug 2016 01:00:40 +0200 Subject: [PATCH 2/4] correct indendation --- xarray/test/test_formatting.py | 48 +++++++++++++++++----------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/xarray/test/test_formatting.py b/xarray/test/test_formatting.py index e389a892341..c385ad44bbe 100644 --- a/xarray/test/test_formatting.py +++ b/xarray/test/test_formatting.py @@ -85,30 +85,30 @@ def test_format_items(self): self.assertEqual(expected, actual) -def test_format_array_flat(self): - actual = formatting.format_array_flat(np.arange(100), 13) - expected = '0 1 2 3 4 ...' - self.assertEqual(expected, actual) - - actual = formatting.format_array_flat(np.arange(100.0), 11) - expected = '0.0 1.0 ...' - self.assertEqual(expected, actual) - - actual = formatting.format_array_flat(np.arange(100.0), 1) - expected = '0.0 ...' - self.assertEqual(expected, actual) - - actual = formatting.format_array_flat(np.arange(3), 5) - expected = '0 1 2' - self.assertEqual(expected, actual) - - actual = formatting.format_array_flat(np.arange(4.0), 11) - expected = '0.0 1.0 ...' - self.assertEqual(expected, actual) - - actual = formatting.format_array_flat(np.arange(4), 0) - expected = '0 ...' - self.assertEqual(expected, actual) + def test_format_array_flat(self): + actual = formatting.format_array_flat(np.arange(100), 13) + expected = '0 1 2 3 4 ...' + self.assertEqual(expected, actual) + + actual = formatting.format_array_flat(np.arange(100.0), 11) + expected = '0.0 1.0 ...' + self.assertEqual(expected, actual) + + actual = formatting.format_array_flat(np.arange(100.0), 1) + expected = '0.0 ...' + self.assertEqual(expected, actual) + + actual = formatting.format_array_flat(np.arange(3), 5) + expected = '0 1 2' + self.assertEqual(expected, actual) + + actual = formatting.format_array_flat(np.arange(4.0), 11) + expected = '0.0 1.0 ...' + self.assertEqual(expected, actual) + + actual = formatting.format_array_flat(np.arange(4), 0) + expected = '0 ...' + self.assertEqual(expected, actual) def test_pretty_print(self): self.assertEqual(formatting.pretty_print('abcdefghij', 8), 'abcde...') From 6c7c0d51a00035e06a70779f52811d9d7017f89e Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Wed, 24 Aug 2016 22:04:46 +0200 Subject: [PATCH 3/4] revise fix datetime issues --- xarray/conventions.py | 12 ++++------- xarray/core/formatting.py | 7 +++--- xarray/test/test_conventions.py | 3 +++ xarray/test/test_formatting.py | 38 ++++++++++++++++----------------- 4 files changed, 29 insertions(+), 31 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index 696bcd07740..64a677a571b 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -376,15 +376,11 @@ def __init__(self, array, units, calendar=None): self.units = units self.calendar = calendar - # Verify that at least the first and last date can be decoded - # successfully. Otherwise, tracebacks end up swallowed by + # Verify that at least the first and last date can be decoded + # successfully. Otherwise, tracebacks end up swallowed by # Dataset.__repr__ when users try to view their lazily decoded array. - example_value = first_n_items(array, 1) or [0] - - if array.size > 1: - # fixes (part of) https://github.com/pydata/xarray/issues/975 - example_value_end = last_item(array) - example_value = np.concatenate((example_value, example_value_end)) + example_value = np.concatenate([first_n_items(array, 1), + last_item(array), [0]]) try: result = decode_cf_datetime(example_value, units, calendar) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 561b18e361d..ed14eb16471 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -90,16 +90,15 @@ def last_item(x): return [] indexer = (slice(-1, None), ) * x.ndim - return x[indexer] - + return np.array(x[indexer], ndmin=1) def format_timestamp(t): """Cast given object to a Timestamp and return a nicely formatted string""" - # Timestamp is only valid for 1678 to 2262 + # Timestamp is only valid for 1678 to 2262 try: datetime_str = unicode_type(pd.Timestamp(t)) except OutOfBoundsDatetime: - datetime_str = unicode_type(t.__str__()) + datetime_str = unicode_type(t) try: date_str, time_str = datetime_str.split() diff --git a/xarray/test/test_conventions.py b/xarray/test/test_conventions.py index e62d87824c7..361435dab56 100644 --- a/xarray/test/test_conventions.py +++ b/xarray/test/test_conventions.py @@ -186,6 +186,7 @@ def test_cf_datetime(self): pd.Index(actual), units, calendar) self.assertArrayEqual(num_dates, np.around(encoded, 1)) + @requires_netCDF4 def test_decode_cf_datetime_overflow(self): # checks for # https://github.com/pydata/pandas/issues/14068 @@ -202,6 +203,7 @@ def test_decode_cf_datetime_overflow(self): result = conventions.decode_cf_datetime(day, units) self.assertEqual(result, expected[i]) + @requires_netCDF4 def test_decode_cf_datetime_transition_to_invalid(self): # manually create dataset with not-decoded date from datetime import datetime @@ -369,6 +371,7 @@ def test_decode_non_standard_calendar_fallback(self): self.assertEqual(actual.dtype, np.dtype('O')) self.assertArrayEqual(actual, expected) + @requires_netCDF4 def test_cf_datetime_nan(self): for num_dates, units, expected_list in [ ([np.nan], 'days since 2000-01-01', ['NaT']), diff --git a/xarray/test/test_formatting.py b/xarray/test/test_formatting.py index c385ad44bbe..0b08fc19680 100644 --- a/xarray/test/test_formatting.py +++ b/xarray/test/test_formatting.py @@ -84,31 +84,31 @@ def test_format_items(self): actual = ' '.join(formatting.format_items(item)) self.assertEqual(expected, actual) - + def test_format_array_flat(self): - actual = formatting.format_array_flat(np.arange(100), 13) - expected = '0 1 2 3 4 ...' - self.assertEqual(expected, actual) + actual = formatting.format_array_flat(np.arange(100), 13) + expected = '0 1 2 3 4 ...' + self.assertEqual(expected, actual) - actual = formatting.format_array_flat(np.arange(100.0), 11) - expected = '0.0 1.0 ...' - self.assertEqual(expected, actual) + actual = formatting.format_array_flat(np.arange(100.0), 11) + expected = '0.0 1.0 ...' + self.assertEqual(expected, actual) - actual = formatting.format_array_flat(np.arange(100.0), 1) - expected = '0.0 ...' - self.assertEqual(expected, actual) + actual = formatting.format_array_flat(np.arange(100.0), 1) + expected = '0.0 ...' + self.assertEqual(expected, actual) - actual = formatting.format_array_flat(np.arange(3), 5) - expected = '0 1 2' - self.assertEqual(expected, actual) + actual = formatting.format_array_flat(np.arange(3), 5) + expected = '0 1 2' + self.assertEqual(expected, actual) - actual = formatting.format_array_flat(np.arange(4.0), 11) - expected = '0.0 1.0 ...' - self.assertEqual(expected, actual) + actual = formatting.format_array_flat(np.arange(4.0), 11) + expected = '0.0 1.0 ...' + self.assertEqual(expected, actual) - actual = formatting.format_array_flat(np.arange(4), 0) - expected = '0 ...' - self.assertEqual(expected, actual) + actual = formatting.format_array_flat(np.arange(4), 0) + expected = '0 ...' + self.assertEqual(expected, actual) def test_pretty_print(self): self.assertEqual(formatting.pretty_print('abcdefghij', 8), 'abcde...') From b552b28eda4f14dd59327f7550c43183818c6318 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Thu, 25 Aug 2016 22:40:40 +0200 Subject: [PATCH 4/4] update whats-new --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 97279fffb89..8443212f13c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,6 +26,8 @@ Enhancements Bug fixes ~~~~~~~~~ +- Fix issues for dates outside the valid range of pandas timestamps + (:issue:`975`). By `Mathias Hauser `_. .. _whats-new.0.8.2: