Skip to content

fix datetime issues #984

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 25, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ Enhancements

Bug fixes
~~~~~~~~~
- Fix issues for dates outside the valid range of pandas timestamps
(:issue:`975`). By `Mathias Hauser <https://github.com/mathause>`_.

.. _whats-new.0.8.2:

Expand Down
19 changes: 14 additions & 5 deletions xarray/conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pandas.tslib import OutOfBoundsDatetime

from .core import indexing, ops, utils
from .core.formatting import format_timestamp, first_n_items
from .core.formatting import format_timestamp, first_n_items, last_item
from .core.variable import as_variable, Variable
from .core.pycompat import iteritems, OrderedDict, PY3, basestring

Expand Down Expand Up @@ -142,6 +142,12 @@ def decode_cf_datetime(num_dates, units, calendar=None):
# strings, in which case we fall back to using netCDF4
raise OutOfBoundsDatetime

# fixes: https://github.com/pydata/pandas/issues/14068
# these lines check if the the lowest or the highest value in dates
# cause an OutOfBoundsDatetime (Overflow) error
pd.to_timedelta(flat_num_dates.min(), delta) + ref_date
pd.to_timedelta(flat_num_dates.max(), delta) + ref_date

dates = (pd.to_timedelta(flat_num_dates, delta) + ref_date).values

except (OutOfBoundsDatetime, OverflowError):
Expand Down Expand Up @@ -369,10 +375,13 @@ def __init__(self, array, units, calendar=None):
self.array = array
self.units = units
self.calendar = calendar
# Verify at least one date can be decoded successfully.
# Otherwise, tracebacks end up swallowed by Dataset.__repr__ when users
# try to view their lazily decoded array.
example_value = first_n_items(array, 1) or 0

# Verify that at least the first and last date can be decoded
# successfully. Otherwise, tracebacks end up swallowed by
# Dataset.__repr__ when users try to view their lazily decoded array.
example_value = np.concatenate([first_n_items(array, 1),
last_item(array), [0]])

try:
result = decode_cf_datetime(example_value, units, calendar)
except Exception:
Expand Down
16 changes: 15 additions & 1 deletion xarray/core/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import numpy as np
import pandas as pd
from pandas.tslib import OutOfBoundsDatetime

from .options import OPTIONS
from .pycompat import PY2, iteritems, unicode_type, bytes_type, dask_array_type
Expand Down Expand Up @@ -82,10 +83,23 @@ def first_n_items(x, n_desired):
x = x[indexer]
return np.asarray(x).flat[:n_desired]

def last_item(x):
"""Returns the last item of an array"""
if x.size == 0:
# work around for https://github.com/numpy/numpy/issues/5195
return []

indexer = (slice(-1, None), ) * x.ndim
return np.array(x[indexer], ndmin=1)

def format_timestamp(t):
"""Cast given object to a Timestamp and return a nicely formatted string"""
datetime_str = unicode_type(pd.Timestamp(t))
# Timestamp is only valid for 1678 to 2262
try:
datetime_str = unicode_type(pd.Timestamp(t))
except OutOfBoundsDatetime:
datetime_str = unicode_type(t)

try:
date_str, time_str = datetime_str.split()
except ValueError:
Expand Down
32 changes: 32 additions & 0 deletions xarray/test/test_conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,37 @@ def test_cf_datetime(self):
pd.Index(actual), units, calendar)
self.assertArrayEqual(num_dates, np.around(encoded, 1))

@requires_netCDF4
def test_decode_cf_datetime_overflow(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You need to mark these tests as requiring netCDF4 using @requires_netCDF4.

# checks for
# https://github.com/pydata/pandas/issues/14068
# https://github.com/pydata/xarray/issues/975

from datetime import datetime
units = 'days since 2000-01-01 00:00:00'

# date after 2262 and before 1678
days = (-117608, 95795)
expected = (datetime(1677, 12, 31), datetime(2262, 4, 12))

for i, day in enumerate(days):
result = conventions.decode_cf_datetime(day, units)
self.assertEqual(result, expected[i])

@requires_netCDF4
def test_decode_cf_datetime_transition_to_invalid(self):
# manually create dataset with not-decoded date
from datetime import datetime
ds = Dataset(coords={'time' : [0, 266 * 365]})
units = 'days since 2000-01-01 00:00:00'
ds.time.attrs = dict(units=units)
ds_decoded = conventions.decode_cf(ds)

expected = [datetime(2000, 1, 1, 0, 0),
datetime(2265, 10, 28, 0, 0)]

self.assertArrayEqual(ds_decoded.time.values, expected)

def test_decoded_cf_datetime_array(self):
actual = conventions.DecodedCFDatetimeArray(
np.array([0, 1, 2]), 'days since 1900-01-01', 'standard')
Expand Down Expand Up @@ -340,6 +371,7 @@ def test_decode_non_standard_calendar_fallback(self):
self.assertEqual(actual.dtype, np.dtype('O'))
self.assertArrayEqual(actual, expected)

@requires_netCDF4
def test_cf_datetime_nan(self):
for num_dates, units, expected_list in [
([np.nan], 'days since 2000-01-01', ['NaT']),
Expand Down
22 changes: 22 additions & 0 deletions xarray/test/test_formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,16 @@ def test_first_n_items(self):
with self.assertRaisesRegexp(ValueError, 'at least one item'):
formatting.first_n_items(array, 0)

def test_last_item(self):
array = np.arange(100)

reshape = ((10, 10), (1, 100), (2, 2, 5, 5))
expected = np.array(99)

for r in reshape:
result = formatting.last_item(array.reshape(r))
self.assertEqual(result, expected)

def test_format_item(self):
cases = [
(pd.Timestamp('2000-01-01T12'), '2000-01-01T12:00:00'),
Expand Down Expand Up @@ -106,3 +116,15 @@ def test_pretty_print(self):

def test_maybe_truncate(self):
self.assertEqual(formatting.maybe_truncate(u'ß', 10), u'ß')

def test_format_timestamp_out_of_bounds(self):
from datetime import datetime
date = datetime(1300, 12, 1)
expected = '1300-12-01'
result = formatting.format_timestamp(date)
self.assertEqual(result, expected)

date = datetime(2300, 12, 1)
expected = '2300-12-01'
result = formatting.format_timestamp(date)
self.assertEqual(result, expected)