From b23b5dfd2f4cb9d73b3e7460c0bce2a7e195800c Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Thu, 18 Dec 2014 23:47:34 -0800 Subject: [PATCH 1/2] ENH: convert to/from cdms2 variables --- .travis.yml | 6 ++--- doc/api.rst | 2 ++ xray/conventions.py | 4 ++-- xray/convert.py | 47 +++++++++++++++++++++++++++++++++++++ xray/core/dataarray.py | 13 ++++++++++ xray/test/test_dataarray.py | 22 +++++++++++++++++ 6 files changed, 89 insertions(+), 5 deletions(-) create mode 100644 xray/convert.py diff --git a/.travis.yml b/.travis.yml index 74c423c5c04..82c95d6c7ad 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,9 +8,9 @@ matrix: include: - python: 2.6 env: UPDATE_ENV="conda install unittest2 pandas==0.15.0" - # Test on Python 2.7 with and without netCDF4/scipy + # Test on Python 2.7 with and without netCDF4/scipy/cdat-lite - python: 2.7 - env: UPDATE_ENV="pip install cyordereddict" + env: UPDATE_ENV="pip install cyordereddict && conda install -c ajdawson cdat-lite" - python: 2.7 # nb. we have to remove scipy because conda install pandas brings it in: # https://github.com/ContinuumIO/anaconda-issues/issues/145 @@ -42,7 +42,7 @@ before_install: install: - conda create --yes -n test_env python=$TRAVIS_PYTHON_VERSION pip nose numpy pandas scipy netCDF4 - source activate test_env - - echo $UPDATE_ENV; $UPDATE_ENV + - echo $UPDATE_ENV; eval $UPDATE_ENV - python setup.py install script: diff --git a/doc/api.rst b/doc/api.rst index fd422911910..4043639d87b 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -272,5 +272,7 @@ IO / Conversion DataArray.to_series DataArray.to_dataframe DataArray.to_index + DataArray.to_cdms2 DataArray.from_series + DataArray.from_cdms2 DataArray.load_data diff --git a/xray/conventions.py b/xray/conventions.py index 2f5467cb4e4..e63708c2b10 100644 --- a/xray/conventions.py +++ b/xray/conventions.py @@ -174,7 +174,7 @@ def infer_datetime_units(dates): 'hours', 'minutes' or 'seconds' (the first one that can evenly divide all unique time deltas in `dates`) """ - dates = pd.to_datetime(dates, box=False) + dates = pd.to_datetime(np.asarray(dates), box=False) unique_timedeltas = np.unique(np.diff(dates[pd.notnull(dates)])) units = _infer_time_units_from_diff(unique_timedeltas) return '%s since %s' % (units, pd.Timestamp(dates[0])) @@ -185,7 +185,7 @@ def infer_timedelta_units(deltas): {'days', 'hours', 'minutes' 'seconds'} (the first one that can evenly divide all unique time deltas in `deltas`) """ - deltas = pd.to_timedelta(deltas, box=False) + deltas = pd.to_timedelta(np.asarray(deltas), box=False) unique_timedeltas = np.unique(deltas[pd.notnull(deltas)]) units = _infer_time_units_from_diff(unique_timedeltas) return units diff --git a/xray/convert.py b/xray/convert.py new file mode 100644 index 00000000000..6c2180e32c2 --- /dev/null +++ b/xray/convert.py @@ -0,0 +1,47 @@ +"""Functions for converting to and from xray objects +""" +import numpy as np + +from .core.dataarray import DataArray + + +ignored_attrs = set(['name', 'tileIndex']) + + +def _get_cdms2_attrs(var): + return dict((k, v) for k, v in var.attributes.items() + if k not in ignored_attrs) + + +def from_cdms2(variable): + """Convert a cdms2 variable into an DataArray + """ + values = np.asarray(variable) + name = variable.id + coords = [(v.id, np.asarray(v), _get_cdms2_attrs(v)) + for v in variable.getAxisList()] + attrs = _get_cdms2_attrs(variable) + return DataArray(values, coords=coords, name=name, attrs=attrs) + + +def _set_cdms2_attrs(var, attrs): + for k, v in attrs.items(): + setattr(var, k, v) + + +def to_cdms2(dataarray): + """Convert a DataArray into a cdms2 variable + """ + # we don't want cdms2 to be a hard dependency + import cdms2 + + axes = [] + for dim in dataarray.dims: + coord = dataarray.coords[dim] + axis = cdms2.createAxis(coord.values, id=dim) + _set_cdms2_attrs(axis, coord.attrs) + axes.append(axis) + + var = cdms2.createVariable(dataarray.values, axes=axes, id=dataarray.name) + _set_cdms2_attrs(var, dataarray.attrs) + return var diff --git a/xray/core/dataarray.py b/xray/core/dataarray.py index dacc1e964f8..e90530f1f48 100644 --- a/xray/core/dataarray.py +++ b/xray/core/dataarray.py @@ -848,6 +848,19 @@ def from_series(cls, series): ds = Dataset.from_dataframe(df) return cls._new_from_dataset_no_copy(ds, series.name) + def to_cdms2(self): + """Convert this array into a cdms2.Variable + """ + from ..convert import to_cdms2 + return to_cdms2(self) + + @classmethod + def from_cdms2(cls, variable): + """Convert a cdms2.Variable into an xray.DataArray + """ + from ..convert import from_cdms2 + return from_cdms2(variable) + def _all_compat(self, other, compat_str): """Helper function for equals and identical""" compat = lambda x, y: getattr(x.variable, compat_str)(y.variable) diff --git a/xray/test/test_dataarray.py b/xray/test/test_dataarray.py index 887ff94c08d..72a50d79cbd 100644 --- a/xray/test/test_dataarray.py +++ b/xray/test/test_dataarray.py @@ -995,6 +995,28 @@ def test_to_and_from_series(self): self.assertDataArrayIdentical(expected_da, DataArray.from_series(actual)) + def test_to_and_from_cdms2(self): + try: + import cdms2 + except ImportError: + raise unittest.SkipTest('cdms2 not installed') + + original = DataArray(np.arange(6).reshape(2, 3), + [('xxx', [-2, 2], {'units': 'meters'}), + ('yyy', [3, 4, 5])], + name='foo', attrs={'baz': 123}) + actual = original.to_cdms2() + self.assertArrayEqual(actual, original) + self.assertEqual(actual.id, original.name) + self.assertItemsEqual(actual.getAxisIds(), original.dims) + for axis, coord in zip(actual.getAxisList(), original.coords.values()): + self.assertEqual(axis.id, coord.name) + self.assertArrayEqual(axis, coord) + self.assertEqual(actual.baz, original.attrs['baz']) + + roundtripped = DataArray.from_cdms2(actual) + self.assertDataArrayIdentical(original, roundtripped) + def test_to_dataset(self): unnamed = DataArray([1, 2], dims='x') actual = unnamed.to_dataset() From 784e24435c2cfddc3cdaab1866d2d8d7ee8eadd5 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Fri, 19 Dec 2014 00:41:05 -0800 Subject: [PATCH 2/2] Encode/decode time information to cdms2 --- xray/convert.py | 42 ++++++++++++++++++++----------------- xray/test/test_dataarray.py | 14 +++++++++---- 2 files changed, 33 insertions(+), 23 deletions(-) diff --git a/xray/convert.py b/xray/convert.py index 6c2180e32c2..26eaaaf18a5 100644 --- a/xray/convert.py +++ b/xray/convert.py @@ -3,30 +3,26 @@ import numpy as np from .core.dataarray import DataArray - +from .conventions import ( + maybe_encode_timedelta, maybe_encode_datetime, decode_cf) ignored_attrs = set(['name', 'tileIndex']) -def _get_cdms2_attrs(var): - return dict((k, v) for k, v in var.attributes.items() - if k not in ignored_attrs) - - def from_cdms2(variable): """Convert a cdms2 variable into an DataArray """ + def get_cdms2_attrs(var): + return dict((k, v) for k, v in var.attributes.items() + if k not in ignored_attrs) + values = np.asarray(variable) name = variable.id - coords = [(v.id, np.asarray(v), _get_cdms2_attrs(v)) + coords = [(v.id, np.asarray(v), get_cdms2_attrs(v)) for v in variable.getAxisList()] - attrs = _get_cdms2_attrs(variable) - return DataArray(values, coords=coords, name=name, attrs=attrs) - - -def _set_cdms2_attrs(var, attrs): - for k, v in attrs.items(): - setattr(var, k, v) + attrs = get_cdms2_attrs(variable) + dataarray = DataArray(values, coords=coords, name=name, attrs=attrs) + return decode_cf(dataarray.to_dataset())[dataarray.name] def to_cdms2(dataarray): @@ -35,13 +31,21 @@ def to_cdms2(dataarray): # we don't want cdms2 to be a hard dependency import cdms2 + def encode(var): + return maybe_encode_timedelta(maybe_encode_datetime(var)) + + def set_cdms2_attrs(var, attrs): + for k, v in attrs.items(): + setattr(var, k, v) + axes = [] for dim in dataarray.dims: - coord = dataarray.coords[dim] + coord = encode(dataarray.coords[dim]) axis = cdms2.createAxis(coord.values, id=dim) - _set_cdms2_attrs(axis, coord.attrs) + set_cdms2_attrs(axis, coord.attrs) axes.append(axis) - var = cdms2.createVariable(dataarray.values, axes=axes, id=dataarray.name) - _set_cdms2_attrs(var, dataarray.attrs) - return var + var = encode(dataarray) + cdms2_var = cdms2.createVariable(var.values, axes=axes, id=dataarray.name) + set_cdms2_attrs(cdms2_var, var.attrs) + return cdms2_var diff --git a/xray/test/test_dataarray.py b/xray/test/test_dataarray.py index 72a50d79cbd..427e3139f92 100644 --- a/xray/test/test_dataarray.py +++ b/xray/test/test_dataarray.py @@ -1002,18 +1002,24 @@ def test_to_and_from_cdms2(self): raise unittest.SkipTest('cdms2 not installed') original = DataArray(np.arange(6).reshape(2, 3), - [('xxx', [-2, 2], {'units': 'meters'}), - ('yyy', [3, 4, 5])], + [('distance', [-2, 2], {'units': 'meters'}), + ('time', pd.date_range('2000-01-01', periods=3))], name='foo', attrs={'baz': 123}) + expected_coords = [Coordinate('distance', [-2, 2]), + Coordinate('time', [0, 1, 2])] actual = original.to_cdms2() self.assertArrayEqual(actual, original) self.assertEqual(actual.id, original.name) self.assertItemsEqual(actual.getAxisIds(), original.dims) - for axis, coord in zip(actual.getAxisList(), original.coords.values()): + for axis, coord in zip(actual.getAxisList(), expected_coords): self.assertEqual(axis.id, coord.name) - self.assertArrayEqual(axis, coord) + self.assertArrayEqual(axis, coord.values) self.assertEqual(actual.baz, original.attrs['baz']) + component_times = actual.getAxis(1).asComponentTime() + self.assertEqual(len(component_times), 3) + self.assertEqual(str(component_times[0]), '2000-1-1 0:0:0.0') + roundtripped = DataArray.from_cdms2(actual) self.assertDataArrayIdentical(original, roundtripped)