From f8de8f43e6197313a984721c0aef6a4c3f33e78a Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Sun, 15 Jan 2023 14:47:22 -0500 Subject: [PATCH 01/14] [test-upstream] Preserve formatting of reference time units Changes in pandas 2.0.0 interfere with the way we expect some times to be formatted. --- doc/whats-new.rst | 4 ++++ xarray/core/formatting.py | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8af7a258f5a..ed2fc048606 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -50,6 +50,10 @@ Bug fixes By `Benoît Bovy `_. - Preserve original dtype on accessing MultiIndex levels (:issue:`7250`, :pull:`7393`). By `Ian Carroll `_. +- Ensure the formatting of time encoding reference dates outside the range of + nanosecond-precision datetimes remains the same under pandas version 2.0.0 + (:issue:`7420`, :pull:`7441`). By `Spencer Clark + `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 1473d513a01..326d8fcc755 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -13,6 +13,7 @@ import numpy as np import pandas as pd +from packaging.version import Version from pandas.errors import OutOfBoundsDatetime from xarray.core.duck_array_ops import array_equiv @@ -116,7 +117,19 @@ def format_timestamp(t): """Cast given object to a Timestamp and return a nicely formatted string""" # Timestamp is only valid for 1678 to 2262 try: - datetime_str = str(pd.Timestamp(t)) + timestamp = pd.Timestamp(t) + # With pandas version >= 2.0.0 it is possible for objects representing + # dates outside the range of nanosecond-precision datetimes to be cast + # to Timestamps (pandas will use a coarser time resolution than + # nanoseconds). To preserve existing behavior in xarray, we will force + # any object passed to this function to be cast to a + # nanosecond-precision Timestamp. If we do not do this, the way pandas + # represents 0001-01-01 00:00:00 as a string causes some failing tests. + # Pandas represents it as 1-01-01 00:00:00, while we expect it to be + # represented as 0001-01-01. + if Version(pd.__version__).major >= 2: + timestamp = timestamp.as_unit("ns") + datetime_str = str(timestamp) except OutOfBoundsDatetime: datetime_str = str(t) From e1808296a8199b9e88eeb780ae74cd003404b633 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Tue, 7 Mar 2023 21:41:16 -0500 Subject: [PATCH 02/14] Use strftime to force expected date format Co-authored-by: Justus Magin --- xarray/core/formatting.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 863df384440..bb6ae164e10 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -118,18 +118,7 @@ def format_timestamp(t): # Timestamp is only valid for 1678 to 2262 try: timestamp = pd.Timestamp(t) - # With pandas version >= 2.0.0 it is possible for objects representing - # dates outside the range of nanosecond-precision datetimes to be cast - # to Timestamps (pandas will use a coarser time resolution than - # nanoseconds). To preserve existing behavior in xarray, we will force - # any object passed to this function to be cast to a - # nanosecond-precision Timestamp. If we do not do this, the way pandas - # represents 0001-01-01 00:00:00 as a string causes some failing tests. - # Pandas represents it as 1-01-01 00:00:00, while we expect it to be - # represented as 0001-01-01. - if Version(pd.__version__).major >= 2: - timestamp = timestamp.as_unit("ns") - datetime_str = str(timestamp) + datetime_str = timestamp.strftime("%Y-%m-%d %H:%M:%s") except OutOfBoundsDatetime: datetime_str = str(t) From 024360cb45be4bd64d331a84b902939ca5f88296 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 8 Mar 2023 02:41:51 +0000 Subject: [PATCH 03/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/formatting.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index bb6ae164e10..64c62a594aa 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -13,7 +13,6 @@ import numpy as np import pandas as pd -from packaging.version import Version from pandas.errors import OutOfBoundsDatetime from xarray.core.duck_array_ops import array_equiv From 176c6dafc36b0da4b8cb09b7d100a55e83feed08 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Sun, 15 Jan 2023 14:47:22 -0500 Subject: [PATCH 04/14] [test-upstream] Preserve formatting of reference time units Changes in pandas 2.0.0 interfere with the way we expect some times to be formatted. --- doc/whats-new.rst | 4 ++++ xarray/core/formatting.py | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3cc2efde599..20e3bf9c861 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -144,6 +144,10 @@ Bug fixes By `Benoît Bovy `_. - Preserve original dtype on accessing MultiIndex levels (:issue:`7250`, :pull:`7393`). By `Ian Carroll `_. +- Ensure the formatting of time encoding reference dates outside the range of + nanosecond-precision datetimes remains the same under pandas version 2.0.0 + (:issue:`7420`, :pull:`7441`). By `Spencer Clark + `_. Internal Changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index ed548771809..863df384440 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -13,6 +13,7 @@ import numpy as np import pandas as pd +from packaging.version import Version from pandas.errors import OutOfBoundsDatetime from xarray.core.duck_array_ops import array_equiv @@ -116,7 +117,19 @@ def format_timestamp(t): """Cast given object to a Timestamp and return a nicely formatted string""" # Timestamp is only valid for 1678 to 2262 try: - datetime_str = str(pd.Timestamp(t)) + timestamp = pd.Timestamp(t) + # With pandas version >= 2.0.0 it is possible for objects representing + # dates outside the range of nanosecond-precision datetimes to be cast + # to Timestamps (pandas will use a coarser time resolution than + # nanoseconds). To preserve existing behavior in xarray, we will force + # any object passed to this function to be cast to a + # nanosecond-precision Timestamp. If we do not do this, the way pandas + # represents 0001-01-01 00:00:00 as a string causes some failing tests. + # Pandas represents it as 1-01-01 00:00:00, while we expect it to be + # represented as 0001-01-01. + if Version(pd.__version__).major >= 2: + timestamp = timestamp.as_unit("ns") + datetime_str = str(timestamp) except OutOfBoundsDatetime: datetime_str = str(t) From 2c40f5331528a41d3d4e4c5163f4dde9cb3b5c79 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Tue, 7 Mar 2023 21:41:16 -0500 Subject: [PATCH 05/14] Use strftime to force expected date format Co-authored-by: Justus Magin --- xarray/core/formatting.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 863df384440..bb6ae164e10 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -118,18 +118,7 @@ def format_timestamp(t): # Timestamp is only valid for 1678 to 2262 try: timestamp = pd.Timestamp(t) - # With pandas version >= 2.0.0 it is possible for objects representing - # dates outside the range of nanosecond-precision datetimes to be cast - # to Timestamps (pandas will use a coarser time resolution than - # nanoseconds). To preserve existing behavior in xarray, we will force - # any object passed to this function to be cast to a - # nanosecond-precision Timestamp. If we do not do this, the way pandas - # represents 0001-01-01 00:00:00 as a string causes some failing tests. - # Pandas represents it as 1-01-01 00:00:00, while we expect it to be - # represented as 0001-01-01. - if Version(pd.__version__).major >= 2: - timestamp = timestamp.as_unit("ns") - datetime_str = str(timestamp) + datetime_str = timestamp.strftime("%Y-%m-%d %H:%M:%s") except OutOfBoundsDatetime: datetime_str = str(t) From e1e827f38ac293eb53c3ce349658f353cf46045c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 8 Mar 2023 02:41:51 +0000 Subject: [PATCH 06/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/formatting.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index bb6ae164e10..64c62a594aa 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -13,7 +13,6 @@ import numpy as np import pandas as pd -from packaging.version import Version from pandas.errors import OutOfBoundsDatetime from xarray.core.duck_array_ops import array_equiv From a86a1ab099ffc9ab1f1602558664a3c698532a41 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Tue, 7 Mar 2023 21:56:14 -0500 Subject: [PATCH 07/14] [test-upstream] Remove old comment and move what's new entry to latest version --- doc/whats-new.rst | 8 ++++---- xarray/core/formatting.py | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 20e3bf9c861..3a30af2bf2c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -46,6 +46,10 @@ Bug fixes By `Jimmy Westling `_. - Improved performance in ``open_dataset`` for datasets with large object arrays (:issue:`7484`, :pull:`7494`). By `Alex Goodman `_ and `Deepak Cherian `_. +- Ensure the formatting of time encoding reference dates outside the range of + nanosecond-precision datetimes remains the same under pandas version 2.0.0 + (:issue:`7420`, :pull:`7441`). By `Spencer Clark + `_. Documentation ~~~~~~~~~~~~~ @@ -144,10 +148,6 @@ Bug fixes By `Benoît Bovy `_. - Preserve original dtype on accessing MultiIndex levels (:issue:`7250`, :pull:`7393`). By `Ian Carroll `_. -- Ensure the formatting of time encoding reference dates outside the range of - nanosecond-precision datetimes remains the same under pandas version 2.0.0 - (:issue:`7420`, :pull:`7441`). By `Spencer Clark - `_. Internal Changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 64c62a594aa..aa028df19fe 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -114,7 +114,6 @@ def calc_max_rows_last(max_rows: int) -> int: def format_timestamp(t): """Cast given object to a Timestamp and return a nicely formatted string""" - # Timestamp is only valid for 1678 to 2262 try: timestamp = pd.Timestamp(t) datetime_str = timestamp.strftime("%Y-%m-%d %H:%M:%s") From 5f98835ed09ed68015440553df5377266f7adf6b Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Tue, 7 Mar 2023 22:03:38 -0500 Subject: [PATCH 08/14] [test-upstream] remove old comment and move what's new entry --- doc/whats-new.rst | 8 ++++---- xarray/core/formatting.py | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 20e3bf9c861..3a30af2bf2c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -46,6 +46,10 @@ Bug fixes By `Jimmy Westling `_. - Improved performance in ``open_dataset`` for datasets with large object arrays (:issue:`7484`, :pull:`7494`). By `Alex Goodman `_ and `Deepak Cherian `_. +- Ensure the formatting of time encoding reference dates outside the range of + nanosecond-precision datetimes remains the same under pandas version 2.0.0 + (:issue:`7420`, :pull:`7441`). By `Spencer Clark + `_. Documentation ~~~~~~~~~~~~~ @@ -144,10 +148,6 @@ Bug fixes By `Benoît Bovy `_. - Preserve original dtype on accessing MultiIndex levels (:issue:`7250`, :pull:`7393`). By `Ian Carroll `_. -- Ensure the formatting of time encoding reference dates outside the range of - nanosecond-precision datetimes remains the same under pandas version 2.0.0 - (:issue:`7420`, :pull:`7441`). By `Spencer Clark - `_. Internal Changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 64c62a594aa..aa028df19fe 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -114,7 +114,6 @@ def calc_max_rows_last(max_rows: int) -> int: def format_timestamp(t): """Cast given object to a Timestamp and return a nicely formatted string""" - # Timestamp is only valid for 1678 to 2262 try: timestamp = pd.Timestamp(t) datetime_str = timestamp.strftime("%Y-%m-%d %H:%M:%s") From 8ce902f7f94dc13e8735f7ccc6c659a0c356add8 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 8 Mar 2023 09:31:32 +0100 Subject: [PATCH 09/14] Update xarray/core/formatting.py --- xarray/core/formatting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index aa028df19fe..703ce15fec0 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -116,7 +116,7 @@ def format_timestamp(t): """Cast given object to a Timestamp and return a nicely formatted string""" try: timestamp = pd.Timestamp(t) - datetime_str = timestamp.strftime("%Y-%m-%d %H:%M:%s") + datetime_str = timestamp.strftime("%Y-%m-%d %H:%M:%S") except OutOfBoundsDatetime: datetime_str = str(t) From dcd763de8baded92db2510a7512be543c7526dac Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Fri, 31 Mar 2023 19:32:36 -0400 Subject: [PATCH 10/14] Use isoformat() now that pandas zero-pads years Co-authored-by: Justus Magin --- xarray/core/formatting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 703ce15fec0..70aa828359a 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -116,7 +116,7 @@ def format_timestamp(t): """Cast given object to a Timestamp and return a nicely formatted string""" try: timestamp = pd.Timestamp(t) - datetime_str = timestamp.strftime("%Y-%m-%d %H:%M:%S") + datetime_str = timestamp.isoformat() except OutOfBoundsDatetime: datetime_str = str(t) From 875123489f0610234634d0f86c2efa9059a74ab7 Mon Sep 17 00:00:00 2001 From: spencerkclark Date: Fri, 31 Mar 2023 19:39:25 -0400 Subject: [PATCH 11/14] [test-upstream] update what's new --- doc/whats-new.rst | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index cf40d306035..04034d2bc0b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -47,6 +47,12 @@ Bug fixes By `Thomas Coleman `_. - Proper plotting when passing :py:class:`~matplotlib.colors.BoundaryNorm` type argument in :py:meth:`DataArray.plot`. (:issue:`4061`, :issue:`7014`,:pull:`7553`) By `Jelmer Veenstra `_. +- Ensure the formatting of time encoding reference dates outside the range of + nanosecond-precision datetimes remains the same under pandas version 2.0.0 + (:issue:`7420`, :pull:`7441`). + By `Justus Magin `_ and + `Spencer Clark `_. + Documentation ~~~~~~~~~~~~~ @@ -116,10 +122,6 @@ Bug fixes By `Jimmy Westling `_. - Improved performance in ``open_dataset`` for datasets with large object arrays (:issue:`7484`, :pull:`7494`). By `Alex Goodman `_ and `Deepak Cherian `_. -- Ensure the formatting of time encoding reference dates outside the range of - nanosecond-precision datetimes remains the same under pandas version 2.0.0 - (:issue:`7420`, :pull:`7441`). By `Spencer Clark - `_. - Fix issue with ``max_gap`` in ``interpolate_na``, when applied to multidimensional arrays. (:issue:`7597`, :pull:`7598`). By `Paul Ockenfuß `_. From 4370377f6ace600603d692c11d384193495e72b1 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Sat, 1 Apr 2023 13:14:03 +0200 Subject: [PATCH 12/14] replace the `isoformat` separator with `" "` [test-upstream] --- xarray/core/formatting.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 175573e154b..e6e9cb7a00f 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -117,6 +117,8 @@ def format_timestamp(t): try: timestamp = pd.Timestamp(t) datetime_str = timestamp.isoformat() + if datetime_str != "NaT": + datetime_str = datetime_str.replace("T", " ") except OutOfBoundsDatetime: datetime_str = str(t) From 8d4abccbd7d4fa4d4364bba8327fb542bfc43e9a Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Sat, 1 Apr 2023 13:41:35 +0200 Subject: [PATCH 13/14] use the `sep` parameter to `isoformat` instead Co-authored-by: Spencer Clark --- xarray/core/formatting.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index e6e9cb7a00f..7f93706c74c 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -116,9 +116,7 @@ def format_timestamp(t): """Cast given object to a Timestamp and return a nicely formatted string""" try: timestamp = pd.Timestamp(t) - datetime_str = timestamp.isoformat() - if datetime_str != "NaT": - datetime_str = datetime_str.replace("T", " ") + datetime_str = timestamp.isoformat(sep=" ") except OutOfBoundsDatetime: datetime_str = str(t) From 839881f8d35abfa3fa16b9467e0e6059ac33d5f0 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Sat, 1 Apr 2023 13:45:16 +0200 Subject: [PATCH 14/14] Update doc/whats-new.rst --- doc/whats-new.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 04034d2bc0b..681fb1f070b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -120,8 +120,6 @@ Bug fixes - Fix matplotlib raising a UserWarning when plotting a scatter plot with an unfilled marker (:issue:`7313`, :pull:`7318`). By `Jimmy Westling `_. -- Improved performance in ``open_dataset`` for datasets with large object arrays (:issue:`7484`, :pull:`7494`). - By `Alex Goodman `_ and `Deepak Cherian `_. - Fix issue with ``max_gap`` in ``interpolate_na``, when applied to multidimensional arrays. (:issue:`7597`, :pull:`7598`). By `Paul Ockenfuß `_.