From 93413a8e0aa5b7c898f5d170d6ba5fe84587404f Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 23 Sep 2022 10:30:54 -0700 Subject: [PATCH 1/5] API: Change Timestamp/Timedelta arithmetic to match numpy --- pandas/_libs/tslibs/timedeltas.pyx | 19 ++-- pandas/_libs/tslibs/timestamps.pyx | 53 +++++------ .../tests/scalar/timedelta/test_timedelta.py | 31 ++++--- .../tests/scalar/timestamp/test_timestamp.py | 87 ++++++++++++------- 4 files changed, 98 insertions(+), 92 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index c9e997ffb405c..bf22967f615c4 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -787,19 +787,12 @@ def _binary_op_method_timedeltalike(op, name): # e.g. if original other was timedelta64('NaT') return NaT - # We allow silent casting to the lower resolution if and only - # if it is lossless. - try: - if self._reso < other._reso: - other = (<_Timedelta>other)._as_reso(self._reso, round_ok=False) - elif self._reso > other._reso: - self = (<_Timedelta>self)._as_reso(other._reso, round_ok=False) - except ValueError as err: - raise ValueError( - "Timedelta addition/subtraction with mismatched resolutions is not " - "allowed when casting to the lower resolution would require " - "lossy rounding." - ) from err + # Matching numpy, we cast to the higher resolution. Unlike numpy, + # we raise instead of silently overflowing during this casting. + if self._reso < other._reso: + self = (<_Timedelta>self)._as_reso(other._reso, round_ok=True) + elif self._reso > other._reso: + other = (<_Timedelta>other)._as_reso(self._reso, round_ok=True) res = op(self.value, other.value) if res == NPY_NAT: diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 07c6e32028942..2c6969a72bf74 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -440,23 +440,23 @@ cdef class _Timestamp(ABCTimestamp): # corresponding DateOffsets? # TODO: no tests get here other = ensure_td64ns(other) + if other_reso > NPY_DATETIMEUNIT.NPY_FR_ns: + # TODO: no tests + other = ensure_td64ns(other) + if other_reso > self._reso: + # Following numpy, we cast to the higher resolution + # test_sub_timedelta64_mismatched_reso + self = (<_Timestamp>self)._as_reso(other_reso) + if isinstance(other, _Timedelta): # TODO: share this with __sub__, Timedelta.__add__ - # We allow silent casting to the lower resolution if and only - # if it is lossless. See also Timestamp.__sub__ - # and Timedelta.__add__ - try: - if self._reso < other._reso: - other = (<_Timedelta>other)._as_reso(self._reso, round_ok=False) - elif self._reso > other._reso: - self = (<_Timestamp>self)._as_reso(other._reso, round_ok=False) - except ValueError as err: - raise ValueError( - "Timestamp addition with mismatched resolutions is not " - "allowed when casting to the lower resolution would require " - "lossy rounding." - ) from err + # Matching numpy, we cast to the higher resolution. Unlike numpy, + # we raise instead of silently overflowing during this casting. + if self._reso < other._reso: + self = (<_Timestamp>self)._as_reso(other._reso, round_ok=True) + elif self._reso > other._reso: + other = (<_Timedelta>other)._as_reso(self._reso, round_ok=True) try: nanos = delta_to_nanoseconds( @@ -464,12 +464,6 @@ cdef class _Timestamp(ABCTimestamp): ) except OutOfBoundsTimedelta: raise - except ValueError as err: - raise ValueError( - "Addition between Timestamp and Timedelta with mismatched " - "resolutions is not allowed when casting to the lower " - "resolution would require lossy rounding." - ) from err try: new_value = self.value + nanos @@ -556,19 +550,12 @@ cdef class _Timestamp(ABCTimestamp): "Cannot subtract tz-naive and tz-aware datetime-like objects." ) - # We allow silent casting to the lower resolution if and only - # if it is lossless. - try: - if self._reso < other._reso: - other = (<_Timestamp>other)._as_reso(self._reso, round_ok=False) - elif self._reso > other._reso: - self = (<_Timestamp>self)._as_reso(other._reso, round_ok=False) - except ValueError as err: - raise ValueError( - "Timestamp subtraction with mismatched resolutions is not " - "allowed when casting to the lower resolution would require " - "lossy rounding." - ) from err + # Matching numpy, we cast to the higher resolution. Unlike numpy, + # we raise instead of silently overflowing during this casting. + if self._reso < other._reso: + self = (<_Timestamp>self)._as_reso(other._reso, round_ok=False) + elif self._reso > other._reso: + other = (<_Timestamp>other)._as_reso(self._reso, round_ok=False) # scalar Timestamp/datetime - Timestamp/datetime -> yields a # Timedelta diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 21f32cf2d2d1e..8d3738d40601b 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -237,38 +237,37 @@ def test_floordiv_numeric(self, td): assert res._reso == td._reso def test_addsub_mismatched_reso(self, td): - other = Timedelta(days=1) # can losslessly convert to other resos + # need to cast to since td is out of bounds for ns, so + # so we would raise OverflowError without casting + other = Timedelta(days=1)._as_unit("us") + # td is out of bounds for ns result = td + other - assert result._reso == td._reso + assert result._reso == other._reso assert result.days == td.days + 1 result = other + td - assert result._reso == td._reso + assert result._reso == other._reso assert result.days == td.days + 1 result = td - other - assert result._reso == td._reso + assert result._reso == other._reso assert result.days == td.days - 1 result = other - td - assert result._reso == td._reso + assert result._reso == other._reso assert result.days == 1 - td.days - other2 = Timedelta(500) # can't cast losslessly - - msg = ( - "Timedelta addition/subtraction with mismatched resolutions is " - "not allowed when casting to the lower resolution would require " - "lossy rounding" - ) - with pytest.raises(ValueError, match=msg): + other2 = Timedelta(500) + # TODO: should be OutOfBoundsTimedelta + msg = "value too large" + with pytest.raises(OverflowError, match=msg): td + other2 - with pytest.raises(ValueError, match=msg): + with pytest.raises(OverflowError, match=msg): other2 + td - with pytest.raises(ValueError, match=msg): + with pytest.raises(OverflowError, match=msg): td - other2 - with pytest.raises(ValueError, match=msg): + with pytest.raises(OverflowError, match=msg): other2 - td def test_min(self, td): diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index dc3ddc7361afd..48c016434a97d 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -18,7 +18,10 @@ utc, ) -from pandas._libs.tslibs.dtypes import NpyDatetimeUnit +from pandas._libs.tslibs.dtypes import ( + NpyDatetimeUnit, + npy_unit_to_abbrev, +) from pandas._libs.tslibs.timezones import ( dateutil_gettz as gettz, get_timezone, @@ -884,22 +887,29 @@ def test_to_period(self, dt64, ts): ) def test_addsub_timedeltalike_non_nano(self, dt64, ts, td): + if isinstance(td, Timedelta): + # td._reso is ns + exp_reso = td._reso + else: + # effective td._reso is s + exp_reso = ts._reso + result = ts - td expected = Timestamp(dt64) - td assert isinstance(result, Timestamp) - assert result._reso == ts._reso + assert result._reso == exp_reso assert result == expected result = ts + td expected = Timestamp(dt64) + td assert isinstance(result, Timestamp) - assert result._reso == ts._reso + assert result._reso == exp_reso assert result == expected result = td + ts expected = td + Timestamp(dt64) assert isinstance(result, Timestamp) - assert result._reso == ts._reso + assert result._reso == exp_reso assert result == expected def test_addsub_offset(self, ts_tz): @@ -944,27 +954,35 @@ def test_sub_datetimelike_mismatched_reso(self, ts_tz): result = ts - other assert isinstance(result, Timedelta) assert result.value == 0 - assert result._reso == min(ts._reso, other._reso) + assert result._reso == max(ts._reso, other._reso) result = other - ts assert isinstance(result, Timedelta) assert result.value == 0 - assert result._reso == min(ts._reso, other._reso) + assert result._reso == max(ts._reso, other._reso) - msg = "Timestamp subtraction with mismatched resolutions" if ts._reso < other._reso: # Case where rounding is lossy other2 = other + Timedelta._from_value_and_reso(1, other._reso) - with pytest.raises(ValueError, match=msg): - ts - other2 - with pytest.raises(ValueError, match=msg): - other2 - ts + exp = ts._as_unit(npy_unit_to_abbrev(other._reso)) - other2 + + res = ts - other2 + assert res == exp + assert res._reso == max(ts._reso, other._reso) + + res = other2 - ts + assert res == -exp + assert res._reso == max(ts._reso, other._reso) else: ts2 = ts + Timedelta._from_value_and_reso(1, ts._reso) - with pytest.raises(ValueError, match=msg): - ts2 - other - with pytest.raises(ValueError, match=msg): - other - ts2 + exp = ts2 - other._as_unit(npy_unit_to_abbrev(ts2._reso)) + + res = ts2 - other + assert res == exp + assert res._reso == max(ts._reso, other._reso) + res = other - ts2 + assert res == -exp + assert res._reso == max(ts._reso, other._reso) def test_sub_timedeltalike_mismatched_reso(self, ts_tz): # case with non-lossy rounding @@ -984,32 +1002,41 @@ def test_sub_timedeltalike_mismatched_reso(self, ts_tz): result = ts + other assert isinstance(result, Timestamp) assert result == ts - assert result._reso == min(ts._reso, other._reso) + assert result._reso == max(ts._reso, other._reso) result = other + ts assert isinstance(result, Timestamp) assert result == ts - assert result._reso == min(ts._reso, other._reso) + assert result._reso == max(ts._reso, other._reso) - msg = "Timestamp addition with mismatched resolutions" if ts._reso < other._reso: # Case where rounding is lossy other2 = other + Timedelta._from_value_and_reso(1, other._reso) - with pytest.raises(ValueError, match=msg): - ts + other2 - with pytest.raises(ValueError, match=msg): - other2 + ts + exp = ts._as_unit(npy_unit_to_abbrev(other._reso)) + other2 + res = ts + other2 + assert res == exp + assert res._reso == max(ts._reso, other._reso) + res = other2 + ts + assert res == exp + assert res._reso == max(ts._reso, other._reso) else: ts2 = ts + Timedelta._from_value_and_reso(1, ts._reso) - with pytest.raises(ValueError, match=msg): - ts2 + other - with pytest.raises(ValueError, match=msg): - other + ts2 + exp = ts2 + other._as_unit(npy_unit_to_abbrev(ts2._reso)) - msg = "Addition between Timestamp and Timedelta with mismatched resolutions" - with pytest.raises(ValueError, match=msg): - # With a mismatched td64 as opposed to Timedelta - ts + np.timedelta64(1, "ns") + res = ts2 + other + assert res == exp + assert res._reso == max(ts._reso, other._reso) + res = other + ts2 + assert res == exp + assert res._reso == max(ts._reso, other._reso) + + def test_sub_timedelta64_mismatched_reso(self, ts_tz): + ts = ts_tz + + res = ts + np.timedelta64(1, "ns") + exp = ts._as_unit("ns") + np.timedelta64(1, "ns") + assert exp == res + assert exp._reso == NpyDatetimeUnit.NPY_FR_ns.value def test_min(self, ts): assert ts.min <= ts From 911c21e7853e5f8003fe21123664aca06d975b66 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 23 Sep 2022 12:18:38 -0700 Subject: [PATCH 2/5] fix interval test --- pandas/_libs/tslibs/timestamps.pyx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 2c6969a72bf74..18bcb5a97bee0 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -433,6 +433,7 @@ cdef class _Timestamp(ABCTimestamp): # TODO: deprecate allowing this? We only get here # with test_timedelta_add_timestamp_interval other = np.timedelta64(other.view("i8"), "ns") + other_reso = NPY_DATETIMEUNIT.NPY_FR_ns elif ( other_reso == NPY_DATETIMEUNIT.NPY_FR_Y or other_reso == NPY_DATETIMEUNIT.NPY_FR_M ): @@ -440,6 +441,8 @@ cdef class _Timestamp(ABCTimestamp): # corresponding DateOffsets? # TODO: no tests get here other = ensure_td64ns(other) + other_reso = NPY_DATETIMEUNIT.NPY_FR_ns + if other_reso > NPY_DATETIMEUNIT.NPY_FR_ns: # TODO: no tests other = ensure_td64ns(other) From 89ab72bb88d483a366c59fdeda7b662181ea47b5 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 23 Sep 2022 13:12:55 -0700 Subject: [PATCH 3/5] DTA/TDA add/sub upcast instead of downcast --- pandas/core/arrays/datetimelike.py | 16 ++++++------- pandas/tests/arrays/test_timedeltas.py | 33 ++++---------------------- 2 files changed, 11 insertions(+), 38 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 5a92cc3c8509c..83c7fa2fbdca1 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1134,13 +1134,12 @@ def _add_datetimelike_scalar(self, other) -> DatetimeArray: return DatetimeArray._simple_new(result, dtype=result.dtype) if self._reso != other._reso: - # Just as with Timestamp/Timedelta, we cast to the lower resolution - # so long as doing so is lossless. + # Just as with Timestamp/Timedelta, we cast to the higher resolution if self._reso < other._reso: - other = other._as_unit(self._unit, round_ok=False) - else: unit = npy_unit_to_abbrev(other._reso) self = self._as_unit(unit) + else: + other = other._as_unit(self._unit) i8 = self.asi8 result = checked_add_with_arr(i8, other.value, arr_mask=self._isnan) @@ -1296,12 +1295,11 @@ def _add_timedelta_arraylike( self = cast("DatetimeArray | TimedeltaArray", self) if self._reso != other._reso: - # Just as with Timestamp/Timedelta, we cast to the lower resolution - # so long as doing so is lossless. + # Just as with Timestamp/Timedelta, we cast to the higher resolution if self._reso < other._reso: - other = other._as_unit(self._unit) - else: self = self._as_unit(other._unit) + else: + other = other._as_unit(self._unit) self_i8 = self.asi8 other_i8 = other.asi8 @@ -2039,7 +2037,7 @@ def _unit(self) -> str: def _as_unit(self: TimelikeOpsT, unit: str) -> TimelikeOpsT: dtype = np.dtype(f"{self.dtype.kind}8[{unit}]") - new_values = astype_overflowsafe(self._ndarray, dtype, round_ok=False) + new_values = astype_overflowsafe(self._ndarray, dtype, round_ok=True) if isinstance(self.dtype, np.dtype): new_dtype = new_values.dtype diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index de45d0b29889b..6c48ee3b6405e 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -104,22 +104,13 @@ def test_add_pdnat(self, tda): def test_add_datetimelike_scalar(self, tda, tz_naive_fixture): ts = pd.Timestamp("2016-01-01", tz=tz_naive_fixture) - expected = tda + ts._as_unit(tda._unit) + expected = tda._as_unit("ns") + ts res = tda + ts tm.assert_extension_array_equal(res, expected) res = ts + tda tm.assert_extension_array_equal(res, expected) - ts += Timedelta(1) # so we can't cast losslessly - msg = "Cannot losslessly convert units" - with pytest.raises(ValueError, match=msg): - # mismatched reso -> check that we don't give an incorrect result - tda + ts - with pytest.raises(ValueError, match=msg): - # mismatched reso -> check that we don't give an incorrect result - ts + tda - - ts = ts._as_unit(tda._unit) + ts += Timedelta(1) # case where we can't cast losslessly exp_values = tda._ndarray + ts.asm8 expected = ( @@ -185,35 +176,19 @@ def test_add_timedeltaarraylike(self, tda): # TODO(2.0): just do `tda_nano = tda.astype("m8[ns]")` tda_nano = TimedeltaArray(tda._ndarray.astype("m8[ns]")) - msg = "mis-matched resolutions is not yet supported" - expected = tda * 2 + expected = tda_nano * 2 res = tda_nano + tda tm.assert_extension_array_equal(res, expected) res = tda + tda_nano tm.assert_extension_array_equal(res, expected) - expected = tda * 0 + expected = tda_nano * 0 res = tda - tda_nano tm.assert_extension_array_equal(res, expected) res = tda_nano - tda tm.assert_extension_array_equal(res, expected) - tda_nano[:] = np.timedelta64(1, "ns") # can't round losslessly - msg = "Cannot losslessly cast '-?1 ns' to" - with pytest.raises(ValueError, match=msg): - tda_nano + tda - with pytest.raises(ValueError, match=msg): - tda + tda_nano - with pytest.raises(ValueError, match=msg): - tda - tda_nano - with pytest.raises(ValueError, match=msg): - tda_nano - tda - - result = tda_nano + tda_nano - expected = tda_nano * 2 - tm.assert_extension_array_equal(result, expected) - class TestTimedeltaArray: @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) From f9400f55374fd89f5e07e64274c47576077de390 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 27 Sep 2022 15:29:25 -0700 Subject: [PATCH 4/5] test for no-downcasting --- pandas/tests/scalar/timestamp/test_timestamp.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 48c016434a97d..38657c1bfbee3 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -1030,6 +1030,13 @@ def test_sub_timedeltalike_mismatched_reso(self, ts_tz): assert res == exp assert res._reso == max(ts._reso, other._reso) + def test_addition_doesnt_downcast_reso(self): + # https://github.com/pandas-dev/pandas/pull/48748#pullrequestreview-1122635413 + ts = Timestamp(year=2022, month=1, day=1, microsecond=999999)._as_unit("us") + td = Timedelta(microseconds=1)._as_unit("us") + res = ts + td + assert res._reso == ts._reso + def test_sub_timedelta64_mismatched_reso(self, ts_tz): ts = ts_tz From d8d360e93f136966fd23639cb98d6343929dc78a Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 27 Sep 2022 16:53:26 -0700 Subject: [PATCH 5/5] add test --- pandas/tests/arrays/test_datetimes.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index af1a292a2975a..83850c6cd2594 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -207,6 +207,17 @@ def test_compare_mismatched_resolutions(self, comparison_op): np_res = op(left._ndarray, right._ndarray) tm.assert_numpy_array_equal(np_res[1:], ~expected[1:]) + def test_add_mismatched_reso_doesnt_downcast(self): + # https://github.com/pandas-dev/pandas/pull/48748#issuecomment-1260181008 + td = pd.Timedelta(microseconds=1) + dti = pd.date_range("2016-01-01", periods=3) - td + dta = dti._data._as_unit("us") + + res = dta + td._as_unit("us") + # even though the result is an even number of days + # (so we _could_ downcast to unit="s"), we do not. + assert res._unit == "us" + class TestDatetimeArrayComparisons: # TODO: merge this into tests/arithmetic/test_datetime64 once it is