From 6baaa7e391405cc87a9573903a92a8e323e73fed Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 14 Oct 2022 16:33:58 -0700 Subject: [PATCH 1/9] ENH: date_range support reso keyword --- doc/source/whatsnew/v2.0.0.rst | 2 ++ pandas/_libs/tslibs/dtypes.pxd | 2 +- pandas/_libs/tslibs/dtypes.pyx | 2 +- pandas/core/arrays/_ranges.py | 15 ++++++++-- pandas/core/arrays/datetimes.py | 27 ++++++++++++++--- pandas/core/indexes/datetimes.py | 7 +++++ .../indexes/datetimes/test_date_range.py | 30 +++++++++++++++++++ 7 files changed, 77 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index fea3d70d81554..146c87c06a591 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -37,6 +37,8 @@ Other enhancements - Added metadata propagation for binary operators on :class:`DataFrame` (:issue:`28283`) - :class:`.CategoricalConversionWarning`, :class:`.InvalidComparison`, :class:`.InvalidVersion`, :class:`.LossySetitemError`, and :class:`.NoBufferPresent` are now exposed in ``pandas.errors`` (:issue:`27656`) - :func:`DataFrame.astype` exception message thrown improved to include column name when type conversion is not possible. (:issue:`47571`) +- :func:`date_range` now supports a ``reso`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`??`) +- .. --------------------------------------------------------------------------- .. _whatsnew_200.notable_bug_fixes: diff --git a/pandas/_libs/tslibs/dtypes.pxd b/pandas/_libs/tslibs/dtypes.pxd index 11b92447f5011..3e3f206685d37 100644 --- a/pandas/_libs/tslibs/dtypes.pxd +++ b/pandas/_libs/tslibs/dtypes.pxd @@ -4,7 +4,7 @@ from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT cpdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit) -cdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev) +cpdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev) cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1 cpdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1 diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index 94781374296fa..3812e7ba8f653 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -336,7 +336,7 @@ cpdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit): raise NotImplementedError(unit) -cdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev): +cpdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev): if abbrev == "Y": return NPY_DATETIMEUNIT.NPY_FR_Y elif abbrev == "M": diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index 3bef3e59d5687..7108eb979e4ba 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -22,6 +22,7 @@ def generate_regular_range( end: Timestamp | Timedelta | None, periods: int | None, freq: BaseOffset, + reso: str = "ns", ) -> npt.NDArray[np.intp]: """ Generate a range of dates or timestamps with the spans between dates @@ -37,14 +38,24 @@ def generate_regular_range( Number of periods in produced date range. freq : Tick Describes space between dates in produced date range. + reso : str, default "ns" + The resolution the output is meant to represent. Returns ------- - ndarray[np.int64] Representing nanoseconds. + ndarray[np.int64] + Representing the given resolution. """ istart = start.value if start is not None else None iend = end.value if end is not None else None - stride = freq.nanos + freq.nanos # raises if non-fixed frequency + try: + stride = Timedelta(freq)._as_unit(reso, round_ok=False).value + except ValueError as err: + raise ValueError( + f"freq={freq} is incompatible with reso={reso}. " + "Use a lower freq or a higher reso instead." + ) from err if periods is None and istart is not None and iend is not None: b = istart diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index fb1a5070c6c0d..8374713288e8f 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -40,6 +40,7 @@ tz_convert_from_utc, tzconversion, ) +from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit from pandas._typing import ( DateTimeErrorChoices, IntervalClosedType, @@ -369,6 +370,8 @@ def _generate_range( # type: ignore[override] ambiguous: TimeAmbiguous = "raise", nonexistent: TimeNonexistent = "raise", inclusive: IntervalClosedType = "both", + *, + reso: str | None = None, ) -> DatetimeArray: periods = dtl.validate_periods(periods) @@ -391,6 +394,17 @@ def _generate_range( # type: ignore[override] if start is NaT or end is NaT: raise ValueError("Neither `start` nor `end` can be NaT") + if reso is not None: + if reso not in ["s", "ms", "us", "ns"]: + raise ValueError("'reso' must be one of 's', 'ms', 'us', 'ns'") + else: + reso = "ns" + + if start is not None and reso is not None: + start = start._as_unit(reso) + if end is not None and reso is not None: + end = end._as_unit(reso) + left_inclusive, right_inclusive = validate_inclusive(inclusive) start, end = _maybe_normalize_endpoints(start, end, normalize) tz = _infer_tz_from_endpoints(start, end, tz) @@ -416,7 +430,7 @@ def _generate_range( # type: ignore[override] end = end.tz_localize(None) if isinstance(freq, Tick): - i8values = generate_regular_range(start, end, periods, freq) + i8values = generate_regular_range(start, end, periods, freq, reso=reso) else: xdr = _generate_range( start=start, end=end, periods=periods, offset=freq @@ -430,8 +444,13 @@ def _generate_range( # type: ignore[override] if not timezones.is_utc(tz): # short-circuit tz_localize_to_utc which would make # an unnecessary copy with UTC but be a no-op. + creso = abbrev_to_npy_unit(reso) i8values = tzconversion.tz_localize_to_utc( - i8values, tz, ambiguous=ambiguous, nonexistent=nonexistent + i8values, + tz, + ambiguous=ambiguous, + nonexistent=nonexistent, + reso=creso, ) # i8values is localized datetime64 array -> have to convert @@ -466,8 +485,8 @@ def _generate_range( # type: ignore[override] if not right_inclusive and len(i8values) and i8values[-1] == end_i8: i8values = i8values[:-1] - dt64_values = i8values.view("datetime64[ns]") - dtype = tz_to_dtype(tz) + dt64_values = i8values.view(f"datetime64[{reso}]") + dtype = tz_to_dtype(tz, unit=reso) return cls._simple_new(dt64_values, freq=freq, dtype=dtype) # ----------------------------------------------------------------- diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index bb9d9f69ed38c..776ddb8e53ba8 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -948,6 +948,8 @@ def date_range( name: Hashable = None, closed: Literal["left", "right"] | None | lib.NoDefault = lib.no_default, inclusive: IntervalClosedType | None = None, + *, + reso: str | None = None, **kwargs, ) -> DatetimeIndex: """ @@ -993,6 +995,10 @@ def date_range( Include boundaries; Whether to set each bound as closed or open. .. versionadded:: 1.4.0 + reso : str, default None + Specify the desired resolution of the result. + + .. versionadded:: 2.0.0 **kwargs For compatibility. Has no effect on the result. @@ -1137,6 +1143,7 @@ def date_range( tz=tz, normalize=normalize, inclusive=inclusive, + reso=reso, **kwargs, ) return DatetimeIndex._simple_new(dtarr, name=name) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 142679e292b38..d5e9c5890d353 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -1166,3 +1166,33 @@ def test_date_range_with_custom_holidays(): freq=freq, ) tm.assert_index_equal(result, expected) + + +class TestDateRangeNonNano: + def test_date_range_reso_validation(self): + msg = "'reso' must be one of 's', 'ms', 'us', 'ns'" + with pytest.raises(ValueError, match=msg): + date_range("2016-01-01", "2016-03-04", periods=3, reso="h") + + def test_date_range_freq_higher_than_reso(self): + # freq being higher-resolution than reso is a problem + msg = "Use a lower freq or a higher reso instead" + with pytest.raises(ValueError, match=msg): + # TODO give a more useful or informative message? + date_range("2016-01-01", "2016-01-01 00:00:00.000001", freq="ns", reso="ms") + + def test_date_range_non_nano(self): + start = np.datetime64("1066-10-14") # Battle of Hastings + end = np.datetime64("2305-07-13") # Jean-Luc Picard's birthday + + dti = date_range(start, end, freq="D", reso="s") + assert dti.freq == "D" + assert dti.dtype == "M8[s]" + + exp = np.arange( + start.astype("M8[s]").view("i8"), + (end + 1).astype("M8[s]").view("i8"), + 24 * 3600, + ).view("M8[s]") + + tm.assert_numpy_array_equal(dti.to_numpy(), exp) From 69be87022b97c006be059bd191b893de835c3688 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 14 Oct 2022 16:35:03 -0700 Subject: [PATCH 2/9] GH ref --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 146c87c06a591..97c51dc65e498 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -37,7 +37,7 @@ Other enhancements - Added metadata propagation for binary operators on :class:`DataFrame` (:issue:`28283`) - :class:`.CategoricalConversionWarning`, :class:`.InvalidComparison`, :class:`.InvalidVersion`, :class:`.LossySetitemError`, and :class:`.NoBufferPresent` are now exposed in ``pandas.errors`` (:issue:`27656`) - :func:`DataFrame.astype` exception message thrown improved to include column name when type conversion is not possible. (:issue:`47571`) -- :func:`date_range` now supports a ``reso`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`??`) +- :func:`date_range` now supports a ``reso`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49106`) - .. --------------------------------------------------------------------------- From f67d877523f35564e00130faedf1efadce7aae6e Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 14 Oct 2022 17:44:17 -0700 Subject: [PATCH 3/9] pyright ignore --- pandas/_libs/tslibs/dtypes.pyi | 1 + pandas/core/arrays/_ranges.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/dtypes.pyi b/pandas/_libs/tslibs/dtypes.pyi index a54db51136d07..b872241d79a54 100644 --- a/pandas/_libs/tslibs/dtypes.pyi +++ b/pandas/_libs/tslibs/dtypes.pyi @@ -10,6 +10,7 @@ def periods_per_second(reso: int) -> int: ... def is_supported_unit(reso: int) -> bool: ... def npy_unit_to_abbrev(reso: int) -> str: ... def get_supported_reso(reso: int) -> int: ... +def abbrev_to_npy_unit(abbrev: str) -> int: ... class PeriodDtypeBase: _dtype_code: int # PeriodDtypeCode diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index 7108eb979e4ba..e11042908665f 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -49,13 +49,17 @@ def generate_regular_range( istart = start.value if start is not None else None iend = end.value if end is not None else None freq.nanos # raises if non-fixed frequency + td = Timedelta(freq) try: - stride = Timedelta(freq)._as_unit(reso, round_ok=False).value + td = td._as_unit( # pyright: ignore[reportGeneralTypeIssues] + reso, round_ok=False + ) except ValueError as err: raise ValueError( f"freq={freq} is incompatible with reso={reso}. " "Use a lower freq or a higher reso instead." ) from err + stride = td.value if periods is None and istart is not None and iend is not None: b = istart From 21877ae9dac09a8105b7414e43afa92213fe7e06 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 19 Oct 2022 11:13:32 -0700 Subject: [PATCH 4/9] reso->unit --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/core/arrays/_ranges.py | 10 +++---- pandas/core/arrays/datetimes.py | 28 +++++++++---------- pandas/core/indexes/datetimes.py | 6 ++-- .../indexes/datetimes/test_date_range.py | 10 +++---- 5 files changed, 28 insertions(+), 28 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 8af05a4f41977..c2dd919a12b01 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -37,7 +37,7 @@ Other enhancements - Added metadata propagation for binary operators on :class:`DataFrame` (:issue:`28283`) - :class:`.CategoricalConversionWarning`, :class:`.InvalidComparison`, :class:`.InvalidVersion`, :class:`.LossySetitemError`, and :class:`.NoBufferPresent` are now exposed in ``pandas.errors`` (:issue:`27656`) - :func:`DataFrame.astype` exception message thrown improved to include column name when type conversion is not possible. (:issue:`47571`) -- :func:`date_range` now supports a ``reso`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49106`) +- :func:`date_range` now supports a ``unit`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49106`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index e11042908665f..34835a8df7651 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -22,7 +22,7 @@ def generate_regular_range( end: Timestamp | Timedelta | None, periods: int | None, freq: BaseOffset, - reso: str = "ns", + unit: str = "ns", ) -> npt.NDArray[np.intp]: """ Generate a range of dates or timestamps with the spans between dates @@ -38,7 +38,7 @@ def generate_regular_range( Number of periods in produced date range. freq : Tick Describes space between dates in produced date range. - reso : str, default "ns" + unit : str, default "ns" The resolution the output is meant to represent. Returns @@ -52,12 +52,12 @@ def generate_regular_range( td = Timedelta(freq) try: td = td._as_unit( # pyright: ignore[reportGeneralTypeIssues] - reso, round_ok=False + unit, round_ok=False ) except ValueError as err: raise ValueError( - f"freq={freq} is incompatible with reso={reso}. " - "Use a lower freq or a higher reso instead." + f"freq={freq} is incompatible with unit={unit}. " + "Use a lower freq or a higher unit instead." ) from err stride = td.value diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 74568aa0b7fbf..6d07e7e18615c 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -385,7 +385,7 @@ def _generate_range( # type: ignore[override] nonexistent: TimeNonexistent = "raise", inclusive: IntervalClosedType = "both", *, - reso: str | None = None, + unit: str | None = None, ) -> DatetimeArray: periods = dtl.validate_periods(periods) @@ -408,16 +408,16 @@ def _generate_range( # type: ignore[override] if start is NaT or end is NaT: raise ValueError("Neither `start` nor `end` can be NaT") - if reso is not None: - if reso not in ["s", "ms", "us", "ns"]: - raise ValueError("'reso' must be one of 's', 'ms', 'us', 'ns'") + if unit is not None: + if unit not in ["s", "ms", "us", "ns"]: + raise ValueError("'unit' must be one of 's', 'ms', 'us', 'ns'") else: - reso = "ns" + unit = "ns" - if start is not None and reso is not None: - start = start._as_unit(reso) - if end is not None and reso is not None: - end = end._as_unit(reso) + if start is not None and unit is not None: + start = start._as_unit(unit) + if end is not None and unit is not None: + end = end._as_unit(unit) left_inclusive, right_inclusive = validate_inclusive(inclusive) start, end = _maybe_normalize_endpoints(start, end, normalize) @@ -444,7 +444,7 @@ def _generate_range( # type: ignore[override] end = end.tz_localize(None) if isinstance(freq, Tick): - i8values = generate_regular_range(start, end, periods, freq, reso=reso) + i8values = generate_regular_range(start, end, periods, freq, unit=unit) else: xdr = _generate_range( start=start, end=end, periods=periods, offset=freq @@ -458,13 +458,13 @@ def _generate_range( # type: ignore[override] if not timezones.is_utc(tz): # short-circuit tz_localize_to_utc which would make # an unnecessary copy with UTC but be a no-op. - creso = abbrev_to_npy_unit(reso) + creso = abbrev_to_npy_unit(unit) i8values = tzconversion.tz_localize_to_utc( i8values, tz, ambiguous=ambiguous, nonexistent=nonexistent, - reso=creso, + creso=creso, ) # i8values is localized datetime64 array -> have to convert @@ -499,8 +499,8 @@ def _generate_range( # type: ignore[override] if not right_inclusive and len(i8values) and i8values[-1] == end_i8: i8values = i8values[:-1] - dt64_values = i8values.view(f"datetime64[{reso}]") - dtype = tz_to_dtype(tz, unit=reso) + dt64_values = i8values.view(f"datetime64[{unit}]") + dtype = tz_to_dtype(tz, unit=unit) return cls._simple_new(dt64_values, freq=freq, dtype=dtype) # ----------------------------------------------------------------- diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e91e194281023..c9ed5a852f16a 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -949,7 +949,7 @@ def date_range( closed: Literal["left", "right"] | None | lib.NoDefault = lib.no_default, inclusive: IntervalClosedType | None = None, *, - reso: str | None = None, + unit: str | None = None, **kwargs, ) -> DatetimeIndex: """ @@ -995,7 +995,7 @@ def date_range( Include boundaries; Whether to set each bound as closed or open. .. versionadded:: 1.4.0 - reso : str, default None + unit : str, default None Specify the desired resolution of the result. .. versionadded:: 2.0.0 @@ -1143,7 +1143,7 @@ def date_range( tz=tz, normalize=normalize, inclusive=inclusive, - reso=reso, + unit=unit, **kwargs, ) return DatetimeIndex._simple_new(dtarr, name=name) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index d5e9c5890d353..e16c487737993 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -1170,22 +1170,22 @@ def test_date_range_with_custom_holidays(): class TestDateRangeNonNano: def test_date_range_reso_validation(self): - msg = "'reso' must be one of 's', 'ms', 'us', 'ns'" + msg = "'unit' must be one of 's', 'ms', 'us', 'ns'" with pytest.raises(ValueError, match=msg): - date_range("2016-01-01", "2016-03-04", periods=3, reso="h") + date_range("2016-01-01", "2016-03-04", periods=3, unit="h") def test_date_range_freq_higher_than_reso(self): # freq being higher-resolution than reso is a problem - msg = "Use a lower freq or a higher reso instead" + msg = "Use a lower freq or a higher unit instead" with pytest.raises(ValueError, match=msg): # TODO give a more useful or informative message? - date_range("2016-01-01", "2016-01-01 00:00:00.000001", freq="ns", reso="ms") + date_range("2016-01-01", "2016-01-01 00:00:00.000001", freq="ns", unit="ms") def test_date_range_non_nano(self): start = np.datetime64("1066-10-14") # Battle of Hastings end = np.datetime64("2305-07-13") # Jean-Luc Picard's birthday - dti = date_range(start, end, freq="D", reso="s") + dti = date_range(start, end, freq="D", unit="s") assert dti.freq == "D" assert dti.dtype == "M8[s]" From 4a8ed2f483d5328b6a6b2de2963674ee55530b16 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 19 Oct 2022 12:02:42 -0700 Subject: [PATCH 5/9] raise if endpoints cant cast losslessly --- pandas/core/arrays/datetimes.py | 5 +++-- .../indexes/datetimes/test_date_range.py | 21 +++++++++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 6d07e7e18615c..80f2376a1f84e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -415,9 +415,9 @@ def _generate_range( # type: ignore[override] unit = "ns" if start is not None and unit is not None: - start = start._as_unit(unit) + start = start._as_unit(unit, round_ok=False) if end is not None and unit is not None: - end = end._as_unit(unit) + end = end._as_unit(unit, round_ok=False) left_inclusive, right_inclusive = validate_inclusive(inclusive) start, end = _maybe_normalize_endpoints(start, end, normalize) @@ -433,6 +433,7 @@ def _generate_range( # type: ignore[override] end = _maybe_localize_point( end, end_tz, end, freq, tz, ambiguous, nonexistent ) + if freq is not None: # We break Day arithmetic (fixed 24 hour) here and opt for # Day to mean calendar day (23/24/25 hour). Therefore, strip diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index e16c487737993..8bc31226a11b0 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -1178,8 +1178,25 @@ def test_date_range_freq_higher_than_reso(self): # freq being higher-resolution than reso is a problem msg = "Use a lower freq or a higher unit instead" with pytest.raises(ValueError, match=msg): - # TODO give a more useful or informative message? - date_range("2016-01-01", "2016-01-01 00:00:00.000001", freq="ns", unit="ms") + # # TODO give a more useful or informative message? + date_range("2016-01-01", "2016-01-02", freq="ns", unit="ms") + + # But matching reso is OK + date_range("2016-01-01", "2016-01-01 00:00:01", freq="ms", unit="ms") + date_range("2016-01-01", "2016-01-01 00:00:01", freq="us", unit="us") + date_range("2016-01-01", "2016-01-01 00:00:00.001", freq="ns", unit="ns") + + def test_date_range_freq_lower_than_endpoints(self): + start = Timestamp("2022-10-19 11:50:44.719781") + end = Timestamp("2022-10-19 11:50:47.066458") + + # start and end cannot be cast to "s" unit without lossy rounding, + # so we do not allow this in date_range + with pytest.raises(ValueError, match="Cannot losslessly convert units"): + date_range(start, end, periods=3, unit="s") + + # but we can losslessly cast to "us" + date_range(start, end, periods=3, unit="us") def test_date_range_non_nano(self): start = np.datetime64("1066-10-14") # Battle of Hastings From 6220000795ba9cd4358deb7118a4f2d740f03eb1 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 16 Nov 2022 16:12:43 -0800 Subject: [PATCH 6/9] add assertions --- pandas/core/arrays/_ranges.py | 2 +- pandas/core/arrays/datetimes.py | 4 +-- .../indexes/datetimes/test_date_range.py | 29 +++++++++++++++---- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index 0db07103651f9..df1a44f8ba895 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -51,7 +51,7 @@ def generate_regular_range( freq.nanos # raises if non-fixed frequency td = Timedelta(freq) try: - td = td._as_unit( # pyright: ignore[reportGeneralTypeIssues] + td = td.as_unit( # pyright: ignore[reportGeneralTypeIssues] unit, round_ok=False ) except ValueError as err: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cc51aa51a6a68..24beb12e5d9ad 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -412,9 +412,9 @@ def _generate_range( # type: ignore[override] unit = "ns" if start is not None and unit is not None: - start = start._as_unit(unit, round_ok=False) + start = start.as_unit(unit, round_ok=False) if end is not None and unit is not None: - end = end._as_unit(unit, round_ok=False) + end = end.as_unit(unit, round_ok=False) left_inclusive, right_inclusive = validate_inclusive(inclusive) start, end = _maybe_normalize_endpoints(start, end, normalize) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 2351162f46051..e90f9fb2b5e36 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -1199,10 +1199,24 @@ def test_date_range_freq_higher_than_reso(self): # # TODO give a more useful or informative message? date_range("2016-01-01", "2016-01-02", freq="ns", unit="ms") - # But matching reso is OK - date_range("2016-01-01", "2016-01-01 00:00:01", freq="ms", unit="ms") - date_range("2016-01-01", "2016-01-01 00:00:01", freq="us", unit="us") - date_range("2016-01-01", "2016-01-01 00:00:00.001", freq="ns", unit="ns") + def test_date_range_freq_matches_reso(self): + # GH#49106 matching reso is OK + dti = date_range("2016-01-01", "2016-01-01 00:00:01", freq="ms", unit="ms") + rng = np.arange(1_451_606_400_000, 1_451_606_401_001, dtype=np.int64) + expected = DatetimeIndex(rng.view("M8[ms]"), freq="ms") + tm.assert_index_equal(dti, expected) + + dti = date_range("2016-01-01", "2016-01-01 00:00:01", freq="us", unit="us") + rng = np.arange(1_451_606_400_000_000, 1_451_606_401_000_001, dtype=np.int64) + expected = DatetimeIndex(rng.view("M8[us]"), freq="us") + tm.assert_index_equal(dti, expected) + + dti = date_range("2016-01-01", "2016-01-01 00:00:00.001", freq="ns", unit="ns") + rng = np.arange( + 1_451_606_400_000_000_000, 1_451_606_400_001_000_001, dtype=np.int64 + ) + expected = DatetimeIndex(rng.view("M8[ns]"), freq="ns") + tm.assert_index_equal(dti, expected) def test_date_range_freq_lower_than_endpoints(self): start = Timestamp("2022-10-19 11:50:44.719781") @@ -1214,7 +1228,12 @@ def test_date_range_freq_lower_than_endpoints(self): date_range(start, end, periods=3, unit="s") # but we can losslessly cast to "us" - date_range(start, end, periods=3, unit="us") + dti = date_range(start, end, periods=2, unit="us") + rng = np.array( + [start.as_unit("us").value, end.as_unit("us").value], dtype=np.int64 + ) + expected = DatetimeIndex(rng.view("M8[us]")) + tm.assert_index_equal(dti, expected) def test_date_range_non_nano(self): start = np.datetime64("1066-10-14") # Battle of Hastings From 44c43cf9886da48c8fbb669266c96f3fae0e5563 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 17 Nov 2022 07:57:57 -0800 Subject: [PATCH 7/9] mypy fixup --- pandas/core/arrays/_ranges.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index df1a44f8ba895..baf8470a866ff 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -59,7 +59,7 @@ def generate_regular_range( f"freq={freq} is incompatible with unit={unit}. " "Use a lower freq or a higher unit instead." ) from err - stride = td.value + stride = int(td.value) if periods is None and istart is not None and iend is not None: b = istart From d0b6d423b0986f1571339a90cbd4100a73abe8b0 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 18 Nov 2022 11:34:02 -0800 Subject: [PATCH 8/9] example with unit --- pandas/core/indexes/datetimes.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 826655a4a1f47..ff53232544422 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -972,6 +972,14 @@ def date_range( >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='right') DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D') + + **Specify a unit** + + >>> pd.date_range(start="2017-01-01", periods=10, freq="100AS", unit="s"") + DatetimeIndex(['2017-01-01', '2117-01-01', '2217-01-01', '2317-01-01', + '2417-01-01', '2517-01-01', '2617-01-01', '2717-01-01', + '2817-01-01', '2917-01-01'], + dtype='datetime64[s]', freq='100AS-JAN') """ if freq is None and com.any_none(periods, start, end): freq = "D" From 3277a40ea31fb8914826a0ed5963cf35c64166e8 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 18 Nov 2022 12:54:44 -0800 Subject: [PATCH 9/9] typo fixup --- pandas/core/indexes/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index ff53232544422..57cbc76d1c2fa 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -975,7 +975,7 @@ def date_range( **Specify a unit** - >>> pd.date_range(start="2017-01-01", periods=10, freq="100AS", unit="s"") + >>> pd.date_range(start="2017-01-01", periods=10, freq="100AS", unit="s") DatetimeIndex(['2017-01-01', '2117-01-01', '2217-01-01', '2317-01-01', '2417-01-01', '2517-01-01', '2617-01-01', '2717-01-01', '2817-01-01', '2917-01-01'],