From 4cadf3626f4582894ab6b6e24504569e5a58e12e Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 14 Nov 2021 20:42:18 -0800 Subject: [PATCH 1/2] ENH: don't silently ignore dtype in NaT/Timestamp/Timedelta to_numpy --- doc/source/whatsnew/v1.4.0.rst | 2 ++ pandas/_libs/tslibs/nattype.pyi | 4 ++- pandas/_libs/tslibs/nattype.pyx | 25 +++++++++++++++---- pandas/_libs/tslibs/timedeltas.pyx | 4 +++ pandas/_libs/tslibs/timestamps.pyx | 4 +++ pandas/tests/scalar/test_nat.py | 20 +++++++++++++++ .../tests/scalar/timedelta/test_timedelta.py | 6 +++++ .../tests/scalar/timestamp/test_timestamp.py | 6 +++++ 8 files changed, 65 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index a593a03de5c25..6de4537d1676b 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -210,6 +210,8 @@ Other enhancements - :meth:`read_excel` now accepts a ``decimal`` argument that allow the user to specify the decimal point when parsing string columns to numeric (:issue:`14403`) - :meth:`.GroupBy.mean` now supports `Numba `_ execution with the ``engine`` keyword (:issue:`43731`) - :meth:`Timestamp.isoformat`, now handles the ``timespec`` argument from the base :class:``datetime`` class (:issue:`26131`) +- :meth:`NaT.to_numpy` ``dtype`` argument is now respected, so ``np.timedelta64`` can be returned (:issue:`??`) +- .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/nattype.pyi b/pandas/_libs/tslibs/nattype.pyi index 22e6395a1fe99..a7ee9a70342d4 100644 --- a/pandas/_libs/tslibs/nattype.pyi +++ b/pandas/_libs/tslibs/nattype.pyi @@ -18,7 +18,9 @@ class NaTType(datetime): value: np.int64 def asm8(self) -> np.datetime64: ... def to_datetime64(self) -> np.datetime64: ... - def to_numpy(self, dtype=..., copy: bool = ...) -> np.datetime64: ... + def to_numpy( + self, dtype=..., copy: bool = ... + ) -> np.datetime64 | np.timedelta64: ... @property def is_leap_year(self) -> bool: ... @property diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 09bfc4527a428..27736d74ab42a 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -258,19 +258,20 @@ cdef class _NaT(datetime): """ return np.datetime64('NaT', "ns") - def to_numpy(self, dtype=None, copy=False) -> np.datetime64: + def to_numpy(self, dtype=None, copy=False) -> np.datetime64 | np.timedelta64: """ - Convert the Timestamp to a NumPy datetime64. + Convert the Timestamp to a NumPy datetime64 or timedelta64. .. versionadded:: 0.25.0 - This is an alias method for `Timestamp.to_datetime64()`. The dtype and - copy parameters are available here only for compatibility. Their values + With the default 'dtype', this is an alias method for `NaT.to_datetime64()`. + + The copy parameter is available here only for compatibility. Its value will not affect the return value. Returns ------- - numpy.datetime64 + numpy.datetime64 or numpy.timedelta64 See Also -------- @@ -286,7 +287,21 @@ cdef class _NaT(datetime): >>> pd.NaT.to_numpy() numpy.datetime64('NaT') + + >>> pd.NaT.to_numpy("m8[ns]") + numpy.timedelta64('NaT','ns') """ + if dtype is not None: + dtype = np.dtype(dtype) + if dtype.kind == "M": + return np.datetime64("NaT").astype(dtype) + elif dtype.kind == "m": + return np.timedelta64("NaT").astype(dtype) + else: + raise ValueError( + "NaT.to_numpy dtype must be a datetime64 dtype, timedelta64 " + "dtype, or None." + ) return self.to_datetime64() def __repr__(self) -> str: diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 43f9be3fef5ee..be39ccd444865 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -929,6 +929,10 @@ cdef class _Timedelta(timedelta): -------- Series.to_numpy : Similar method for Series. """ + if dtype is not None or copy is not False: + raise ValueError( + "Timedelta.to_numpy dtype and copy arguments are ignored" + ) return self.to_timedelta64() def view(self, dtype): diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 28b8158548ca8..bf3b3ed0264a0 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -934,6 +934,10 @@ cdef class _Timestamp(ABCTimestamp): >>> pd.NaT.to_numpy() numpy.datetime64('NaT') """ + if dtype is not None or copy is not False: + raise ValueError( + "Timestamp.to_numpy dtype and copy arguments are ignored." + ) return self.to_datetime64() def to_period(self, freq=None): diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index b9718249b38c8..830463b9a9dea 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -330,6 +330,11 @@ def test_nat_doc_strings(compare): if klass == Timestamp and method == "isoformat": return + if method == "to_numpy": + # can return either dt64 or td64 depending on dtype, different docstring + # is intentional + return + nat_doc = getattr(NaT, method).__doc__ assert klass_doc == nat_doc @@ -511,6 +516,21 @@ def test_to_numpy_alias(): assert isna(expected) and isna(result) + result = NaT.to_numpy("M8[s]") + assert isinstance(result, np.datetime64) + assert result.dtype == "M8[s]" + + result = NaT.to_numpy("m8[ns]") + assert isinstance(result, np.timedelta64) + assert result.dtype == "m8[ns]" + + result = NaT.to_numpy("m8[s]") + assert isinstance(result, np.timedelta64) + assert result.dtype == "m8[s]" + + with pytest.raises(ValueError, match="NaT.to_numpy dtype must be a "): + NaT.to_numpy(np.int64) + @pytest.mark.parametrize( "other", diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 4aa2f62fe85a0..ffc989057b7a8 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -317,6 +317,12 @@ def test_to_numpy_alias(self): td = Timedelta("10m7s") assert td.to_timedelta64() == td.to_numpy() + msg = "dtype and copy arguments are ignored" + with pytest.raises(ValueError, match=msg): + td.to_numpy("m8[s]") + with pytest.raises(ValueError, match=msg): + td.to_numpy(copy=True) + @pytest.mark.parametrize( "freq,s1,s2", [ diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index f2010b33538fb..99238668543e8 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -619,6 +619,12 @@ def test_to_numpy_alias(self): ts = Timestamp(datetime.now()) assert ts.to_datetime64() == ts.to_numpy() + msg = "dtype and copy arguments are ignored" + with pytest.raises(ValueError, match=msg): + ts.to_numpy("M8[s]") + with pytest.raises(ValueError, match=msg): + ts.to_numpy(copy=True) + class SubDatetime(datetime): pass From c3c1f098611e5a1d4219bf804143f183e6c28166 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 14 Nov 2021 20:44:11 -0800 Subject: [PATCH 2/2] GH ref --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/_libs/tslibs/nattype.pyx | 1 + pandas/tests/scalar/test_nat.py | 5 +++-- pandas/tests/scalar/timedelta/test_timedelta.py | 1 + pandas/tests/scalar/timestamp/test_timestamp.py | 1 + 5 files changed, 7 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 6de4537d1676b..2492b51aa6c23 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -210,7 +210,7 @@ Other enhancements - :meth:`read_excel` now accepts a ``decimal`` argument that allow the user to specify the decimal point when parsing string columns to numeric (:issue:`14403`) - :meth:`.GroupBy.mean` now supports `Numba `_ execution with the ``engine`` keyword (:issue:`43731`) - :meth:`Timestamp.isoformat`, now handles the ``timespec`` argument from the base :class:``datetime`` class (:issue:`26131`) -- :meth:`NaT.to_numpy` ``dtype`` argument is now respected, so ``np.timedelta64`` can be returned (:issue:`??`) +- :meth:`NaT.to_numpy` ``dtype`` argument is now respected, so ``np.timedelta64`` can be returned (:issue:`44460`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 27736d74ab42a..0ec0fb9e814c1 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -292,6 +292,7 @@ cdef class _NaT(datetime): numpy.timedelta64('NaT','ns') """ if dtype is not None: + # GH#44460 dtype = np.dtype(dtype) if dtype.kind == "M": return np.datetime64("NaT").astype(dtype) diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index 830463b9a9dea..73227caa9fd62 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -331,8 +331,8 @@ def test_nat_doc_strings(compare): return if method == "to_numpy": - # can return either dt64 or td64 depending on dtype, different docstring - # is intentional + # GH#44460 can return either dt64 or td64 depending on dtype, + # different docstring is intentional return nat_doc = getattr(NaT, method).__doc__ @@ -516,6 +516,7 @@ def test_to_numpy_alias(): assert isna(expected) and isna(result) + # GH#44460 result = NaT.to_numpy("M8[s]") assert isinstance(result, np.datetime64) assert result.dtype == "M8[s]" diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index ffc989057b7a8..cb3468c097cbf 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -317,6 +317,7 @@ def test_to_numpy_alias(self): td = Timedelta("10m7s") assert td.to_timedelta64() == td.to_numpy() + # GH#44460 msg = "dtype and copy arguments are ignored" with pytest.raises(ValueError, match=msg): td.to_numpy("m8[s]") diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 99238668543e8..214ad634e78da 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -619,6 +619,7 @@ def test_to_numpy_alias(self): ts = Timestamp(datetime.now()) assert ts.to_datetime64() == ts.to_numpy() + # GH#44460 msg = "dtype and copy arguments are ignored" with pytest.raises(ValueError, match=msg): ts.to_numpy("M8[s]")