Skip to content

Commit 3cbc459

Browse files
andersy005dcherian
authored andcommitted
Extend DatetimeAccessor properties and support .dt accessor for Timedelta (#3612)
* Support `.dt` accessor for Timedelta * Rename accessors * Use `is_np_timedelta_like` for consistency * Use `pd.timedelta_range` * Move shared method to Properties * Parametrize field access test * move `strftime()` to `DatetimeAccessor` * Update the documentation * Update `whats-new.rst` * Add PR reference * Parametrize tests * Extend DatetimeAccessor properties * Cleanup * Fix docstring
1 parent 471a5d6 commit 3cbc459

File tree

6 files changed

+418
-139
lines changed

6 files changed

+418
-139
lines changed

doc/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,7 @@ Accessors
616616
:toctree: generated/
617617

618618
core.accessor_dt.DatetimeAccessor
619+
core.accessor_dt.TimedeltaAccessor
619620
core.accessor_str.StringAccessor
620621

621622
Custom Indexes

doc/whats-new.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ New Features
3131
- Added the ``count`` reduction method to both :py:class:`~core.rolling.DatasetCoarsen`
3232
and :py:class:`~core.rolling.DataArrayCoarsen` objects. (:pull:`3500`)
3333
By `Deepak Cherian <https://github.com/dcherian>`_
34+
- Extend :py:class:`core.accessor_dt.DatetimeAccessor` properties
35+
and support `.dt` accessor for timedelta
36+
via :py:class:`core.accessor_dt.TimedeltaAccessor` (:pull:`3612`)
37+
By `Anderson Banihirwe <https://github.com/andersy005>`_.
3438

3539
Bug fixes
3640
~~~~~~~~~

xarray/core/accessor_dt.py

Lines changed: 210 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import numpy as np
22
import pandas as pd
33

4-
from .common import _contains_datetime_like_objects, is_np_datetime_like
4+
from .common import (
5+
_contains_datetime_like_objects,
6+
is_np_datetime_like,
7+
is_np_timedelta_like,
8+
)
59
from .pycompat import dask_array_type
610

711

@@ -145,37 +149,8 @@ def _strftime(values, date_format):
145149
return access_method(values, date_format)
146150

147151

148-
class DatetimeAccessor:
149-
"""Access datetime fields for DataArrays with datetime-like dtypes.
150-
151-
Similar to pandas, fields can be accessed through the `.dt` attribute
152-
for applicable DataArrays:
153-
154-
>>> ds = xarray.Dataset({'time': pd.date_range(start='2000/01/01',
155-
... freq='D', periods=100)})
156-
>>> ds.time.dt
157-
<xarray.core.accessors.DatetimeAccessor at 0x10c369f60>
158-
>>> ds.time.dt.dayofyear[:5]
159-
<xarray.DataArray 'dayofyear' (time: 5)>
160-
array([1, 2, 3, 4, 5], dtype=int32)
161-
Coordinates:
162-
* time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ...
163-
164-
All of the pandas fields are accessible here. Note that these fields are
165-
not calendar-aware; if your datetimes are encoded with a non-Gregorian
166-
calendar (e.g. a 360-day calendar) using cftime, then some fields like
167-
`dayofyear` may not be accurate.
168-
169-
"""
170-
152+
class Properties:
171153
def __init__(self, obj):
172-
if not _contains_datetime_like_objects(obj):
173-
raise TypeError(
174-
"'dt' accessor only available for "
175-
"DataArray with datetime64 timedelta64 dtype or "
176-
"for arrays containing cftime datetime "
177-
"objects."
178-
)
179154
self._obj = obj
180155

181156
def _tslib_field_accessor( # type: ignore
@@ -194,48 +169,6 @@ def f(self, dtype=dtype):
194169
f.__doc__ = docstring
195170
return property(f)
196171

197-
year = _tslib_field_accessor("year", "The year of the datetime", np.int64)
198-
month = _tslib_field_accessor(
199-
"month", "The month as January=1, December=12", np.int64
200-
)
201-
day = _tslib_field_accessor("day", "The days of the datetime", np.int64)
202-
hour = _tslib_field_accessor("hour", "The hours of the datetime", np.int64)
203-
minute = _tslib_field_accessor("minute", "The minutes of the datetime", np.int64)
204-
second = _tslib_field_accessor("second", "The seconds of the datetime", np.int64)
205-
microsecond = _tslib_field_accessor(
206-
"microsecond", "The microseconds of the datetime", np.int64
207-
)
208-
nanosecond = _tslib_field_accessor(
209-
"nanosecond", "The nanoseconds of the datetime", np.int64
210-
)
211-
weekofyear = _tslib_field_accessor(
212-
"weekofyear", "The week ordinal of the year", np.int64
213-
)
214-
week = weekofyear
215-
dayofweek = _tslib_field_accessor(
216-
"dayofweek", "The day of the week with Monday=0, Sunday=6", np.int64
217-
)
218-
weekday = dayofweek
219-
220-
weekday_name = _tslib_field_accessor(
221-
"weekday_name", "The name of day in a week (ex: Friday)", object
222-
)
223-
224-
dayofyear = _tslib_field_accessor(
225-
"dayofyear", "The ordinal day of the year", np.int64
226-
)
227-
quarter = _tslib_field_accessor("quarter", "The quarter of the date")
228-
days_in_month = _tslib_field_accessor(
229-
"days_in_month", "The number of days in the month", np.int64
230-
)
231-
daysinmonth = days_in_month
232-
233-
season = _tslib_field_accessor("season", "Season of the year (ex: DJF)", object)
234-
235-
time = _tslib_field_accessor(
236-
"time", "Timestamps corresponding to datetimes", object
237-
)
238-
239172
def _tslib_round_accessor(self, name, freq):
240173
obj_type = type(self._obj)
241174
result = _round_field(self._obj.data, name, freq)
@@ -290,6 +223,50 @@ def round(self, freq):
290223
"""
291224
return self._tslib_round_accessor("round", freq)
292225

226+
227+
class DatetimeAccessor(Properties):
228+
"""Access datetime fields for DataArrays with datetime-like dtypes.
229+
230+
Fields can be accessed through the `.dt` attribute
231+
for applicable DataArrays.
232+
233+
Notes
234+
------
235+
Note that these fields are not calendar-aware; if your datetimes are encoded
236+
with a non-Gregorian calendar (e.g. a 360-day calendar) using cftime,
237+
then some fields like `dayofyear` may not be accurate.
238+
239+
Examples
240+
---------
241+
>>> import xarray as xr
242+
>>> import pandas as pd
243+
>>> dates = pd.date_range(start='2000/01/01', freq='D', periods=10)
244+
>>> ts = xr.DataArray(dates, dims=('time'))
245+
>>> ts
246+
<xarray.DataArray (time: 10)>
247+
array(['2000-01-01T00:00:00.000000000', '2000-01-02T00:00:00.000000000',
248+
'2000-01-03T00:00:00.000000000', '2000-01-04T00:00:00.000000000',
249+
'2000-01-05T00:00:00.000000000', '2000-01-06T00:00:00.000000000',
250+
'2000-01-07T00:00:00.000000000', '2000-01-08T00:00:00.000000000',
251+
'2000-01-09T00:00:00.000000000', '2000-01-10T00:00:00.000000000'],
252+
dtype='datetime64[ns]')
253+
Coordinates:
254+
* time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10
255+
>>> ts.dt
256+
<xarray.core.accessor_dt.DatetimeAccessor object at 0x118b54d68>
257+
>>> ts.dt.dayofyear
258+
<xarray.DataArray 'dayofyear' (time: 10)>
259+
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
260+
Coordinates:
261+
* time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10
262+
>>> ts.dt.quarter
263+
<xarray.DataArray 'quarter' (time: 10)>
264+
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
265+
Coordinates:
266+
* time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10
267+
268+
"""
269+
293270
def strftime(self, date_format):
294271
'''
295272
Return an array of formatted strings specified by date_format, which
@@ -323,3 +300,163 @@ def strftime(self, date_format):
323300
return obj_type(
324301
result, name="strftime", coords=self._obj.coords, dims=self._obj.dims
325302
)
303+
304+
year = Properties._tslib_field_accessor(
305+
"year", "The year of the datetime", np.int64
306+
)
307+
month = Properties._tslib_field_accessor(
308+
"month", "The month as January=1, December=12", np.int64
309+
)
310+
day = Properties._tslib_field_accessor("day", "The days of the datetime", np.int64)
311+
hour = Properties._tslib_field_accessor(
312+
"hour", "The hours of the datetime", np.int64
313+
)
314+
minute = Properties._tslib_field_accessor(
315+
"minute", "The minutes of the datetime", np.int64
316+
)
317+
second = Properties._tslib_field_accessor(
318+
"second", "The seconds of the datetime", np.int64
319+
)
320+
microsecond = Properties._tslib_field_accessor(
321+
"microsecond", "The microseconds of the datetime", np.int64
322+
)
323+
nanosecond = Properties._tslib_field_accessor(
324+
"nanosecond", "The nanoseconds of the datetime", np.int64
325+
)
326+
weekofyear = Properties._tslib_field_accessor(
327+
"weekofyear", "The week ordinal of the year", np.int64
328+
)
329+
week = weekofyear
330+
dayofweek = Properties._tslib_field_accessor(
331+
"dayofweek", "The day of the week with Monday=0, Sunday=6", np.int64
332+
)
333+
weekday = dayofweek
334+
335+
weekday_name = Properties._tslib_field_accessor(
336+
"weekday_name", "The name of day in a week", object
337+
)
338+
339+
dayofyear = Properties._tslib_field_accessor(
340+
"dayofyear", "The ordinal day of the year", np.int64
341+
)
342+
quarter = Properties._tslib_field_accessor("quarter", "The quarter of the date")
343+
days_in_month = Properties._tslib_field_accessor(
344+
"days_in_month", "The number of days in the month", np.int64
345+
)
346+
daysinmonth = days_in_month
347+
348+
season = Properties._tslib_field_accessor("season", "Season of the year", object)
349+
350+
time = Properties._tslib_field_accessor(
351+
"time", "Timestamps corresponding to datetimes", object
352+
)
353+
354+
is_month_start = Properties._tslib_field_accessor(
355+
"is_month_start",
356+
"Indicates whether the date is the first day of the month.",
357+
bool,
358+
)
359+
is_month_end = Properties._tslib_field_accessor(
360+
"is_month_end", "Indicates whether the date is the last day of the month.", bool
361+
)
362+
is_quarter_start = Properties._tslib_field_accessor(
363+
"is_quarter_start",
364+
"Indicator for whether the date is the first day of a quarter.",
365+
bool,
366+
)
367+
is_quarter_end = Properties._tslib_field_accessor(
368+
"is_quarter_end",
369+
"Indicator for whether the date is the last day of a quarter.",
370+
bool,
371+
)
372+
is_year_start = Properties._tslib_field_accessor(
373+
"is_year_start", "Indicate whether the date is the first day of a year.", bool
374+
)
375+
is_year_end = Properties._tslib_field_accessor(
376+
"is_year_end", "Indicate whether the date is the last day of the year.", bool
377+
)
378+
is_leap_year = Properties._tslib_field_accessor(
379+
"is_leap_year", "Boolean indicator if the date belongs to a leap year.", bool
380+
)
381+
382+
383+
class TimedeltaAccessor(Properties):
384+
"""Access Timedelta fields for DataArrays with Timedelta-like dtypes.
385+
386+
Fields can be accessed through the `.dt` attribute for applicable DataArrays.
387+
388+
Examples
389+
--------
390+
>>> import pandas as pd
391+
>>> import xarray as xr
392+
>>> dates = pd.timedelta_range(start="1 day", freq="6H", periods=20)
393+
>>> ts = xr.DataArray(dates, dims=('time'))
394+
>>> ts
395+
<xarray.DataArray (time: 20)>
396+
array([ 86400000000000, 108000000000000, 129600000000000, 151200000000000,
397+
172800000000000, 194400000000000, 216000000000000, 237600000000000,
398+
259200000000000, 280800000000000, 302400000000000, 324000000000000,
399+
345600000000000, 367200000000000, 388800000000000, 410400000000000,
400+
432000000000000, 453600000000000, 475200000000000, 496800000000000],
401+
dtype='timedelta64[ns]')
402+
Coordinates:
403+
* time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00
404+
>>> ts.dt
405+
<xarray.core.accessor_dt.TimedeltaAccessor object at 0x109a27d68>
406+
>>> ts.dt.days
407+
<xarray.DataArray 'days' (time: 20)>
408+
array([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5])
409+
Coordinates:
410+
* time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00
411+
>>> ts.dt.microseconds
412+
<xarray.DataArray 'microseconds' (time: 20)>
413+
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
414+
Coordinates:
415+
* time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00
416+
>>> ts.dt.seconds
417+
<xarray.DataArray 'seconds' (time: 20)>
418+
array([ 0, 21600, 43200, 64800, 0, 21600, 43200, 64800, 0,
419+
21600, 43200, 64800, 0, 21600, 43200, 64800, 0, 21600,
420+
43200, 64800])
421+
Coordinates:
422+
* time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00
423+
"""
424+
425+
days = Properties._tslib_field_accessor(
426+
"days", "Number of days for each element.", np.int64
427+
)
428+
seconds = Properties._tslib_field_accessor(
429+
"seconds",
430+
"Number of seconds (>= 0 and less than 1 day) for each element.",
431+
np.int64,
432+
)
433+
microseconds = Properties._tslib_field_accessor(
434+
"microseconds",
435+
"Number of microseconds (>= 0 and less than 1 second) for each element.",
436+
np.int64,
437+
)
438+
nanoseconds = Properties._tslib_field_accessor(
439+
"nanoseconds",
440+
"Number of nanoseconds (>= 0 and less than 1 microsecond) for each element.",
441+
np.int64,
442+
)
443+
444+
445+
class CombinedDatetimelikeAccessor(DatetimeAccessor, TimedeltaAccessor):
446+
def __new__(cls, obj):
447+
# CombinedDatetimelikeAccessor isn't really instatiated. Instead
448+
# we need to choose which parent (datetime or timedelta) is
449+
# appropriate. Since we're checking the dtypes anyway, we'll just
450+
# do all the validation here.
451+
if not _contains_datetime_like_objects(obj):
452+
raise TypeError(
453+
"'.dt' accessor only available for "
454+
"DataArray with datetime64 timedelta64 dtype or "
455+
"for arrays containing cftime datetime "
456+
"objects."
457+
)
458+
459+
if is_np_timedelta_like(obj.dtype):
460+
return TimedeltaAccessor(obj)
461+
else:
462+
return DatetimeAccessor(obj)

xarray/core/common.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1447,6 +1447,12 @@ def is_np_datetime_like(dtype: DTypeLike) -> bool:
14471447
return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64)
14481448

14491449

1450+
def is_np_timedelta_like(dtype: DTypeLike) -> bool:
1451+
"""Check whether dtype is of the timedelta64 dtype.
1452+
"""
1453+
return np.issubdtype(dtype, np.timedelta64)
1454+
1455+
14501456
def _contains_cftime_datetimes(array) -> bool:
14511457
"""Check if an array contains cftime.datetime objects
14521458
"""

xarray/core/dataarray.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
rolling,
3434
utils,
3535
)
36-
from .accessor_dt import DatetimeAccessor
36+
from .accessor_dt import CombinedDatetimelikeAccessor
3737
from .accessor_str import StringAccessor
3838
from .alignment import (
3939
_broadcast_helper,
@@ -258,7 +258,7 @@ class DataArray(AbstractArray, DataWithCoords):
258258
_coarsen_cls = rolling.DataArrayCoarsen
259259
_resample_cls = resample.DataArrayResample
260260

261-
dt = property(DatetimeAccessor)
261+
dt = property(CombinedDatetimelikeAccessor)
262262

263263
def __init__(
264264
self,

0 commit comments

Comments
 (0)