From 92416a6db6994c9f65c629af1ce5ebf5413040c7 Mon Sep 17 00:00:00 2001 From: reidy-p Date: Tue, 1 May 2018 23:07:58 +0100 Subject: [PATCH 1/9] BUG: DatetimeIndex._data should return an ndarray --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/indexes/datetimes.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 7509c502f27ed..f8dd2112f7879 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -996,6 +996,7 @@ Other API Changes - :func:`DataFrame.to_dict` with ``orient='index'`` no longer casts int columns to float for a DataFrame with only int and float columns (:issue:`18580`) - A user-defined-function that is passed to :func:`Series.rolling().aggregate() `, :func:`DataFrame.rolling().aggregate() `, or its expanding cousins, will now *always* be passed a ``Series``, rather than a ``np.array``; ``.apply()`` only has the ``raw`` keyword, see :ref:`here `. This is consistent with the signatures of ``.aggregate()`` across pandas (:issue:`20584`) - Rolling and Expanding types raise ``NotImplementedError`` upon iteration (:issue:`11704`). +- ``DatetimeIndex._data`` now returns a numpy array in all cases (:issue:`20810`) .. _whatsnew_0230.deprecations: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 96c30eeb92628..b189a36438158 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -598,6 +598,9 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, if we are passed a non-dtype compat, then coerce using the constructor """ + if isinstance(values, DatetimeIndex): + values = values.values + if getattr(values, 'dtype', None) is None: # empty, but with dtype compat if values is None: From a6a038bd28a9ba071814a7a145ecc55e59f4f643 Mon Sep 17 00:00:00 2001 From: reidy-p Date: Tue, 8 May 2018 20:06:48 +0100 Subject: [PATCH 2/9] tz_localize_to_utc generates an array not DTI --- pandas/core/indexes/datetimes.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index b189a36438158..9513286668621 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -525,7 +525,6 @@ def _generate(cls, start, end, periods, name, freq, freq=freq, name=name) else: index = _generate_regular_range(start, end, periods, freq) - else: if tz is not None: @@ -549,12 +548,13 @@ def _generate(cls, start, end, periods, name, freq, freq=freq, name=name) else: index = _generate_regular_range(start, end, periods, freq) - if tz is not None and getattr(index, 'tz', None) is None: - index = conversion.tz_localize_to_utc(_ensure_int64(index), - tz, - ambiguous=ambiguous) - index = index.view(_NS_DTYPE) + arr = conversion.tz_localize_to_utc(_ensure_int64(index), + tz, + ambiguous=ambiguous) + + arr = arr.view(_NS_DTYPE) + index = DatetimeIndex(arr) # index is localized datetime64 array -> have to convert # start/end as well to compare @@ -575,7 +575,9 @@ def _generate(cls, start, end, periods, name, freq, index = index[1:] if not right_closed and len(index) and index[-1] == end: index = index[:-1] - index = cls._simple_new(index, name=name, freq=freq, tz=tz) + + index = cls._simple_new(index.values, name=name, freq=freq, tz=tz) + return index def _convert_for_op(self, value): @@ -598,9 +600,6 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, if we are passed a non-dtype compat, then coerce using the constructor """ - if isinstance(values, DatetimeIndex): - values = values.values - if getattr(values, 'dtype', None) is None: # empty, but with dtype compat if values is None: From 917917ef4d452a6f20a4c8281a7bb718b4a1b799 Mon Sep 17 00:00:00 2001 From: reidy-p Date: Sat, 2 Jun 2018 13:36:35 +0100 Subject: [PATCH 3/9] Remove whatsnew --- doc/source/whatsnew/v0.23.0.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index f8dd2112f7879..7509c502f27ed 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -996,7 +996,6 @@ Other API Changes - :func:`DataFrame.to_dict` with ``orient='index'`` no longer casts int columns to float for a DataFrame with only int and float columns (:issue:`18580`) - A user-defined-function that is passed to :func:`Series.rolling().aggregate() `, :func:`DataFrame.rolling().aggregate() `, or its expanding cousins, will now *always* be passed a ``Series``, rather than a ``np.array``; ``.apply()`` only has the ``raw`` keyword, see :ref:`here `. This is consistent with the signatures of ``.aggregate()`` across pandas (:issue:`20584`) - Rolling and Expanding types raise ``NotImplementedError`` upon iteration (:issue:`11704`). -- ``DatetimeIndex._data`` now returns a numpy array in all cases (:issue:`20810`) .. _whatsnew_0230.deprecations: From cc6ab25c36cf1908d6345da29ad911c591477685 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Fri, 6 Jul 2018 21:54:17 +0100 Subject: [PATCH 4/9] Check whether _simple_new always receives an ndarray --- pandas/core/indexes/base.py | 3 +++ pandas/core/indexes/datetimes.py | 6 +++++- pandas/tests/indexes/test_base.py | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3d7c4762d21ca..6ea8a9ac165bc 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -506,6 +506,9 @@ def _shallow_copy(self, values=None, **kwargs): attributes.update(kwargs) if not len(values) and 'dtype' not in kwargs: attributes['dtype'] = self.dtype + from pandas import DatetimeIndex + if isinstance(values, DatetimeIndex): + values = values.values return self._simple_new(values, **attributes) def _shallow_copy_with_infer(self, values=None, **kwargs): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 9513286668621..19459663c6e88 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -525,6 +525,7 @@ def _generate(cls, start, end, periods, name, freq, freq=freq, name=name) else: index = _generate_regular_range(start, end, periods, freq) + else: if tz is not None: @@ -548,6 +549,7 @@ def _generate(cls, start, end, periods, name, freq, freq=freq, name=name) else: index = _generate_regular_range(start, end, periods, freq) + if tz is not None and getattr(index, 'tz', None) is None: arr = conversion.tz_localize_to_utc(_ensure_int64(index), tz, @@ -608,6 +610,8 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=dtype, **kwargs) values = np.array(values, copy=False) + assert isinstance(values, np.ndarray) + if is_object_dtype(values): return cls(values, name=name, freq=freq, tz=tz, dtype=dtype, **kwargs).values @@ -1002,7 +1006,7 @@ def unique(self, level=None): else: naive = self result = super(DatetimeIndex, naive).unique(level=level) - return self._simple_new(result, name=self.name, tz=self.tz, + return self._simple_new(result.values, name=self.name, tz=self.tz, freq=self.freq) def union(self, other): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index daba56e0c1e29..639e51e9361ab 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -329,7 +329,7 @@ def test_index_ctor_infer_periodindex(self): ]) def test_constructor_simple_new(self, vals, dtype): index = Index(vals, name=dtype) - result = index._simple_new(index, dtype) + result = index._simple_new(index.values, dtype) tm.assert_index_equal(result, index) @pytest.mark.parametrize("vals", [ From f841afd4411ebb4e859a661b7e94113e2d3286ba Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Sat, 7 Jul 2018 16:37:37 +0100 Subject: [PATCH 5/9] cleaner way to get ndarray from DTI and fix failing pytables tests --- pandas/core/indexes/base.py | 6 +++--- pandas/core/indexes/datetimes.py | 1 + pandas/io/pytables.py | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6ea8a9ac165bc..ef48142c9f4f2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -506,9 +506,9 @@ def _shallow_copy(self, values=None, **kwargs): attributes.update(kwargs) if not len(values) and 'dtype' not in kwargs: attributes['dtype'] = self.dtype - from pandas import DatetimeIndex - if isinstance(values, DatetimeIndex): - values = values.values + + # _simple_new expects an ndarray + values = getattr(values, 'values', values) return self._simple_new(values, **attributes) def _shallow_copy_with_infer(self, values=None, **kwargs): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 19459663c6e88..72258d0ae1cdb 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -610,6 +610,7 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=dtype, **kwargs) values = np.array(values, copy=False) + # values should be a numpy array assert isinstance(values, np.ndarray) if is_object_dtype(values): diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 6b5714bcadba1..bb31e8927cba3 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2472,7 +2472,8 @@ def _get_index_factory(self, klass): if klass == DatetimeIndex: def f(values, freq=None, tz=None): # data are already in UTC, localize and convert if tz present - result = DatetimeIndex._simple_new(values, None, freq=freq) + result = DatetimeIndex._simple_new(values.values, None, + freq=freq) if tz is not None: result = result.tz_localize('UTC').tz_convert(tz) return result From 4233f6fb1217aa94181b34b54afe2b9f40b7cea3 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Sat, 7 Jul 2018 22:37:36 +0100 Subject: [PATCH 6/9] assert that _simple_new always receives an array of datetime64[ns] --- pandas/core/indexes/datetimes.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 72258d0ae1cdb..45f9aefb91523 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -555,9 +555,9 @@ def _generate(cls, start, end, periods, name, freq, tz, ambiguous=ambiguous) - arr = arr.view(_NS_DTYPE) index = DatetimeIndex(arr) + # index is localized datetime64 array -> have to convert # start/end as well to compare if start is not None: @@ -610,8 +610,8 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=dtype, **kwargs) values = np.array(values, copy=False) - # values should be a numpy array - assert isinstance(values, np.ndarray) + assert isinstance(values, np.ndarray), "values is not an np.ndarray" + assert is_datetime64_dtype(values) if is_object_dtype(values): return cls(values, name=name, freq=freq, tz=tz, @@ -1862,6 +1862,8 @@ def _generate_regular_range(start, end, periods, freq): "if a 'period' is given.") data = np.arange(b, e, stride, dtype=np.int64) + + # _simple_new is getting an array of int64 here data = DatetimeIndex._simple_new(data, None, tz=tz) else: if isinstance(start, Timestamp): From c20bb44486b2c2d07ac192877624cbaddfd49f8e Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Sun, 8 Jul 2018 22:02:16 +0100 Subject: [PATCH 7/9] use .view(_NS_DTYPE) --- pandas/core/indexes/base.py | 2 ++ pandas/core/indexes/datetimes.py | 13 ++++--------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ef48142c9f4f2..171ccdc27be53 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -41,6 +41,7 @@ is_signed_integer_dtype, is_unsigned_integer_dtype, is_integer_dtype, is_float_dtype, + is_datetime64_dtype, is_datetime64_any_dtype, is_datetime64tz_dtype, is_timedelta64_dtype, @@ -509,6 +510,7 @@ def _shallow_copy(self, values=None, **kwargs): # _simple_new expects an ndarray values = getattr(values, 'values', values) + return self._simple_new(values, **attributes) def _shallow_copy_with_infer(self, values=None, **kwargs): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 45f9aefb91523..e9c1941db3836 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -610,15 +610,12 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=dtype, **kwargs) values = np.array(values, copy=False) + if not is_datetime64_dtype(values): + values = _ensure_int64(values).view(_NS_DTYPE) + assert isinstance(values, np.ndarray), "values is not an np.ndarray" assert is_datetime64_dtype(values) - if is_object_dtype(values): - return cls(values, name=name, freq=freq, tz=tz, - dtype=dtype, **kwargs).values - elif not is_datetime64_dtype(values): - values = _ensure_int64(values).view(_NS_DTYPE) - result = super(DatetimeIndex, cls)._simple_new(values, freq, tz, **kwargs) result.name = name @@ -1862,9 +1859,7 @@ def _generate_regular_range(start, end, periods, freq): "if a 'period' is given.") data = np.arange(b, e, stride, dtype=np.int64) - - # _simple_new is getting an array of int64 here - data = DatetimeIndex._simple_new(data, None, tz=tz) + data = DatetimeIndex._simple_new(data.view(_NS_DTYPE), None, tz=tz) else: if isinstance(start, Timestamp): start = start.to_pydatetime() From c2072898e34ce7259e0567bd0f3d01e85a7c55d5 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Sun, 8 Jul 2018 22:13:24 +0100 Subject: [PATCH 8/9] fix and rebase --- pandas/core/indexes/datetimes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e9c1941db3836..1b53ff160a15b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -613,6 +613,8 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, if not is_datetime64_dtype(values): values = _ensure_int64(values).view(_NS_DTYPE) + values = getattr(values, 'values', values) + assert isinstance(values, np.ndarray), "values is not an np.ndarray" assert is_datetime64_dtype(values) From e18d996ee8e9d49cdf6606525edc3e4b9402ddbe Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Sun, 8 Jul 2018 22:56:25 +0100 Subject: [PATCH 9/9] lint --- pandas/core/indexes/base.py | 1 - pandas/core/indexes/datetimes.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 171ccdc27be53..78fa6f8217157 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -41,7 +41,6 @@ is_signed_integer_dtype, is_unsigned_integer_dtype, is_integer_dtype, is_float_dtype, - is_datetime64_dtype, is_datetime64_any_dtype, is_datetime64tz_dtype, is_timedelta64_dtype, diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1b53ff160a15b..b8a89ac26c9d9 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -12,7 +12,6 @@ from pandas.core.dtypes.common import ( _INT64_DTYPE, _NS_DTYPE, - is_object_dtype, is_datetime64_dtype, is_datetimetz, is_dtype_equal, @@ -557,7 +556,6 @@ def _generate(cls, start, end, periods, name, freq, index = DatetimeIndex(arr) - # index is localized datetime64 array -> have to convert # start/end as well to compare if start is not None: