From e71e609170946fc1028a76a7caa436992dec861c Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 4 Oct 2021 12:18:41 -0700 Subject: [PATCH 1/4] whatsnew --- doc/source/whatsnew/v1.4.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 722d0dcc10041..0c54cd17db760 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -129,6 +129,8 @@ Other enhancements - :meth:`DataFrame.__pos__`, :meth:`DataFrame.__neg__` now retain ``ExtensionDtype`` dtypes (:issue:`43883`) - The error raised when an optional dependency can't be imported now includes the original exception, for easier investigation (:issue:`43882`) - Added :meth:`.ExponentialMovingWindow.sum` (:issue:`13297`) +- + .. --------------------------------------------------------------------------- From fc48b1c20d8e0a039caabf109b9f5168af8dfcf0 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 6 Oct 2021 13:21:57 -0700 Subject: [PATCH 2/4] post rebase fixup --- doc/source/whatsnew/v1.4.0.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 0c54cd17db760..722d0dcc10041 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -129,8 +129,6 @@ Other enhancements - :meth:`DataFrame.__pos__`, :meth:`DataFrame.__neg__` now retain ``ExtensionDtype`` dtypes (:issue:`43883`) - The error raised when an optional dependency can't be imported now includes the original exception, for easier investigation (:issue:`43882`) - Added :meth:`.ExponentialMovingWindow.sum` (:issue:`13297`) -- - .. --------------------------------------------------------------------------- From d171990bf14ff7ae994627d0431bb55c4318667f Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 7 Oct 2021 20:17:36 -0700 Subject: [PATCH 3/4] ENH: __array_ufunc__ handle np.minimum.reduce, np.maximum.reduce --- pandas/core/arraylike.py | 53 +++++++++++++++++++- pandas/core/arrays/base.py | 7 +++ pandas/core/arrays/masked.py | 10 +++- pandas/core/arrays/numpy_.py | 2 - pandas/core/indexes/base.py | 7 +++ pandas/tests/indexes/test_numpy_compat.py | 28 +++++++++++ pandas/tests/series/test_ufunc.py | 59 +++++++++++++++++++++-- 7 files changed, 157 insertions(+), 9 deletions(-) diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index fe09a044566f8..98481965588b9 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -12,6 +12,8 @@ from pandas._libs import lib +from pandas.core.dtypes.generic import ABCNDFrame + from pandas.core.construction import extract_array from pandas.core.ops import ( maybe_dispatch_ufunc_to_dunder_op, @@ -19,6 +21,11 @@ ) from pandas.core.ops.common import unpack_zerodim_and_defer +REDUCTION_ALIASES = { + "maximum": "max", + "minimum": "min", +} + class OpsMixin: # ------------------------------------------------------------- @@ -341,7 +348,7 @@ def reconstruct(result): raise NotImplementedError return result if isinstance(result, BlockManager): - # we went through BlockManager.apply + # we went through BlockManager.apply e.g. np.sqrt result = self._constructor(result, **reconstruct_kwargs, copy=False) else: # we converted an array, lost our axes @@ -360,6 +367,11 @@ def reconstruct(result): result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs) return reconstruct(result) + if method == "reduce": + result = dispatch_reduction_ufunc(self, ufunc, method, *inputs, **kwargs) + if result is not NotImplemented: + return result + # We still get here with kwargs `axis` for e.g. np.maximum.accumulate # and `dtype` and `keepdims` for np.ptp @@ -370,6 +382,8 @@ def reconstruct(result): # returned a Tuple[BlockManager]. # * len(inputs) > 1 is doable when we know that we have # aligned blocks / dtypes. + + # e.g. my_ufunc, modf, logaddexp, heaviside, subtract, add inputs = tuple(np.asarray(x) for x in inputs) # Note: we can't use default_array_ufunc here bc reindexing means # that `self` may not be among `inputs` @@ -390,6 +404,7 @@ def reconstruct(result): # otherwise specific ufunc methods (eg np..accumulate(..)) # Those can have an axis keyword and thus can't be called block-by-block result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs) + # e.g. np.negative (only one reached), with "where" and "out" in kwargs result = reconstruct(result) return result @@ -470,3 +485,39 @@ def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): new_inputs = [x if x is not self else np.asarray(x) for x in inputs] return getattr(ufunc, method)(*new_inputs, **kwargs) + + +def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + """ + Dispatch ufunc reductions to self's reduction methods. + """ + assert method == "reduce" + + if len(inputs) != 1 or inputs[0] is not self: + return NotImplemented + + if ufunc.__name__ not in REDUCTION_ALIASES: + return NotImplemented + + method_name = REDUCTION_ALIASES[ufunc.__name__] + + # NB: we are assuming that min/max represent minimum/maximum methods, + # which would not be accurate for e.g. Timestamp.min + if not hasattr(self, method_name): + return NotImplemented + + if self.ndim > 1: + if isinstance(self, ABCNDFrame): + # TODO: test cases where this doesn't hold, i.e. 2D DTA/TDA + kwargs["numeric_only"] = False + + if "axis" not in kwargs: + # For DataFrame reductions we don't want the default axis=0 + # FIXME: DataFrame.min ignores axis=None + # FIXME: np.minimum.reduce(df) gets here bc axis is not in kwargs, + # but np.minimum.reduce(df.values) behaves as if axis=0 + kwargs["axis"] = None + + # By default, numpy's reductions do not skip NaNs, so we have to + # pass skipna=False + return getattr(self, method_name)(skipna=False, **kwargs) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 46b0a6873986e..c70097f1654e9 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1384,6 +1384,13 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): self, ufunc, method, *inputs, **kwargs ) + if method == "reduce": + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 6a03456673604..6c240a69cdbee 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -51,6 +51,7 @@ ) from pandas.core import ( + arraylike, missing, nanops, ops, @@ -370,7 +371,7 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): # For MaskedArray inputs, we apply the ufunc to ._data # and mask the result. - if method == "reduce": + if method == "reduce" and ufunc not in [np.maximum, np.minimum]: # Not clear how to handle missing values in reductions. Raise. raise NotImplementedError("The 'reduce' method is not supported.") @@ -387,6 +388,13 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): if result is not NotImplemented: return result + if method == "reduce": + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + mask = np.zeros(len(self), dtype=bool) inputs2 = [] for x in inputs: diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 8fe0c0114fb04..0afe204b35c68 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -3,7 +3,6 @@ import numbers import numpy as np -from numpy.lib.mixins import NDArrayOperatorsMixin from pandas._libs import lib from pandas._typing import ( @@ -31,7 +30,6 @@ class PandasArray( OpsMixin, NDArrayBackedExtensionArray, - NDArrayOperatorsMixin, ObjectStringArrayMixin, ): """ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index da953fe46ef1d..bcf7d8cf4803c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -856,6 +856,13 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs): if result is not NotImplemented: return result + if method == "reduce": + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + new_inputs = [x if x is not self else x._values for x in inputs] result = getattr(ufunc, method)(*new_inputs, **kwargs) if ufunc.nout == 2: diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index 9cc1205310ea7..b867c39b0e177 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -2,11 +2,13 @@ import pytest from pandas import ( + CategoricalIndex, DatetimeIndex, Index, NumericIndex, PeriodIndex, TimedeltaIndex, + isna, ) import pandas._testing as tm from pandas.core.api import Float64Index @@ -105,3 +107,29 @@ def test_numpy_ufuncs_other(index, func, request): else: with tm.external_error_raised(TypeError): func(index) + + +@pytest.mark.parametrize("func", [np.maximum, np.minimum]) +def test_numpy_ufuncs_reductions(index, func): + # TODO: overlap with tests.series.test_ufunc.test_reductions + if len(index) == 0: + return + + if isinstance(index, CategoricalIndex) and index.dtype.ordered is False: + with pytest.raises(TypeError, match="is not ordered for"): + func.reduce(index) + return + else: + result = func.reduce(index) + + if func is np.maximum: + expected = index.max(skipna=False) + else: + expected = index.min(skipna=False) + # TODO: do we have cases both with and without NAs? + + assert type(result) is type(expected) + if isna(result): + assert isna(expected) + else: + assert result == expected diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 15b2ff36cff1e..b7830c387c923 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -252,16 +252,65 @@ def __add__(self, other): @pytest.mark.parametrize( "values", [ - pd.array([1, 3, 2], dtype="int64"), - pd.array([1, 10, 0], dtype="Sparse[int]"), + pd.array([1, 3, 2], dtype=np.int64), + pd.array([1, 3, 2], dtype="Int64"), + pd.array([1, 3, 2], dtype="Float32"), + pd.array([1, 10, 2], dtype="Sparse[int]"), pd.to_datetime(["2000", "2010", "2001"]), pd.to_datetime(["2000", "2010", "2001"]).tz_localize("CET"), pd.to_datetime(["2000", "2010", "2001"]).to_period(freq="D"), + pd.to_timedelta(["1 Day", "3 Days", "2 Days"]), + pd.IntervalIndex([pd.Interval(0, 1), pd.Interval(2, 3), pd.Interval(1, 2)]), ], + ids=lambda x: str(x.dtype), ) -def test_reduce(values): - a = pd.Series(values) - assert np.maximum.reduce(a) == values[1] +@pytest.mark.parametrize("box", [pd.array, pd.Index, pd.Series, pd.DataFrame]) +def test_reduce(values, box, request): + # TODO: cases with NAs + + same_type = True + + if box is pd.Index: + if values.dtype.kind in ["i", "f"]: + # ATM Index casts to object, so we get python ints/floats + same_type = False + elif isinstance(values, pd.IntervalIndex): + mark = pytest.mark.xfail(reason="IntervalArray.min/max not implemented") + request.node.add_marker(mark) + + elif box is pd.Series or box is pd.DataFrame: + if isinstance(values, pd.IntervalIndex): + mark = pytest.mark.xfail(reason="IntervalArray.min/max not implemented") + request.node.add_marker(mark) + + if values.dtype == "i8" and box is pd.array: + # FIXME: pd.array casts to Int64 + obj = values + else: + obj = box(values) + + result = np.maximum.reduce(obj) + expected = values[1] + if box is pd.DataFrame: + # TODO: cases with axis kwarg + expected = obj.max(numeric_only=False) + tm.assert_series_equal(result, expected) + else: + assert result == expected + if same_type: + # check we have e.g. Timestamp instead of dt64 + assert type(result) == type(expected) + + result = np.minimum.reduce(obj) + expected = values[0] + if box is pd.DataFrame: + expected = obj.min(numeric_only=False) + tm.assert_series_equal(result, expected) + else: + assert result == expected + if same_type: + # check we have e.g. Timestamp instead of dt64 + assert type(result) == type(expected) @pytest.mark.parametrize("type_", [list, deque, tuple]) From def345948648f6693607be36f3a4134a1b3d72bf Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 8 Nov 2021 09:08:53 -0800 Subject: [PATCH 4/4] whatsnew --- doc/source/whatsnew/v1.4.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 99a66c7e5454b..7f79c72538363 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -478,7 +478,7 @@ Datetimelike - Bug in inplace addition and subtraction of :class:`DatetimeIndex` or :class:`TimedeltaIndex` with :class:`DatetimeArray` or :class:`TimedeltaArray` (:issue:`43904`) - Bug in in calling ``np.isnan``, ``np.isfinite``, or ``np.isinf`` on a timezone-aware :class:`DatetimeIndex` incorrectly raising ``TypeError`` (:issue:`43917`) - Bug in constructing a :class:`Series` from datetime-like strings with mixed timezones incorrectly partially-inferring datetime values (:issue:`40111`) -- +- ``np.maximum.reduce`` and ``np.minimum.reduce`` now correctly return :class:`Timestamp` and :class:`Timedelta` objects when operating on :class:`Series`, :class:`DataFrame`, or :class:`Index` with ``datetime64[ns]`` or ``timedelta64[ns]`` dtype (:issue:`43923`) Timedelta ^^^^^^^^^ @@ -635,6 +635,7 @@ ExtensionArray ^^^^^^^^^^^^^^ - Bug in :func:`array` failing to preserve :class:`PandasArray` (:issue:`43887`) - NumPy ufuncs ``np.abs``, ``np.positive``, ``np.negative`` now correctly preserve dtype when called on ExtensionArrays that implement ``__abs__, __pos__, __neg__``, respectively. In particular this is fixed for :class:`TimedeltaArray` (:issue:`43899`) +- NumPy ufuncs ``np.minimum.reduce`` and ``np.maximum.reduce`` now work correctly instead of raising ``NotImplementedError`` on :class:`Series` with ``IntegerDtype`` or ``FloatDtype`` (:issue:`43923`) - Avoid raising ``PerformanceWarning`` about fragmented DataFrame when using many columns with an extension dtype (:issue:`44098`) -