diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index db5cce8459ca2..e1f08fa0ada9f 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -552,6 +552,7 @@ Datetimelike - Bug in in calling ``np.isnan``, ``np.isfinite``, or ``np.isinf`` on a timezone-aware :class:`DatetimeIndex` incorrectly raising ``TypeError`` (:issue:`43917`) - Bug in constructing a :class:`Series` from datetime-like strings with mixed timezones incorrectly partially-inferring datetime values (:issue:`40111`) - Bug in addition with a :class:`Tick` object and a ``np.timedelta64`` object incorrectly raising instead of returning :class:`Timedelta` (:issue:`44474`) +- ``np.maximum.reduce`` and ``np.minimum.reduce`` now correctly return :class:`Timestamp` and :class:`Timedelta` objects when operating on :class:`Series`, :class:`DataFrame`, or :class:`Index` with ``datetime64[ns]`` or ``timedelta64[ns]`` dtype (:issue:`43923`) - Bug in adding a ``np.timedelta64`` object to a :class:`BusinessDay` or :class:`CustomBusinessDay` object incorrectly raising (:issue:`44532`) - Bug in :meth:`Index.insert` for inserting ``np.datetime64``, ``np.timedelta64`` or ``tuple`` into :class:`Index` with ``dtype='object'`` with negative loc adding ``None`` and replacing existing value (:issue:`44509`) - Bug in :meth:`Series.mode` with ``DatetimeTZDtype`` incorrectly returning timezone-naive and ``PeriodDtype`` incorrectly raising (:issue:`41927`) @@ -736,6 +737,7 @@ ExtensionArray ^^^^^^^^^^^^^^ - Bug in :func:`array` failing to preserve :class:`PandasArray` (:issue:`43887`) - NumPy ufuncs ``np.abs``, ``np.positive``, ``np.negative`` now correctly preserve dtype when called on ExtensionArrays that implement ``__abs__, __pos__, __neg__``, respectively. In particular this is fixed for :class:`TimedeltaArray` (:issue:`43899`) +- NumPy ufuncs ``np.minimum.reduce`` and ``np.maximum.reduce`` now work correctly instead of raising ``NotImplementedError`` on :class:`Series` with ``IntegerDtype`` or ``FloatDtype`` (:issue:`43923`) - Avoid raising ``PerformanceWarning`` about fragmented DataFrame when using many columns with an extension dtype (:issue:`44098`) - Bug in :meth:`BooleanArray.__eq__` and :meth:`BooleanArray.__ne__` raising ``TypeError`` on comparison with an incompatible type (like a string). This caused :meth:`DataFrame.replace` to sometimes raise a ``TypeError`` if a nullable boolean column was included (:issue:`44499`) - diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index d91404ff05157..c496099e3a8d2 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -13,6 +13,8 @@ from pandas._libs import lib from pandas.util._exceptions import find_stack_level +from pandas.core.dtypes.generic import ABCNDFrame + from pandas.core.construction import extract_array from pandas.core.ops import ( maybe_dispatch_ufunc_to_dunder_op, @@ -20,6 +22,11 @@ ) from pandas.core.ops.common import unpack_zerodim_and_defer +REDUCTION_ALIASES = { + "maximum": "max", + "minimum": "min", +} + class OpsMixin: # ------------------------------------------------------------- @@ -344,7 +351,7 @@ def reconstruct(result): raise NotImplementedError return result if isinstance(result, BlockManager): - # we went through BlockManager.apply + # we went through BlockManager.apply e.g. np.sqrt result = self._constructor(result, **reconstruct_kwargs, copy=False) else: # we converted an array, lost our axes @@ -363,6 +370,11 @@ def reconstruct(result): result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs) return reconstruct(result) + if method == "reduce": + result = dispatch_reduction_ufunc(self, ufunc, method, *inputs, **kwargs) + if result is not NotImplemented: + return result + # We still get here with kwargs `axis` for e.g. np.maximum.accumulate # and `dtype` and `keepdims` for np.ptp @@ -373,6 +385,8 @@ def reconstruct(result): # returned a Tuple[BlockManager]. # * len(inputs) > 1 is doable when we know that we have # aligned blocks / dtypes. + + # e.g. my_ufunc, modf, logaddexp, heaviside, subtract, add inputs = tuple(np.asarray(x) for x in inputs) # Note: we can't use default_array_ufunc here bc reindexing means # that `self` may not be among `inputs` @@ -393,6 +407,7 @@ def reconstruct(result): # otherwise specific ufunc methods (eg np..accumulate(..)) # Those can have an axis keyword and thus can't be called block-by-block result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs) + # e.g. np.negative (only one reached), with "where" and "out" in kwargs result = reconstruct(result) return result @@ -473,3 +488,39 @@ def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): new_inputs = [x if x is not self else np.asarray(x) for x in inputs] return getattr(ufunc, method)(*new_inputs, **kwargs) + + +def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + """ + Dispatch ufunc reductions to self's reduction methods. + """ + assert method == "reduce" + + if len(inputs) != 1 or inputs[0] is not self: + return NotImplemented + + if ufunc.__name__ not in REDUCTION_ALIASES: + return NotImplemented + + method_name = REDUCTION_ALIASES[ufunc.__name__] + + # NB: we are assuming that min/max represent minimum/maximum methods, + # which would not be accurate for e.g. Timestamp.min + if not hasattr(self, method_name): + return NotImplemented + + if self.ndim > 1: + if isinstance(self, ABCNDFrame): + # TODO: test cases where this doesn't hold, i.e. 2D DTA/TDA + kwargs["numeric_only"] = False + + if "axis" not in kwargs: + # For DataFrame reductions we don't want the default axis=0 + # FIXME: DataFrame.min ignores axis=None + # FIXME: np.minimum.reduce(df) gets here bc axis is not in kwargs, + # but np.minimum.reduce(df.values) behaves as if axis=0 + kwargs["axis"] = None + + # By default, numpy's reductions do not skip NaNs, so we have to + # pass skipna=False + return getattr(self, method_name)(skipna=False, **kwargs) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index a64aef64ab49f..b1fcd6a11b45a 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1511,6 +1511,13 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): self, ufunc, method, *inputs, **kwargs ) + if method == "reduce": + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index b334a167d3824..10c294797814f 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -55,6 +55,7 @@ ) from pandas.core import ( + arraylike, missing, nanops, ops, @@ -414,7 +415,7 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): # For MaskedArray inputs, we apply the ufunc to ._data # and mask the result. - if method == "reduce": + if method == "reduce" and ufunc not in [np.maximum, np.minimum]: # Not clear how to handle missing values in reductions. Raise. raise NotImplementedError("The 'reduce' method is not supported.") @@ -431,6 +432,13 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): if result is not NotImplemented: return result + if method == "reduce": + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + mask = np.zeros(len(self), dtype=bool) inputs2 = [] for x in inputs: diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 8fe0c0114fb04..0afe204b35c68 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -3,7 +3,6 @@ import numbers import numpy as np -from numpy.lib.mixins import NDArrayOperatorsMixin from pandas._libs import lib from pandas._typing import ( @@ -31,7 +30,6 @@ class PandasArray( OpsMixin, NDArrayBackedExtensionArray, - NDArrayOperatorsMixin, ObjectStringArrayMixin, ): """ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 58a31e568981f..2cf8ea95a345a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -876,6 +876,13 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs): if result is not NotImplemented: return result + if method == "reduce": + result = arraylike.dispatch_reduction_ufunc( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + new_inputs = [x if x is not self else x._values for x in inputs] result = getattr(ufunc, method)(*new_inputs, **kwargs) if ufunc.nout == 2: diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index 93b51cf9611f9..573ee987ab4c8 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -2,11 +2,13 @@ import pytest from pandas import ( + CategoricalIndex, DatetimeIndex, Index, NumericIndex, PeriodIndex, TimedeltaIndex, + isna, ) import pandas._testing as tm from pandas.core.api import Float64Index @@ -98,3 +100,29 @@ def test_numpy_ufuncs_other(index, func, request): else: with tm.external_error_raised(TypeError): func(index) + + +@pytest.mark.parametrize("func", [np.maximum, np.minimum]) +def test_numpy_ufuncs_reductions(index, func): + # TODO: overlap with tests.series.test_ufunc.test_reductions + if len(index) == 0: + return + + if isinstance(index, CategoricalIndex) and index.dtype.ordered is False: + with pytest.raises(TypeError, match="is not ordered for"): + func.reduce(index) + return + else: + result = func.reduce(index) + + if func is np.maximum: + expected = index.max(skipna=False) + else: + expected = index.min(skipna=False) + # TODO: do we have cases both with and without NAs? + + assert type(result) is type(expected) + if isna(result): + assert isna(expected) + else: + assert result == expected diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 15b2ff36cff1e..b7830c387c923 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -252,16 +252,65 @@ def __add__(self, other): @pytest.mark.parametrize( "values", [ - pd.array([1, 3, 2], dtype="int64"), - pd.array([1, 10, 0], dtype="Sparse[int]"), + pd.array([1, 3, 2], dtype=np.int64), + pd.array([1, 3, 2], dtype="Int64"), + pd.array([1, 3, 2], dtype="Float32"), + pd.array([1, 10, 2], dtype="Sparse[int]"), pd.to_datetime(["2000", "2010", "2001"]), pd.to_datetime(["2000", "2010", "2001"]).tz_localize("CET"), pd.to_datetime(["2000", "2010", "2001"]).to_period(freq="D"), + pd.to_timedelta(["1 Day", "3 Days", "2 Days"]), + pd.IntervalIndex([pd.Interval(0, 1), pd.Interval(2, 3), pd.Interval(1, 2)]), ], + ids=lambda x: str(x.dtype), ) -def test_reduce(values): - a = pd.Series(values) - assert np.maximum.reduce(a) == values[1] +@pytest.mark.parametrize("box", [pd.array, pd.Index, pd.Series, pd.DataFrame]) +def test_reduce(values, box, request): + # TODO: cases with NAs + + same_type = True + + if box is pd.Index: + if values.dtype.kind in ["i", "f"]: + # ATM Index casts to object, so we get python ints/floats + same_type = False + elif isinstance(values, pd.IntervalIndex): + mark = pytest.mark.xfail(reason="IntervalArray.min/max not implemented") + request.node.add_marker(mark) + + elif box is pd.Series or box is pd.DataFrame: + if isinstance(values, pd.IntervalIndex): + mark = pytest.mark.xfail(reason="IntervalArray.min/max not implemented") + request.node.add_marker(mark) + + if values.dtype == "i8" and box is pd.array: + # FIXME: pd.array casts to Int64 + obj = values + else: + obj = box(values) + + result = np.maximum.reduce(obj) + expected = values[1] + if box is pd.DataFrame: + # TODO: cases with axis kwarg + expected = obj.max(numeric_only=False) + tm.assert_series_equal(result, expected) + else: + assert result == expected + if same_type: + # check we have e.g. Timestamp instead of dt64 + assert type(result) == type(expected) + + result = np.minimum.reduce(obj) + expected = values[0] + if box is pd.DataFrame: + expected = obj.min(numeric_only=False) + tm.assert_series_equal(result, expected) + else: + assert result == expected + if same_type: + # check we have e.g. Timestamp instead of dt64 + assert type(result) == type(expected) @pytest.mark.parametrize("type_", [list, deque, tuple])