Skip to content

ENH: __array_ufunc__ handle np.minimum.reduce #43923

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Nov 29, 2021
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,7 @@ Datetimelike
- Bug in in calling ``np.isnan``, ``np.isfinite``, or ``np.isinf`` on a timezone-aware :class:`DatetimeIndex` incorrectly raising ``TypeError`` (:issue:`43917`)
- Bug in constructing a :class:`Series` from datetime-like strings with mixed timezones incorrectly partially-inferring datetime values (:issue:`40111`)
- Bug in addition with a :class:`Tick` object and a ``np.timedelta64`` object incorrectly raising instead of returning :class:`Timedelta` (:issue:`44474`)
- ``np.maximum.reduce`` and ``np.minimum.reduce`` now correctly return :class:`Timestamp` and :class:`Timedelta` objects when operating on :class:`Series`, :class:`DataFrame`, or :class:`Index` with ``datetime64[ns]`` or ``timedelta64[ns]`` dtype (:issue:`43923`)
- Bug in adding a ``np.timedelta64`` object to a :class:`BusinessDay` or :class:`CustomBusinessDay` object incorrectly raising (:issue:`44532`)
- Bug in :meth:`Index.insert` for inserting ``np.datetime64``, ``np.timedelta64`` or ``tuple`` into :class:`Index` with ``dtype='object'`` with negative loc adding ``None`` and replacing existing value (:issue:`44509`)
- Bug in :meth:`Series.mode` with ``DatetimeTZDtype`` incorrectly returning timezone-naive and ``PeriodDtype`` incorrectly raising (:issue:`41927`)
Expand Down Expand Up @@ -736,6 +737,7 @@ ExtensionArray
^^^^^^^^^^^^^^
- Bug in :func:`array` failing to preserve :class:`PandasArray` (:issue:`43887`)
- NumPy ufuncs ``np.abs``, ``np.positive``, ``np.negative`` now correctly preserve dtype when called on ExtensionArrays that implement ``__abs__, __pos__, __neg__``, respectively. In particular this is fixed for :class:`TimedeltaArray` (:issue:`43899`)
- NumPy ufuncs ``np.minimum.reduce`` and ``np.maximum.reduce`` now work correctly instead of raising ``NotImplementedError`` on :class:`Series` with ``IntegerDtype`` or ``FloatDtype`` (:issue:`43923`)
- Avoid raising ``PerformanceWarning`` about fragmented DataFrame when using many columns with an extension dtype (:issue:`44098`)
- Bug in :meth:`BooleanArray.__eq__` and :meth:`BooleanArray.__ne__` raising ``TypeError`` on comparison with an incompatible type (like a string). This caused :meth:`DataFrame.replace` to sometimes raise a ``TypeError`` if a nullable boolean column was included (:issue:`44499`)
-
Expand Down
53 changes: 52 additions & 1 deletion pandas/core/arraylike.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,20 @@
from pandas._libs import lib
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.generic import ABCNDFrame

from pandas.core.construction import extract_array
from pandas.core.ops import (
maybe_dispatch_ufunc_to_dunder_op,
roperator,
)
from pandas.core.ops.common import unpack_zerodim_and_defer

REDUCTION_ALIASES = {
"maximum": "max",
"minimum": "min",
}


class OpsMixin:
# -------------------------------------------------------------
Expand Down Expand Up @@ -344,7 +351,7 @@ def reconstruct(result):
raise NotImplementedError
return result
if isinstance(result, BlockManager):
# we went through BlockManager.apply
# we went through BlockManager.apply e.g. np.sqrt
result = self._constructor(result, **reconstruct_kwargs, copy=False)
else:
# we converted an array, lost our axes
Expand All @@ -363,6 +370,11 @@ def reconstruct(result):
result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs)
return reconstruct(result)

if method == "reduce":
result = dispatch_reduction_ufunc(self, ufunc, method, *inputs, **kwargs)
if result is not NotImplemented:
return result

# We still get here with kwargs `axis` for e.g. np.maximum.accumulate
# and `dtype` and `keepdims` for np.ptp

Expand All @@ -373,6 +385,8 @@ def reconstruct(result):
# returned a Tuple[BlockManager].
# * len(inputs) > 1 is doable when we know that we have
# aligned blocks / dtypes.

# e.g. my_ufunc, modf, logaddexp, heaviside, subtract, add
inputs = tuple(np.asarray(x) for x in inputs)
# Note: we can't use default_array_ufunc here bc reindexing means
# that `self` may not be among `inputs`
Expand All @@ -393,6 +407,7 @@ def reconstruct(result):
# otherwise specific ufunc methods (eg np.<ufunc>.accumulate(..))
# Those can have an axis keyword and thus can't be called block-by-block
result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs)
# e.g. np.negative (only one reached), with "where" and "out" in kwargs

result = reconstruct(result)
return result
Expand Down Expand Up @@ -473,3 +488,39 @@ def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
new_inputs = [x if x is not self else np.asarray(x) for x in inputs]

return getattr(ufunc, method)(*new_inputs, **kwargs)


def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
"""
Dispatch ufunc reductions to self's reduction methods.
"""
assert method == "reduce"

if len(inputs) != 1 or inputs[0] is not self:
return NotImplemented

if ufunc.__name__ not in REDUCTION_ALIASES:
return NotImplemented

method_name = REDUCTION_ALIASES[ufunc.__name__]

# NB: we are assuming that min/max represent minimum/maximum methods,
# which would not be accurate for e.g. Timestamp.min
if not hasattr(self, method_name):
return NotImplemented

if self.ndim > 1:
if isinstance(self, ABCNDFrame):
# TODO: test cases where this doesn't hold, i.e. 2D DTA/TDA
kwargs["numeric_only"] = False
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we normally modify the inputs?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, but that's bc the ufunc takes different kwargs than the DataFrame method


if "axis" not in kwargs:
# For DataFrame reductions we don't want the default axis=0
# FIXME: DataFrame.min ignores axis=None
# FIXME: np.minimum.reduce(df) gets here bc axis is not in kwargs,
# but np.minimum.reduce(df.values) behaves as if axis=0
kwargs["axis"] = None

# By default, numpy's reductions do not skip NaNs, so we have to
# pass skipna=False
return getattr(self, method_name)(skipna=False, **kwargs)
7 changes: 7 additions & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1511,6 +1511,13 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
self, ufunc, method, *inputs, **kwargs
)

if method == "reduce":
result = arraylike.dispatch_reduction_ufunc(
self, ufunc, method, *inputs, **kwargs
)
if result is not NotImplemented:
return result

return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)


Expand Down
10 changes: 9 additions & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
)

from pandas.core import (
arraylike,
missing,
nanops,
ops,
Expand Down Expand Up @@ -414,7 +415,7 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
# For MaskedArray inputs, we apply the ufunc to ._data
# and mask the result.
if method == "reduce":
if method == "reduce" and ufunc not in [np.maximum, np.minimum]:
# Not clear how to handle missing values in reductions. Raise.
raise NotImplementedError("The 'reduce' method is not supported.")

Expand All @@ -431,6 +432,13 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
if result is not NotImplemented:
return result

if method == "reduce":
result = arraylike.dispatch_reduction_ufunc(
self, ufunc, method, *inputs, **kwargs
)
if result is not NotImplemented:
return result

mask = np.zeros(len(self), dtype=bool)
inputs2 = []
for x in inputs:
Expand Down
2 changes: 0 additions & 2 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import numbers

import numpy as np
from numpy.lib.mixins import NDArrayOperatorsMixin

from pandas._libs import lib
from pandas._typing import (
Expand Down Expand Up @@ -31,7 +30,6 @@
class PandasArray(
OpsMixin,
NDArrayBackedExtensionArray,
NDArrayOperatorsMixin,
ObjectStringArrayMixin,
):
"""
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -876,6 +876,13 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
if result is not NotImplemented:
return result

if method == "reduce":
result = arraylike.dispatch_reduction_ufunc(
self, ufunc, method, *inputs, **kwargs
)
if result is not NotImplemented:
return result

new_inputs = [x if x is not self else x._values for x in inputs]
result = getattr(ufunc, method)(*new_inputs, **kwargs)
if ufunc.nout == 2:
Expand Down
28 changes: 28 additions & 0 deletions pandas/tests/indexes/test_numpy_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
import pytest

from pandas import (
CategoricalIndex,
DatetimeIndex,
Index,
NumericIndex,
PeriodIndex,
TimedeltaIndex,
isna,
)
import pandas._testing as tm
from pandas.core.api import Float64Index
Expand Down Expand Up @@ -98,3 +100,29 @@ def test_numpy_ufuncs_other(index, func, request):
else:
with tm.external_error_raised(TypeError):
func(index)


@pytest.mark.parametrize("func", [np.maximum, np.minimum])
def test_numpy_ufuncs_reductions(index, func):
# TODO: overlap with tests.series.test_ufunc.test_reductions
if len(index) == 0:
return

if isinstance(index, CategoricalIndex) and index.dtype.ordered is False:
with pytest.raises(TypeError, match="is not ordered for"):
func.reduce(index)
return
else:
result = func.reduce(index)

if func is np.maximum:
expected = index.max(skipna=False)
else:
expected = index.min(skipna=False)
# TODO: do we have cases both with and without NAs?

assert type(result) is type(expected)
if isna(result):
assert isna(expected)
else:
assert result == expected
59 changes: 54 additions & 5 deletions pandas/tests/series/test_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,16 +252,65 @@ def __add__(self, other):
@pytest.mark.parametrize(
"values",
[
pd.array([1, 3, 2], dtype="int64"),
pd.array([1, 10, 0], dtype="Sparse[int]"),
pd.array([1, 3, 2], dtype=np.int64),
pd.array([1, 3, 2], dtype="Int64"),
pd.array([1, 3, 2], dtype="Float32"),
pd.array([1, 10, 2], dtype="Sparse[int]"),
pd.to_datetime(["2000", "2010", "2001"]),
pd.to_datetime(["2000", "2010", "2001"]).tz_localize("CET"),
pd.to_datetime(["2000", "2010", "2001"]).to_period(freq="D"),
pd.to_timedelta(["1 Day", "3 Days", "2 Days"]),
pd.IntervalIndex([pd.Interval(0, 1), pd.Interval(2, 3), pd.Interval(1, 2)]),
],
ids=lambda x: str(x.dtype),
)
def test_reduce(values):
a = pd.Series(values)
assert np.maximum.reduce(a) == values[1]
@pytest.mark.parametrize("box", [pd.array, pd.Index, pd.Series, pd.DataFrame])
def test_reduce(values, box, request):
# TODO: cases with NAs

same_type = True

if box is pd.Index:
if values.dtype.kind in ["i", "f"]:
# ATM Index casts to object, so we get python ints/floats
same_type = False
elif isinstance(values, pd.IntervalIndex):
mark = pytest.mark.xfail(reason="IntervalArray.min/max not implemented")
request.node.add_marker(mark)

elif box is pd.Series or box is pd.DataFrame:
if isinstance(values, pd.IntervalIndex):
mark = pytest.mark.xfail(reason="IntervalArray.min/max not implemented")
request.node.add_marker(mark)

if values.dtype == "i8" and box is pd.array:
# FIXME: pd.array casts to Int64
obj = values
else:
obj = box(values)

result = np.maximum.reduce(obj)
expected = values[1]
if box is pd.DataFrame:
# TODO: cases with axis kwarg
expected = obj.max(numeric_only=False)
tm.assert_series_equal(result, expected)
else:
assert result == expected
if same_type:
# check we have e.g. Timestamp instead of dt64
assert type(result) == type(expected)

result = np.minimum.reduce(obj)
expected = values[0]
if box is pd.DataFrame:
expected = obj.min(numeric_only=False)
tm.assert_series_equal(result, expected)
else:
assert result == expected
if same_type:
# check we have e.g. Timestamp instead of dt64
assert type(result) == type(expected)


@pytest.mark.parametrize("type_", [list, deque, tuple])
Expand Down