From 9c811a8b10fdf8d9164e9c421919900612d4522e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 21 Nov 2021 21:47:50 +0100 Subject: [PATCH 1/3] PERF: only apply nanops rowwise optimization for narrow arrows --- pandas/core/nanops.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 10d95dfbb9181..c6ad19884557b 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -463,6 +463,7 @@ def newfunc(values: np.ndarray, *, axis: int | None = None, **kwargs): axis == 1 and values.ndim == 2 and values.flags["C_CONTIGUOUS"] + and (values.shape[1] / values.shape[0]) > 1000 and values.dtype != object and values.dtype != bool ): From 3e6cba6d3b19ae0c46d1e86f277eb6c1c8a6865f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 22 Nov 2021 08:05:21 +0100 Subject: [PATCH 2/3] add comment --- pandas/core/nanops.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index c6ad19884557b..aa9575cde89a6 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -454,7 +454,8 @@ def _na_for_min_count(values: np.ndarray, axis: int | None) -> Scalar | np.ndarr def maybe_operate_rowwise(func: F) -> F: """ NumPy operations on C-contiguous ndarrays with axis=1 can be - very slow. Operate row-by-row and concatenate the results. + very slow if axis 1 >> axis 0. + Operate row-by-row and concatenate the results. """ @functools.wraps(func) @@ -463,6 +464,8 @@ def newfunc(values: np.ndarray, *, axis: int | None = None, **kwargs): axis == 1 and values.ndim == 2 and values.flags["C_CONTIGUOUS"] + # only takes this path for wide arrays (long dataframes), for threshold see + # https://github.com/pandas-dev/pandas/pull/43311#issuecomment-974891737 and (values.shape[1] / values.shape[0]) > 1000 and values.dtype != object and values.dtype != bool From 5f82b0279daed7b870b083723f16dd824341ec6a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 22 Nov 2021 09:22:20 +0100 Subject: [PATCH 3/3] avoid divide by zero --- pandas/core/nanops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index d14730a664417..52d2322b11f42 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -467,7 +467,7 @@ def newfunc(values: np.ndarray, *, axis: int | None = None, **kwargs): and values.flags["C_CONTIGUOUS"] # only takes this path for wide arrays (long dataframes), for threshold see # https://github.com/pandas-dev/pandas/pull/43311#issuecomment-974891737 - and (values.shape[1] / values.shape[0]) > 1000 + and (values.shape[1] / 1000) > values.shape[0] and values.dtype != object and values.dtype != bool ):