From 82782234603650478e783714005e19760c3d5d1a Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 3 May 2021 15:54:50 -0700
Subject: [PATCH 1/2] REF: avoid unnecessary raising in object-dtype case

---
 pandas/_libs/reduction.pyx     | 15 +++++++++------
 pandas/core/groupby/generic.py |  2 +-
 pandas/core/groupby/ops.py     |  2 +-
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index 09999b6970bca..0eafccab34946 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -27,12 +27,15 @@ from pandas._libs.lib import (
 )
 
 
-cpdef check_result_array(object obj):
+cdef cnp.dtype _dtype_obj = np.dtype("object")
 
-    if (is_array(obj) or
-            (isinstance(obj, list) and len(obj) == 0) or
-            getattr(obj, 'shape', None) == (0,)):
-        raise ValueError('Must produce aggregated value')
+
+cpdef check_result_array(object obj, object dtype):
+    if is_array(obj):
+        if dtype != _dtype_obj:
+            # if it is object dtype, we the function can be a reduction/aggregation
+            #  and still return an ndarray
+            raise ValueError("Must produce aggregated value")
 
 
 cdef class _BaseGrouper:
@@ -89,7 +92,7 @@ cdef class _BaseGrouper:
             # On the first pass, we check the output shape to see
             #  if this looks like a reduction.
             initialized = True
-            check_result_array(res)
+            check_result_array(res, cached_series.dtype)
 
         return res, initialized
 
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 4f60660dfb499..a69f7ef9dcd49 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -530,7 +530,7 @@ def _aggregate_named(self, func, *args, **kwargs):
             output = libreduction.extract_result(output)
             if not initialized:
                 # We only do this validation on the first iteration
-                libreduction.check_result_array(output)
+                libreduction.check_result_array(output, group.dtype)
                 initialized = True
             result[name] = output
 
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 975a902f49db9..d649240c1df88 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -1027,7 +1027,7 @@ def _aggregate_series_pure_python(self, obj: Series, func: F):
 
             if not initialized:
                 # We only do this validation on the first iteration
-                libreduction.check_result_array(res)
+                libreduction.check_result_array(res, group.dtype)
                 initialized = True
 
             counts[i] = group.shape[0]

From f464bdcda64064ea46fedaabc7501df327f2e37a Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 3 May 2021 18:45:32 -0700
Subject: [PATCH 2/2] flesh out comment

---
 pandas/_libs/reduction.pyx | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index 0eafccab34946..9bef6cb428e8a 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -31,10 +31,13 @@ cdef cnp.dtype _dtype_obj = np.dtype("object")
 
 
 cpdef check_result_array(object obj, object dtype):
+    # Our operation is supposed to be an aggregation/reduction. If
+    #  it returns an ndarray, this likely means an invalid operation has
+    #  been passed. See test_apply_without_aggregation, test_agg_must_agg
     if is_array(obj):
         if dtype != _dtype_obj:
-            # if it is object dtype, we the function can be a reduction/aggregation
-            #  and still return an ndarray
+            # If it is object dtype, the function can be a reduction/aggregation
+            #  and still return an ndarray e.g. test_agg_over_numpy_arrays
             raise ValueError("Must produce aggregated value")