From 45b3dc7b0c1bb2080002f947eb4cc52c5cb1f2d8 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 11 Sep 2021 22:59:20 -0400
Subject: [PATCH 1/6] BUG/PERF: sparse min/max don't densify

---
 asv_bench/benchmarks/sparse.py           | 14 +++++
 doc/source/whatsnew/v1.4.0.rst           |  1 +
 pandas/core/arrays/sparse/array.py       | 69 ++++++++++++++++++++----
 pandas/tests/arrays/sparse/test_array.py | 56 ++++++++++++++-----
 4 files changed, 115 insertions(+), 25 deletions(-)

diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py
index bcc3edab4a349..1d684a64cad70 100644
--- a/asv_bench/benchmarks/sparse.py
+++ b/asv_bench/benchmarks/sparse.py
@@ -166,4 +166,18 @@ def time_division(self, fill_value):
         self.arr1 / self.arr2
 
 
+class MinMax:
+
+    params = ["min", "max"]
+    param_names = ["func"]
+
+    def setup(self, func):
+        N = 100000
+        arr = make_array(N, 1e-4, np.nan, np.float64)
+        self.sp_arr = SparseArray(arr)
+
+    def time_min_max(self, func):
+        getattr(self.sp_arr, func)
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 328499a4ae98e..b6aca2e8dacd4 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -300,6 +300,7 @@ Performance improvements
 - Performance improvement in :meth:`Series.sparse.to_coo` (:issue:`42880`)
 - Performance improvement in indexing with a :class:`MultiIndex` indexer on another :class:`MultiIndex` (:issue:43370`)
 - Performance improvement in :meth:`GroupBy.quantile` (:issue:`43469`)
+- :meth:`SparseArray.min` and :meth:`SparseArray.max` no longer require converting to a dense array (:issue:`?`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index 77142ef450487..7d4773199bed7 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -1433,23 +1433,70 @@ def mean(self, axis=0, *args, **kwargs):
             nsparse = self.sp_index.ngaps
             return (sp_sum + self.fill_value * nsparse) / (ct + nsparse)
 
-    def max(self, axis=0, *args, **kwargs):
+    def max(self, axis: int = 0, *args, **kwargs) -> Scalar:
+        """
+        Max of non-NA/null values
+
+        Parameters
+        ----------
+        axis : int, default 0
+            Not Used. NumPy compatibility.
+        *args, **kwargs
+            Not Used. NumPy compatibility.
+
+        Returns
+        -------
+        scalar
+        """
         nv.validate_max(args, kwargs)
+        return self._min_max("max")
 
-        # This condition returns a nan if there are no valid values in the array.
-        if self.size > 0 and self._valid_sp_values.size == 0:
-            return self.fill_value
-        else:
-            return np.nanmax(self, axis)
+    def min(self, axis: int = 0, *args, **kwargs) -> Scalar:
+        """
+        Min of non-NA/null values
+
+        Parameters
+        ----------
+        axis : int, default 0
+            Not Used. NumPy compatibility.
+        *args, **kwargs
+            Not Used. NumPy compatibility.
 
-    def min(self, axis=0, *args, **kwargs):
+        Returns
+        -------
+        scalar
+        """
         nv.validate_min(args, kwargs)
+        return self._min_max("min")
 
-        # This condition returns a nan if there are no valid values in the array.
-        if self.size > 0 and self._valid_sp_values.size == 0:
-            return self.fill_value
+    def _min_max(self, kind: Literal["min", "max"]) -> Scalar:
+        """
+        Min/max of non-NA/null values
+
+        Parameters
+        ----------
+        kind : {"min", "max"}
+
+        Returns
+        -------
+        scalar
+        """
+        valid_vals = self._valid_sp_values
+        has_nonnull_fill_vals = not self._null_fill_value and self.sp_index.ngaps > 0
+        if len(valid_vals) > 0:
+            sp_min_max = getattr(valid_vals, kind)()
+
+            # If a non-null fill value is currently present, it might be the max
+            if has_nonnull_fill_vals:
+                func = max if kind == "max" else min
+                return func(sp_min_max, self.fill_value)
+            else:
+                return sp_min_max
         else:
-            return np.nanmin(self, axis)
+            if has_nonnull_fill_vals:
+                return self.fill_value
+            else:
+                return na_value_for_dtype(self.dtype.subtype)
 
     # ------------------------------------------------------------------------
     # Ufuncs
diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py
index fbbe521b0eaf1..7c3a3367169fb 100644
--- a/pandas/tests/arrays/sparse/test_array.py
+++ b/pandas/tests/arrays/sparse/test_array.py
@@ -1359,26 +1359,54 @@ def test_drop_duplicates_fill_value():
 
 
 class TestMinMax:
-    plain_data = np.arange(5).astype(float)
-    data_neg = plain_data * (-1)
-    data_NaN = SparseArray(np.array([0, 1, 2, np.nan, 4]))
-    data_all_NaN = SparseArray(np.array([np.nan, np.nan, np.nan, np.nan, np.nan]))
-    data_NA_filled = SparseArray(
-        np.array([np.nan, np.nan, np.nan, np.nan, np.nan]), fill_value=5
-    )
-
     @pytest.mark.parametrize(
         "raw_data,max_expected,min_expected",
         [
-            (plain_data, [4], [0]),
-            (data_neg, [0], [-4]),
-            (data_NaN, [4], [0]),
-            (data_all_NaN, [np.nan], [np.nan]),
-            (data_NA_filled, [5], [5]),
+            (np.arange(5.0), [4], [0]),
+            (-np.arange(5.0), [0], [-4]),
+            (np.array([0, 1, 2, np.nan, 4]), [4], [0]),
+            (np.array([np.nan] * 5), [np.nan], [np.nan]),
+            (np.array([]), [np.nan], [np.nan]),
         ],
     )
-    def test_maxmin(self, raw_data, max_expected, min_expected):
+    def test_nan_fill_value(self, raw_data, max_expected, min_expected):
         max_result = SparseArray(raw_data).max()
         min_result = SparseArray(raw_data).min()
         assert max_result in max_expected
         assert min_result in min_expected
+
+    @pytest.mark.parametrize(
+        "fill_value,max_expected,min_expected",
+        [
+            (100, 100, 0),
+            (-100, 1, -100),
+        ],
+    )
+    def test_fill_value(self, fill_value, max_expected, min_expected):
+        arr = SparseArray(
+            np.array([fill_value, 0, 1]), dtype=SparseDtype("int", fill_value)
+        )
+        max_result = arr.max()
+        assert max_result == max_expected
+
+        min_result = arr.min()
+        assert min_result == min_expected
+
+    @pytest.mark.parametrize("func", ["min", "max"])
+    @pytest.mark.parametrize("data", [np.array([]), np.array([np.nan, np.nan])])
+    @pytest.mark.parametrize(
+        "dtype,expected",
+        [
+            (SparseDtype(np.float64, np.nan), np.nan),
+            (SparseDtype(np.float64, 5.0), np.nan),
+            (SparseDtype("datetime64[ns]", pd.NaT), pd.NaT),
+            (SparseDtype("datetime64[ns]", pd.to_datetime("2018-05-05")), pd.NaT),
+        ],
+    )
+    def test_na_value_if_no_valid_values(self, func, data, dtype, expected):
+        arr = SparseArray(data, dtype=dtype)
+        result = getattr(arr, func)()
+        if expected == pd.NaT:
+            assert result == pd.NaT
+        else:
+            assert np.isnan(result)

From f3eea3132a97a3db3fd34617404f0b949610b344 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 11 Sep 2021 23:08:18 -0400
Subject: [PATCH 2/6] Update whatsnew

---
 doc/source/whatsnew/v1.4.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index b6aca2e8dacd4..41077637e5ac1 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -436,7 +436,7 @@ Reshaping
 Sparse
 ^^^^^^
 - Bug in :meth:`DataFrame.sparse.to_coo` raising ``AttributeError`` when column names are not unique (:issue:`29564`)
--
+- Bug in :meth:`SparseArray.max` and :meth:`SparseArray.min` raising ``ValueError`` for arrays with 0 non-null elements (:issue:`?`)
 -
 
 ExtensionArray

From ef4ddc8a545d5f2fdc9d7a5e16acc5eae67c48ac Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 11 Sep 2021 23:26:26 -0400
Subject: [PATCH 3/6] Update benchmark

---
 asv_bench/benchmarks/sparse.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py
index 1d684a64cad70..32baec25bef2d 100644
--- a/asv_bench/benchmarks/sparse.py
+++ b/asv_bench/benchmarks/sparse.py
@@ -168,16 +168,16 @@ def time_division(self, fill_value):
 
 class MinMax:
 
-    params = ["min", "max"]
-    param_names = ["func"]
+    params = (["min", "max"], [0.0, np.nan])
+    param_names = ["func", "fill_value"]
 
-    def setup(self, func):
-        N = 100000
-        arr = make_array(N, 1e-4, np.nan, np.float64)
-        self.sp_arr = SparseArray(arr)
+    def setup(self, func, fill_value):
+        N = 1_000_000
+        arr = make_array(N, 1e-5, fill_value, np.float64)
+        self.sp_arr = SparseArray(arr, fill_value=fill_value)
 
-    def time_min_max(self, func):
-        getattr(self.sp_arr, func)
+    def time_min_max(self, func, fill_value):
+        getattr(self.sp_arr, func)()
 
 
 from .pandas_vb_common import setup  # noqa: F401 isort:skip

From b82cde32d872c4e1164dba4afca62a2f5e480e73 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 11 Sep 2021 23:30:38 -0400
Subject: [PATCH 4/6] Update comment

---
 pandas/core/arrays/sparse/array.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index 7d4773199bed7..852cf219b8c2c 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -1486,7 +1486,7 @@ def _min_max(self, kind: Literal["min", "max"]) -> Scalar:
         if len(valid_vals) > 0:
             sp_min_max = getattr(valid_vals, kind)()
 
-            # If a non-null fill value is currently present, it might be the max
+            # If a non-null fill value is currently present, it might be the min/max
             if has_nonnull_fill_vals:
                 func = max if kind == "max" else min
                 return func(sp_min_max, self.fill_value)

From 2d2545b2396875e7ebd6d9440a1fcc9ba1e395f6 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sat, 11 Sep 2021 23:34:34 -0400
Subject: [PATCH 5/6] Update pr number

---
 doc/source/whatsnew/v1.4.0.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 41077637e5ac1..2e0223407e084 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -300,7 +300,7 @@ Performance improvements
 - Performance improvement in :meth:`Series.sparse.to_coo` (:issue:`42880`)
 - Performance improvement in indexing with a :class:`MultiIndex` indexer on another :class:`MultiIndex` (:issue:43370`)
 - Performance improvement in :meth:`GroupBy.quantile` (:issue:`43469`)
-- :meth:`SparseArray.min` and :meth:`SparseArray.max` no longer require converting to a dense array (:issue:`?`)
+- :meth:`SparseArray.min` and :meth:`SparseArray.max` no longer require converting to a dense array (:issue:`43526`)
 -
 
 .. ---------------------------------------------------------------------------
@@ -436,7 +436,8 @@ Reshaping
 Sparse
 ^^^^^^
 - Bug in :meth:`DataFrame.sparse.to_coo` raising ``AttributeError`` when column names are not unique (:issue:`29564`)
-- Bug in :meth:`SparseArray.max` and :meth:`SparseArray.min` raising ``ValueError`` for arrays with 0 non-null elements (:issue:`?`)
+- Bug in :meth:`SparseArray.max` and :meth:`SparseArray.min` raising ``ValueError`` for arrays with 0 non-null elements (:issue:`43527`)
+-
 -
 
 ExtensionArray

From 0ce141bc4466ff805574a1709f8af3b0ab76e79c Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sun, 12 Sep 2021 11:20:16 -0400
Subject: [PATCH 6/6] Structure with elif instead

---
 pandas/core/arrays/sparse/array.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index d0722876d81cd..f715a4d47ab30 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -1515,11 +1515,10 @@ def _min_max(self, kind: Literal["min", "max"]) -> Scalar:
                 return func(sp_min_max, self.fill_value)
             else:
                 return sp_min_max
+        elif has_nonnull_fill_vals:
+            return self.fill_value
         else:
-            if has_nonnull_fill_vals:
-                return self.fill_value
-            else:
-                return na_value_for_dtype(self.dtype.subtype)
+            return na_value_for_dtype(self.dtype.subtype)
 
     # ------------------------------------------------------------------------
     # Ufuncs