From eccf4843a61f33e8f1f954f03e79dc57b3045814 Mon Sep 17 00:00:00 2001 From: stan Date: Sat, 1 Mar 2025 15:07:31 +0000 Subject: [PATCH 01/25] Initial addition --- Doc/library/heapq.rst | 40 +++++++++++ Lib/heapq.py | 38 ++++++---- Lib/test/test_heapq.py | 71 +++++++++++++++++-- ...-03-01-15-00-00.gh-issue-110067.1ad3as.rst | 1 + 4 files changed, 132 insertions(+), 18 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst index d3c4b920ba500a..fb1e28c09ab261 100644 --- a/Doc/library/heapq.rst +++ b/Doc/library/heapq.rst @@ -82,6 +82,46 @@ The following functions are provided: on the heap. +.. function:: heappush_max(heap, item) + + Push the value *item* onto the *heap*, maintaining the heap invariant. + + +.. function:: heappop_max(heap) + + Pop and return the largest item from the *heap*, maintaining the heap + invariant. If the heap is empty, :exc:`IndexError` is raised. To access the + largest item without popping it, use ``heap[0]``. + + +.. function:: heappushpop_max(heap, item) + + Push *item* on the heap, then pop and return the largest item from the + *heap*. The combined action runs more efficiently than :func:`heappush_max` + followed by a separate call to :func:`heappop_max`. + + +.. function:: heapify_max(x) + + Transform list *x* into a max heap, in-place, in linear time. + + +.. function:: heapreplace(heap, item) + + Pop and return the smallest item from the *heap*, and also push the new *item*. + The heap size doesn't change. If the heap is empty, :exc:`IndexError` is raised. + + This one step operation is more efficient than a :func:`heappop` followed by + :func:`heappush` and can be more appropriate when using a fixed-size heap. + The pop/push combination always returns an element from the heap and replaces + it with *item*. + + The value returned may be larger than the *item* added. If that isn't + desired, consider using :func:`heappushpop` instead. Its push/pop + combination returns the smaller of the two values, leaving the larger value + on the heap. + + The module also offers three general purpose functions based on heaps. diff --git a/Lib/heapq.py b/Lib/heapq.py index 9649da251f2a83..5fb2ea65504938 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -178,7 +178,7 @@ def heapify(x): for i in reversed(range(n//2)): _siftup(x, i) -def _heappop_max(heap): +def heappop_max(heap): """Maxheap version of a heappop.""" lastelt = heap.pop() # raises appropriate IndexError if heap is empty if heap: @@ -188,14 +188,26 @@ def _heappop_max(heap): return returnitem return lastelt -def _heapreplace_max(heap, item): +def heapreplace_max(heap, item): """Maxheap version of a heappop followed by a heappush.""" returnitem = heap[0] # raises appropriate IndexError if heap is empty heap[0] = item _siftup_max(heap, 0) return returnitem -def _heapify_max(x): +def heappush_max(heap, item): + """Maxheap version of a heappush.""" + heap.append(item) + _siftdown_max(heap, 0, len(heap)-1) + +def heappushpop_max(heap, item): + """Maxheap fast version of a heappush followed by a heappop.""" + if heap and heap[0] < item: + item, heap[0] = heap[0], item + _siftup_max(heap, 0) + return item + +def heapify_max(x): """Transform list into a maxheap, in-place, in O(len(x)) time.""" n = len(x) for i in reversed(range(n//2)): @@ -335,9 +347,9 @@ def merge(*iterables, key=None, reverse=False): h_append = h.append if reverse: - _heapify = _heapify_max - _heappop = _heappop_max - _heapreplace = _heapreplace_max + _heapify = heapify_max + _heappop = heappop_max + _heapreplace = heapreplace_max direction = -1 else: _heapify = heapify @@ -490,10 +502,10 @@ def nsmallest(n, iterable, key=None): result = [(elem, i) for i, elem in zip(range(n), it)] if not result: return result - _heapify_max(result) + heapify_max(result) top = result[0][0] order = n - _heapreplace = _heapreplace_max + _heapreplace = heapreplace_max for elem in it: if elem < top: _heapreplace(result, (elem, order)) @@ -507,10 +519,10 @@ def nsmallest(n, iterable, key=None): result = [(key(elem), i, elem) for i, elem in zip(range(n), it)] if not result: return result - _heapify_max(result) + heapify_max(result) top = result[0][0] order = n - _heapreplace = _heapreplace_max + _heapreplace = heapreplace_max for elem in it: k = key(elem) if k < top: @@ -584,15 +596,15 @@ def nlargest(n, iterable, key=None): except ImportError: pass try: - from _heapq import _heapreplace_max + from _heapq import heapreplace_max except ImportError: pass try: - from _heapq import _heapify_max + from _heapq import heapify_max except ImportError: pass try: - from _heapq import _heappop_max + from _heapq import heappop_max except ImportError: pass diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 1aa8e4e289730d..258f23c1128a88 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -14,7 +14,7 @@ # _heapq.nlargest/nsmallest are saved in heapq._nlargest/_smallest when # _heapq is imported, so check them there func_names = ['heapify', 'heappop', 'heappush', 'heappushpop', 'heapreplace', - '_heappop_max', '_heapreplace_max', '_heapify_max'] + 'heappop_max', 'heapreplace_max', 'heapify_max'] class TestModules(TestCase): def test_py_functions(self): @@ -23,7 +23,7 @@ def test_py_functions(self): @skipUnless(c_heapq, 'requires _heapq') def test_c_functions(self): - for fname in func_names: + for fname in ['heapify', 'heappop', 'heappush', 'heappushpop', 'heapreplace']: self.assertEqual(getattr(c_heapq, fname).__module__, '_heapq') @@ -74,6 +74,35 @@ def test_push_pop(self): except AttributeError: pass + def test_max_push_pop(self): + # 1) Push 256 random numbers and pop them off, verifying all's OK. + heap = [] + data = [] + self.check_max_invariant(heap) + for i in range(256): + item = random.random() + data.append(item) + self.module.heappush_max(heap, item) + self.check_max_invariant(heap) + results = [] + while heap: + item = self.module.heappop_max(heap) + self.check_max_invariant(heap) + results.append(item) + data_sorted = data[:] + data_sorted.sort(reverse=True) + + self.assertEqual(data_sorted, results) + # 2) Check that the invariant holds for a sorted array + self.check_max_invariant(results) + + self.assertRaises(TypeError, self.module.heappush, []) + try: + self.assertRaises(TypeError, self.module.heappush, None, None) + self.assertRaises(TypeError, self.module.heappop, None) + except AttributeError: + pass + def check_invariant(self, heap): # Check the heap invariant. for pos, item in enumerate(heap): @@ -81,6 +110,11 @@ def check_invariant(self, heap): parentpos = (pos-1) >> 1 self.assertTrue(heap[parentpos] <= item) + def check_max_invariant(self, heap): + for pos in range(1, len(heap)): + parentpos = (pos - 1) >> 1 + self.assertTrue(heap[parentpos] >= heap[pos]) + def test_heapify(self): for size in list(range(30)) + [20000]: heap = [random.random() for dummy in range(size)] @@ -89,6 +123,14 @@ def test_heapify(self): self.assertRaises(TypeError, self.module.heapify, None) + def test_heapify_max(self): + for size in list(range(30)) + [20000]: + heap = [random.random() for dummy in range(size)] + self.module.heapify_max(heap) + self.check_max_invariant(heap) + + self.assertRaises(TypeError, self.module.heapify, None) + def test_naive_nbest(self): data = [random.randrange(2000) for i in range(1000)] heap = [] @@ -153,12 +195,31 @@ def test_heappushpop(self): x = self.module.heappushpop(h, 11) self.assertEqual((h, x), ([11], 10)) + def test_heappushpop_max(self): + h = [] + x = self.module.heappushpop_max(h, 10) + self.assertEqual((h, x), ([], 10)) + + h = [10] + x = self.module.heappushpop_max(h, 10.0) + self.assertEqual((h, x), ([10], 10.0)) + self.assertEqual(type(h[0]), int) + self.assertEqual(type(x), float) + + h = [10] + x = self.module.heappushpop_max(h, 11) + self.assertEqual((h, x), ([11], 10)) + + h = [10] + x = self.module.heappushpop_max(h, 9) + self.assertEqual((h, x), ([10], 9)) + def test_heappop_max(self): - # _heapop_max has an optimization for one-item lists which isn't + # heapop_max has an optimization for one-item lists which isn't # covered in other tests, so test that case explicitly here h = [3, 2] - self.assertEqual(self.module._heappop_max(h), 3) - self.assertEqual(self.module._heappop_max(h), 2) + self.assertEqual(self.module.heappop_max(h), 3) + self.assertEqual(self.module.heappop_max(h), 2) def test_heapsort(self): # Exercise everything with repeated heapsort checks diff --git a/Misc/NEWS.d/next/Library/2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst b/Misc/NEWS.d/next/Library/2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst new file mode 100644 index 00000000000000..859bbc63494a15 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst @@ -0,0 +1 @@ +Make max heap functions public. From beaf9151346df6355347431b0e1941e76c53f276 Mon Sep 17 00:00:00 2001 From: stan Date: Sat, 1 Mar 2025 15:32:30 +0000 Subject: [PATCH 02/25] Add C imp --- Lib/heapq.py | 12 ----- Lib/test/test_heapq.py | 4 +- Modules/_heapqmodule.c | 67 +++++++++++++++++++----- Modules/clinic/_heapqmodule.c.h | 90 +++++++++++++++++++++++---------- 4 files changed, 119 insertions(+), 54 deletions(-) diff --git a/Lib/heapq.py b/Lib/heapq.py index 5fb2ea65504938..3af3d5a14386d7 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -595,18 +595,6 @@ def nlargest(n, iterable, key=None): from _heapq import * except ImportError: pass -try: - from _heapq import heapreplace_max -except ImportError: - pass -try: - from _heapq import heapify_max -except ImportError: - pass -try: - from _heapq import heappop_max -except ImportError: - pass if __name__ == "__main__": diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 258f23c1128a88..ae5ddf9290698d 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -14,7 +14,7 @@ # _heapq.nlargest/nsmallest are saved in heapq._nlargest/_smallest when # _heapq is imported, so check them there func_names = ['heapify', 'heappop', 'heappush', 'heappushpop', 'heapreplace', - 'heappop_max', 'heapreplace_max', 'heapify_max'] + 'heappop_max', 'heapreplace_max', 'heapify_max', 'heappushpop_max',] class TestModules(TestCase): def test_py_functions(self): @@ -23,7 +23,7 @@ def test_py_functions(self): @skipUnless(c_heapq, 'requires _heapq') def test_c_functions(self): - for fname in ['heapify', 'heappop', 'heappush', 'heappushpop', 'heapreplace']: + for fname in func_names: self.assertEqual(getattr(c_heapq, fname).__module__, '_heapq') diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c index 80fe9cff98509d..dda0d36c7dcfa0 100644 --- a/Modules/_heapqmodule.c +++ b/Modules/_heapqmodule.c @@ -482,7 +482,7 @@ siftup_max(PyListObject *heap, Py_ssize_t pos) /*[clinic input] -_heapq._heappop_max +_heapq.heappop_max heap: object(subclass_of='&PyList_Type') / @@ -491,14 +491,14 @@ Maxheap variant of heappop. [clinic start generated code]*/ static PyObject * -_heapq__heappop_max_impl(PyObject *module, PyObject *heap) -/*[clinic end generated code: output=9e77aadd4e6a8760 input=362c06e1c7484793]*/ +_heapq_heappop_max_impl(PyObject *module, PyObject *heap) +/*[clinic end generated code: output=2f051195ab404b77 input=e62b14016a5a26de]*/ { return heappop_internal(heap, siftup_max); } /*[clinic input] -_heapq._heapreplace_max +_heapq.heapreplace_max heap: object(subclass_of='&PyList_Type') item: object @@ -508,15 +508,14 @@ Maxheap variant of heapreplace. [clinic start generated code]*/ static PyObject * -_heapq__heapreplace_max_impl(PyObject *module, PyObject *heap, - PyObject *item) -/*[clinic end generated code: output=8ad7545e4a5e8adb input=f2dd27cbadb948d7]*/ +_heapq_heapreplace_max_impl(PyObject *module, PyObject *heap, PyObject *item) +/*[clinic end generated code: output=8770778b5a9cbe9b input=21a3d28d757c881c]*/ { return heapreplace_internal(heap, item, siftup_max); } /*[clinic input] -_heapq._heapify_max +_heapq.heapify_max heap: object(subclass_of='&PyList_Type') / @@ -525,21 +524,63 @@ Maxheap variant of heapify. [clinic start generated code]*/ static PyObject * -_heapq__heapify_max_impl(PyObject *module, PyObject *heap) -/*[clinic end generated code: output=2cb028beb4a8b65e input=c1f765ee69f124b8]*/ +_heapq_heapify_max_impl(PyObject *module, PyObject *heap) +/*[clinic end generated code: output=8401af3856529807 input=edda4255728c431e]*/ { return heapify_internal(heap, siftup_max); } +/*[clinic input] +_heapq.heappushpop_max + + heap: object(subclass_of='&PyList_Type') + item: object + / + +Maxheap variant of heappushpop. + +The combined action runs more efficiently than heappush_max() followed by +a separate call to heappop_max(). +[clinic start generated code]*/ + +static PyObject * +_heapq_heappushpop_max_impl(PyObject *module, PyObject *heap, PyObject *item) +/*[clinic end generated code: output=ff0019f0941aca0d input=525a843013cbd6c0]*/ +{ + PyObject *returnitem; + int cmp; + if (PyList_GET_SIZE(heap) == 0) { + return Py_NewRef(item); + } + PyObject *top = PyList_GET_ITEM(heap, 0); + Py_INCREF(top); + cmp = PyObject_RichCompareBool(top, item, Py_LT); + Py_DECREF(top); + if (cmp < 0) + return NULL; + if (cmp == 0) { + return Py_NewRef(item); + } + returnitem = PyList_GET_ITEM(heap, 0); + PyList_SET_ITEM(heap, 0, Py_NewRef(item)); + + if (siftup_max((PyListObject *)heap, 0)) { + Py_DECREF(returnitem); + return NULL; + } + return Py_NewRef(returnitem); +} + static PyMethodDef heapq_methods[] = { _HEAPQ_HEAPPUSH_METHODDEF _HEAPQ_HEAPPUSHPOP_METHODDEF _HEAPQ_HEAPPOP_METHODDEF _HEAPQ_HEAPREPLACE_METHODDEF _HEAPQ_HEAPIFY_METHODDEF - _HEAPQ__HEAPPOP_MAX_METHODDEF - _HEAPQ__HEAPIFY_MAX_METHODDEF - _HEAPQ__HEAPREPLACE_MAX_METHODDEF + _HEAPQ_HEAPPOP_MAX_METHODDEF + _HEAPQ_HEAPIFY_MAX_METHODDEF + _HEAPQ_HEAPREPLACE_MAX_METHODDEF + _HEAPQ_HEAPPUSHPOP_MAX_METHODDEF {NULL, NULL} /* sentinel */ }; diff --git a/Modules/clinic/_heapqmodule.c.h b/Modules/clinic/_heapqmodule.c.h index 9046307990773b..56632a94d9e9d6 100644 --- a/Modules/clinic/_heapqmodule.c.h +++ b/Modules/clinic/_heapqmodule.c.h @@ -175,96 +175,132 @@ _heapq_heapify(PyObject *module, PyObject *arg) return return_value; } -PyDoc_STRVAR(_heapq__heappop_max__doc__, -"_heappop_max($module, heap, /)\n" +PyDoc_STRVAR(_heapq_heappop_max__doc__, +"heappop_max($module, heap, /)\n" "--\n" "\n" "Maxheap variant of heappop."); -#define _HEAPQ__HEAPPOP_MAX_METHODDEF \ - {"_heappop_max", (PyCFunction)_heapq__heappop_max, METH_O, _heapq__heappop_max__doc__}, +#define _HEAPQ_HEAPPOP_MAX_METHODDEF \ + {"heappop_max", (PyCFunction)_heapq_heappop_max, METH_O, _heapq_heappop_max__doc__}, static PyObject * -_heapq__heappop_max_impl(PyObject *module, PyObject *heap); +_heapq_heappop_max_impl(PyObject *module, PyObject *heap); static PyObject * -_heapq__heappop_max(PyObject *module, PyObject *arg) +_heapq_heappop_max(PyObject *module, PyObject *arg) { PyObject *return_value = NULL; PyObject *heap; if (!PyList_Check(arg)) { - _PyArg_BadArgument("_heappop_max", "argument", "list", arg); + _PyArg_BadArgument("heappop_max", "argument", "list", arg); goto exit; } heap = arg; - return_value = _heapq__heappop_max_impl(module, heap); + return_value = _heapq_heappop_max_impl(module, heap); exit: return return_value; } -PyDoc_STRVAR(_heapq__heapreplace_max__doc__, -"_heapreplace_max($module, heap, item, /)\n" +PyDoc_STRVAR(_heapq_heapreplace_max__doc__, +"heapreplace_max($module, heap, item, /)\n" "--\n" "\n" "Maxheap variant of heapreplace."); -#define _HEAPQ__HEAPREPLACE_MAX_METHODDEF \ - {"_heapreplace_max", _PyCFunction_CAST(_heapq__heapreplace_max), METH_FASTCALL, _heapq__heapreplace_max__doc__}, +#define _HEAPQ_HEAPREPLACE_MAX_METHODDEF \ + {"heapreplace_max", _PyCFunction_CAST(_heapq_heapreplace_max), METH_FASTCALL, _heapq_heapreplace_max__doc__}, static PyObject * -_heapq__heapreplace_max_impl(PyObject *module, PyObject *heap, - PyObject *item); +_heapq_heapreplace_max_impl(PyObject *module, PyObject *heap, PyObject *item); static PyObject * -_heapq__heapreplace_max(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +_heapq_heapreplace_max(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; PyObject *heap; PyObject *item; - if (!_PyArg_CheckPositional("_heapreplace_max", nargs, 2, 2)) { + if (!_PyArg_CheckPositional("heapreplace_max", nargs, 2, 2)) { goto exit; } if (!PyList_Check(args[0])) { - _PyArg_BadArgument("_heapreplace_max", "argument 1", "list", args[0]); + _PyArg_BadArgument("heapreplace_max", "argument 1", "list", args[0]); goto exit; } heap = args[0]; item = args[1]; - return_value = _heapq__heapreplace_max_impl(module, heap, item); + return_value = _heapq_heapreplace_max_impl(module, heap, item); exit: return return_value; } -PyDoc_STRVAR(_heapq__heapify_max__doc__, -"_heapify_max($module, heap, /)\n" +PyDoc_STRVAR(_heapq_heapify_max__doc__, +"heapify_max($module, heap, /)\n" "--\n" "\n" "Maxheap variant of heapify."); -#define _HEAPQ__HEAPIFY_MAX_METHODDEF \ - {"_heapify_max", (PyCFunction)_heapq__heapify_max, METH_O, _heapq__heapify_max__doc__}, +#define _HEAPQ_HEAPIFY_MAX_METHODDEF \ + {"heapify_max", (PyCFunction)_heapq_heapify_max, METH_O, _heapq_heapify_max__doc__}, static PyObject * -_heapq__heapify_max_impl(PyObject *module, PyObject *heap); +_heapq_heapify_max_impl(PyObject *module, PyObject *heap); static PyObject * -_heapq__heapify_max(PyObject *module, PyObject *arg) +_heapq_heapify_max(PyObject *module, PyObject *arg) { PyObject *return_value = NULL; PyObject *heap; if (!PyList_Check(arg)) { - _PyArg_BadArgument("_heapify_max", "argument", "list", arg); + _PyArg_BadArgument("heapify_max", "argument", "list", arg); goto exit; } heap = arg; - return_value = _heapq__heapify_max_impl(module, heap); + return_value = _heapq_heapify_max_impl(module, heap); exit: return return_value; } -/*[clinic end generated code: output=05f2afdf3bc54c9d input=a9049054013a1b77]*/ + +PyDoc_STRVAR(_heapq_heappushpop_max__doc__, +"heappushpop_max($module, heap, item, /)\n" +"--\n" +"\n" +"Maxheap variant of heappushpop.\n" +"\n" +"The combined action runs more efficiently than heappush_max() followed by\n" +"a separate call to heappop_max()."); + +#define _HEAPQ_HEAPPUSHPOP_MAX_METHODDEF \ + {"heappushpop_max", _PyCFunction_CAST(_heapq_heappushpop_max), METH_FASTCALL, _heapq_heappushpop_max__doc__}, + +static PyObject * +_heapq_heappushpop_max_impl(PyObject *module, PyObject *heap, PyObject *item); + +static PyObject * +_heapq_heappushpop_max(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *heap; + PyObject *item; + + if (!_PyArg_CheckPositional("heappushpop_max", nargs, 2, 2)) { + goto exit; + } + if (!PyList_Check(args[0])) { + _PyArg_BadArgument("heappushpop_max", "argument 1", "list", args[0]); + goto exit; + } + heap = args[0]; + item = args[1]; + return_value = _heapq_heappushpop_max_impl(module, heap, item); + +exit: + return return_value; +} +/*[clinic end generated code: output=0404176bef8091d2 input=a9049054013a1b77]*/ From c143ae234fbf61bc4902fac139033830c9fd5dde Mon Sep 17 00:00:00 2001 From: stan Date: Sat, 1 Mar 2025 15:38:36 +0000 Subject: [PATCH 03/25] Benedikts suggestions --- Doc/library/heapq.rst | 21 +++++++++++++++------ Lib/heapq.py | 13 +++++++++++++ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst index fb1e28c09ab261..a95b3501dd82aa 100644 --- a/Doc/library/heapq.rst +++ b/Doc/library/heapq.rst @@ -82,10 +82,18 @@ The following functions are provided: on the heap. +.. function:: heapify_max(x) + + Transform list *x* into a heap, in-place, in linear time. + + .. versionadded:: next + + .. function:: heappush_max(heap, item) Push the value *item* onto the *heap*, maintaining the heap invariant. + .. versionadded:: next .. function:: heappop_max(heap) @@ -93,6 +101,8 @@ The following functions are provided: invariant. If the heap is empty, :exc:`IndexError` is raised. To access the largest item without popping it, use ``heap[0]``. + .. versionadded:: next + .. function:: heappushpop_max(heap, item) @@ -100,15 +110,12 @@ The following functions are provided: *heap*. The combined action runs more efficiently than :func:`heappush_max` followed by a separate call to :func:`heappop_max`. - -.. function:: heapify_max(x) - - Transform list *x* into a max heap, in-place, in linear time. + .. versionadded:: next -.. function:: heapreplace(heap, item) +.. function:: heapreplace_max(heap, item) - Pop and return the smallest item from the *heap*, and also push the new *item*. + Pop and return the largest item from the *heap*, and also push the new *item*. The heap size doesn't change. If the heap is empty, :exc:`IndexError` is raised. This one step operation is more efficient than a :func:`heappop` followed by @@ -121,6 +128,8 @@ The following functions are provided: combination returns the smaller of the two values, leaving the larger value on the heap. + .. versionadded:: next + The module also offers three general purpose functions based on heaps. diff --git a/Lib/heapq.py b/Lib/heapq.py index 3af3d5a14386d7..0f108f9f5d667a 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -213,6 +213,19 @@ def heapify_max(x): for i in reversed(range(n//2)): _siftup_max(x, i) +# For backwards compadibility +def _heappop_max(heap): + return heappop_max(heap) +def _heapreplace_max(heap, item): + return heapreplace_max(heap, item) +def _heappush_max(heap, item): + return _heappush_max(heap, item) +def _heappushpop_max(heap, item): + return _heappushpop_max(heap, item) +def _heapify_max(x): + return _heapify_max() + + # 'heap' is a heap at all indices >= startpos, except possibly for pos. pos # is the index of a leaf with a possibly out-of-order value. Restore the # heap invariant. From 167525dbc86477eeab1499cdb60b4dbf2b7ef80a Mon Sep 17 00:00:00 2001 From: stan Date: Sat, 1 Mar 2025 16:03:09 +0000 Subject: [PATCH 04/25] Benedikts suggestions --- Doc/library/heapq.rst | 26 +++++++++++++++----------- Lib/heapq.py | 15 +++++---------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst index a95b3501dd82aa..906e374c9a966e 100644 --- a/Doc/library/heapq.rst +++ b/Doc/library/heapq.rst @@ -82,23 +82,27 @@ The following functions are provided: on the heap. +For max heaps, the reverse of a heap, the following functions are provided: + + .. function:: heapify_max(x) - Transform list *x* into a heap, in-place, in linear time. + Transform list *x* into a max heap, in-place, in linear time. .. versionadded:: next .. function:: heappush_max(heap, item) - Push the value *item* onto the *heap*, maintaining the heap invariant. + Push the value *item* onto the max *heap*, maintaining the heap invariant. .. versionadded:: next + .. function:: heappop_max(heap) - Pop and return the largest item from the *heap*, maintaining the heap - invariant. If the heap is empty, :exc:`IndexError` is raised. To access the + Pop and return the largest item from the max *heap*, maintaining the heap + invariant. If the max heap is empty, :exc:`IndexError` is raised. To access the largest item without popping it, use ``heap[0]``. .. versionadded:: next @@ -106,7 +110,7 @@ The following functions are provided: .. function:: heappushpop_max(heap, item) - Push *item* on the heap, then pop and return the largest item from the + Push *item* on the max heap, then pop and return the largest item from the max *heap*. The combined action runs more efficiently than :func:`heappush_max` followed by a separate call to :func:`heappop_max`. @@ -115,17 +119,17 @@ The following functions are provided: .. function:: heapreplace_max(heap, item) - Pop and return the largest item from the *heap*, and also push the new *item*. - The heap size doesn't change. If the heap is empty, :exc:`IndexError` is raised. + Pop and return the largest item from the max *heap*, and also push the new *item*. + The max heap size doesn't change. If the max heap is empty, :exc:`IndexError` is raised. - This one step operation is more efficient than a :func:`heappop` followed by - :func:`heappush` and can be more appropriate when using a fixed-size heap. + This one step operation is more efficient than a :func:`heappop_max` followed by + :func:`heappush_max` and can be more appropriate when using a fixed-size heap. The pop/push combination always returns an element from the heap and replaces it with *item*. The value returned may be larger than the *item* added. If that isn't - desired, consider using :func:`heappushpop` instead. Its push/pop - combination returns the smaller of the two values, leaving the larger value + desired, consider using :func:`heappushpop_max` instead. Its push/pop + combination returns the larger of the two values, leaving the smaller value on the heap. .. versionadded:: next diff --git a/Lib/heapq.py b/Lib/heapq.py index 0f108f9f5d667a..695e07c393e7c2 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -214,16 +214,11 @@ def heapify_max(x): _siftup_max(x, i) # For backwards compadibility -def _heappop_max(heap): - return heappop_max(heap) -def _heapreplace_max(heap, item): - return heapreplace_max(heap, item) -def _heappush_max(heap, item): - return _heappush_max(heap, item) -def _heappushpop_max(heap, item): - return _heappushpop_max(heap, item) -def _heapify_max(x): - return _heapify_max() +_heappop_max = heappop_max +_heapreplace_max = heapreplace_max +_heappush_max = _heappush_max +_heappushpop_max = _heappushpop_max +_heapify_max = _heapify_max # 'heap' is a heap at all indices >= startpos, except possibly for pos. pos From 1b0b6f30026121bf7c28e8f531841a07e8988ee2 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Sat, 1 Mar 2025 16:04:50 +0000 Subject: [PATCH 05/25] Update Modules/_heapqmodule.c with Benedikts suggestion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Modules/_heapqmodule.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c index dda0d36c7dcfa0..5e98d38e505c89 100644 --- a/Modules/_heapqmodule.c +++ b/Modules/_heapqmodule.c @@ -556,8 +556,9 @@ _heapq_heappushpop_max_impl(PyObject *module, PyObject *heap, PyObject *item) Py_INCREF(top); cmp = PyObject_RichCompareBool(top, item, Py_LT); Py_DECREF(top); - if (cmp < 0) + if (cmp < 0) { return NULL; + } if (cmp == 0) { return Py_NewRef(item); } From fc467076d6db9584fffb11e6b25fe445cb998047 Mon Sep 17 00:00:00 2001 From: stan Date: Sat, 1 Mar 2025 16:08:10 +0000 Subject: [PATCH 06/25] Fix mistake (extra underscores) --- Lib/heapq.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/heapq.py b/Lib/heapq.py index 695e07c393e7c2..b5223ab6099345 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -216,9 +216,9 @@ def heapify_max(x): # For backwards compadibility _heappop_max = heappop_max _heapreplace_max = heapreplace_max -_heappush_max = _heappush_max -_heappushpop_max = _heappushpop_max -_heapify_max = _heapify_max +_heappush_max = heappush_max +_heappushpop_max = heappushpop_max +_heapify_max = heapify_max # 'heap' is a heap at all indices >= startpos, except possibly for pos. pos From f4fd94a41745efb1d77afd8ed2b8eec850620e1d Mon Sep 17 00:00:00 2001 From: stan Date: Sat, 1 Mar 2025 18:21:53 +0000 Subject: [PATCH 07/25] Benedikt's requested changes --- Doc/library/heapq.rst | 24 +++++++++++++----------- Lib/heapq.py | 13 ++++++------- Lib/test/test_heapq.py | 24 ++++++++++++------------ Modules/_heapqmodule.c | 13 ++++++++++--- 4 files changed, 41 insertions(+), 33 deletions(-) diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst index 906e374c9a966e..0771bedaeccc81 100644 --- a/Doc/library/heapq.rst +++ b/Doc/library/heapq.rst @@ -16,8 +16,10 @@ This module provides an implementation of the heap queue algorithm, also known as the priority queue algorithm. -Heaps are binary trees for which every parent node has a value less than or -equal to any of its children. We refer to this condition as the heap invariant. +in-heaps (resp. max-heaps) are binary trees for which every parent node +has a value less than (resp. greater than) or equal to any of its children. +We refer to this condition as the heap invariant. Unless stated otherwise, +*heaps* refer to min-heaps. This implementation uses arrays for which ``heap[k] <= heap[2*k+1]`` and ``heap[k] <= heap[2*k+2]`` for all *k*, counting @@ -82,27 +84,27 @@ The following functions are provided: on the heap. -For max heaps, the reverse of a heap, the following functions are provided: +For max-heaps, the reverse of a heap, the following functions are provided: .. function:: heapify_max(x) - Transform list *x* into a max heap, in-place, in linear time. + Transform list *x* into a max-heap, in-place, in linear time. .. versionadded:: next .. function:: heappush_max(heap, item) - Push the value *item* onto the max *heap*, maintaining the heap invariant. + Push the value *item* onto the max-*heap*, maintaining the heap invariant. .. versionadded:: next .. function:: heappop_max(heap) - Pop and return the largest item from the max *heap*, maintaining the heap - invariant. If the max heap is empty, :exc:`IndexError` is raised. To access the + Pop and return the largest item from the max-*heap*, maintaining the heap + invariant. If the max-heap is empty, :exc:`IndexError` is raised. To access the largest item without popping it, use ``heap[0]``. .. versionadded:: next @@ -110,8 +112,8 @@ For max heaps, the reverse of a heap, the following functions are provided: .. function:: heappushpop_max(heap, item) - Push *item* on the max heap, then pop and return the largest item from the max - *heap*. The combined action runs more efficiently than :func:`heappush_max` + Push *item* on the max-heap, then pop and return the largest item from *heap*. + The combined action runs more efficiently than :func:`heappush_max` followed by a separate call to :func:`heappop_max`. .. versionadded:: next @@ -119,8 +121,8 @@ For max heaps, the reverse of a heap, the following functions are provided: .. function:: heapreplace_max(heap, item) - Pop and return the largest item from the max *heap*, and also push the new *item*. - The max heap size doesn't change. If the max heap is empty, :exc:`IndexError` is raised. + Pop and return the largest item from the max-heap *heap* and also push the new *item*. + The max-heap size doesn't change. If the max-heap is empty, :exc:`IndexError` is raised. This one step operation is more efficient than a :func:`heappop_max` followed by :func:`heappush_max` and can be more appropriate when using a fixed-size heap. diff --git a/Lib/heapq.py b/Lib/heapq.py index b5223ab6099345..54487f1df567fc 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -213,13 +213,6 @@ def heapify_max(x): for i in reversed(range(n//2)): _siftup_max(x, i) -# For backwards compadibility -_heappop_max = heappop_max -_heapreplace_max = heapreplace_max -_heappush_max = heappush_max -_heappushpop_max = heappushpop_max -_heapify_max = heapify_max - # 'heap' is a heap at all indices >= startpos, except possibly for pos. pos # is the index of a leaf with a possibly out-of-order value. Restore the @@ -604,6 +597,12 @@ def nlargest(n, iterable, key=None): except ImportError: pass +# For backwards compatibility +_heappop_max = heappop_max +_heapreplace_max = heapreplace_max +_heappush_max = heappush_max +_heappushpop_max = heappushpop_max +_heapify_max = heapify_max if __name__ == "__main__": diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index ae5ddf9290698d..d6a6d65774f67e 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -14,7 +14,7 @@ # _heapq.nlargest/nsmallest are saved in heapq._nlargest/_smallest when # _heapq is imported, so check them there func_names = ['heapify', 'heappop', 'heappush', 'heappushpop', 'heapreplace', - 'heappop_max', 'heapreplace_max', 'heapify_max', 'heappushpop_max',] + 'heappop_max', 'heapreplace_max', 'heapify_max', 'heappushpop_max'] class TestModules(TestCase): def test_py_functions(self): @@ -96,10 +96,10 @@ def test_max_push_pop(self): # 2) Check that the invariant holds for a sorted array self.check_max_invariant(results) - self.assertRaises(TypeError, self.module.heappush, []) + self.assertRaises(TypeError, self.module.heappush_max, []) try: - self.assertRaises(TypeError, self.module.heappush, None, None) - self.assertRaises(TypeError, self.module.heappop, None) + self.assertRaises(TypeError, self.module.heappush_max, None, None) + self.assertRaises(TypeError, self.module.heappop_max, None) except AttributeError: pass @@ -113,7 +113,7 @@ def check_invariant(self, heap): def check_max_invariant(self, heap): for pos in range(1, len(heap)): parentpos = (pos - 1) >> 1 - self.assertTrue(heap[parentpos] >= heap[pos]) + self.assertGreaterEqual(heap[parentpos], heap[pos]) def test_heapify(self): for size in list(range(30)) + [20000]: @@ -129,7 +129,7 @@ def test_heapify_max(self): self.module.heapify_max(heap) self.check_max_invariant(heap) - self.assertRaises(TypeError, self.module.heapify, None) + self.assertRaises(TypeError, self.module.heapify_max, None) def test_naive_nbest(self): data = [random.randrange(2000) for i in range(1000)] @@ -198,21 +198,21 @@ def test_heappushpop(self): def test_heappushpop_max(self): h = [] x = self.module.heappushpop_max(h, 10) - self.assertEqual((h, x), ([], 10)) + self.assertTupleEqual((h, x), ([], 10)) h = [10] x = self.module.heappushpop_max(h, 10.0) - self.assertEqual((h, x), ([10], 10.0)) - self.assertEqual(type(h[0]), int) - self.assertEqual(type(x), float) + self.assertTupleEqual((h, x), ([10], 10.0)) + self.assertIsInstance(h[0], int) + self.assertIsInstance(x, float) h = [10] x = self.module.heappushpop_max(h, 11) - self.assertEqual((h, x), ([11], 10)) + self.assertTupleEqual((h, x), ([11], 10)) h = [10] x = self.module.heappushpop_max(h, 9) - self.assertEqual((h, x), ([10], 9)) + self.assertTupleEqual((h, x), ([10], 9)) def test_heappop_max(self): # heapop_max has an optimization for one-item lists which isn't diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c index 5e98d38e505c89..b58f303bb15012 100644 --- a/Modules/_heapqmodule.c +++ b/Modules/_heapqmodule.c @@ -549,9 +549,11 @@ _heapq_heappushpop_max_impl(PyObject *module, PyObject *heap, PyObject *item) { PyObject *returnitem; int cmp; + if (PyList_GET_SIZE(heap) == 0) { return Py_NewRef(item); } + PyObject *top = PyList_GET_ITEM(heap, 0); Py_INCREF(top); cmp = PyObject_RichCompareBool(top, item, Py_LT); @@ -562,14 +564,19 @@ _heapq_heappushpop_max_impl(PyObject *module, PyObject *heap, PyObject *item) if (cmp == 0) { return Py_NewRef(item); } + + if (PyList_GET_SIZE(heap) == 0) { + PyErr_SetString(PyExc_IndexError, "index out of range"); + return NULL; + } + returnitem = PyList_GET_ITEM(heap, 0); PyList_SET_ITEM(heap, 0, Py_NewRef(item)); - - if (siftup_max((PyListObject *)heap, 0)) { + if (siftup_max((PyListObject *)heap, 0) < 0) { Py_DECREF(returnitem); return NULL; } - return Py_NewRef(returnitem); + return returnitem; } static PyMethodDef heapq_methods[] = { From cebbc884b9850acae8c8804ec2e2cda707f26c53 Mon Sep 17 00:00:00 2001 From: stan Date: Sat, 1 Mar 2025 18:25:02 +0000 Subject: [PATCH 08/25] Missed one of Benedikt's requested changes --- Lib/test/test_heapq.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index d6a6d65774f67e..9e31bbbb5f300a 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -111,9 +111,9 @@ def check_invariant(self, heap): self.assertTrue(heap[parentpos] <= item) def check_max_invariant(self, heap): - for pos in range(1, len(heap)): + for pos, item in enumerate(heap[1:], start=1): parentpos = (pos - 1) >> 1 - self.assertGreaterEqual(heap[parentpos], heap[pos]) + self.assertGreaterEqual(heap[parentpos], item) def test_heapify(self): for size in list(range(30)) + [20000]: From 3cde6c67b6417969000fb630fa0a4ea19e59b47b Mon Sep 17 00:00:00 2001 From: stan Date: Sat, 1 Mar 2025 19:43:49 +0000 Subject: [PATCH 09/25] Benedikts suggestion --- Doc/library/heapq.rst | 4 ++-- Lib/test/test_heapq.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst index 0771bedaeccc81..c449f5541ef51d 100644 --- a/Doc/library/heapq.rst +++ b/Doc/library/heapq.rst @@ -16,7 +16,7 @@ This module provides an implementation of the heap queue algorithm, also known as the priority queue algorithm. -in-heaps (resp. max-heaps) are binary trees for which every parent node +Min-heaps (resp. max-heaps) are binary trees for which every parent node has a value less than (resp. greater than) or equal to any of its children. We refer to this condition as the heap invariant. Unless stated otherwise, *heaps* refer to min-heaps. @@ -96,7 +96,7 @@ For max-heaps, the reverse of a heap, the following functions are provided: .. function:: heappush_max(heap, item) - Push the value *item* onto the max-*heap*, maintaining the heap invariant. + Push the value *item* onto the max-heap *heap*, maintaining the heap invariant. .. versionadded:: next diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 9e31bbbb5f300a..43764584b3a882 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -68,11 +68,11 @@ def test_push_pop(self): self.check_invariant(results) self.assertRaises(TypeError, self.module.heappush, []) - try: - self.assertRaises(TypeError, self.module.heappush, None, None) - self.assertRaises(TypeError, self.module.heappop, None) - except AttributeError: - pass + + exc_types = (AttributeError, TypeError) + self.assertRaises(exc_types, self.module.heappush, None, None) + self.assertRaises(exc_types, self.module.heappop, None) + def test_max_push_pop(self): # 1) Push 256 random numbers and pop them off, verifying all's OK. From 5d2d3877c2e6ae65145de835d9a8a6266d3f382f Mon Sep 17 00:00:00 2001 From: stan Date: Sun, 2 Mar 2025 11:30:02 +0000 Subject: [PATCH 10/25] Benedikts Suggestions --- Doc/library/heapq.rst | 6 +++--- Doc/whatsnew/3.14.rst | 17 +++++++++++++++++ Lib/test/test_heapq.py | 18 +++++++++--------- ...5-03-01-15-00-00.gh-issue-110067.1ad3as.rst | 12 +++++++++++- 4 files changed, 40 insertions(+), 13 deletions(-) diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst index c449f5541ef51d..00cc64ef6bbf2e 100644 --- a/Doc/library/heapq.rst +++ b/Doc/library/heapq.rst @@ -84,7 +84,7 @@ The following functions are provided: on the heap. -For max-heaps, the reverse of a heap, the following functions are provided: +For max-heaps, the following functions are provided: .. function:: heapify_max(x) @@ -103,7 +103,7 @@ For max-heaps, the reverse of a heap, the following functions are provided: .. function:: heappop_max(heap) - Pop and return the largest item from the max-*heap*, maintaining the heap + Pop and return the largest item from the max-heap *heap*, maintaining the heap invariant. If the max-heap is empty, :exc:`IndexError` is raised. To access the largest item without popping it, use ``heap[0]``. @@ -112,7 +112,7 @@ For max-heaps, the reverse of a heap, the following functions are provided: .. function:: heappushpop_max(heap, item) - Push *item* on the max-heap, then pop and return the largest item from *heap*. + Push *item* on the max-heap *heap*, then pop and return the largest item from *heap*. The combined action runs more efficiently than :func:`heappush_max` followed by a separate call to :func:`heappop_max`. diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 3c876a193fad32..f8c8b92ea1d2f2 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -563,6 +563,23 @@ getopt * Add support for returning intermixed options and non-option arguments in order. (Contributed by Serhiy Storchaka in :gh:`126390`.) + +heapq +----- + +* Make :mod:`heapq` max-heap functions + + * :func:`heapify_max`, + * :func:`heappush_max`, + * :func:`heappop_max`, + * :func:`heapreplace_max` + + public. And add the missing :func:`heappushpop_max` to + both the C and Python implementation. + + Previous underscored naming is kept for backwards compatibility. + + http ---- diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 43764584b3a882..b348449535387f 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -69,10 +69,11 @@ def test_push_pop(self): self.assertRaises(TypeError, self.module.heappush, []) - exc_types = (AttributeError, TypeError) - self.assertRaises(exc_types, self.module.heappush, None, None) - self.assertRaises(exc_types, self.module.heappop, None) - + try: + self.assertRaises(TypeError, self.module.heappush, None, None) + self.assertRaises(TypeError, self.module.heappop, None) + except AttributeError: + pass def test_max_push_pop(self): # 1) Push 256 random numbers and pop them off, verifying all's OK. @@ -97,11 +98,10 @@ def test_max_push_pop(self): self.check_max_invariant(results) self.assertRaises(TypeError, self.module.heappush_max, []) - try: - self.assertRaises(TypeError, self.module.heappush_max, None, None) - self.assertRaises(TypeError, self.module.heappop_max, None) - except AttributeError: - pass + + exc_types = (AttributeError, TypeError) + self.assertRaises(exc_types, self.module.heappush_max, None, None) + self.assertRaises(exc_types, self.module.heappop_max, None) def check_invariant(self, heap): # Check the heap invariant. diff --git a/Misc/NEWS.d/next/Library/2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst b/Misc/NEWS.d/next/Library/2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst index 859bbc63494a15..f486056a4ef83b 100644 --- a/Misc/NEWS.d/next/Library/2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst +++ b/Misc/NEWS.d/next/Library/2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst @@ -1 +1,11 @@ -Make max heap functions public. +Make :mod:`heapq` max-heap functions + +* :func:`heapify_max`, +* :func:`heappush_max`, +* :func:`heappop_max`, +* :func:`heapreplace_max` + +public. And add the missing :func:`heappushpop_max` to +both the C and Python implementation. + +Previous underscored naming is kept for backwards compatibility. From a499cd4ca19313262d42f59f10653f5ef18b9950 Mon Sep 17 00:00:00 2001 From: stan Date: Sun, 2 Mar 2025 13:58:48 +0000 Subject: [PATCH 11/25] Fix doc warnings --- Doc/whatsnew/3.14.rst | 10 +++++----- .../2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index f8c8b92ea1d2f2..ba2acc9744be98 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -569,12 +569,12 @@ heapq * Make :mod:`heapq` max-heap functions - * :func:`heapify_max`, - * :func:`heappush_max`, - * :func:`heappop_max`, - * :func:`heapreplace_max` + * :func:`heapq.heapify_max`, + * :func:`heapq.heappush_max`, + * :func:`heapq.heappop_max`, + * :func:`heapq.heapreplace_max` - public. And add the missing :func:`heappushpop_max` to + public. And add the missing :func:`heapq.heappushpop_max` to both the C and Python implementation. Previous underscored naming is kept for backwards compatibility. diff --git a/Misc/NEWS.d/next/Library/2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst b/Misc/NEWS.d/next/Library/2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst index f486056a4ef83b..a06e141521487b 100644 --- a/Misc/NEWS.d/next/Library/2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst +++ b/Misc/NEWS.d/next/Library/2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst @@ -1,11 +1,11 @@ Make :mod:`heapq` max-heap functions -* :func:`heapify_max`, -* :func:`heappush_max`, -* :func:`heappop_max`, -* :func:`heapreplace_max` +* :func:`heapq.heapify_max`, +* :func:`heapq.heappush_max`, +* :func:`heapq.heappop_max`, +* :func:`heapq.heapreplace_max` -public. And add the missing :func:`heappushpop_max` to +public. And add the missing :func:`heapq.heappushpop_max` to both the C and Python implementation. Previous underscored naming is kept for backwards compatibility. From abe0a95aa70d8dc3bdafb4286e5e400802b5a032 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 2 Mar 2025 15:30:02 +0000 Subject: [PATCH 12/25] Improve entries --- Doc/whatsnew/3.14.rst | 7 +++---- Lib/test/test_heapq.py | 1 - .../2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst | 13 ++----------- 3 files changed, 5 insertions(+), 16 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index ba2acc9744be98..398f8757cb66e5 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -567,17 +567,16 @@ getopt heapq ----- -* Make :mod:`heapq` max-heap functions +* The max-heap functions in :mod:`heapq` are now public: * :func:`heapq.heapify_max`, * :func:`heapq.heappush_max`, * :func:`heapq.heappop_max`, * :func:`heapq.heapreplace_max` - public. And add the missing :func:`heapq.heappushpop_max` to - both the C and Python implementation. - Previous underscored naming is kept for backwards compatibility. + Additionally, the missing function :func:`heapq.heappushpop_max` has been added to both the C and Python + implementations. http diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index b348449535387f..35b35d7958f345 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -68,7 +68,6 @@ def test_push_pop(self): self.check_invariant(results) self.assertRaises(TypeError, self.module.heappush, []) - try: self.assertRaises(TypeError, self.module.heappush, None, None) self.assertRaises(TypeError, self.module.heappop, None) diff --git a/Misc/NEWS.d/next/Library/2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst b/Misc/NEWS.d/next/Library/2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst index a06e141521487b..7d1ffad053aeda 100644 --- a/Misc/NEWS.d/next/Library/2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst +++ b/Misc/NEWS.d/next/Library/2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst @@ -1,11 +1,2 @@ -Make :mod:`heapq` max-heap functions - -* :func:`heapq.heapify_max`, -* :func:`heapq.heappush_max`, -* :func:`heapq.heappop_max`, -* :func:`heapq.heapreplace_max` - -public. And add the missing :func:`heapq.heappushpop_max` to -both the C and Python implementation. - -Previous underscored naming is kept for backwards compatibility. +Make :mod:`heapq` max-heap functions :func:`heapq.heapify_max`, :func:`heapq.heappush_max`, :func:`heapq.heappop_max`, +and :func:`heapq.heapreplace_max` public. Add missing :func:`heapq.heappushpop_max` to both the C and Python implementation. From 35612063f846038c22ba52ad68b369c4e2b221f1 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Mon, 10 Mar 2025 17:03:50 +0000 Subject: [PATCH 13/25] Address some of Petr's suggestions --- Doc/library/heapq.rst | 11 ++------- Lib/test/test_heapq.py | 23 ++++++++++++++++--- ...-03-01-15-00-00.gh-issue-110067.1ad3as.rst | 7 ++++-- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst index 00cc64ef6bbf2e..4c645aa83a2497 100644 --- a/Doc/library/heapq.rst +++ b/Doc/library/heapq.rst @@ -124,15 +124,8 @@ For max-heaps, the following functions are provided: Pop and return the largest item from the max-heap *heap* and also push the new *item*. The max-heap size doesn't change. If the max-heap is empty, :exc:`IndexError` is raised. - This one step operation is more efficient than a :func:`heappop_max` followed by - :func:`heappush_max` and can be more appropriate when using a fixed-size heap. - The pop/push combination always returns an element from the heap and replaces - it with *item*. - - The value returned may be larger than the *item* added. If that isn't - desired, consider using :func:`heappushpop_max` instead. Its push/pop - combination returns the larger of the two values, leaving the smaller value - on the heap. + The value returned may be smaller than the *item* added. Refer to the analogous + function heapreplace for detailed usage notes. .. versionadded:: next diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 35b35d7958f345..2a0beedae9ab20 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -437,16 +437,20 @@ def __lt__(self, other): class TestErrorHandling: def test_non_sequence(self): - for f in (self.module.heapify, self.module.heappop): + for f in (self.module.heapify, self.module.heappop, + self.module.heapify_max, self.module.heappop_max): self.assertRaises((TypeError, AttributeError), f, 10) for f in (self.module.heappush, self.module.heapreplace, + self.module.heappush_max, self.module.heapreplace_max, self.module.nlargest, self.module.nsmallest): self.assertRaises((TypeError, AttributeError), f, 10, 10) def test_len_only(self): - for f in (self.module.heapify, self.module.heappop): + for f in (self.module.heapify, self.module.heappop, + self.module.heapify_max, self.module.heappop_max): self.assertRaises((TypeError, AttributeError), f, LenOnly()) - for f in (self.module.heappush, self.module.heapreplace): + for f in (self.module.heappush, self.module.heapreplace, + self.module.heappush_max, self.module.heapreplace_max): self.assertRaises((TypeError, AttributeError), f, LenOnly(), 10) for f in (self.module.nlargest, self.module.nsmallest): self.assertRaises(TypeError, f, 2, LenOnly()) @@ -463,6 +467,8 @@ def test_cmp_err(self): def test_arg_parsing(self): for f in (self.module.heapify, self.module.heappop, self.module.heappush, self.module.heapreplace, + self.module.heapify_max, self.module.heappop_max, + self.module.heappush_max, self.module.heapreplace_max, self.module.nlargest, self.module.nsmallest): self.assertRaises((TypeError, AttributeError), f, 10) @@ -484,6 +490,10 @@ def test_heappush_mutating_heap(self): # Python version raises IndexError, C version RuntimeError with self.assertRaises((IndexError, RuntimeError)): self.module.heappush(heap, SideEffectLT(5, heap)) + heap = [] + heap.extend(SideEffectLT(i, heap) for i in range(200)) + with self.assertRaises((IndexError, RuntimeError)): + self.module.heappush_max(heap, SideEffectLT(5, heap)) def test_heappop_mutating_heap(self): heap = [] @@ -491,6 +501,10 @@ def test_heappop_mutating_heap(self): # Python version raises IndexError, C version RuntimeError with self.assertRaises((IndexError, RuntimeError)): self.module.heappop(heap) + heap = [] + heap.extend(SideEffectLT(i, heap) for i in range(200)) + with self.assertRaises((IndexError, RuntimeError)): + self.module.heappop_max(heap) def test_comparison_operator_modifiying_heap(self): # See bpo-39421: Strong references need to be taken @@ -503,6 +517,9 @@ def __lt__(self, o): heap = [] self.module.heappush(heap, EvilClass(0)) self.assertRaises(IndexError, self.module.heappushpop, heap, 1) + heap = [] + self.module.heappush_max(heap, EvilClass(0)) + self.assertRaises(IndexError, self.module.heappushpop_max, heap, 1) def test_comparison_operator_modifiying_heap_two_heaps(self): diff --git a/Misc/NEWS.d/next/Library/2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst b/Misc/NEWS.d/next/Library/2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst index 7d1ffad053aeda..98e125f4966a64 100644 --- a/Misc/NEWS.d/next/Library/2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst +++ b/Misc/NEWS.d/next/Library/2025-03-01-15-00-00.gh-issue-110067.1ad3as.rst @@ -1,2 +1,5 @@ -Make :mod:`heapq` max-heap functions :func:`heapq.heapify_max`, :func:`heapq.heappush_max`, :func:`heapq.heappop_max`, -and :func:`heapq.heapreplace_max` public. Add missing :func:`heapq.heappushpop_max` to both the C and Python implementation. +Make :mod:`heapq` max-heap functions :func:`heapq.heapify_max`, :func:`heapq.heappush_max`, +:func:`heapq.heappop_max`, and :func:`heapq.heapreplace_max` public. +Previous underscored naming is kept for backwards compatibility. +Additionally, the missing function :func:`heapq.heappushpop_max` has been added +to both the C and Python implementations. From 8fd1a037c83bb5afb08106f59f9dda9e14eb094c Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Mon, 10 Mar 2025 17:21:23 +0000 Subject: [PATCH 14/25] Clean up and add missing --- Doc/whatsnew/3.14.rst | 7 ++----- Lib/test/test_heapq.py | 14 +++++++++++++- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 398f8757cb66e5..a262a6d207d607 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -567,16 +567,13 @@ getopt heapq ----- -* The max-heap functions in :mod:`heapq` are now public: +* Add functions for working with max-heaps: * :func:`heapq.heapify_max`, * :func:`heapq.heappush_max`, * :func:`heapq.heappop_max`, * :func:`heapq.heapreplace_max` - - Previous underscored naming is kept for backwards compatibility. - Additionally, the missing function :func:`heapq.heappushpop_max` has been added to both the C and Python - implementations. + * :func:`heapq.heappushpop_max` http diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 2a0beedae9ab20..8829be47fb0837 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -459,7 +459,8 @@ def test_cmp_err(self): seq = [CmpErr(), CmpErr(), CmpErr()] for f in (self.module.heapify, self.module.heappop): self.assertRaises(ZeroDivisionError, f, seq) - for f in (self.module.heappush, self.module.heapreplace): + for f in (self.module.heappush, self.module.heapreplace, + self.module.heappush_max, self.module.heapreplace_max): self.assertRaises(ZeroDivisionError, f, seq, 10) for f in (self.module.nlargest, self.module.nsmallest): self.assertRaises(ZeroDivisionError, f, 2, seq) @@ -541,6 +542,17 @@ def __lt__(self, o): self.assertRaises((IndexError, RuntimeError), self.module.heappush, list1, g(1)) self.assertRaises((IndexError, RuntimeError), self.module.heappush, list2, h(1)) + list1, list2 = [], [] + + self.module.heappush_max(list1, h(0)) + self.module.heappush_max(list2, g(0)) + self.module.heappush_max(list1, g(1)) + self.module.heappush_max(list2, h(1)) + + TestHeap.check_max_invariant(self, list1) + TestHeap.check_max_invariant(self, list2) + + class TestErrorHandlingPython(TestErrorHandling, TestCase): module = py_heapq From 8ab97c2df8fa1ec3f61be49e1bcb574677086a89 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Mon, 17 Mar 2025 16:05:20 +0000 Subject: [PATCH 15/25] Update Doc/library/heapq.rst Co-authored-by: Petr Viktorin --- Doc/library/heapq.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst index 4c645aa83a2497..da393adbb7fff6 100644 --- a/Doc/library/heapq.rst +++ b/Doc/library/heapq.rst @@ -125,7 +125,7 @@ For max-heaps, the following functions are provided: The max-heap size doesn't change. If the max-heap is empty, :exc:`IndexError` is raised. The value returned may be smaller than the *item* added. Refer to the analogous - function heapreplace for detailed usage notes. + function :func:`heapreplace` for detailed usage notes. .. versionadded:: next From 81db251af26f0f0e44e6c6fa78b0cc205fbf1c46 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Fri, 2 May 2025 15:55:31 +0100 Subject: [PATCH 16/25] Sort and add missing C implementation --- Lib/test/test_heapq.py | 5 +++-- Modules/_heapqmodule.c | 31 ++++++++++++++++++++++++++-- Modules/clinic/_heapqmodule.c.h | 36 ++++++++++++++++++++++++++++++++- 3 files changed, 67 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 8829be47fb0837..ee555ccbdda369 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -14,7 +14,8 @@ # _heapq.nlargest/nsmallest are saved in heapq._nlargest/_smallest when # _heapq is imported, so check them there func_names = ['heapify', 'heappop', 'heappush', 'heappushpop', 'heapreplace', - 'heappop_max', 'heapreplace_max', 'heapify_max', 'heappushpop_max'] + 'heapify_max', 'heappop_max', 'heappush_max', 'heappushpop_max', + 'heapreplace_max'] class TestModules(TestCase): def test_py_functions(self): @@ -24,7 +25,7 @@ def test_py_functions(self): @skipUnless(c_heapq, 'requires _heapq') def test_c_functions(self): for fname in func_names: - self.assertEqual(getattr(c_heapq, fname).__module__, '_heapq') + self.assertEqual(getattr(c_heapq, fname).__module__, '_heapq', fname) def load_tests(loader, tests, ignore): diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c index b58f303bb15012..b924502e85c0a0 100644 --- a/Modules/_heapqmodule.c +++ b/Modules/_heapqmodule.c @@ -480,6 +480,30 @@ siftup_max(PyListObject *heap, Py_ssize_t pos) return siftdown_max(heap, startpos, pos); } +/*[clinic input] +_heapq.heappush_max + + heap: object(subclass_of='&PyList_Type') + item: object + / + +Push item onto max heap, maintaining the heap invariant. +[clinic start generated code]*/ + +static PyObject * +_heapq_heappush_max_impl(PyObject *module, PyObject *heap, PyObject *item) +/*[clinic end generated code: output=c869d5f9deb08277 input=4743d7db137b6e2b]*/ +{ + if (PyList_Append(heap, item)) { + return NULL; + } + + if (siftdown_max((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1)) { + return NULL; + } + + Py_RETURN_NONE; +} /*[clinic input] _heapq.heappop_max @@ -585,10 +609,13 @@ static PyMethodDef heapq_methods[] = { _HEAPQ_HEAPPOP_METHODDEF _HEAPQ_HEAPREPLACE_METHODDEF _HEAPQ_HEAPIFY_METHODDEF + + _HEAPQ_HEAPPUSH_MAX_METHODDEF + _HEAPQ_HEAPPUSHPOP_MAX_METHODDEF _HEAPQ_HEAPPOP_MAX_METHODDEF - _HEAPQ_HEAPIFY_MAX_METHODDEF _HEAPQ_HEAPREPLACE_MAX_METHODDEF - _HEAPQ_HEAPPUSHPOP_MAX_METHODDEF + _HEAPQ_HEAPIFY_MAX_METHODDEF + {NULL, NULL} /* sentinel */ }; diff --git a/Modules/clinic/_heapqmodule.c.h b/Modules/clinic/_heapqmodule.c.h index 56632a94d9e9d6..81d108627265ab 100644 --- a/Modules/clinic/_heapqmodule.c.h +++ b/Modules/clinic/_heapqmodule.c.h @@ -175,6 +175,40 @@ _heapq_heapify(PyObject *module, PyObject *arg) return return_value; } +PyDoc_STRVAR(_heapq_heappush_max__doc__, +"heappush_max($module, heap, item, /)\n" +"--\n" +"\n" +"Push item onto max heap, maintaining the heap invariant."); + +#define _HEAPQ_HEAPPUSH_MAX_METHODDEF \ + {"heappush_max", _PyCFunction_CAST(_heapq_heappush_max), METH_FASTCALL, _heapq_heappush_max__doc__}, + +static PyObject * +_heapq_heappush_max_impl(PyObject *module, PyObject *heap, PyObject *item); + +static PyObject * +_heapq_heappush_max(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *heap; + PyObject *item; + + if (!_PyArg_CheckPositional("heappush_max", nargs, 2, 2)) { + goto exit; + } + if (!PyList_Check(args[0])) { + _PyArg_BadArgument("heappush_max", "argument 1", "list", args[0]); + goto exit; + } + heap = args[0]; + item = args[1]; + return_value = _heapq_heappush_max_impl(module, heap, item); + +exit: + return return_value; +} + PyDoc_STRVAR(_heapq_heappop_max__doc__, "heappop_max($module, heap, /)\n" "--\n" @@ -303,4 +337,4 @@ _heapq_heappushpop_max(PyObject *module, PyObject *const *args, Py_ssize_t nargs exit: return return_value; } -/*[clinic end generated code: output=0404176bef8091d2 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=f55d8595ce150c76 input=a9049054013a1b77]*/ From 38cbf137f9facf9abd1d25a23f581fa6b9d6aa4b Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Fri, 2 May 2025 16:21:59 +0100 Subject: [PATCH 17/25] Petr's list suggestion --- Lib/test/test_heapq.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index ee555ccbdda369..baa0a9fb4877fb 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -13,9 +13,9 @@ # _heapq.nlargest/nsmallest are saved in heapq._nlargest/_smallest when # _heapq is imported, so check them there -func_names = ['heapify', 'heappop', 'heappush', 'heappushpop', 'heapreplace', - 'heapify_max', 'heappop_max', 'heappush_max', 'heappushpop_max', - 'heapreplace_max'] +func_names = ['heapify', 'heappop', 'heappush', 'heappushpop', 'heapreplace'] +# Add max-heap variants +func_names += [func + '_max' for func in func_names] class TestModules(TestCase): def test_py_functions(self): From b6f4db47b1999976ccbd2a3e7a437904313d2382 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Fri, 2 May 2025 17:21:03 +0100 Subject: [PATCH 18/25] heappushpop_max fixup --- Lib/heapq.py | 2 +- Lib/test/test_heapq.py | 11 +++++++---- Modules/_heapqmodule.c | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Lib/heapq.py b/Lib/heapq.py index 54487f1df567fc..da8e2fd9349124 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -202,7 +202,7 @@ def heappush_max(heap, item): def heappushpop_max(heap, item): """Maxheap fast version of a heappush followed by a heappop.""" - if heap and heap[0] < item: + if heap and heap[0] > item: item, heap[0] = heap[0], item _siftup_max(heap, 0) return item diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index baa0a9fb4877fb..2872b207157b3e 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -208,11 +208,11 @@ def test_heappushpop_max(self): h = [10] x = self.module.heappushpop_max(h, 11) - self.assertTupleEqual((h, x), ([11], 10)) + self.assertTupleEqual((h, x), ([10], 11)) h = [10] x = self.module.heappushpop_max(h, 9) - self.assertTupleEqual((h, x), ([10], 9)) + self.assertTupleEqual((h, x), ([9], 10)) def test_heappop_max(self): # heapop_max has an optimization for one-item lists which isn't @@ -515,13 +515,16 @@ class EvilClass(int): def __lt__(self, o): heap.clear() return NotImplemented + def __gt__(self, o): + heap.clear() + return NotImplemented heap = [] self.module.heappush(heap, EvilClass(0)) self.assertRaises(IndexError, self.module.heappushpop, heap, 1) heap = [] - self.module.heappush_max(heap, EvilClass(0)) - self.assertRaises(IndexError, self.module.heappushpop_max, heap, 1) + self.module.heappush_max(heap, EvilClass(1)) + self.assertRaises(IndexError, self.module.heappushpop_max, heap, 0) def test_comparison_operator_modifiying_heap_two_heaps(self): diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c index b924502e85c0a0..9f37fa290c7c76 100644 --- a/Modules/_heapqmodule.c +++ b/Modules/_heapqmodule.c @@ -580,7 +580,7 @@ _heapq_heappushpop_max_impl(PyObject *module, PyObject *heap, PyObject *item) PyObject *top = PyList_GET_ITEM(heap, 0); Py_INCREF(top); - cmp = PyObject_RichCompareBool(top, item, Py_LT); + cmp = PyObject_RichCompareBool(top, item, Py_GT); Py_DECREF(top); if (cmp < 0) { return NULL; From 61c92852e2147376ea761f74cafd3f8caa9db511 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 4 May 2025 18:26:11 +0100 Subject: [PATCH 19/25] Improve test --- Lib/test/test_heapq.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 2872b207157b3e..342e989717dc74 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -508,7 +508,7 @@ def test_heappop_mutating_heap(self): with self.assertRaises((IndexError, RuntimeError)): self.module.heappop_max(heap) - def test_comparison_operator_modifiying_heap(self): + def test_comparison_operator_modifying_heap(self): # See bpo-39421: Strong references need to be taken # when comparing objects as they can alter the heap class EvilClass(int): @@ -526,7 +526,7 @@ def __gt__(self, o): self.module.heappush_max(heap, EvilClass(1)) self.assertRaises(IndexError, self.module.heappushpop_max, heap, 0) - def test_comparison_operator_modifiying_heap_two_heaps(self): + def test_comparison_operator_modifying_heap_two_heaps(self): class h(int): def __lt__(self, o): @@ -553,8 +553,8 @@ def __lt__(self, o): self.module.heappush_max(list1, g(1)) self.module.heappush_max(list2, h(1)) - TestHeap.check_max_invariant(self, list1) - TestHeap.check_max_invariant(self, list2) + self.assertRaises((IndexError, RuntimeError), self.module.heappush_max, list1, g(1)) + self.assertRaises((IndexError, RuntimeError), self.module.heappush_max, list2, h(1)) class TestErrorHandlingPython(TestErrorHandling, TestCase): From ebe00dcc71f9d4464a73dd3ec37136d2e78de46a Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Mon, 5 May 2025 08:47:06 +0100 Subject: [PATCH 20/25] Switch to < --- Lib/heapq.py | 2 +- Modules/_heapqmodule.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/heapq.py b/Lib/heapq.py index da8e2fd9349124..6ceb211f1ca2ae 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -202,7 +202,7 @@ def heappush_max(heap, item): def heappushpop_max(heap, item): """Maxheap fast version of a heappush followed by a heappop.""" - if heap and heap[0] > item: + if heap and item < heap[0]: item, heap[0] = heap[0], item _siftup_max(heap, 0) return item diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c index 9f37fa290c7c76..095866eec7d75a 100644 --- a/Modules/_heapqmodule.c +++ b/Modules/_heapqmodule.c @@ -580,7 +580,7 @@ _heapq_heappushpop_max_impl(PyObject *module, PyObject *heap, PyObject *item) PyObject *top = PyList_GET_ITEM(heap, 0); Py_INCREF(top); - cmp = PyObject_RichCompareBool(top, item, Py_GT); + cmp = PyObject_RichCompareBool(item, top, Py_LT); Py_DECREF(top); if (cmp < 0) { return NULL; From bc0dd6631eac7eafcc92ae69994c0b7bb182fdbb Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Mon, 5 May 2025 08:55:33 +0100 Subject: [PATCH 21/25] Clean up test --- Lib/test/test_heapq.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index 342e989717dc74..d372890d8045d9 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -515,16 +515,10 @@ class EvilClass(int): def __lt__(self, o): heap.clear() return NotImplemented - def __gt__(self, o): - heap.clear() - return NotImplemented heap = [] self.module.heappush(heap, EvilClass(0)) self.assertRaises(IndexError, self.module.heappushpop, heap, 1) - heap = [] - self.module.heappush_max(heap, EvilClass(1)) - self.assertRaises(IndexError, self.module.heappushpop_max, heap, 0) def test_comparison_operator_modifying_heap_two_heaps(self): From 988b2d38210322c983f339719c66b5523f1bd113 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 5 May 2025 11:13:24 +0200 Subject: [PATCH 22/25] Reword the docs --- Doc/library/heapq.rst | 78 ++++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst index da393adbb7fff6..a5eaf0d672fa59 100644 --- a/Doc/library/heapq.rst +++ b/Doc/library/heapq.rst @@ -16,42 +16,56 @@ This module provides an implementation of the heap queue algorithm, also known as the priority queue algorithm. -Min-heaps (resp. max-heaps) are binary trees for which every parent node -has a value less than (resp. greater than) or equal to any of its children. -We refer to this condition as the heap invariant. Unless stated otherwise, -*heaps* refer to min-heaps. - -This implementation uses arrays for which -``heap[k] <= heap[2*k+1]`` and ``heap[k] <= heap[2*k+2]`` for all *k*, counting -elements from zero. For the sake of comparison, non-existing elements are -considered to be infinite. The interesting property of a heap is that its -smallest element is always the root, ``heap[0]``. - -The API below differs from textbook heap algorithms in two aspects: (a) We use -zero-based indexing. This makes the relationship between the index for a node -and the indexes for its children slightly less obvious, but is more suitable -since Python uses zero-based indexing. (b) Our pop method returns the smallest -item, not the largest (called a "min heap" in textbooks; a "max heap" is more -common in texts because of its suitability for in-place sorting). +Min-heaps are binary trees for which every parent node has a value less than +or equal to any of its children. +We refer to this condition as the heap invariant. + +For min-heaps, this implementation uses lists for which +``heap[k] <= heap[2*k+1]`` and ``heap[k] <= heap[2*k+2]`` for all *k* for which +the compared elements exist. Elements are counted from zero. The interesting +property of a min-heap is that its smallest element is always the root, +``heap[0]``. These two make it possible to view the heap as a regular Python list without surprises: ``heap[0]`` is the smallest item, and ``heap.sort()`` maintains the heap invariant! +Max-heaps satisfy the reverse invariant: every parent node node has a value +*greater* than any of its children. These are implemented as lists for which +``maxheap[2*k+1] <= maxheap[k]`` and ``maxheap[2*k+2] <= maxheap[k]`` for all +*k* for which the compared elements exist. +The root, ``maxheap[0]``, contains the *largest* element; +``heap.sort(reverse=True)`` maintains the max-heap invariant. + +The :mod:`!heapq` API differs from textbook heap algorithms in two aspects: (a) +We use zero-based indexing. This makes the relationship between the index for +a node and the indexes for its children slightly less obvious, but is more +suitable since Python uses zero-based indexing. (b) Textbooks often focus on +max-heaps, due to their suitability for in-place sorting. Our implementation +favors min-heaps as they better correspond to Python lists: :meth:`list.sort` +maintains the *min*-heap invariant. + +Like :meth:`list.sort`, this implementation uses only the ``<`` operator +for comparisons, for both min-heaps and max-heaps. + +In the API below, and in this documentation, the unqalified term *heap* +generally refers to a min-heap. +API for max-heaps is named using a ``_max`` suffix. + To create a heap, use a list initialized to ``[]``, or you can transform a populated list into a heap via function :func:`heapify`. -The following functions are provided: +The following functions are provided for min-heaps: .. function:: heappush(heap, item) - Push the value *item* onto the *heap*, maintaining the heap invariant. + Push the value *item* onto the *heap*, maintaining the min-heap invariant. .. function:: heappop(heap) - Pop and return the smallest item from the *heap*, maintaining the heap + Pop and return the smallest item from the *heap*, maintaining the min-heap invariant. If the heap is empty, :exc:`IndexError` is raised. To access the smallest item without popping it, use ``heap[0]``. @@ -65,7 +79,7 @@ The following functions are provided: .. function:: heapify(x) - Transform list *x* into a heap, in-place, in linear time. + Transform list *x* into a min-heap, in-place, in linear time. .. function:: heapreplace(heap, item) @@ -96,23 +110,25 @@ For max-heaps, the following functions are provided: .. function:: heappush_max(heap, item) - Push the value *item* onto the max-heap *heap*, maintaining the heap invariant. + Push the value *item* onto the max-heap *heap*, maintaining the max-heap + invariant. .. versionadded:: next .. function:: heappop_max(heap) - Pop and return the largest item from the max-heap *heap*, maintaining the heap - invariant. If the max-heap is empty, :exc:`IndexError` is raised. To access the - largest item without popping it, use ``heap[0]``. + Pop and return the largest item from the max-heap *heap*, maintaining the + max-heap invariant. If the max-heap is empty, :exc:`IndexError` is raised. + To access the largest item without popping it, use ``maxheap[0]``. .. versionadded:: next .. function:: heappushpop_max(heap, item) - Push *item* on the max-heap *heap*, then pop and return the largest item from *heap*. + Push *item* on the max-heap *heap*, then pop and return the largest item + from *heap*. The combined action runs more efficiently than :func:`heappush_max` followed by a separate call to :func:`heappop_max`. @@ -121,11 +137,13 @@ For max-heaps, the following functions are provided: .. function:: heapreplace_max(heap, item) - Pop and return the largest item from the max-heap *heap* and also push the new *item*. - The max-heap size doesn't change. If the max-heap is empty, :exc:`IndexError` is raised. + Pop and return the largest item from the max-heap *heap* and also push the + new *item*. + The max-heap size doesn't change. If the max-heap is empty, + :exc:`IndexError` is raised. - The value returned may be smaller than the *item* added. Refer to the analogous - function :func:`heapreplace` for detailed usage notes. + The value returned may be smaller than the *item* added. Refer to the + analogous function :func:`heapreplace` for detailed usage notes. .. versionadded:: next From 6efd70c6549e3d10922ad6047f8a9cf10a63ba61 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 5 May 2025 13:19:23 +0200 Subject: [PATCH 23/25] Add max-heap variants for the other tests --- Lib/test/test_heapq.py | 86 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 82 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index d372890d8045d9..bb38459ca2e61b 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -151,10 +151,7 @@ def heapiter(self, heap): def test_nbest(self): # Less-naive "N-best" algorithm, much faster (if len(data) is big - # enough ) than sorting all of data. However, if we had a max - # heap instead of a min heap, it could go faster still via - # heapify'ing all of data (linear time), then doing 10 heappops - # (10 log-time steps). + # enough ) than sorting all of data. data = [random.randrange(2000) for i in range(1000)] heap = data[:10] self.module.heapify(heap) @@ -167,6 +164,17 @@ def test_nbest(self): self.assertRaises(TypeError, self.module.heapreplace, None, None) self.assertRaises(IndexError, self.module.heapreplace, [], None) + def test_nbest_maxheap(self): + # With a max heap instead of a min heap, the "N-best" algorithm can + # go even faster still via heapify'ing all of data (linear time), then + # doing 10 heappops (10 log-time steps). + data = [random.randrange(2000) for i in range(1000)] + heap = data[:] + self.module.heapify_max(heap) + result = [self.module.heappop_max(heap) for _ in range(10)] + result.reverse() + self.assertEqual(result, sorted(data)[-10:]) + def test_nbest_with_pushpop(self): data = [random.randrange(2000) for i in range(1000)] heap = data[:10] @@ -176,6 +184,62 @@ def test_nbest_with_pushpop(self): self.assertEqual(list(self.heapiter(heap)), sorted(data)[-10:]) self.assertEqual(self.module.heappushpop([], 'x'), 'x') + def test_naive_nworst(self): + # Max-heap variant of "test_naive_nbest" + data = [random.randrange(2000) for i in range(1000)] + heap = [] + for item in data: + self.module.heappush_max(heap, item) + if len(heap) > 10: + self.module.heappop_max(heap) + heap.sort() + expected = sorted(data)[:10] + self.assertEqual(heap, expected) + + def heapiter_max(self, heap): + # An iterator returning a max-heap's elements, largest-first. + try: + while 1: + yield self.module.heappop_max(heap) + except IndexError: + pass + + def test_nworst(self): + # Max-heap variant of "test_nbest" + data = [random.randrange(2000) for i in range(1000)] + heap = data[:10] + self.module.heapify_max(heap) + for item in data[10:]: + if item < heap[0]: # this gets rarer the longer we run + self.module.heapreplace_max(heap, item) + expected = sorted(data, reverse=True)[-10:] + self.assertEqual(list(self.heapiter_max(heap)), expected) + + self.assertRaises(TypeError, self.module.heapreplace_max, None) + self.assertRaises(TypeError, self.module.heapreplace_max, None, None) + self.assertRaises(IndexError, self.module.heapreplace_max, [], None) + + def test_nworst_minheap(self): + # Min-heap variant of "test_nbest_maxheap" + data = [random.randrange(2000) for i in range(1000)] + heap = data[:] + self.module.heapify(heap) + result = [self.module.heappop(heap) for _ in range(10)] + result.reverse() + expected = sorted(data, reverse=True)[-10:] + self.assertEqual(result, expected) + + def test_nworst_with_pushpop(self): + # Max-heap variant of "test_nbest_with_pushpop" + data = [random.randrange(2000) for i in range(1000)] + heap = data[:10] + self.module.heapify_max(heap) + for item in data[10:]: + self.module.heappushpop_max(heap, item) + expected = sorted(data, reverse=True)[-10:] + self.assertEqual(list(self.heapiter_max(heap)), expected) + self.assertEqual(self.module.heappushpop_max([], 'x'), 'x') + def test_heappushpop(self): h = [] x = self.module.heappushpop(h, 10) @@ -236,6 +300,20 @@ def test_heapsort(self): heap_sorted = [self.module.heappop(heap) for i in range(size)] self.assertEqual(heap_sorted, sorted(data)) + def test_heapsort_max(self): + for trial in range(100): + size = random.randrange(50) + data = [random.randrange(25) for i in range(size)] + if trial & 1: # Half of the time, use heapify + heap = data[:] + self.module.heapify_max(heap) + else: # The rest of the time, use heappush + heap = [] + for item in data: + self.module.heappush_max(heap, item) + heap_sorted = [self.module.heappop_max(heap) for i in range(size)] + self.assertEqual(heap_sorted, sorted(data, reverse=True)) + def test_merge(self): inputs = [] for i in range(random.randrange(25)): From 4ba533bb0a9d77cf3c97caa6b4e86b64a091cba0 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 5 May 2025 14:27:09 +0200 Subject: [PATCH 24/25] Apply suggestions from code review Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> --- Doc/library/heapq.rst | 2 +- Lib/test/test_heapq.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst index a5eaf0d672fa59..3b7829c18abba5 100644 --- a/Doc/library/heapq.rst +++ b/Doc/library/heapq.rst @@ -50,7 +50,7 @@ for comparisons, for both min-heaps and max-heaps. In the API below, and in this documentation, the unqalified term *heap* generally refers to a min-heap. -API for max-heaps is named using a ``_max`` suffix. +The API for max-heaps is named using a ``_max`` suffix. To create a heap, use a list initialized to ``[]``, or you can transform a populated list into a heap via function :func:`heapify`. diff --git a/Lib/test/test_heapq.py b/Lib/test/test_heapq.py index bb38459ca2e61b..d6623fee9bb2b4 100644 --- a/Lib/test/test_heapq.py +++ b/Lib/test/test_heapq.py @@ -304,10 +304,10 @@ def test_heapsort_max(self): for trial in range(100): size = random.randrange(50) data = [random.randrange(25) for i in range(size)] - if trial & 1: # Half of the time, use heapify + if trial & 1: # Half of the time, use heapify_max heap = data[:] self.module.heapify_max(heap) - else: # The rest of the time, use heappush + else: # The rest of the time, use heappush_max heap = [] for item in data: self.module.heappush_max(heap, item) From 742c46cbb0fac25757ef7080ad83b3bdd8d4d413 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Mon, 5 May 2025 15:41:41 +0100 Subject: [PATCH 25/25] final touchups --- Doc/library/heapq.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst index 3b7829c18abba5..2bd0162a982778 100644 --- a/Doc/library/heapq.rst +++ b/Doc/library/heapq.rst @@ -26,11 +26,7 @@ the compared elements exist. Elements are counted from zero. The interesting property of a min-heap is that its smallest element is always the root, ``heap[0]``. -These two make it possible to view the heap as a regular Python list without -surprises: ``heap[0]`` is the smallest item, and ``heap.sort()`` maintains the -heap invariant! - -Max-heaps satisfy the reverse invariant: every parent node node has a value +Max-heaps satisfy the reverse invariant: every parent node has a value *greater* than any of its children. These are implemented as lists for which ``maxheap[2*k+1] <= maxheap[k]`` and ``maxheap[2*k+2] <= maxheap[k]`` for all *k* for which the compared elements exist. @@ -42,18 +38,22 @@ We use zero-based indexing. This makes the relationship between the index for a node and the indexes for its children slightly less obvious, but is more suitable since Python uses zero-based indexing. (b) Textbooks often focus on max-heaps, due to their suitability for in-place sorting. Our implementation -favors min-heaps as they better correspond to Python lists: :meth:`list.sort` -maintains the *min*-heap invariant. +favors min-heaps as they better correspond to Python :class:`lists `. + +These two aspects make it possible to view the heap as a regular Python list +without surprises: ``heap[0]`` is the smallest item, and ``heap.sort()`` +maintains the heap invariant! Like :meth:`list.sort`, this implementation uses only the ``<`` operator for comparisons, for both min-heaps and max-heaps. -In the API below, and in this documentation, the unqalified term *heap* +In the API below, and in this documentation, the unqualified term *heap* generally refers to a min-heap. The API for max-heaps is named using a ``_max`` suffix. -To create a heap, use a list initialized to ``[]``, or you can transform a -populated list into a heap via function :func:`heapify`. +To create a heap, use a list initialized as ``[]``, or transform an existing list +into a min-heap or max-heap using the :func:`heapify` or :func:`heapify_max` +functions, respectively. The following functions are provided for min-heaps: