From d045bb600081690eb40bdf4288071af418f0829d Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 10 Aug 2021 14:36:29 -0700 Subject: [PATCH 1/3] Integrate GC untrack into trashcan begin. For subtle reasons, PyObject_GC_UnTrack() function must be called before Py_TRASHCAN_BEGIN(). There have been a number of bugs over the years related to not doing this particular dance just right. Integrating the PyObject_GC_UnTrack() call makes it harder to do things incorrectly. That avoids some hard to find bugs (e.g. only triggered when object nesting gets deep enough). Extensions that still call PyObject_GC_UnTrack() explictly will still work correctly but the call is unneeded after this change. It would still be needed for the extension to work correctly with older versions of Python. --- Include/cpython/object.h | 1 - .../2021-08-10-14-46-10.bpo-44881.M1bcMW.rst | 9 +++++ Modules/_elementtree.c | 2 - Objects/descrobject.c | 1 - Objects/dictobject.c | 2 - Objects/frameobject.c | 4 -- Objects/listobject.c | 1 - Objects/object.c | 8 ++++ Objects/odictobject.c | 1 - Objects/setobject.c | 2 - Objects/tupleobject.c | 1 - Objects/typeobject.c | 40 ------------------- Python/hamt.c | 2 - Python/traceback.c | 1 - 14 files changed, 17 insertions(+), 58 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-08-10-14-46-10.bpo-44881.M1bcMW.rst diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 75cd0f9002215b..15a7c0a922eca4 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -471,7 +471,6 @@ mytype_dealloc(mytype *p) { ... declarations go here ... - PyObject_GC_UnTrack(p); // must untrack first Py_TRASHCAN_BEGIN(p, mytype_dealloc) ... The body of the deallocator goes here, including all calls ... ... to Py_DECREF on contained objects. ... diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-08-10-14-46-10.bpo-44881.M1bcMW.rst b/Misc/NEWS.d/next/Core and Builtins/2021-08-10-14-46-10.bpo-44881.M1bcMW.rst new file mode 100644 index 00000000000000..1d7f63baded27e --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-08-10-14-46-10.bpo-44881.M1bcMW.rst @@ -0,0 +1,9 @@ +Change `Py_TRASHCAN_BEGIN()` to automatically call `PyObject_GC_UnTrack()`. +Previously, the untrack function must be explicitly called before +`Py_TRASHCAN_BEGIN()`. There have been a number of bugs over the years +related to not doing this particular dance just right. This change makes it +harder to do things incorrectly. That avoids some hard to find bugs (e.g. +only triggered when object nesting gets deep enough). Extensions that still +call `PyObject_GC_UnTrack()` explictly will still work correctly but the +call is redundant after this change. It would still be needed for the +extension to work correctly with both older and newer versions of Python. diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index b4528a90b3e095..6533edb2c1e620 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -664,8 +664,6 @@ element_gc_clear(ElementObject *self) static void element_dealloc(ElementObject* self) { - /* bpo-31095: UnTrack is needed before calling any callbacks */ - PyObject_GC_UnTrack(self); Py_TRASHCAN_BEGIN(self, element_dealloc) if (self->weakreflist != NULL) diff --git a/Objects/descrobject.c b/Objects/descrobject.c index 0565992bdb79f7..3b83be834065f4 100644 --- a/Objects/descrobject.c +++ b/Objects/descrobject.c @@ -1265,7 +1265,6 @@ typedef struct { static void wrapper_dealloc(wrapperobject *wp) { - PyObject_GC_UnTrack(wp); Py_TRASHCAN_BEGIN(wp, wrapper_dealloc) Py_XDECREF(wp->descr); Py_XDECREF(wp->self); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index c78cbafe583bdc..9b06563235f9d8 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1928,8 +1928,6 @@ dict_dealloc(PyDictObject *mp) PyDictKeysObject *keys = mp->ma_keys; Py_ssize_t i, n; - /* bpo-31095: UnTrack is needed before calling any callbacks */ - PyObject_GC_UnTrack(mp); Py_TRASHCAN_BEGIN(mp, dict_dealloc) if (values != NULL) { if (values != empty_values) { diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 2af69597c396ee..ad4ceed1c48e85 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -615,10 +615,6 @@ static PyGetSetDef frame_getsetlist[] = { static void _Py_HOT_FUNCTION frame_dealloc(PyFrameObject *f) { - if (_PyObject_GC_IS_TRACKED(f)) { - _PyObject_GC_UNTRACK(f); - } - Py_TRASHCAN_BEGIN(f, frame_dealloc); PyCodeObject *co = NULL; diff --git a/Objects/listobject.c b/Objects/listobject.c index 898cbc20c5f81c..7fe86a1463ccb1 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -331,7 +331,6 @@ static void list_dealloc(PyListObject *op) { Py_ssize_t i; - PyObject_GC_UnTrack(op); Py_TRASHCAN_BEGIN(op, list_dealloc) if (op->ob_item != NULL) { /* Do it backwards, for Christian Tismer. diff --git a/Objects/object.c b/Objects/object.c index 446c974f8e614b..467380383cab68 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2101,6 +2101,11 @@ _PyTrash_thread_deposit_object(PyObject *op) { PyThreadState *tstate = _PyThreadState_GET(); _PyObject_ASSERT(op, _PyObject_IS_GC(op)); + /* untrack is done by _PyTrash_begin() so the following assert must be + * true. Previously _PyTrash_begin() did not untrack itself and it was the + * responsibility of the users of the trashcan mechanism to ensure that + * untrack was called first. + */ _PyObject_ASSERT(op, !_PyObject_GC_IS_TRACKED(op)); _PyObject_ASSERT(op, Py_REFCNT(op) == 0); _PyGCHead_SET_PREV(_Py_AS_GC(op), tstate->trash_delete_later); @@ -2150,6 +2155,9 @@ _PyTrash_thread_destroy_chain(void) int _PyTrash_begin(PyThreadState *tstate, PyObject *op) { + if (_PyObject_GC_IS_TRACKED(op)) { + _PyObject_GC_UNTRACK(op); + } if (tstate->trash_delete_nesting >= _PyTrash_UNWIND_LEVEL) { /* Store the object (to be deallocated later) and jump past * Py_TRASHCAN_END, skipping the body of the deallocator */ diff --git a/Objects/odictobject.c b/Objects/odictobject.c index e5361da6dcdedb..38c196ab45ec58 100644 --- a/Objects/odictobject.c +++ b/Objects/odictobject.c @@ -1365,7 +1365,6 @@ static PyGetSetDef odict_getset[] = { static void odict_dealloc(PyODictObject *self) { - PyObject_GC_UnTrack(self); Py_TRASHCAN_BEGIN(self, odict_dealloc) Py_XDECREF(self->od_inst_dict); diff --git a/Objects/setobject.c b/Objects/setobject.c index caff85c9e38939..ab911cce547d1c 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -485,8 +485,6 @@ set_dealloc(PySetObject *so) setentry *entry; Py_ssize_t used = so->used; - /* bpo-31095: UnTrack is needed before calling any callbacks */ - PyObject_GC_UnTrack(so); Py_TRASHCAN_BEGIN(so, set_dealloc) if (so->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) so); diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index b7fd421196ddaa..a1a8777c43c4ab 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -263,7 +263,6 @@ static void tupledealloc(PyTupleObject *op) { Py_ssize_t len = Py_SIZE(op); - PyObject_GC_UnTrack(op); Py_TRASHCAN_BEGIN(op, tupledealloc) if (len > 0) { Py_ssize_t i = len; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 7ae50c453ed2f8..9690a933adb4f0 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1381,9 +1381,6 @@ subtype_dealloc(PyObject *self) /* We get here only if the type has GC */ - /* UnTrack and re-Track around the trashcan macro, alas */ - /* See explanation at end of function for full disclosure */ - PyObject_GC_UnTrack(self); Py_TRASHCAN_BEGIN(self, subtype_dealloc); /* Find the nearest base with a different tp_dealloc */ @@ -1487,43 +1484,6 @@ subtype_dealloc(PyObject *self) endlabel: Py_TRASHCAN_END - - /* Explanation of the weirdness around the trashcan macros: - - Q. What do the trashcan macros do? - - A. Read the comment titled "Trashcan mechanism" in object.h. - For one, this explains why there must be a call to GC-untrack - before the trashcan begin macro. Without understanding the - trashcan code, the answers to the following questions don't make - sense. - - Q. Why do we GC-untrack before the trashcan and then immediately - GC-track again afterward? - - A. In the case that the base class is GC-aware, the base class - probably GC-untracks the object. If it does that using the - UNTRACK macro, this will crash when the object is already - untracked. Because we don't know what the base class does, the - only safe thing is to make sure the object is tracked when we - call the base class dealloc. But... The trashcan begin macro - requires that the object is *untracked* before it is called. So - the dance becomes: - - GC untrack - trashcan begin - GC track - - Q. Why did the last question say "immediately GC-track again"? - It's nowhere near immediately. - - A. Because the code *used* to re-track immediately. Bad Idea. - self has a refcount of 0, and if gc ever gets its hands on it - (which can happen if any weakref callback gets invoked), it - looks like trash to gc too, and gc also tries to delete self - then. But we're already deleting self. Double deallocation is - a subtle disaster. - */ } static PyTypeObject *solid_base(PyTypeObject *type); diff --git a/Python/hamt.c b/Python/hamt.c index e272e8808fd956..efc52347f12ca9 100644 --- a/Python/hamt.c +++ b/Python/hamt.c @@ -1155,7 +1155,6 @@ hamt_node_bitmap_dealloc(PyHamtNode_Bitmap *self) Py_ssize_t len = Py_SIZE(self); Py_ssize_t i; - PyObject_GC_UnTrack(self); Py_TRASHCAN_BEGIN(self, hamt_node_bitmap_dealloc) if (len > 0) { @@ -1563,7 +1562,6 @@ hamt_node_collision_dealloc(PyHamtNode_Collision *self) Py_ssize_t len = Py_SIZE(self); - PyObject_GC_UnTrack(self); Py_TRASHCAN_BEGIN(self, hamt_node_collision_dealloc) if (len > 0) { diff --git a/Python/traceback.c b/Python/traceback.c index 204121ba66d96e..128adfe115fcc9 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -168,7 +168,6 @@ static PyGetSetDef tb_getsetters[] = { static void tb_dealloc(PyTracebackObject *tb) { - PyObject_GC_UnTrack(tb); Py_TRASHCAN_BEGIN(tb, tb_dealloc) Py_XDECREF(tb->tb_next); Py_XDECREF(tb->tb_frame); From 42a10b4c503a7df4be0ee093428e1e6bfc0094a8 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 10 Aug 2021 17:02:47 -0700 Subject: [PATCH 2/3] small cleanup, move declarations after It seems slightly cleaner to have the BEGIN/END macros at the start and end of the dealloc function body. --- Include/cpython/object.h | 2 -- Objects/dictobject.c | 2 +- Objects/listobject.c | 2 +- Objects/setobject.c | 2 +- Objects/tupleobject.c | 2 +- Python/hamt.c | 3 +-- 6 files changed, 5 insertions(+), 8 deletions(-) diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 15a7c0a922eca4..6447c61c70d228 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -469,8 +469,6 @@ function with a pair of macros: static void mytype_dealloc(mytype *p) { - ... declarations go here ... - Py_TRASHCAN_BEGIN(p, mytype_dealloc) ... The body of the deallocator goes here, including all calls ... ... to Py_DECREF on contained objects. ... diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 9b06563235f9d8..f5836811778eea 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1924,11 +1924,11 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value) static void dict_dealloc(PyDictObject *mp) { + Py_TRASHCAN_BEGIN(mp, dict_dealloc) PyObject **values = mp->ma_values; PyDictKeysObject *keys = mp->ma_keys; Py_ssize_t i, n; - Py_TRASHCAN_BEGIN(mp, dict_dealloc) if (values != NULL) { if (values != empty_values) { for (i = 0, n = mp->ma_keys->dk_nentries; i < n; i++) { diff --git a/Objects/listobject.c b/Objects/listobject.c index 7fe86a1463ccb1..2c88e2e00a8ba1 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -330,8 +330,8 @@ PyList_Append(PyObject *op, PyObject *newitem) static void list_dealloc(PyListObject *op) { - Py_ssize_t i; Py_TRASHCAN_BEGIN(op, list_dealloc) + Py_ssize_t i; if (op->ob_item != NULL) { /* Do it backwards, for Christian Tismer. There's a simple test case where somehow this reduces diff --git a/Objects/setobject.c b/Objects/setobject.c index ab911cce547d1c..16575a19d253b0 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -482,10 +482,10 @@ set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr) static void set_dealloc(PySetObject *so) { + Py_TRASHCAN_BEGIN(so, set_dealloc) setentry *entry; Py_ssize_t used = so->used; - Py_TRASHCAN_BEGIN(so, set_dealloc) if (so->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) so); diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index a1a8777c43c4ab..c06c95f454724d 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -262,8 +262,8 @@ PyTuple_Pack(Py_ssize_t n, ...) static void tupledealloc(PyTupleObject *op) { - Py_ssize_t len = Py_SIZE(op); Py_TRASHCAN_BEGIN(op, tupledealloc) + Py_ssize_t len = Py_SIZE(op); if (len > 0) { Py_ssize_t i = len; while (--i >= 0) { diff --git a/Python/hamt.c b/Python/hamt.c index efc52347f12ca9..b602afada61e52 100644 --- a/Python/hamt.c +++ b/Python/hamt.c @@ -1558,12 +1558,11 @@ hamt_node_collision_traverse(PyHamtNode_Collision *self, static void hamt_node_collision_dealloc(PyHamtNode_Collision *self) { + Py_TRASHCAN_BEGIN(self, hamt_node_collision_dealloc) /* Collision's tp_dealloc */ Py_ssize_t len = Py_SIZE(self); - Py_TRASHCAN_BEGIN(self, hamt_node_collision_dealloc) - if (len > 0) { while (--len >= 0) { From 7539849fb808c8c228d5f0044954b2ddd44ea8e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Mon, 16 Aug 2021 12:55:04 +0200 Subject: [PATCH 3/3] Add check for GC protocol per @vstinner request --- Objects/object.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Objects/object.c b/Objects/object.c index 467380383cab68..c0fb5f9c06798c 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2155,6 +2155,7 @@ _PyTrash_thread_destroy_chain(void) int _PyTrash_begin(PyThreadState *tstate, PyObject *op) { + _PyObject_ASSERT(op, _PyObject_IS_GC(op)); if (_PyObject_GC_IS_TRACKED(op)) { _PyObject_GC_UNTRACK(op); }