From d7ca0ca5770877fc15048dfd530c417de0c9ebb0 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 10 Oct 2024 18:06:08 -0700 Subject: [PATCH 1/3] gh-125286: Add test for single-phase init and shared objects. --- Lib/test/support/singlephase_helper.py | 32 ++++++++++++++++++++ Programs/_testembed.c | 41 ++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 Lib/test/support/singlephase_helper.py diff --git a/Lib/test/support/singlephase_helper.py b/Lib/test/support/singlephase_helper.py new file mode 100644 index 00000000000000..7f827237cd8c8b --- /dev/null +++ b/Lib/test/support/singlephase_helper.py @@ -0,0 +1,32 @@ +"""Helper module for _testembed.c test_subinterpreter_finalize test. +""" + +import sys + +PREFIX = "shared_string_" +SUFFIX = "DByGMRJRSEDp29PkiZQNHA" + +def init_sub1(): + import _testsinglephase + # Create global object to be shared when imported a second time. + _testsinglephase._shared_list = [] + # Create a new interned string, to be shared with the main interpreter. + _testsinglephase._shared_string = sys.intern(PREFIX + SUFFIX) + + +def init_sub2(): + # This sub-interpreter will share a reference to _shared_list with the + # first interpreter, since importing _testsinglephase will not initialize + # the module a second time but will just copy the global dict. This + # situtation used to trigger a bug like gh-125286 if TraceRefs was enabled + # for the build. + import _testsinglephase + + +def init_main(): + global shared_str + # The first sub-interpreter has already interned this string value. The + # return value from intern() will be the same string object created in + # sub-interpreter 1. Assign it to a global so it lives until the main + # interpreter is shutdown. + shared_string = sys.intern(PREFIX + SUFFIX) diff --git a/Programs/_testembed.c b/Programs/_testembed.c index ab619e32429d63..e4c385720de6f2 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -2072,6 +2072,46 @@ static void configure_init_main(PyConfig *config) } +static int test_subinterpreter_finalize(void) +{ + // This test is done by creating two subinterpreters and then sharing + // objects between them by importing a basic single-phase init extension + // (m_size == -1). Then we finalize the interpreters in the reverse order + // so that the interpreter that created the shared objects gets finalized + // first. + _testembed_Py_InitializeFromConfig(); + + PyThreadState *tstate1 = Py_NewInterpreter(); + PyThreadState_Swap(tstate1); + PyRun_SimpleString( + "import test.support.singlephase_helper\n" + "test.support.singlephase_helper.init_sub1\n" + ); + + PyThreadState *tstate2 = Py_NewInterpreter(); + PyThreadState_Swap(tstate2); + PyRun_SimpleString( + "import test.support.singlephase_helper\n" + "test.support.singlephase_helper.init_sub2\n" + ); + + PyThreadState *main_tstate = _PyRuntime.main_tstate; + PyThreadState_Swap(main_tstate); + PyRun_SimpleString( + "import test.support.singlephase_helper\n" + "test.support.singlephase_helper.init_main\n" + ); + + PyThreadState_Swap(tstate1); + Py_EndInterpreter(tstate1); + PyThreadState_Swap(tstate2); + Py_EndInterpreter(tstate2); + Py_Finalize(); + + return 0; +} + + static int test_init_run_main(void) { PyConfig config; @@ -2480,6 +2520,7 @@ static struct TestCase TestCases[] = { {"test_initconfig_get_api", test_initconfig_get_api}, {"test_initconfig_exit", test_initconfig_exit}, {"test_initconfig_module", test_initconfig_module}, + {"test_subinterpreter_finalize", test_subinterpreter_finalize}, {"test_run_main", test_run_main}, {"test_run_main_loop", test_run_main_loop}, {"test_get_argc_argv", test_get_argc_argv}, From e2e9a69304d3b56a8aaafc83157b8a4f171f248b Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Fri, 11 Oct 2024 02:01:57 -0700 Subject: [PATCH 2/3] gh-125286: Bug fix for trace-refs logic and shared objects. --- ...-10-11-02-26-39.gh-issue-125286.2ii5_E.rst | 3 ++ Objects/object.c | 52 ++++++++++--------- Objects/unicodeobject.c | 10 ++-- 3 files changed, 35 insertions(+), 30 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-11-02-26-39.gh-issue-125286.2ii5_E.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-11-02-26-39.gh-issue-125286.2ii5_E.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-11-02-26-39.gh-issue-125286.2ii5_E.rst new file mode 100644 index 00000000000000..5bba4483586caf --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-11-02-26-39.gh-issue-125286.2ii5_E.rst @@ -0,0 +1,3 @@ +Fix the TraceRefs build to handle objects that are shared between +interpreters (interned strings and globals of single-phase init extension +modules). diff --git a/Objects/object.c b/Objects/object.c index 4a4c5bf7d7f08a..036a80ee9cd10f 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2500,38 +2500,21 @@ _Py_ResurrectReference(PyObject *op) #ifdef Py_TRACE_REFS -/* Make sure the ref is associated with the right interpreter. - * This only needs special attention for heap-allocated objects - * that have been immortalized, and only when the object might - * outlive the interpreter where it was created. That means the - * object was necessarily created using a global allocator - * (i.e. from the main interpreter). Thus in that specific case - * we move the object over to the main interpreter's refchain. - * - * This was added for the sake of the immortal interned strings, - * where legacy subinterpreters share the main interpreter's - * interned dict (and allocator), and therefore the strings can - * outlive the subinterpreter. - * - * It may make sense to fold this into _Py_SetImmortalUntracked(), - * but that requires further investigation. In the meantime, it is - * up to the caller to know if this is needed. There should be - * very few cases. +/* Make sure the ref is traced in the main interpreter. This is used only by + * _PyUnicode_ClearInterned() to ensure interned strings are traced. Since + * interned strings can be shared between interpreters, the interpreter that + * created the string might not be the main interpreter. We trace it here so + * that _Py_ForgetReference() will handle it without error. */ void -_Py_NormalizeImmortalReference(PyObject *op) +_Py_NormalizeReference(PyObject *op) { - assert(_Py_IsImmortal(op)); PyInterpreterState *interp = _PyInterpreterState_GET(); - if (!_PyRefchain_IsTraced(interp, op)) { + if (_PyRefchain_IsTraced(interp, op)) { return; } PyInterpreterState *main_interp = _PyInterpreterState_Main(); - if (interp != main_interp - && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) - { - assert(!_PyRefchain_IsTraced(main_interp, op)); - _PyRefchain_Remove(interp, op); + if (interp == main_interp) { _PyRefchain_Trace(main_interp, op); } } @@ -2545,6 +2528,25 @@ _Py_ForgetReference(PyObject *op) PyInterpreterState *interp = _PyInterpreterState_GET(); + if (!_PyRefchain_IsTraced(interp, op)) { + if (PyUnicode_CHECK_INTERNED(op)) { + // interned strings can be shared between the main interpreter and + // between sub-interpreters due to the shared interp dict. See + // init_interned_dict(). In this case, the string was created and + // traced by a different sub-interpreter. + return; + } + PyInterpreterState *main_interp = _PyInterpreterState_Main(); + if (interp != main_interp && + interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) { + // objects stored in the globals of basic single-phase init + // (m_size == -1) extension modules can be shared between + // sub-interpreters. In this case, the object was created + // and traced by a different sub-interpreter. + return; + } + } + #ifdef SLOW_UNREF_CHECK if (!_PyRefchain_Get(interp, op)) { /* Not found */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b94a74c2c688a9..e556e4e8ab85a8 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15445,7 +15445,7 @@ _PyUnicode_InternStatic(PyInterpreterState *interp, PyObject **p) } #ifdef Py_TRACE_REFS -extern void _Py_NormalizeImmortalReference(PyObject *); +extern void _Py_NormalizeReference(PyObject *); #endif static void @@ -15463,10 +15463,6 @@ immortalize_interned(PyObject *s) #endif _PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL; _Py_SetImmortal(s); -#ifdef Py_TRACE_REFS - /* Make sure the ref is associated with the right interpreter. */ - _Py_NormalizeImmortalReference(s); -#endif } static /* non-null */ PyObject* @@ -15678,6 +15674,10 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) while (PyDict_Next(interned, &pos, &s, &ignored_value)) { assert(PyUnicode_IS_READY(s)); int shared = 0; +#ifdef Py_TRACE_REFS + /* Make sure the ref is associated with the right interpreter. */ + _Py_NormalizeReference(s); +#endif switch (PyUnicode_CHECK_INTERNED(s)) { case SSTATE_INTERNED_IMMORTAL: /* Make immortal interned strings mortal again. */ From f9b1db010afea8973717f2873887dd36a95c3b67 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sat, 12 Oct 2024 14:32:47 -0700 Subject: [PATCH 3/3] Slightly cleaner fix, more comments. The case with shared objects (other than interned strings) can't seem to happen so comment that code out. Small code cleanups. Simplify the _testembed.c test, don't need a separate helper module. --- Lib/test/support/singlephase_helper.py | 32 -------- ...-10-11-02-26-39.gh-issue-125286.2ii5_E.rst | 5 +- Objects/object.c | 82 +++++++++++++++---- Objects/unicodeobject.c | 4 +- Programs/_testembed.c | 25 ++---- 5 files changed, 78 insertions(+), 70 deletions(-) delete mode 100644 Lib/test/support/singlephase_helper.py diff --git a/Lib/test/support/singlephase_helper.py b/Lib/test/support/singlephase_helper.py deleted file mode 100644 index 7f827237cd8c8b..00000000000000 --- a/Lib/test/support/singlephase_helper.py +++ /dev/null @@ -1,32 +0,0 @@ -"""Helper module for _testembed.c test_subinterpreter_finalize test. -""" - -import sys - -PREFIX = "shared_string_" -SUFFIX = "DByGMRJRSEDp29PkiZQNHA" - -def init_sub1(): - import _testsinglephase - # Create global object to be shared when imported a second time. - _testsinglephase._shared_list = [] - # Create a new interned string, to be shared with the main interpreter. - _testsinglephase._shared_string = sys.intern(PREFIX + SUFFIX) - - -def init_sub2(): - # This sub-interpreter will share a reference to _shared_list with the - # first interpreter, since importing _testsinglephase will not initialize - # the module a second time but will just copy the global dict. This - # situtation used to trigger a bug like gh-125286 if TraceRefs was enabled - # for the build. - import _testsinglephase - - -def init_main(): - global shared_str - # The first sub-interpreter has already interned this string value. The - # return value from intern() will be the same string object created in - # sub-interpreter 1. Assign it to a global so it lives until the main - # interpreter is shutdown. - shared_string = sys.intern(PREFIX + SUFFIX) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-11-02-26-39.gh-issue-125286.2ii5_E.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-11-02-26-39.gh-issue-125286.2ii5_E.rst index 5bba4483586caf..65cd2fa6159aa5 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-11-02-26-39.gh-issue-125286.2ii5_E.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-11-02-26-39.gh-issue-125286.2ii5_E.rst @@ -1,3 +1,2 @@ -Fix the TraceRefs build to handle objects that are shared between -interpreters (interned strings and globals of single-phase init extension -modules). +Fix the TraceRefs build to handle interned strings shared between +interpreters. diff --git a/Objects/object.c b/Objects/object.c index 036a80ee9cd10f..4711a1babc0a88 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2513,10 +2513,34 @@ _Py_NormalizeReference(PyObject *op) if (_PyRefchain_IsTraced(interp, op)) { return; } - PyInterpreterState *main_interp = _PyInterpreterState_Main(); - if (interp == main_interp) { - _PyRefchain_Trace(main_interp, op); + if (_Py_IsMainInterpreter(interp)) { + _PyRefchain_Trace(interp, op); + } +} + +/* Find the interpreter that is tracing 'op' and trace it in 'this_interp' + * instead. This is only used in the case that non-isolated sub-interpreters + * are used and in that case objects can be shared between interpreters. + */ +void +_PyRefchain_FindAndTake(PyInterpreterState *this_interp, PyObject *op) +{ + _PyRuntimeState *runtime = &_PyRuntime; + HEAD_LOCK(runtime); + PyInterpreterState *interp = runtime->interpreters.head; + for (; interp != NULL; interp = interp->next) { + if (_PyRefchain_IsTraced(interp, op)) { + _PyRefchain_Remove(interp, op); + break; + } } + // It's possible the loop above didn't find the interpreter tracing + // the object. That can happen if the interpreter was finalized + // before the object (i.e. the shared object outlived the interpreter + // that created it). We assume that's the case and trace it in this + // interpreter. + HEAD_UNLOCK(runtime); + _PyRefchain_Trace(this_interp, op); } void @@ -2529,22 +2553,52 @@ _Py_ForgetReference(PyObject *op) PyInterpreterState *interp = _PyInterpreterState_GET(); if (!_PyRefchain_IsTraced(interp, op)) { + // If the object is not traced, it might be because the object + // was created in a different interpreter and then shared to + // this one. Then, the interpreter that created it dropped all + // references to it (possibly the whole interpreter finalized + // but not necessarily) and this interpreter is the one that will + // dealloc it. There are two ways the object sharing can happen + // and we check those two cases below. This is unfortunately not a + // 100% bulletproof check. An untraced object could also occur + // because there is some bug that caused the object to not be + // traced and that bug could be masked here. + if (PyUnicode_CHECK_INTERNED(op)) { - // interned strings can be shared between the main interpreter and - // between sub-interpreters due to the shared interp dict. See - // init_interned_dict(). In this case, the string was created and - // traced by a different sub-interpreter. - return; + // interned strings can be shared between the main interpreter + // and between sub-interpreters due to the shared interp dict. + // See init_interned_dict(). In this case, the string was + // likely created and traced by a different interpreter. This + // block handles mortal interned strings, the immortal case + // is handled by _Py_NormalizeReference(). For immortal interned + // strings, they are always freed by the main interpreter. For + // mortal interned strings, the last reference could be dropped + // from any interpreter that shares the interned string (could be + // the main or a sub-interpreter). We don't know up-front which + // interpreter will be the last one holding a reference so we + // can't trace in the right one and we have to fix it here. + _PyRefchain_FindAndTake(interp, op); } - PyInterpreterState *main_interp = _PyInterpreterState_Main(); - if (interp != main_interp && - interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) { +#if 0 + // This case seems possible in theory but it seems it can't + // actually happen in practice. Therefore it's disabled with + // the "#if 0". When module dicts are copied for single-phase + // init extensions, a copy of the module dict is stored in the + // Python runtime, using _PyRuntime.imports.extensions. Those + // extra reference counts (from the m_copy dict) ensure that + // sub-interpreters sharing those objects are never the one to drop + // the last reference to the shared object. + if (!_Py_IsMainInterpreter(interp) && + (interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC)) { // objects stored in the globals of basic single-phase init // (m_size == -1) extension modules can be shared between - // sub-interpreters. In this case, the object was created - // and traced by a different sub-interpreter. - return; + // sub-interpreters. In this case, the object was created and + // traced by a different sub-interpreter. That sub-interpreter + // dropped all references to it already and now it's being + // deallocated in a different sub-interpreter. + _PyRefchain_FindAndTake(interp, op); } +#endif } #ifdef SLOW_UNREF_CHECK diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e556e4e8ab85a8..eaf0763f152474 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15675,7 +15675,9 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) assert(PyUnicode_IS_READY(s)); int shared = 0; #ifdef Py_TRACE_REFS - /* Make sure the ref is associated with the right interpreter. */ + /* Make sure the ref is associated with the right interpreter. + * _Py_ForgetReference() will fail if the string is traced by the + * different interpreter. */ _Py_NormalizeReference(s); #endif switch (PyUnicode_CHECK_INTERNED(s)) { diff --git a/Programs/_testembed.c b/Programs/_testembed.c index e4c385720de6f2..91c055ceac226b 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -2074,38 +2074,23 @@ static void configure_init_main(PyConfig *config) static int test_subinterpreter_finalize(void) { - // This test is done by creating two subinterpreters and then sharing - // objects between them by importing a basic single-phase init extension - // (m_size == -1). Then we finalize the interpreters in the reverse order - // so that the interpreter that created the shared objects gets finalized - // first. + // Create a legacy subinterpreter (with the interned dict shared + // with the main interpreter). This checks that interned strings are + // freed correctly. _testembed_Py_InitializeFromConfig(); PyThreadState *tstate1 = Py_NewInterpreter(); PyThreadState_Swap(tstate1); PyRun_SimpleString( - "import test.support.singlephase_helper\n" - "test.support.singlephase_helper.init_sub1\n" - ); - - PyThreadState *tstate2 = Py_NewInterpreter(); - PyThreadState_Swap(tstate2); - PyRun_SimpleString( - "import test.support.singlephase_helper\n" - "test.support.singlephase_helper.init_sub2\n" + "import test.support.import_helper\n" ); PyThreadState *main_tstate = _PyRuntime.main_tstate; PyThreadState_Swap(main_tstate); PyRun_SimpleString( - "import test.support.singlephase_helper\n" - "test.support.singlephase_helper.init_main\n" + "import test.support.import_helper\n" ); - PyThreadState_Swap(tstate1); - Py_EndInterpreter(tstate1); - PyThreadState_Swap(tstate2); - Py_EndInterpreter(tstate2); Py_Finalize(); return 0;