diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 04d7a6a615e370..c689ae41f0bf4f 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -166,7 +166,17 @@ struct _is { struct _warnings_runtime_state warnings; struct atexit_state atexit; - struct _obmalloc_state obmalloc; + // Per-interpreter state for the obmalloc allocator. For the main + // interpreter and for all interpreters that don't have their + // own obmalloc state, this points to the static structure in + // obmalloc.c obmalloc_state_main. For other interpreters, it is + // heap allocated by _PyMem_init_obmalloc() and freed when the + // interpreter structure is freed. In the case of a heap allocated + // obmalloc state, it is not safe to hold on to or use memory after + // the interpreter is freed. The obmalloc state corresponding to + // that allocated memory is gone. See free_obmalloc_arenas() for + // more comments. + struct _obmalloc_state *obmalloc; PyObject *audit_hooks; PyType_WatchCallback type_watchers[TYPE_MAX_WATCHERS]; diff --git a/Include/internal/pycore_obmalloc.h b/Include/internal/pycore_obmalloc.h index 17572dba65487d..9140d8f08f0af1 100644 --- a/Include/internal/pycore_obmalloc.h +++ b/Include/internal/pycore_obmalloc.h @@ -686,6 +686,8 @@ extern Py_ssize_t _Py_GetGlobalAllocatedBlocks(void); _Py_GetGlobalAllocatedBlocks() extern Py_ssize_t _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *); extern void _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *); +extern int _PyMem_init_obmalloc(PyInterpreterState *interp); +extern bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp); #ifdef WITH_PYMALLOC diff --git a/Include/internal/pycore_obmalloc_init.h b/Include/internal/pycore_obmalloc_init.h index 8ee72ff2d4126f..e6811b7aeca73c 100644 --- a/Include/internal/pycore_obmalloc_init.h +++ b/Include/internal/pycore_obmalloc_init.h @@ -59,13 +59,6 @@ extern "C" { .dump_debug_stats = -1, \ } -#define _obmalloc_state_INIT(obmalloc) \ - { \ - .pools = { \ - .used = _obmalloc_pools_INIT(obmalloc.pools), \ - }, \ - } - #ifdef __cplusplus } diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index d324a94278839c..c3b8b02a5aa358 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -151,7 +151,6 @@ extern PyTypeObject _PyExc_MemoryError; { \ .id_refcount = -1, \ .imports = IMPORTS_INIT, \ - .obmalloc = _obmalloc_state_INIT(INTERP.obmalloc), \ .ceval = { \ .recursion_limit = Py_DEFAULT_RECURSION_LIMIT, \ }, \ diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-12-22-13-21-39.gh-issue-113055.47xBMF.rst b/Misc/NEWS.d/next/Core and Builtins/2023-12-22-13-21-39.gh-issue-113055.47xBMF.rst new file mode 100644 index 00000000000000..90f49272218c96 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-12-22-13-21-39.gh-issue-113055.47xBMF.rst @@ -0,0 +1,5 @@ +Make interp->obmalloc a pointer. For interpreters that share state with the +main interpreter, this points to the same static memory structure. For +interpreters with their own obmalloc state, it is heap allocated. Add +free_obmalloc_arenas() which will free the obmalloc arenas and radix tree +structures for interpreters with their own obmalloc state. diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 99c95d90658b08..c2eecd95d46c16 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -7,6 +7,7 @@ #include "pycore_pyerrors.h" // _Py_FatalErrorFormat() #include "pycore_pymem.h" #include "pycore_pystate.h" // _PyInterpreterState_GET +#include "pycore_obmalloc_init.h" #include // malloc() #include @@ -967,6 +968,13 @@ static int running_on_valgrind = -1; typedef struct _obmalloc_state OMState; +/* obmalloc state for main interpreter and shared by all interpreters without + * their own obmalloc state. By not explicitly initalizing this structure, it + * will be allocated in the BSS which is a small performance win. The radix + * tree arrays are fairly large but are sparsely used. */ +static struct _obmalloc_state obmalloc_state_main; +static bool obmalloc_state_initialized; + static inline int has_own_state(PyInterpreterState *interp) { @@ -979,10 +987,8 @@ static inline OMState * get_state(void) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (!has_own_state(interp)) { - interp = _PyInterpreterState_Main(); - } - return &interp->obmalloc; + assert(interp->obmalloc != NULL); // otherwise not initialized or freed + return interp->obmalloc; } // These macros all rely on a local "state" variable. @@ -1030,7 +1036,11 @@ _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp) "the interpreter doesn't have its own allocator"); } #endif - OMState *state = &interp->obmalloc; + OMState *state = interp->obmalloc; + + if (state == NULL) { + return 0; + } Py_ssize_t n = raw_allocated_blocks; /* add up allocated blocks for used pools */ @@ -1052,6 +1062,8 @@ _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp) return n; } +static void free_obmalloc_arenas(PyInterpreterState *interp); + void _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp) { @@ -1060,10 +1072,20 @@ _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp) return; } #endif - if (has_own_state(interp)) { + if (has_own_state(interp) && interp->obmalloc != NULL) { Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp); assert(has_own_state(interp) || leaked == 0); interp->runtime->obmalloc.interpreter_leaks += leaked; + if (_PyMem_obmalloc_state_on_heap(interp) && leaked == 0) { + // free the obmalloc arenas and radix tree nodes. If leaked > 0 + // then some of the memory allocated by obmalloc has not been + // freed. It might be safe to free the arenas in that case but + // it's possible that extension modules are still using that + // memory. So, it is safer to not free and to leak. Perhaps there + // should be warning when this happens. It should be possible to + // use a tool like "-fsanitize=address" to track down these leaks. + free_obmalloc_arenas(interp); + } } } @@ -2663,9 +2685,96 @@ _PyDebugAllocatorStats(FILE *out, (void)printone(out, buf2, num_blocks * sizeof_block); } +// Return true if the obmalloc state structure is heap allocated, +// by PyMem_RawCalloc(). For the main interpreter, this structure +// allocated in the BSS. Allocating that way gives some memory savings +// and a small performance win (at least on a demand paged OS). On +// 64-bit platforms, the obmalloc structure is 256 kB. Most of that +// memory is for the arena_map_top array. Since normally only one entry +// of that array is used, only one page of resident memory is actually +// used, rather than the full 256 kB. +bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp) +{ +#if WITH_PYMALLOC + return interp->obmalloc && interp->obmalloc != &obmalloc_state_main; +#else + return false; +#endif +} + +#ifdef WITH_PYMALLOC +static void +init_obmalloc_pools(PyInterpreterState *interp) +{ + // initialize the obmalloc->pools structure. This must be done + // before the obmalloc alloc/free functions can be called. + poolp temp[OBMALLOC_USED_POOLS_SIZE] = + _obmalloc_pools_INIT(interp->obmalloc->pools); + memcpy(&interp->obmalloc->pools.used, temp, sizeof(temp)); +} +#endif /* WITH_PYMALLOC */ + +int _PyMem_init_obmalloc(PyInterpreterState *interp) +{ +#ifdef WITH_PYMALLOC + /* Initialize obmalloc, but only for subinterpreters, + since the main interpreter is initialized statically. */ + if (_Py_IsMainInterpreter(interp) + || _PyInterpreterState_HasFeature(interp, + Py_RTFLAGS_USE_MAIN_OBMALLOC)) { + interp->obmalloc = &obmalloc_state_main; + if (!obmalloc_state_initialized) { + init_obmalloc_pools(interp); + obmalloc_state_initialized = true; + } + } else { + interp->obmalloc = PyMem_RawCalloc(1, sizeof(struct _obmalloc_state)); + if (interp->obmalloc == NULL) { + return -1; + } + init_obmalloc_pools(interp); + } +#endif /* WITH_PYMALLOC */ + return 0; // success +} + #ifdef WITH_PYMALLOC +static void +free_obmalloc_arenas(PyInterpreterState *interp) +{ + OMState *state = interp->obmalloc; + for (uint i = 0; i < maxarenas; ++i) { + // free each obmalloc memory arena + struct arena_object *ao = &allarenas[i]; + _PyObject_Arena.free(_PyObject_Arena.ctx, + (void *)ao->address, ARENA_SIZE); + } + // free the array containing pointers to all arenas + PyMem_RawFree(allarenas); +#if WITH_PYMALLOC_RADIX_TREE +#ifdef USE_INTERIOR_NODES + // Free the middle and bottom nodes of the radix tree. These are allocated + // by arena_map_mark_used() but not freed when arenas are freed. + for (int i1 = 0; i1 < MAP_TOP_LENGTH; i1++) { + arena_map_mid_t *mid = arena_map_root.ptrs[i1]; + if (mid == NULL) { + continue; + } + for (int i2 = 0; i2 < MAP_MID_LENGTH; i2++) { + arena_map_bot_t *bot = arena_map_root.ptrs[i1]->ptrs[i2]; + if (bot == NULL) { + continue; + } + PyMem_RawFree(bot); + } + PyMem_RawFree(mid); + } +#endif +#endif +} + #ifdef Py_DEBUG /* Is target in the list? The list is traversed via the nextpool pointers. * The list may be NULL-terminated, or circular. Return 1 if target is in diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 0ec29846b0850b..31d3d6c906c441 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -32,6 +32,7 @@ #include "pycore_typevarobject.h" // _Py_clear_generic_types() #include "pycore_unicodeobject.h" // _PyUnicode_InitTypes() #include "pycore_weakref.h" // _PyWeakref_GET_REF() +#include "pycore_obmalloc.h" // _PyMem_init_obmalloc() #include "opcode.h" @@ -639,6 +640,13 @@ pycore_create_interpreter(_PyRuntimeState *runtime, return status; } + // initialize the interp->obmalloc state. This must be done after + // the settings are loaded (so that feature_flags are set) but before + // any calls are made to obmalloc functions. + if (_PyMem_init_obmalloc(interp) < 0) { + return _PyStatus_NO_MEMORY(); + } + PyThreadState *tstate = _PyThreadState_New(interp, _PyThreadState_WHENCE_INTERP); if (tstate == NULL) { @@ -2121,6 +2129,14 @@ new_interpreter(PyThreadState **tstate_p, const PyInterpreterConfig *config) goto error; } + // initialize the interp->obmalloc state. This must be done after + // the settings are loaded (so that feature_flags are set) but before + // any calls are made to obmalloc functions. + if (_PyMem_init_obmalloc(interp) < 0) { + status = _PyStatus_NO_MEMORY(); + goto error; + } + tstate = _PyThreadState_New(interp, _PyThreadState_WHENCE_INTERP); if (tstate == NULL) { status = _PyStatus_NO_MEMORY(); diff --git a/Python/pystate.c b/Python/pystate.c index 632a119ea6d4f8..873803535798f9 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -17,6 +17,7 @@ #include "pycore_pystate.h" #include "pycore_runtime_init.h" // _PyRuntimeState_INIT #include "pycore_sysmodule.h" // _PySys_Audit() +#include "pycore_obmalloc.h" // _PyMem_obmalloc_state_on_heap() /* -------------------------------------------------------------------------- CAUTION @@ -547,6 +548,11 @@ free_interpreter(PyInterpreterState *interp) // The main interpreter is statically allocated so // should not be freed. if (interp != &_PyRuntime._main_interpreter) { + if (_PyMem_obmalloc_state_on_heap(interp)) { + // interpreter has its own obmalloc state, free it + PyMem_RawFree(interp->obmalloc); + interp->obmalloc = NULL; + } PyMem_RawFree(interp); } } @@ -589,14 +595,6 @@ init_interpreter(PyInterpreterState *interp, assert(next != NULL || (interp == runtime->interpreters.main)); interp->next = next; - /* Initialize obmalloc, but only for subinterpreters, - since the main interpreter is initialized statically. */ - if (interp != &runtime->_main_interpreter) { - poolp temp[OBMALLOC_USED_POOLS_SIZE] = \ - _obmalloc_pools_INIT(interp->obmalloc.pools); - memcpy(&interp->obmalloc.pools.used, temp, sizeof(temp)); - } - PyStatus status = _PyObject_InitState(interp); if (_PyStatus_EXCEPTION(status)) { return status; diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 2f9e80d6ab6737..c75aff8c1723c1 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -325,7 +325,8 @@ Objects/obmalloc.c - _PyMem_Debug - Objects/obmalloc.c - _PyMem_Raw - Objects/obmalloc.c - _PyObject - Objects/obmalloc.c - last_final_leaks - -Objects/obmalloc.c - usedpools - +Objects/obmalloc.c - obmalloc_state_main - +Objects/obmalloc.c - obmalloc_state_initialized - Objects/typeobject.c - name_op - Objects/typeobject.c - slotdefs - Objects/unicodeobject.c - stripfuncnames -