From 6651884fd3c77d62f6d3f87bd84791b59acad681 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Wed, 10 Jan 2024 09:39:25 -0800 Subject: [PATCH 1/5] Make type cache thread safe with sequence lock --- Include/internal/pycore_lock.h | 1 + Include/internal/pycore_typeobject.h | 5 +- Objects/typeobject.c | 119 +++++++++++++++++++++++++-- Python/lock.c | 2 +- Python/pystate.c | 2 + 5 files changed, 118 insertions(+), 11 deletions(-) diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index 18a8896d97a548..63c7376f9027bd 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -251,6 +251,7 @@ PyAPI_FUNC(void) _PyRWMutex_RUnlock(_PyRWMutex *rwmutex); PyAPI_FUNC(void) _PyRWMutex_Lock(_PyRWMutex *rwmutex); PyAPI_FUNC(void) _PyRWMutex_Unlock(_PyRWMutex *rwmutex); +void _Py_yield(void); #ifdef __cplusplus } diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index c03c3d766bef61..d2f5d4077f33a7 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -28,6 +28,9 @@ struct _types_runtime_state { // see _PyType_Lookup(). struct type_cache_entry { unsigned int version; // initialized from type->tp_version_tag +#ifdef Py_GIL_DISABLED + int sequence; +#endif PyObject *name; // reference to exactly a str or None PyObject *value; // borrowed reference or NULL }; @@ -74,7 +77,7 @@ struct types_state { extern PyStatus _PyTypes_InitTypes(PyInterpreterState *); extern void _PyTypes_FiniTypes(PyInterpreterState *); extern void _PyTypes_Fini(PyInterpreterState *); - +void _PyTypes_AfterFork(void); /* other API */ diff --git a/Objects/typeobject.c b/Objects/typeobject.c index c65d0ec2acae52..bcdace54cf1f33 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -4765,6 +4765,82 @@ is_dunder_name(PyObject *name) return 0; } +static void +update_cache(struct type_cache_entry *entry, PyObject *name, unsigned int version_tag, PyObject *value) +{ + entry->version = version_tag; + entry->value = value; /* borrowed */ + assert(_PyASCIIObject_CAST(name)->hash != -1); + OBJECT_STAT_INC_COND(type_cache_collisions, entry->name != Py_None && entry->name != name); + Py_SETREF(entry->name, Py_NewRef(name)); +} + +#if Py_GIL_DISABLED + +#define TYPE_CACHE_IS_UPDATING(sequence) (sequence & 0x01) + +static void +update_cache_gil_disabled(struct type_cache_entry *entry, PyObject *name, + unsigned int version_tag, PyObject *value) +{ + // Similar to linux seqlock: https://en.wikipedia.org/wiki/Seqlock + // We use a sequence number to lock the writer, an even sequence means we're unlocked, an odd + // sequence means we're locked. + // Differs a little bit in that we use CAS on sequence as the lock, instead of a seperate spin lock. + // If our writer detects that another thread has already done the same write we'll just bail + // and restore the previous sequence number without doing any updates. + + // lock the entry by setting by moving to an odd sequence number + int prev = entry->sequence; + while (1) { + if (TYPE_CACHE_IS_UPDATING(prev)) { + // Someone else is currently updating the cache + _Py_yield(); + prev = _Py_atomic_load_int32_relaxed(&entry->sequence); + } else if(_Py_atomic_compare_exchange_int32(&entry->sequence, &prev, prev + 1)) { + // We've locked the cache + break; + } else { + _Py_yield(); + } + } + + // update the entry + if (entry->name == name && + entry->value == value && + entry->version == version_tag) { + // We reaced with another update, bail and restore previous sequence. + _Py_atomic_exchange_int32(&entry->sequence, prev); + return; + } + + update_cache(entry, name, version_tag, value); + + // Then update sequence to the next valid value + int new_sequence = prev + 2; + assert(!TYPE_CACHE_IS_UPDATING(new_sequence)); + _Py_atomic_exchange_int32(&entry->sequence, new_sequence); +} + +#endif + +void _PyTypes_AfterFork() { +#ifdef Py_GIL_DISABLED + struct type_cache *cache = get_type_cache(); + for (int i = 0; i < 1 << MCACHE_SIZE_EXP; i++) { + struct type_cache_entry *entry = &cache->hashtable[i]; + int sequence = _Py_atomic_load_int_acquire(&entry->sequence); + if (TYPE_CACHE_IS_UPDATING(sequence)) { + // Entry was in the process of updating while forking, clear it... + entry->sequence = 0; + entry->value = NULL; + entry->name = NULL; + entry->version = 0; + } + } +#endif +} + /* Internal API to look for a name through the MRO. This returns a borrowed reference, and doesn't set an exception! */ PyObject * @@ -4777,13 +4853,40 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name) unsigned int h = MCACHE_HASH_METHOD(type, name); struct type_cache *cache = get_type_cache(); struct type_cache_entry *entry = &cache->hashtable[h]; +#ifdef Py_GIL_DISABLED + // synchronize-with other writing threads by doing an acquire load on the sequence + while (1) { + int sequence = _Py_atomic_load_int_acquire(&entry->sequence); + if (!TYPE_CACHE_IS_UPDATING(sequence)) { + if (_Py_atomic_load_uint32_relaxed(&entry->version) == type->tp_version_tag && + _Py_atomic_load_ptr_relaxed(&entry->name) == name) { + assert(_PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)); + OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name)); + OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name)); + PyObject *value = _Py_atomic_load_ptr_relaxed(&entry->value); + + // Synchronize again and validate that the entry hasn't been updated + // while we were readying the values. + if (_Py_atomic_load_int_acquire(&entry->sequence) == sequence) { + return value; + } + } else { + // Cache miss + break; + } + } + // We are in progress of updating the cache, retry + _Py_yield(); + } +#else if (entry->version == type->tp_version_tag && entry->name == name) { assert(_PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)); OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name)); OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name)); - return entry->value; + return entry->value; } +#endif OBJECT_STAT_INC_COND(type_cache_misses, !is_dunder_name(name)); OBJECT_STAT_INC_COND(type_cache_dunder_misses, is_dunder_name(name)); @@ -4808,15 +4911,13 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name) } if (MCACHE_CACHEABLE_NAME(name) && assign_version_tag(interp, type)) { - h = MCACHE_HASH_METHOD(type, name); - struct type_cache_entry *entry = &cache->hashtable[h]; - entry->version = type->tp_version_tag; - entry->value = res; /* borrowed */ - assert(_PyASCIIObject_CAST(name)->hash != -1); - OBJECT_STAT_INC_COND(type_cache_collisions, entry->name != Py_None && entry->name != name); +#if Py_GIL_DISABLED + update_cache_gil_disabled(entry, name, type->tp_version_tag, res); +#else + update_cache(entry, name, type->tp_version_tag, res); +#endif assert(_PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)); - Py_SETREF(entry->name, Py_NewRef(name)); - } +} return res; } diff --git a/Python/lock.c b/Python/lock.c index f0ff1176941da8..9d73ec3fa2217d 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -36,7 +36,7 @@ struct mutex_entry { int handed_off; }; -static void +void _Py_yield(void) { #ifdef MS_WINDOWS diff --git a/Python/pystate.c b/Python/pystate.c index 08ec586963ce11..580b2f0ae3277f 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -499,6 +499,8 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) _PyMutex_at_fork_reinit(locks[i]); } + _PyTypes_AfterFork(); + /* bpo-42540: id_mutex is freed by _PyInterpreterState_Delete, which does * not force the default allocator. */ if (_PyThread_at_fork_reinit(&runtime->interpreters.main->id_mutex) < 0) { From b961d515c71d2493cc7d129b589cafb213fb91ae Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Wed, 10 Jan 2024 12:14:55 -0800 Subject: [PATCH 2/5] Make bases/mro accesses thread safe --- Include/internal/pycore_typeobject.h | 2 + Modules/_abc.c | 15 +- Objects/typeobject.c | 323 ++++++++++++++++++++++----- Python/pystate.c | 1 + 4 files changed, 276 insertions(+), 65 deletions(-) diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index d2f5d4077f33a7..ce7e08385aa6bd 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -9,6 +9,7 @@ extern "C" { #endif #include "pycore_moduleobject.h" // PyModuleObject +#include "pycore_lock.h" // PyMutex /* state */ @@ -21,6 +22,7 @@ struct _types_runtime_state { // bpo-42745: next_version_tag remains shared by all interpreters // because of static types. unsigned int next_version_tag; + PyMutex type_mutex; }; diff --git a/Modules/_abc.c b/Modules/_abc.c index 9473905243d438..399ecbbd6a2172 100644 --- a/Modules/_abc.c +++ b/Modules/_abc.c @@ -8,7 +8,6 @@ #include "pycore_object.h" // _PyType_GetSubclasses() #include "pycore_runtime.h" // _Py_ID() #include "pycore_setobject.h" // _PySet_NextEntry() -#include "pycore_typeobject.h" // _PyType_GetMRO() #include "pycore_weakref.h" // _PyWeakref_GET_REF() #include "clinic/_abc.c.h" @@ -744,18 +743,12 @@ _abc__abc_subclasscheck_impl(PyObject *module, PyObject *self, Py_DECREF(ok); /* 4. Check if it's a direct subclass. */ - PyObject *mro = _PyType_GetMRO((PyTypeObject *)subclass); - assert(PyTuple_Check(mro)); - for (pos = 0; pos < PyTuple_GET_SIZE(mro); pos++) { - PyObject *mro_item = PyTuple_GET_ITEM(mro, pos); - assert(mro_item != NULL); - if ((PyObject *)self == mro_item) { - if (_add_to_weak_set(&impl->_abc_cache, subclass) < 0) { - goto end; - } - result = Py_True; + if (PyType_IsSubtype((PyTypeObject *)subclass, (PyTypeObject *)self)) { + if (_add_to_weak_set(&impl->_abc_cache, subclass) < 0) { goto end; } + result = Py_True; + goto end; } /* 5. Check if it's a subclass of a registered class (recursive). */ diff --git a/Objects/typeobject.c b/Objects/typeobject.c index bcdace54cf1f33..def1c6da1e8948 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -52,6 +52,34 @@ class object "PyObject *" "&PyBaseObject_Type" #define NEXT_VERSION_TAG(interp) \ (interp)->types.next_version_tag +#ifdef Py_GIL_DISABLED + +// There's a global lock for mutation of types. This avoids having to take +// additonal locks while doing various subclass processing which may result +// in odd behaviors w.r.t. running with the GIL as the outer type lock could +// be released and reacquired during a subclass update if there's contention +// on the subclass lock. +#define BEGIN_TYPE_LOCK() \ + { \ + _PyCriticalSection _cs; \ + _PyCriticalSection_Begin(&_cs, &_PyRuntime.types.type_mutex); \ + +#define END_TYPE_LOCK() \ + _PyCriticalSection_End(&_cs); \ + } + +#define ASSERT_TYPE_LOCK_HELD() \ + _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(&_PyRuntime.types.type_mutex) + +#else + +#define BEGIN_TYPE_LOCK() +#define END_TYPE_LOCK() +#define ASSERT_TYPE_LOCK_HELD() + +#endif + + typedef struct PySlot_Offset { short subslot_offset; short slot_offset; @@ -279,8 +307,14 @@ lookup_tp_bases(PyTypeObject *self) PyObject * _PyType_GetBases(PyTypeObject *self) { - /* It returns a borrowed reference. */ - return lookup_tp_bases(self); + PyObject *res; + + BEGIN_TYPE_LOCK(); + res = lookup_tp_bases(self); + Py_INCREF(res); + END_TYPE_LOCK() + + return res; } static inline void @@ -330,14 +364,19 @@ clear_tp_bases(PyTypeObject *self) static inline PyObject * lookup_tp_mro(PyTypeObject *self) { + ASSERT_TYPE_LOCK_HELD(); return self->tp_mro; } PyObject * _PyType_GetMRO(PyTypeObject *self) { - /* It returns a borrowed reference. */ - return lookup_tp_mro(self); + PyObject *mro; + BEGIN_TYPE_LOCK(); + mro = lookup_tp_mro(self); + Py_INCREF(mro); + END_TYPE_LOCK() + return mro; } static inline void @@ -760,8 +799,10 @@ PyType_Watch(int watcher_id, PyObject* obj) return -1; } // ensure we will get a callback on the next modification + BEGIN_TYPE_LOCK() assign_version_tag(interp, type); type->tp_watched |= (1 << watcher_id); + END_TYPE_LOCK() return 0; } @@ -781,8 +822,8 @@ PyType_Unwatch(int watcher_id, PyObject* obj) return 0; } -void -PyType_Modified(PyTypeObject *type) +static void +type_modified_unlocked(PyTypeObject *type) { /* Invalidate any cached data for the specified type and all subclasses. This function is called after the base @@ -848,6 +889,22 @@ PyType_Modified(PyTypeObject *type) } } +void +PyType_Modified(PyTypeObject *type) +{ + // Quick check without the lock held + if (!_PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)) { + return; + } + + BEGIN_TYPE_LOCK() + type_modified_unlocked(type); + END_TYPE_LOCK() +} + +static int +is_subtype_unlocked(PyTypeObject *a, PyTypeObject *b); + static void type_mro_modified(PyTypeObject *type, PyObject *bases) { /* @@ -866,6 +923,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) { int custom = !Py_IS_TYPE(type, &PyType_Type); int unbound; + ASSERT_TYPE_LOCK_HELD(); if (custom) { PyObject *mro_meth, *type_mro_meth; mro_meth = lookup_maybe_method( @@ -891,7 +949,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) { PyObject *b = PyTuple_GET_ITEM(bases, i); PyTypeObject *cls = _PyType_CAST(b); - if (!PyType_IsSubtype(type, cls)) { + if (!is_subtype_unlocked(type, cls)) { goto clear; } } @@ -913,6 +971,8 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) { static int assign_version_tag(PyInterpreterState *interp, PyTypeObject *type) { + ASSERT_TYPE_LOCK_HELD(); + /* Ensure that the tp_version_tag is valid and set Py_TPFLAGS_VALID_VERSION_TAG. To respect the invariant, this must first be done on all super classes. Return 0 if this @@ -961,7 +1021,11 @@ assign_version_tag(PyInterpreterState *interp, PyTypeObject *type) int PyUnstable_Type_AssignVersionTag(PyTypeObject *type) { PyInterpreterState *interp = _PyInterpreterState_GET(); - return assign_version_tag(interp, type); + int assigned; + BEGIN_TYPE_LOCK() + assigned = assign_version_tag(interp, type); + END_TYPE_LOCK() + return assigned; } @@ -1183,21 +1247,28 @@ type_set_abstractmethods(PyTypeObject *type, PyObject *value, void *context) static PyObject * type_get_bases(PyTypeObject *type, void *context) { - PyObject *bases = lookup_tp_bases(type); + PyObject *bases = _PyType_GetBases(type); if (bases == NULL) { Py_RETURN_NONE; } - return Py_NewRef(bases); + return bases; } static PyObject * type_get_mro(PyTypeObject *type, void *context) { - PyObject *mro = lookup_tp_mro(type); + PyObject *mro; + + BEGIN_TYPE_LOCK() + mro = lookup_tp_mro(type); if (mro == NULL) { - Py_RETURN_NONE; + mro = Py_None; + } else { + Py_INCREF(mro); } - return Py_NewRef(mro); + + END_TYPE_LOCK() + return mro; } static PyTypeObject *best_base(PyObject *); @@ -1219,6 +1290,8 @@ static int recurse_down_subclasses(PyTypeObject *type, PyObject *name, static int mro_hierarchy(PyTypeObject *type, PyObject *temp) { + ASSERT_TYPE_LOCK_HELD(); + PyObject *old_mro; int res = mro_internal(type, &old_mro); if (res <= 0) { @@ -1282,7 +1355,7 @@ mro_hierarchy(PyTypeObject *type, PyObject *temp) } static int -type_set_bases(PyTypeObject *type, PyObject *new_bases, void *context) +type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases, void *context) { // Check arguments if (!check_set_special_type_attr(type, new_bases, "__bases__")) { @@ -1313,7 +1386,7 @@ type_set_bases(PyTypeObject *type, PyObject *new_bases, void *context) } PyTypeObject *base = (PyTypeObject*)ob; - if (PyType_IsSubtype(base, type) || + if (is_subtype_unlocked(base, type) || /* In case of reentering here again through a custom mro() the above check is not enough since it relies on base->tp_mro which would gonna be updated inside @@ -1418,6 +1491,16 @@ type_set_bases(PyTypeObject *type, PyObject *new_bases, void *context) return -1; } +static int +type_set_bases(PyTypeObject *type, PyObject *new_bases, void *context) +{ + int res; + BEGIN_TYPE_LOCK(); + res = type_set_bases_unlocked(type, new_bases, context); + END_TYPE_LOCK(); + return res; +} + static PyObject * type_dict(PyTypeObject *type, void *context) { @@ -2156,11 +2239,12 @@ type_is_subtype_base_chain(PyTypeObject *a, PyTypeObject *b) return (b == &PyBaseObject_Type); } -int -PyType_IsSubtype(PyTypeObject *a, PyTypeObject *b) +static int +is_subtype_unlocked(PyTypeObject *a, PyTypeObject *b) { PyObject *mro; + ASSERT_TYPE_LOCK_HELD(); mro = lookup_tp_mro(a); if (mro != NULL) { /* Deal with multiple inheritance without recursion @@ -2179,6 +2263,16 @@ PyType_IsSubtype(PyTypeObject *a, PyTypeObject *b) return type_is_subtype_base_chain(a, b); } +int +PyType_IsSubtype(PyTypeObject *a, PyTypeObject *b) +{ + int res; + BEGIN_TYPE_LOCK(); + res = is_subtype_unlocked(a, b); + END_TYPE_LOCK() + return res; +} + /* Routines to do a method lookup in the type without looking in the instance dictionary (so we can't use PyObject_GetAttr) but still binding it to the instance. @@ -2538,8 +2632,10 @@ pmerge(PyObject *acc, PyObject **to_merge, Py_ssize_t to_merge_size) } static PyObject * -mro_implementation(PyTypeObject *type) +mro_implementation_unlocked(PyTypeObject *type) { + ASSERT_TYPE_LOCK_HELD(); + if (!_PyType_IsReady(type)) { if (PyType_Ready(type) < 0) return NULL; @@ -2619,6 +2715,16 @@ mro_implementation(PyTypeObject *type) return result; } +static PyObject * +mro_implementation(PyTypeObject *type) +{ + PyObject *mro; + BEGIN_TYPE_LOCK() + mro = mro_implementation_unlocked(type); + END_TYPE_LOCK() + return mro; +} + /*[clinic input] type.mro @@ -2657,7 +2763,7 @@ mro_check(PyTypeObject *type, PyObject *mro) } PyTypeObject *base = (PyTypeObject*)obj; - if (!PyType_IsSubtype(solid, solid_base(base))) { + if (!is_subtype_unlocked(solid, solid_base(base))) { PyErr_Format( PyExc_TypeError, "mro() returned base with unsuitable layout ('%.500s')", @@ -2688,6 +2794,9 @@ mro_invoke(PyTypeObject *type) { PyObject *mro_result; PyObject *new_mro; + + ASSERT_TYPE_LOCK_HELD(); + const int custom = !Py_IS_TYPE(type, &PyType_Type); if (custom) { @@ -2700,7 +2809,7 @@ mro_invoke(PyTypeObject *type) Py_DECREF(mro_meth); } else { - mro_result = mro_implementation(type); + mro_result = mro_implementation_unlocked(type); } if (mro_result == NULL) return NULL; @@ -2747,8 +2856,10 @@ mro_invoke(PyTypeObject *type) - Returns -1 in case of an error. */ static int -mro_internal(PyTypeObject *type, PyObject **p_old_mro) +mro_internal_unlocked(PyTypeObject *type, PyObject **p_old_mro) { + ASSERT_TYPE_LOCK_HELD(); + PyObject *new_mro, *old_mro; int reent; @@ -2793,6 +2904,16 @@ mro_internal(PyTypeObject *type, PyObject **p_old_mro) return 1; } +static int +mro_internal(PyTypeObject *type, PyObject **p_old_mro) +{ + int res; + BEGIN_TYPE_LOCK() + res = mro_internal_unlocked(type, p_old_mro); + END_TYPE_LOCK() + return res; +} + /* Calculate the best base amongst multiple base classes. This is the first one that's on the path to the "solid base". */ @@ -4631,6 +4752,9 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) { assert(PyType_Check(type)); + PyObject *res = NULL; + BEGIN_TYPE_LOCK() + PyObject *mro = lookup_tp_mro(type); // The type must be ready assert(mro != NULL); @@ -4650,15 +4774,19 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) PyHeapTypeObject *ht = (PyHeapTypeObject*)super; PyObject *module = ht->ht_module; if (module && _PyModule_GetDef(module) == def) { - return module; + res = module; + break; } } + END_TYPE_LOCK() - PyErr_Format( - PyExc_TypeError, - "PyType_GetModuleByDef: No superclass of '%s' has the given module", - type->tp_name); - return NULL; + if (res == NULL) { + PyErr_Format( + PyExc_TypeError, + "PyType_GetModuleByDef: No superclass of '%s' has the given module", + type->tp_name); + } + return res; } void * @@ -4696,6 +4824,8 @@ PyObject_GetItemData(PyObject *obj) static PyObject * find_name_in_mro(PyTypeObject *type, PyObject *name, int *error) { + ASSERT_TYPE_LOCK_HELD(); + Py_hash_t hash; if (!PyUnicode_CheckExact(name) || (hash = _PyASCIIObject_CAST(name)->hash) == -1) @@ -4884,7 +5014,7 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name) assert(_PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)); OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name)); OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name)); - return entry->value; + return entry->value; } #endif OBJECT_STAT_INC_COND(type_cache_misses, !is_dunder_name(name)); @@ -4893,7 +5023,19 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name) /* We may end up clearing live exceptions below, so make sure it's ours. */ assert(!PyErr_Occurred()); + // We need to atomically do the lookup and capture the version before + // anyone else can modify our mro or mutate the type. + + int has_version = 0; + int version = 0; + BEGIN_TYPE_LOCK() res = find_name_in_mro(type, name, &error); + if (MCACHE_CACHEABLE_NAME(name)) { + has_version = assign_version_tag(interp, type); + version = type->tp_version_tag; + } + END_TYPE_LOCK() + /* Only put NULL results into cache if there was no error. */ if (error) { /* It's not ideal to clear the error condition, @@ -4910,11 +5052,11 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name) return NULL; } - if (MCACHE_CACHEABLE_NAME(name) && assign_version_tag(interp, type)) { + if (has_version) { #if Py_GIL_DISABLED - update_cache_gil_disabled(entry, name, type->tp_version_tag, res); + update_cache_gil_disabled(entry, name, version, res); #else - update_cache(entry, name, type->tp_version_tag, res); + update_cache(entry, name, version, res); #endif assert(_PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)); } @@ -5079,6 +5221,8 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) /* Will fail in _PyObject_GenericSetAttrWithDict. */ Py_INCREF(name); } + + BEGIN_TYPE_LOCK() res = _PyObject_GenericSetAttrWithDict((PyObject *)type, name, value, NULL); if (res == 0) { /* Clear the VALID_VERSION flag of 'type' and all its @@ -5086,13 +5230,15 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) update_subclasses() recursion in update_slot(), but carefully: they each have their own conditions on which to stop recursing into subclasses. */ - PyType_Modified(type); + type_modified_unlocked(type); if (is_dunder_name(name)) { res = update_slot(type, name); } assert(_PyType_CheckConsistency(type)); } + END_TYPE_LOCK() + Py_DECREF(name); return res; } @@ -6897,28 +7043,28 @@ inherit_special(PyTypeObject *type, PyTypeObject *base) #undef COPYVAL /* Setup fast subclass flags */ - if (PyType_IsSubtype(base, (PyTypeObject*)PyExc_BaseException)) { + if (is_subtype_unlocked(base, (PyTypeObject*)PyExc_BaseException)) { type->tp_flags |= Py_TPFLAGS_BASE_EXC_SUBCLASS; } - else if (PyType_IsSubtype(base, &PyType_Type)) { + else if (is_subtype_unlocked(base, &PyType_Type)) { type->tp_flags |= Py_TPFLAGS_TYPE_SUBCLASS; } - else if (PyType_IsSubtype(base, &PyLong_Type)) { + else if (is_subtype_unlocked(base, &PyLong_Type)) { type->tp_flags |= Py_TPFLAGS_LONG_SUBCLASS; } - else if (PyType_IsSubtype(base, &PyBytes_Type)) { + else if (is_subtype_unlocked(base, &PyBytes_Type)) { type->tp_flags |= Py_TPFLAGS_BYTES_SUBCLASS; } - else if (PyType_IsSubtype(base, &PyUnicode_Type)) { + else if (is_subtype_unlocked(base, &PyUnicode_Type)) { type->tp_flags |= Py_TPFLAGS_UNICODE_SUBCLASS; } - else if (PyType_IsSubtype(base, &PyTuple_Type)) { + else if (is_subtype_unlocked(base, &PyTuple_Type)) { type->tp_flags |= Py_TPFLAGS_TUPLE_SUBCLASS; } - else if (PyType_IsSubtype(base, &PyList_Type)) { + else if (is_subtype_unlocked(base, &PyList_Type)) { type->tp_flags |= Py_TPFLAGS_LIST_SUBCLASS; } - else if (PyType_IsSubtype(base, &PyDict_Type)) { + else if (is_subtype_unlocked(base, &PyDict_Type)) { type->tp_flags |= Py_TPFLAGS_DICT_SUBCLASS; } @@ -7357,6 +7503,8 @@ type_ready_preheader(PyTypeObject *type) static int type_ready_mro(PyTypeObject *type) { + ASSERT_TYPE_LOCK_HELD(); + if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { if (!_Py_IsMainInterpreter(_PyInterpreterState_GET())) { assert(lookup_tp_mro(type) != NULL); @@ -7366,7 +7514,7 @@ type_ready_mro(PyTypeObject *type) } /* Calculate method resolution order */ - if (mro_internal(type, NULL) < 0) { + if (mro_internal_unlocked(type, NULL) < 0) { return -1; } PyObject *mro = lookup_tp_mro(type); @@ -7430,6 +7578,8 @@ inherit_patma_flags(PyTypeObject *type, PyTypeObject *base) { static int type_ready_inherit(PyTypeObject *type) { + ASSERT_TYPE_LOCK_HELD(); + /* Inherit special flags from dominant base */ PyTypeObject *base = type->tp_base; if (base != NULL) { @@ -7622,6 +7772,8 @@ type_ready_post_checks(PyTypeObject *type) static int type_ready(PyTypeObject *type, int rerunbuiltin) { + ASSERT_TYPE_LOCK_HELD(); + _PyObject_ASSERT((PyObject *)type, !is_readying(type)); start_readying(type); @@ -7709,7 +7861,16 @@ PyType_Ready(PyTypeObject *type) type->tp_flags |= Py_TPFLAGS_IMMUTABLETYPE; } - return type_ready(type, 0); + int res; + BEGIN_TYPE_LOCK() + if (!(type->tp_flags & Py_TPFLAGS_READY)) { + res = type_ready(type, 0); + } else { + res = 0; + assert(_PyType_CheckConsistency(type)); + } + END_TYPE_LOCK() + return res; } int @@ -7739,7 +7900,10 @@ _PyStaticType_InitBuiltin(PyInterpreterState *interp, PyTypeObject *self) static_builtin_state_init(interp, self); - int res = type_ready(self, !ismain); + int res; + BEGIN_TYPE_LOCK(); + res = type_ready(self, !ismain); + END_TYPE_LOCK() if (res < 0) { static_builtin_state_clear(interp, self); } @@ -8141,9 +8305,12 @@ wrap_delitem(PyObject *self, PyObject *args, void *wrapped) https://mail.python.org/pipermail/python-dev/2003-April/034535.html */ static int -hackcheck(PyObject *self, setattrofunc func, const char *what) +hackcheck_unlocked(PyObject *self, setattrofunc func, const char *what) { PyTypeObject *type = Py_TYPE(self); + + ASSERT_TYPE_LOCK_HELD(); + PyObject *mro = lookup_tp_mro(type); if (!mro) { /* Probably ok not to check the call in this case. */ @@ -8185,6 +8352,20 @@ hackcheck(PyObject *self, setattrofunc func, const char *what) return 1; } +static int +hackcheck(PyObject *self, setattrofunc func, const char *what) +{ + if (!PyType_Check(self)) { + return 1; + } + + int res; + BEGIN_TYPE_LOCK(); + res = hackcheck_unlocked(self, func, what); + END_TYPE_LOCK() + return res; +} + static PyObject * wrap_setattr(PyObject *self, PyObject *args, void *wrapped) { @@ -9354,7 +9535,7 @@ slot_bf_getbuffer(PyObject *self, Py_buffer *buffer, int flags) } static int -releasebuffer_maybe_call_super(PyObject *self, Py_buffer *buffer) +releasebuffer_maybe_call_super_unlocked(PyObject *self, Py_buffer *buffer, releasebufferproc *base_releasebuffer) { PyTypeObject *self_type = Py_TYPE(self); PyObject *mro = lookup_tp_mro(self_type); @@ -9375,7 +9556,7 @@ releasebuffer_maybe_call_super(PyObject *self, Py_buffer *buffer) if (i >= n) return -1; - releasebufferproc base_releasebuffer = NULL; + *base_releasebuffer = NULL; for (; i < n; i++) { PyObject *obj = PyTuple_GET_ITEM(mro, i); if (!PyType_Check(obj)) { @@ -9385,15 +9566,28 @@ releasebuffer_maybe_call_super(PyObject *self, Py_buffer *buffer) if (base_type->tp_as_buffer != NULL && base_type->tp_as_buffer->bf_releasebuffer != NULL && base_type->tp_as_buffer->bf_releasebuffer != slot_bf_releasebuffer) { - base_releasebuffer = base_type->tp_as_buffer->bf_releasebuffer; + *base_releasebuffer = base_type->tp_as_buffer->bf_releasebuffer; break; } } - if (base_releasebuffer != NULL) { + return 0; +} + +static int +releasebuffer_maybe_call_super(PyObject *self, Py_buffer *buffer) +{ + int res; + releasebufferproc base_releasebuffer; + + BEGIN_TYPE_LOCK(); + res = releasebuffer_maybe_call_super_unlocked(self, buffer, &base_releasebuffer); + END_TYPE_LOCK(); + + if (res == 0) { base_releasebuffer(self, buffer); } - return 0; + return res; } static void @@ -9929,6 +10123,8 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) static pytype_slotdef * update_one_slot(PyTypeObject *type, pytype_slotdef *p) { + ASSERT_TYPE_LOCK_HELD(); + PyObject *descr; PyWrapperDescrObject *d; @@ -9977,7 +10173,7 @@ update_one_slot(PyTypeObject *type, pytype_slotdef *p) d = (PyWrapperDescrObject *)descr; if ((specific == NULL || specific == d->d_wrapped) && d->d_base->wrapper == p->wrapper && - PyType_IsSubtype(type, PyDescr_TYPE(d))) + is_subtype_unlocked(type, PyDescr_TYPE(d))) { specific = d->d_wrapped; } @@ -10042,6 +10238,8 @@ update_one_slot(PyTypeObject *type, pytype_slotdef *p) static int update_slots_callback(PyTypeObject *type, void *data) { + ASSERT_TYPE_LOCK_HELD(); + pytype_slotdef **pp = (pytype_slotdef **)data; for (; *pp; pp++) { update_one_slot(type, *pp); @@ -10058,6 +10256,7 @@ update_slot(PyTypeObject *type, PyObject *name) pytype_slotdef **pp; int offset; + ASSERT_TYPE_LOCK_HELD(); assert(PyUnicode_CheckExact(name)); assert(PyUnicode_CHECK_INTERNED(name)); @@ -10091,10 +10290,17 @@ update_slot(PyTypeObject *type, PyObject *name) static void fixup_slot_dispatchers(PyTypeObject *type) { + // This lock isn't strictly necessary because the type has not been + // exposed to anyone else yet, but update_ont_slot calls find_name_in_mro + // where we'd like to assert that the tyep is locked. + BEGIN_TYPE_LOCK() + assert(!PyErr_Occurred()); for (pytype_slotdef *p = slotdefs; p->name; ) { p = update_one_slot(type, p); } + + END_TYPE_LOCK() } static void @@ -10102,6 +10308,8 @@ update_all_slots(PyTypeObject* type) { pytype_slotdef *p; + ASSERT_TYPE_LOCK_HELD(); + /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ PyType_Modified(type); @@ -10374,7 +10582,15 @@ _super_lookup_descr(PyTypeObject *su_type, PyTypeObject *su_obj_type, PyObject * PyObject *mro, *res; Py_ssize_t i, n; + BEGIN_TYPE_LOCK(); mro = lookup_tp_mro(su_obj_type); + /* keep a strong reference to mro because su_obj_type->tp_mro can be + replaced during PyDict_GetItemRef(dict, name, &res) and because + another thread can modify it after we end the critical section + below */ + Py_XINCREF(mro); + END_TYPE_LOCK() + if (mro == NULL) return NULL; @@ -10387,12 +10603,11 @@ _super_lookup_descr(PyTypeObject *su_type, PyTypeObject *su_obj_type, PyObject * break; } i++; /* skip su->type (if any) */ - if (i >= n) + if (i >= n) { + Py_DECREF(mro); return NULL; + } - /* keep a strong reference to mro because su_obj_type->tp_mro can be - replaced during PyDict_GetItemRef(dict, name, &res) */ - Py_INCREF(mro); do { PyObject *obj = PyTuple_GET_ITEM(mro, i); PyObject *dict = lookup_tp_dict(_PyType_CAST(obj)); diff --git a/Python/pystate.c b/Python/pystate.c index 580b2f0ae3277f..b1d1a085d629b4 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -395,6 +395,7 @@ _Py_COMP_DIAG_POP &(runtime)->atexit.mutex, \ &(runtime)->audit_hooks.mutex, \ &(runtime)->allocators.mutex, \ + &(runtime)->types.type_mutex, \ } static void From 3799cbebd42ac2a0f7f729a2a647a8eeabeeb213 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Tue, 16 Jan 2024 12:02:37 -0800 Subject: [PATCH 3/5] Move seqlock implementation to lock.c --- Include/internal/pycore_lock.h | 34 ++++++++++++- Include/internal/pycore_typeobject.h | 2 +- Objects/typeobject.c | 70 ++++++++------------------- Python/lock.c | 71 +++++++++++++++++++++++++++- 4 files changed, 125 insertions(+), 52 deletions(-) diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index 63c7376f9027bd..30d0ac0ae8f0be 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -251,7 +251,39 @@ PyAPI_FUNC(void) _PyRWMutex_RUnlock(_PyRWMutex *rwmutex); PyAPI_FUNC(void) _PyRWMutex_Lock(_PyRWMutex *rwmutex); PyAPI_FUNC(void) _PyRWMutex_Unlock(_PyRWMutex *rwmutex); -void _Py_yield(void); +// Similar to linux seqlock: https://en.wikipedia.org/wiki/Seqlock +// We use a sequence number to lock the writer, an even sequence means we're unlocked, an odd +// sequence means we're locked. Readers will read the sequence before attempting to read the +// underlying data and then read the sequence number again after reading the data. If the +// sequence has not changed the data is valid. +// +// Differs a little bit in that we use CAS on sequence as the lock, instead of a seperate spin lock. +// The writer can also detect that the undelering data has not changed and abandon the write +// and restore the previous sequence. +typedef struct { + int sequence; +} _PySeqLock; + +// Lock the sequence lock for the writer +PyAPI_FUNC(void) _PySeqLock_LockWrite(_PySeqLock *seqlock); + +// Unlock the sequence lock and move to the next sequence number. +PyAPI_FUNC(void) _PySeqLock_UnlockWrite(_PySeqLock *seqlock); + +// Abandon the current update indicating that no mutations have occured +// and restore the previous sequence value. +PyAPI_FUNC(void) _PySeqLock_AbandonWrite(_PySeqLock *seqlock); + +// Begin a read operation and return the current sequence number. +PyAPI_FUNC(int) _PySeqLock_BeginRead(_PySeqLock *seqlock); + +// End the read operation and confirm that the sequence number has not changed. +// Returns 1 if the read was successful or 0 if the read should be re-tried. +PyAPI_FUNC(int) _PySeqLock_EndRead(_PySeqLock *seqlock, int previous); + +// Check if the lock was held during a fork and clear the lock. Returns 1 +// if the lock was held and any associated datat should be cleared. +PyAPI_FUNC(int) _PySeqLock_AfterFork(_PySeqLock *seqlock); #ifdef __cplusplus } diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index ce7e08385aa6bd..115855f2a8e24c 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -31,7 +31,7 @@ struct _types_runtime_state { struct type_cache_entry { unsigned int version; // initialized from type->tp_version_tag #ifdef Py_GIL_DISABLED - int sequence; + _PySeqLock sequence; #endif PyObject *name; // reference to exactly a str or None PyObject *value; // borrowed reference or NULL diff --git a/Objects/typeobject.c b/Objects/typeobject.c index def1c6da1e8948..abbcf7d2b32f32 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6,6 +6,7 @@ #include "pycore_code.h" // CO_FAST_FREE #include "pycore_dict.h" // _PyDict_KeysSize() #include "pycore_frame.h" // _PyInterpreterFrame +#include "pycore_lock.h" // _PySeqLock_* #include "pycore_long.h" // _PyLong_IsNegative() #include "pycore_memoryobject.h" // _PyMemoryView_FromBufferProc() #include "pycore_modsupport.h" // _PyArg_NoKwnames() @@ -4913,43 +4914,21 @@ static void update_cache_gil_disabled(struct type_cache_entry *entry, PyObject *name, unsigned int version_tag, PyObject *value) { - // Similar to linux seqlock: https://en.wikipedia.org/wiki/Seqlock - // We use a sequence number to lock the writer, an even sequence means we're unlocked, an odd - // sequence means we're locked. - // Differs a little bit in that we use CAS on sequence as the lock, instead of a seperate spin lock. - // If our writer detects that another thread has already done the same write we'll just bail - // and restore the previous sequence number without doing any updates. - - // lock the entry by setting by moving to an odd sequence number - int prev = entry->sequence; - while (1) { - if (TYPE_CACHE_IS_UPDATING(prev)) { - // Someone else is currently updating the cache - _Py_yield(); - prev = _Py_atomic_load_int32_relaxed(&entry->sequence); - } else if(_Py_atomic_compare_exchange_int32(&entry->sequence, &prev, prev + 1)) { - // We've locked the cache - break; - } else { - _Py_yield(); - } - } + _PySeqLock_LockWrite(&entry->sequence); // update the entry if (entry->name == name && entry->value == value && entry->version == version_tag) { - // We reaced with another update, bail and restore previous sequence. - _Py_atomic_exchange_int32(&entry->sequence, prev); + // We raced with another update, bail and restore previous sequence. + _PySeqLock_AbandonWrite(&entry->sequence); return; } update_cache(entry, name, version_tag, value); // Then update sequence to the next valid value - int new_sequence = prev + 2; - assert(!TYPE_CACHE_IS_UPDATING(new_sequence)); - _Py_atomic_exchange_int32(&entry->sequence, new_sequence); + _PySeqLock_UnlockWrite(&entry->sequence); } #endif @@ -4959,10 +4938,8 @@ void _PyTypes_AfterFork() { struct type_cache *cache = get_type_cache(); for (int i = 0; i < 1 << MCACHE_SIZE_EXP; i++) { struct type_cache_entry *entry = &cache->hashtable[i]; - int sequence = _Py_atomic_load_int_acquire(&entry->sequence); - if (TYPE_CACHE_IS_UPDATING(sequence)) { + if (_PySeqLock_AfterFork(&entry->sequence)) { // Entry was in the process of updating while forking, clear it... - entry->sequence = 0; entry->value = NULL; entry->name = NULL; entry->version = 0; @@ -4986,27 +4963,22 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name) #ifdef Py_GIL_DISABLED // synchronize-with other writing threads by doing an acquire load on the sequence while (1) { - int sequence = _Py_atomic_load_int_acquire(&entry->sequence); - if (!TYPE_CACHE_IS_UPDATING(sequence)) { - if (_Py_atomic_load_uint32_relaxed(&entry->version) == type->tp_version_tag && - _Py_atomic_load_ptr_relaxed(&entry->name) == name) { - assert(_PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)); - OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name)); - OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name)); - PyObject *value = _Py_atomic_load_ptr_relaxed(&entry->value); - - // Synchronize again and validate that the entry hasn't been updated - // while we were readying the values. - if (_Py_atomic_load_int_acquire(&entry->sequence) == sequence) { - return value; - } - } else { - // Cache miss - break; - } + int sequence = _PySeqLock_BeginRead(&entry->sequence); + if (_Py_atomic_load_uint32_relaxed(&entry->version) == type->tp_version_tag && + _Py_atomic_load_ptr_relaxed(&entry->name) == name) { + assert(_PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)); + OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name)); + OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name)); + PyObject *value = _Py_atomic_load_ptr_relaxed(&entry->value); + + // If the sequence is still valid then we're done + if (_PySeqLock_EndRead(&entry->sequence, sequence)) { + return value; + } + } else { + // cache miss + break; } - // We are in progress of updating the cache, retry - _Py_yield(); } #else if (entry->version == type->tp_version_tag && diff --git a/Python/lock.c b/Python/lock.c index 9d73ec3fa2217d..87444c75db9c0a 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -36,7 +36,7 @@ struct mutex_entry { int handed_off; }; -void +static void _Py_yield(void) { #ifdef MS_WINDOWS @@ -459,3 +459,72 @@ _PyRWMutex_Unlock(_PyRWMutex *rwmutex) _PyParkingLot_UnparkAll(&rwmutex->bits); } } + +#define SEQLOCK_IS_UPDATING(sequence) (sequence & 0x01) + +void _PySeqLock_LockWrite(_PySeqLock *seqlock) +{ + // lock the entry by setting by moving to an odd sequence number + int prev = seqlock->sequence; + while (1) { + if (SEQLOCK_IS_UPDATING(prev)) { + // Someone else is currently updating the cache + _Py_yield(); + prev = _Py_atomic_load_int32_relaxed(&seqlock->sequence); + } else if (_Py_atomic_compare_exchange_int32(&seqlock->sequence, &prev, prev + 1)) { + // We've locked the cache + break; + } else { + _Py_yield(); + } + } +} + +void _PySeqLock_AbandonWrite(_PySeqLock *seqlock) +{ + int new_seq = seqlock->sequence - 1; + assert(!SEQLOCK_IS_UPDATING(new_seq)); + _Py_atomic_exchange_int32(&seqlock->sequence, new_seq); +} + +void _PySeqLock_UnlockWrite(_PySeqLock *seqlock) +{ + int new_seq = seqlock->sequence + 1; + assert(!SEQLOCK_IS_UPDATING(new_seq)); + _Py_atomic_exchange_int32(&seqlock->sequence, new_seq); +} + +int _PySeqLock_BeginRead(_PySeqLock *seqlock) +{ + int sequence = _Py_atomic_load_int_acquire(&seqlock->sequence); + while (SEQLOCK_IS_UPDATING(sequence)) { + _Py_yield(); + sequence = _Py_atomic_load_int_acquire(&seqlock->sequence); + } + + return sequence; +} + +int _PySeqLock_EndRead(_PySeqLock *seqlock, int previous) +{ + // Synchronize again and validate that the entry hasn't been updated + // while we were readying the values. + if (_Py_atomic_load_int_acquire(&seqlock->sequence) == previous) { + return 1; + } + + _Py_yield(); + return 0; +} + +int _PySeqLock_AfterFork(_PySeqLock *seqlock) +{ + // Synchronize again and validate that the entry hasn't been updated + // while we were readying the values. + if (SEQLOCK_IS_UPDATING(seqlock->sequence)) { + seqlock->sequence = 0; + return 1; + } + + return 0; +} From 51a4b93d27fa3b5a57842489de346e7be772ec84 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Tue, 16 Jan 2024 12:34:13 -0800 Subject: [PATCH 4/5] Review feedback --- Include/internal/pycore_critical_section.h | 3 +- Include/internal/pycore_typeobject.h | 2 +- Objects/typeobject.c | 48 +++++++++++----------- 3 files changed, 28 insertions(+), 25 deletions(-) diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h index 30820a24f5bb64..9163b5cf0f2e8a 100644 --- a/Include/internal/pycore_critical_section.h +++ b/Include/internal/pycore_critical_section.h @@ -293,7 +293,8 @@ _PyCriticalSection_SuspendAll(PyThreadState *tstate); #ifdef Py_GIL_DISABLED static inline void -_PyCriticalSection_AssertHeld(PyMutex *mutex) { +_PyCriticalSection_AssertHeld(PyMutex *mutex) +{ #ifdef Py_DEBUG PyThreadState *tstate = _PyThreadState_GET(); uintptr_t prev = tstate->critical_section; diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 115855f2a8e24c..664f6fb212a57d 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -79,7 +79,7 @@ struct types_state { extern PyStatus _PyTypes_InitTypes(PyInterpreterState *); extern void _PyTypes_FiniTypes(PyInterpreterState *); extern void _PyTypes_Fini(PyInterpreterState *); -void _PyTypes_AfterFork(void); +extern void _PyTypes_AfterFork(void); /* other API */ diff --git a/Objects/typeobject.c b/Objects/typeobject.c index abbcf7d2b32f32..892206be189915 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -686,9 +686,15 @@ type_cache_clear(struct type_cache *cache, PyObject *value) { for (Py_ssize_t i = 0; i < (1 << MCACHE_SIZE_EXP); i++) { struct type_cache_entry *entry = &cache->hashtable[i]; +#ifdef Py_GIL_DISABLED + _PySeqLock_LockWrite(&entry->sequence); +#endif entry->version = 0; Py_XSETREF(entry->name, _Py_XNewRef(value)); entry->value = NULL; +#ifdef Py_GIL_DISABLED + _PySeqLock_UnlockWrite(&entry->sequence); +#endif } } @@ -4903,6 +4909,8 @@ update_cache(struct type_cache_entry *entry, PyObject *name, unsigned int versio entry->value = value; /* borrowed */ assert(_PyASCIIObject_CAST(name)->hash != -1); OBJECT_STAT_INC_COND(type_cache_collisions, entry->name != Py_None && entry->name != name); + // We're releasing this under the lock for simplicity sake because it's always a + // exact unicode object or Py_None so it's safe to do so. Py_SETREF(entry->name, Py_NewRef(name)); } @@ -4933,15 +4941,17 @@ update_cache_gil_disabled(struct type_cache_entry *entry, PyObject *name, #endif -void _PyTypes_AfterFork() { +void +_PyTypes_AfterFork() +{ #ifdef Py_GIL_DISABLED struct type_cache *cache = get_type_cache(); - for (int i = 0; i < 1 << MCACHE_SIZE_EXP; i++) { + for (Py_ssize_t i = 0; i < (1 << MCACHE_SIZE_EXP); i++) { struct type_cache_entry *entry = &cache->hashtable[i]; if (_PySeqLock_AfterFork(&entry->sequence)) { // Entry was in the process of updating while forking, clear it... entry->value = NULL; - entry->name = NULL; + Py_SETREF(entry->name, Py_None); entry->version = 0; } } @@ -5005,6 +5015,7 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name) if (MCACHE_CACHEABLE_NAME(name)) { has_version = assign_version_tag(interp, type); version = type->tp_version_tag; + assert(!has_version || _PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)); } END_TYPE_LOCK() @@ -5030,8 +5041,7 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name) #else update_cache(entry, name, version, res); #endif - assert(_PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)); -} + } return res; } @@ -9506,13 +9516,13 @@ slot_bf_getbuffer(PyObject *self, Py_buffer *buffer, int flags) return -1; } -static int -releasebuffer_maybe_call_super_unlocked(PyObject *self, Py_buffer *buffer, releasebufferproc *base_releasebuffer) +static releasebufferproc +releasebuffer_maybe_call_super_unlocked(PyObject *self, Py_buffer *buffer) { PyTypeObject *self_type = Py_TYPE(self); PyObject *mro = lookup_tp_mro(self_type); if (mro == NULL) { - return -1; + return NULL; } assert(PyTuple_Check(mro)); @@ -9526,9 +9536,8 @@ releasebuffer_maybe_call_super_unlocked(PyObject *self, Py_buffer *buffer, relea } i++; /* skip self_type */ if (i >= n) - return -1; + return NULL; - *base_releasebuffer = NULL; for (; i < n; i++) { PyObject *obj = PyTuple_GET_ITEM(mro, i); if (!PyType_Check(obj)) { @@ -9538,28 +9547,25 @@ releasebuffer_maybe_call_super_unlocked(PyObject *self, Py_buffer *buffer, relea if (base_type->tp_as_buffer != NULL && base_type->tp_as_buffer->bf_releasebuffer != NULL && base_type->tp_as_buffer->bf_releasebuffer != slot_bf_releasebuffer) { - *base_releasebuffer = base_type->tp_as_buffer->bf_releasebuffer; - break; + return base_type->tp_as_buffer->bf_releasebuffer; } } - return 0; + return NULL; } -static int +static void releasebuffer_maybe_call_super(PyObject *self, Py_buffer *buffer) { - int res; releasebufferproc base_releasebuffer; BEGIN_TYPE_LOCK(); - res = releasebuffer_maybe_call_super_unlocked(self, buffer, &base_releasebuffer); + base_releasebuffer = releasebuffer_maybe_call_super_unlocked(self, buffer); END_TYPE_LOCK(); - if (res == 0) { + if (base_releasebuffer != NULL) { base_releasebuffer(self, buffer); } - return res; } static void @@ -9636,11 +9642,7 @@ static void slot_bf_releasebuffer(PyObject *self, Py_buffer *buffer) { releasebuffer_call_python(self, buffer); - if (releasebuffer_maybe_call_super(self, buffer) < 0) { - if (PyErr_Occurred()) { - PyErr_WriteUnraisable(self); - } - } + releasebuffer_maybe_call_super(self, buffer); } static PyObject * From 66ce5f582b1568fd8a4fa86cd422cfd804a800c8 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Wed, 17 Jan 2024 14:03:25 -0800 Subject: [PATCH 5/5] Use int32_t for sequence lock Use atomic storage instead of exchange Use relaxed load when starting sequence lock for write Fix some formatting --- Include/cpython/pyatomic.h | 3 +++ Include/cpython/pyatomic_gcc.h | 3 +++ Include/cpython/pyatomic_msc.h | 11 +++++++++++ Include/cpython/pyatomic_std.h | 7 +++++++ Include/internal/pycore_lock.h | 8 ++++---- Objects/typeobject.c | 12 ++++++------ Python/lock.c | 30 ++++++++++++++++-------------- 7 files changed, 50 insertions(+), 24 deletions(-) diff --git a/Include/cpython/pyatomic.h b/Include/cpython/pyatomic.h index 5314a70436bfc3..e10d48285367cf 100644 --- a/Include/cpython/pyatomic.h +++ b/Include/cpython/pyatomic.h @@ -469,6 +469,9 @@ _Py_atomic_store_int_release(int *obj, int value); static inline int _Py_atomic_load_int_acquire(const int *obj); +static inline uint32_t +_Py_atomic_load_uint32_acquire(const uint32_t *obj); + // --- _Py_atomic_fence ------------------------------------------------------ diff --git a/Include/cpython/pyatomic_gcc.h b/Include/cpython/pyatomic_gcc.h index 70f2b7e1b5706a..4095e1873d8b07 100644 --- a/Include/cpython/pyatomic_gcc.h +++ b/Include/cpython/pyatomic_gcc.h @@ -495,6 +495,9 @@ static inline int _Py_atomic_load_int_acquire(const int *obj) { return __atomic_load_n(obj, __ATOMIC_ACQUIRE); } +static inline uint32_t +_Py_atomic_load_uint32_acquire(const uint32_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_ACQUIRE); } // --- _Py_atomic_fence ------------------------------------------------------ diff --git a/Include/cpython/pyatomic_msc.h b/Include/cpython/pyatomic_msc.h index 601a0cf65afc1c..b5c1ec94112562 100644 --- a/Include/cpython/pyatomic_msc.h +++ b/Include/cpython/pyatomic_msc.h @@ -938,6 +938,17 @@ _Py_atomic_load_int_acquire(const int *obj) #endif } +static inline uint32_t +_Py_atomic_load_uint32_acquire(const uint32_t *obj) +{ +#if defined(_M_X64) || defined(_M_IX86) + return *(uint32_t volatile *)obj; +#elif defined(_M_ARM64) + return (int)__ldar32((uint32_t volatile *)obj); +#else +# error "no implementation of _Py_atomic_load_uint32_acquire" +#endif +} // --- _Py_atomic_fence ------------------------------------------------------ diff --git a/Include/cpython/pyatomic_std.h b/Include/cpython/pyatomic_std.h index a05bfaec47e89d..6c934a2c5e7b64 100644 --- a/Include/cpython/pyatomic_std.h +++ b/Include/cpython/pyatomic_std.h @@ -870,6 +870,13 @@ _Py_atomic_load_int_acquire(const int *obj) memory_order_acquire); } +static inline uint32_t +_Py_atomic_load_uint32_acquire(const uint32_t *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(uint32_t)*)obj, + memory_order_acquire); +} // --- _Py_atomic_fence ------------------------------------------------------ diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index 30d0ac0ae8f0be..674a1d170fec10 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -261,7 +261,7 @@ PyAPI_FUNC(void) _PyRWMutex_Unlock(_PyRWMutex *rwmutex); // The writer can also detect that the undelering data has not changed and abandon the write // and restore the previous sequence. typedef struct { - int sequence; + uint32_t sequence; } _PySeqLock; // Lock the sequence lock for the writer @@ -275,15 +275,15 @@ PyAPI_FUNC(void) _PySeqLock_UnlockWrite(_PySeqLock *seqlock); PyAPI_FUNC(void) _PySeqLock_AbandonWrite(_PySeqLock *seqlock); // Begin a read operation and return the current sequence number. -PyAPI_FUNC(int) _PySeqLock_BeginRead(_PySeqLock *seqlock); +PyAPI_FUNC(uint32_t) _PySeqLock_BeginRead(_PySeqLock *seqlock); // End the read operation and confirm that the sequence number has not changed. // Returns 1 if the read was successful or 0 if the read should be re-tried. -PyAPI_FUNC(int) _PySeqLock_EndRead(_PySeqLock *seqlock, int previous); +PyAPI_FUNC(uint32_t) _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous); // Check if the lock was held during a fork and clear the lock. Returns 1 // if the lock was held and any associated datat should be cleared. -PyAPI_FUNC(int) _PySeqLock_AfterFork(_PySeqLock *seqlock); +PyAPI_FUNC(uint32_t) _PySeqLock_AfterFork(_PySeqLock *seqlock); #ifdef __cplusplus } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 892206be189915..e0711dfe8545b7 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -60,13 +60,13 @@ class object "PyObject *" "&PyBaseObject_Type" // in odd behaviors w.r.t. running with the GIL as the outer type lock could // be released and reacquired during a subclass update if there's contention // on the subclass lock. -#define BEGIN_TYPE_LOCK() \ - { \ - _PyCriticalSection _cs; \ - _PyCriticalSection_Begin(&_cs, &_PyRuntime.types.type_mutex); \ +#define BEGIN_TYPE_LOCK() \ + { \ + _PyCriticalSection _cs; \ + _PyCriticalSection_Begin(&_cs, &_PyRuntime.types.type_mutex); \ -#define END_TYPE_LOCK() \ - _PyCriticalSection_End(&_cs); \ +#define END_TYPE_LOCK() \ + _PyCriticalSection_End(&_cs); \ } #define ASSERT_TYPE_LOCK_HELD() \ diff --git a/Python/lock.c b/Python/lock.c index 87444c75db9c0a..bf0143654bd692 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -465,16 +465,18 @@ _PyRWMutex_Unlock(_PyRWMutex *rwmutex) void _PySeqLock_LockWrite(_PySeqLock *seqlock) { // lock the entry by setting by moving to an odd sequence number - int prev = seqlock->sequence; + uint32_t prev = _Py_atomic_load_uint32_relaxed(&seqlock->sequence); while (1) { if (SEQLOCK_IS_UPDATING(prev)) { // Someone else is currently updating the cache _Py_yield(); - prev = _Py_atomic_load_int32_relaxed(&seqlock->sequence); - } else if (_Py_atomic_compare_exchange_int32(&seqlock->sequence, &prev, prev + 1)) { + prev = _Py_atomic_load_uint32_relaxed(&seqlock->sequence); + } + else if (_Py_atomic_compare_exchange_uint32(&seqlock->sequence, &prev, prev + 1)) { // We've locked the cache break; - } else { + } + else { _Py_yield(); } } @@ -482,34 +484,34 @@ void _PySeqLock_LockWrite(_PySeqLock *seqlock) void _PySeqLock_AbandonWrite(_PySeqLock *seqlock) { - int new_seq = seqlock->sequence - 1; + uint32_t new_seq = seqlock->sequence - 1; assert(!SEQLOCK_IS_UPDATING(new_seq)); - _Py_atomic_exchange_int32(&seqlock->sequence, new_seq); + _Py_atomic_store_uint32(&seqlock->sequence, new_seq); } void _PySeqLock_UnlockWrite(_PySeqLock *seqlock) { - int new_seq = seqlock->sequence + 1; + uint32_t new_seq = seqlock->sequence + 1; assert(!SEQLOCK_IS_UPDATING(new_seq)); - _Py_atomic_exchange_int32(&seqlock->sequence, new_seq); + _Py_atomic_store_uint32(&seqlock->sequence, new_seq); } -int _PySeqLock_BeginRead(_PySeqLock *seqlock) +uint32_t _PySeqLock_BeginRead(_PySeqLock *seqlock) { - int sequence = _Py_atomic_load_int_acquire(&seqlock->sequence); + uint32_t sequence = _Py_atomic_load_uint32_acquire(&seqlock->sequence); while (SEQLOCK_IS_UPDATING(sequence)) { _Py_yield(); - sequence = _Py_atomic_load_int_acquire(&seqlock->sequence); + sequence = _Py_atomic_load_uint32_acquire(&seqlock->sequence); } return sequence; } -int _PySeqLock_EndRead(_PySeqLock *seqlock, int previous) +uint32_t _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous) { // Synchronize again and validate that the entry hasn't been updated // while we were readying the values. - if (_Py_atomic_load_int_acquire(&seqlock->sequence) == previous) { + if (_Py_atomic_load_uint32_acquire(&seqlock->sequence) == previous) { return 1; } @@ -517,7 +519,7 @@ int _PySeqLock_EndRead(_PySeqLock *seqlock, int previous) return 0; } -int _PySeqLock_AfterFork(_PySeqLock *seqlock) +uint32_t _PySeqLock_AfterFork(_PySeqLock *seqlock) { // Synchronize again and validate that the entry hasn't been updated // while we were readying the values.