From bc26f92e59c6fa8b89a1b932d06b58b37c7df956 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 3 Dec 2024 13:24:20 -0800 Subject: [PATCH 1/3] Use an atomic store to set type flags. The `PyType_HasFeature()` function reads the flags with a relaxed atomic load and without holding the type lock. To avoid data races, use atomic stores if `PyType_Ready()` has already been called. --- Objects/typeobject.c | 123 ++++++++++++++++++++++++++++--------------- 1 file changed, 81 insertions(+), 42 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 2068d6aa9be52b..388a49a49574dc 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -13,6 +13,7 @@ #include "pycore_moduleobject.h" // _PyModule_GetDef() #include "pycore_object.h" // _PyType_HasFeature() #include "pycore_object_alloc.h" // _PyObject_MallocWithType() +#include "pycore_pyatomic_ft_wrappers.h" #include "pycore_pyerrors.h" // _PyErr_Occurred() #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_symtable.h" // _Py_Mangle() @@ -343,6 +344,49 @@ _PyStaticType_GetBuiltins(void) /* end static builtin helpers */ +static void +type_set_flags(PyTypeObject *tp, unsigned long flags) +{ +#ifdef Py_GIL_DISABLED + if (tp->tp_flags & Py_TPFLAGS_READY) { + // Since PyType_HasFeature() reads the flags without holding the type + // lock, we need an atomic store here. + FT_ATOMIC_STORE_ULONG_RELAXED(tp->tp_flags, flags); + } + else { + // This is a small optimization to avoid the atomic stores in + // cases when new types are being initialized. If type is not + // yet ready, a non-atomic store is okay. When the type is + // finally marked ready then an atomic store will be done near + // the end of PyType_Read() (the READY flag is set and then the + // READYING flag is cleared after that). That ensures any other + // thread reading the type flags will read the correct value. + tp->tp_flags = flags; + } +#else + tp->tp_flags = flags; +#endif +} + +static void +type_set_flags_with_mask(PyTypeObject *tp, unsigned long mask, unsigned long flags) +{ + ASSERT_TYPE_LOCK_HELD(); + unsigned long new_flags = (tp->tp_flags & ~mask) | flags; + type_set_flags(tp, new_flags); +} + +static void +type_add_flags(PyTypeObject *tp, unsigned long flag) +{ + type_set_flags(tp, tp->tp_flags | flag); +} + +static void +type_clear_flags(PyTypeObject *tp, unsigned long flag) +{ + type_set_flags(tp, tp->tp_flags & ~flag); +} static inline void start_readying(PyTypeObject *type) @@ -356,7 +400,7 @@ start_readying(PyTypeObject *type) return; } assert((type->tp_flags & Py_TPFLAGS_READYING) == 0); - type->tp_flags |= Py_TPFLAGS_READYING; + type_add_flags(type, Py_TPFLAGS_READYING); } static inline void @@ -371,7 +415,7 @@ stop_readying(PyTypeObject *type) return; } assert(type->tp_flags & Py_TPFLAGS_READYING); - type->tp_flags &= ~Py_TPFLAGS_READYING; + type_clear_flags(type, Py_TPFLAGS_READYING); } static inline int @@ -1530,9 +1574,9 @@ type_set_abstractmethods(PyTypeObject *type, PyObject *value, void *context) PyType_Modified(type); if (abstract) - type->tp_flags |= Py_TPFLAGS_IS_ABSTRACT; + type_add_flags(type, Py_TPFLAGS_IS_ABSTRACT); else - type->tp_flags &= ~Py_TPFLAGS_IS_ABSTRACT; + type_clear_flags(type, Py_TPFLAGS_IS_ABSTRACT); return 0; } @@ -3906,8 +3950,8 @@ type_new_alloc(type_new_ctx *ctx) // Initialize tp_flags. // All heap types need GC, since we can create a reference cycle by storing // an instance on one of its parents. - type->tp_flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HEAPTYPE | - Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC); + type_set_flags(type, Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HEAPTYPE | + Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC); // Initialize essential fields type->tp_as_async = &et->as_async; @@ -4140,12 +4184,12 @@ type_new_descriptors(const type_new_ctx *ctx, PyTypeObject *type) if (ctx->add_weak) { assert((type->tp_flags & Py_TPFLAGS_MANAGED_WEAKREF) == 0); - type->tp_flags |= Py_TPFLAGS_MANAGED_WEAKREF; + type_add_flags(type, Py_TPFLAGS_MANAGED_WEAKREF); type->tp_weaklistoffset = MANAGED_WEAKREF_OFFSET; } if (ctx->add_dict) { assert((type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0); - type->tp_flags |= Py_TPFLAGS_MANAGED_DICT; + type_add_flags(type, Py_TPFLAGS_MANAGED_DICT); type->tp_dictoffset = -1; } @@ -4943,7 +4987,7 @@ PyType_FromMetaclass( type = &res->ht_type; /* The flags must be initialized early, before the GC traverses us */ - type->tp_flags = spec->flags | Py_TPFLAGS_HEAPTYPE; + type_set_flags(type, spec->flags | Py_TPFLAGS_HEAPTYPE); res->ht_module = Py_XNewRef(module); @@ -5679,18 +5723,11 @@ _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init, return can_cache; } -static void -set_flags(PyTypeObject *self, unsigned long mask, unsigned long flags) -{ - ASSERT_TYPE_LOCK_HELD(); - self->tp_flags = (self->tp_flags & ~mask) | flags; -} - void _PyType_SetFlags(PyTypeObject *self, unsigned long mask, unsigned long flags) { BEGIN_TYPE_LOCK(); - set_flags(self, mask, flags); + type_set_flags_with_mask(self, mask, flags); END_TYPE_LOCK(); } @@ -5721,7 +5758,7 @@ set_flags_recursive(PyTypeObject *self, unsigned long mask, unsigned long flags) return; } - set_flags(self, mask, flags); + type_set_flags_with_mask(self, mask, flags); PyObject *children = _PyType_GetSubclasses(self); if (children == NULL) { @@ -6055,7 +6092,7 @@ fini_static_type(PyInterpreterState *interp, PyTypeObject *type, clear_static_type_objects(interp, type, isbuiltin, final); if (final) { - type->tp_flags &= ~Py_TPFLAGS_READY; + type_clear_flags(type, Py_TPFLAGS_READY); _PyType_SetVersion(type, 0); } @@ -7804,13 +7841,13 @@ inherit_special(PyTypeObject *type, PyTypeObject *base) if (!(type->tp_flags & Py_TPFLAGS_HAVE_GC) && (base->tp_flags & Py_TPFLAGS_HAVE_GC) && (!type->tp_traverse && !type->tp_clear)) { - type->tp_flags |= Py_TPFLAGS_HAVE_GC; + type_add_flags(type, Py_TPFLAGS_HAVE_GC); if (type->tp_traverse == NULL) type->tp_traverse = base->tp_traverse; if (type->tp_clear == NULL) type->tp_clear = base->tp_clear; } - type->tp_flags |= (base->tp_flags & Py_TPFLAGS_PREHEADER); + type_add_flags(type, base->tp_flags & Py_TPFLAGS_PREHEADER); if (type->tp_basicsize == 0) type->tp_basicsize = base->tp_basicsize; @@ -7828,38 +7865,40 @@ inherit_special(PyTypeObject *type, PyTypeObject *base) /* Setup fast subclass flags */ PyObject *mro = lookup_tp_mro(base); + unsigned long flags = 0; if (is_subtype_with_mro(mro, base, (PyTypeObject*)PyExc_BaseException)) { - type->tp_flags |= Py_TPFLAGS_BASE_EXC_SUBCLASS; + flags |= Py_TPFLAGS_BASE_EXC_SUBCLASS; } else if (is_subtype_with_mro(mro, base, &PyType_Type)) { - type->tp_flags |= Py_TPFLAGS_TYPE_SUBCLASS; + flags |= Py_TPFLAGS_TYPE_SUBCLASS; } else if (is_subtype_with_mro(mro, base, &PyLong_Type)) { - type->tp_flags |= Py_TPFLAGS_LONG_SUBCLASS; + flags |= Py_TPFLAGS_LONG_SUBCLASS; } else if (is_subtype_with_mro(mro, base, &PyBytes_Type)) { - type->tp_flags |= Py_TPFLAGS_BYTES_SUBCLASS; + flags |= Py_TPFLAGS_BYTES_SUBCLASS; } else if (is_subtype_with_mro(mro, base, &PyUnicode_Type)) { - type->tp_flags |= Py_TPFLAGS_UNICODE_SUBCLASS; + flags |= Py_TPFLAGS_UNICODE_SUBCLASS; } else if (is_subtype_with_mro(mro, base, &PyTuple_Type)) { - type->tp_flags |= Py_TPFLAGS_TUPLE_SUBCLASS; + flags |= Py_TPFLAGS_TUPLE_SUBCLASS; } else if (is_subtype_with_mro(mro, base, &PyList_Type)) { - type->tp_flags |= Py_TPFLAGS_LIST_SUBCLASS; + flags |= Py_TPFLAGS_LIST_SUBCLASS; } else if (is_subtype_with_mro(mro, base, &PyDict_Type)) { - type->tp_flags |= Py_TPFLAGS_DICT_SUBCLASS; + flags |= Py_TPFLAGS_DICT_SUBCLASS; } /* Setup some inheritable flags */ if (PyType_HasFeature(base, _Py_TPFLAGS_MATCH_SELF)) { - type->tp_flags |= _Py_TPFLAGS_MATCH_SELF; + flags |= _Py_TPFLAGS_MATCH_SELF; } if (PyType_HasFeature(base, Py_TPFLAGS_ITEMS_AT_END)) { - type->tp_flags |= Py_TPFLAGS_ITEMS_AT_END; + flags |= Py_TPFLAGS_ITEMS_AT_END; } + type_add_flags(type, flags); } static int @@ -8007,7 +8046,7 @@ inherit_slots(PyTypeObject *type, PyTypeObject *base) if (!type->tp_call && _PyType_HasFeature(base, Py_TPFLAGS_HAVE_VECTORCALL)) { - type->tp_flags |= Py_TPFLAGS_HAVE_VECTORCALL; + type_add_flags(type, Py_TPFLAGS_HAVE_VECTORCALL); } COPYSLOT(tp_call); } @@ -8041,7 +8080,7 @@ inherit_slots(PyTypeObject *type, PyTypeObject *base) _PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE) && _PyType_HasFeature(base, Py_TPFLAGS_METHOD_DESCRIPTOR)) { - type->tp_flags |= Py_TPFLAGS_METHOD_DESCRIPTOR; + type_add_flags(type, Py_TPFLAGS_METHOD_DESCRIPTOR); } COPYSLOT(tp_descr_set); COPYSLOT(tp_dictoffset); @@ -8356,7 +8395,7 @@ type_ready_inherit_as_structs(PyTypeObject *type, PyTypeObject *base) static void inherit_patma_flags(PyTypeObject *type, PyTypeObject *base) { if ((type->tp_flags & COLLECTION_FLAGS) == 0) { - type->tp_flags |= base->tp_flags & COLLECTION_FLAGS; + type_add_flags(type, base->tp_flags & COLLECTION_FLAGS); } } @@ -8471,7 +8510,7 @@ type_ready_set_new(PyTypeObject *type, int initial) && base == &PyBaseObject_Type && !(type->tp_flags & Py_TPFLAGS_HEAPTYPE)) { - type->tp_flags |= Py_TPFLAGS_DISALLOW_INSTANTIATION; + type_add_flags(type, Py_TPFLAGS_DISALLOW_INSTANTIATION); } if (!(type->tp_flags & Py_TPFLAGS_DISALLOW_INSTANTIATION)) { @@ -8518,7 +8557,7 @@ type_ready_managed_dict(PyTypeObject *type) } } if (type->tp_itemsize == 0) { - type->tp_flags |= Py_TPFLAGS_INLINE_VALUES; + type_add_flags(type, Py_TPFLAGS_INLINE_VALUES); } return 0; } @@ -8624,7 +8663,7 @@ type_ready(PyTypeObject *type, int initial) } /* All done -- set the ready flag */ - type->tp_flags |= Py_TPFLAGS_READY; + type_add_flags(type, Py_TPFLAGS_READY); stop_readying(type); assert(_PyType_CheckConsistency(type)); @@ -8646,7 +8685,7 @@ PyType_Ready(PyTypeObject *type) /* Historically, all static types were immutable. See bpo-43908 */ if (!(type->tp_flags & Py_TPFLAGS_HEAPTYPE)) { - type->tp_flags |= Py_TPFLAGS_IMMUTABLETYPE; + type_add_flags(type, Py_TPFLAGS_IMMUTABLETYPE); /* Static types must be immortal */ _Py_SetImmortalUntracked((PyObject *)type); } @@ -8676,8 +8715,8 @@ init_static_type(PyInterpreterState *interp, PyTypeObject *self, if ((self->tp_flags & Py_TPFLAGS_READY) == 0) { assert(initial); - self->tp_flags |= _Py_TPFLAGS_STATIC_BUILTIN; - self->tp_flags |= Py_TPFLAGS_IMMUTABLETYPE; + type_add_flags(self, _Py_TPFLAGS_STATIC_BUILTIN); + type_add_flags(self, Py_TPFLAGS_IMMUTABLETYPE); assert(NEXT_GLOBAL_VERSION_TAG <= _Py_MAX_GLOBAL_TYPE_VERSION_TAG); if (self->tp_version_tag == 0) { @@ -11060,7 +11099,7 @@ update_one_slot(PyTypeObject *type, pytype_slotdef *p) generic = p->function; if (p->function == slot_tp_call) { /* A generic __call__ is incompatible with vectorcall */ - type->tp_flags &= ~Py_TPFLAGS_HAVE_VECTORCALL; + type_clear_flags(type, Py_TPFLAGS_HAVE_VECTORCALL); } } Py_DECREF(descr); @@ -11426,7 +11465,7 @@ PyType_Freeze(PyTypeObject *type) return -1; } - type->tp_flags |= Py_TPFLAGS_IMMUTABLETYPE; + type_add_flags(type, Py_TPFLAGS_IMMUTABLETYPE); PyType_Modified(type); return 0; From 3c67c79456c8c560f3667b891a8b6564a75c3d8f Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 4 Dec 2024 11:23:21 -0800 Subject: [PATCH 2/3] Simplify `type_set_flags()` function. Just use FT_ATOMIC_STORE_ULONG_RELAXED() always, not worth the extra code complexity. --- Objects/typeobject.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 388a49a49574dc..90dcbfcc84b22b 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -347,25 +347,9 @@ _PyStaticType_GetBuiltins(void) static void type_set_flags(PyTypeObject *tp, unsigned long flags) { -#ifdef Py_GIL_DISABLED - if (tp->tp_flags & Py_TPFLAGS_READY) { - // Since PyType_HasFeature() reads the flags without holding the type - // lock, we need an atomic store here. - FT_ATOMIC_STORE_ULONG_RELAXED(tp->tp_flags, flags); - } - else { - // This is a small optimization to avoid the atomic stores in - // cases when new types are being initialized. If type is not - // yet ready, a non-atomic store is okay. When the type is - // finally marked ready then an atomic store will be done near - // the end of PyType_Read() (the READY flag is set and then the - // READYING flag is cleared after that). That ensures any other - // thread reading the type flags will read the correct value. - tp->tp_flags = flags; - } -#else - tp->tp_flags = flags; -#endif + // Since PyType_HasFeature() reads the flags without holding the type + // lock, we need an atomic store here. + FT_ATOMIC_STORE_ULONG_RELAXED(tp->tp_flags, flags); } static void From 0fe7d78c14ffa479054f3771e5fc828f835b8a3f Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 4 Dec 2024 11:24:29 -0800 Subject: [PATCH 3/3] Add type lock held assert to `type_set_flags()`. This helps ensure that type flag setting is done in a free-threading safe way. Fix a few cases were flags were set without holding the type lock. Use `type_modified_unlocked()` in a couple places where the lock is already head. --- Objects/typeobject.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 90dcbfcc84b22b..c0a87a5a5b3e8d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -347,6 +347,12 @@ _PyStaticType_GetBuiltins(void) static void type_set_flags(PyTypeObject *tp, unsigned long flags) { + if (tp->tp_flags & Py_TPFLAGS_READY) { + // It's possible the type object has been exposed to other threads + // if it's been marked ready. In that case, the type lock should be + // held when flags are modified. + ASSERT_TYPE_LOCK_HELD(); + } // Since PyType_HasFeature() reads the flags without holding the type // lock, we need an atomic store here. FT_ATOMIC_STORE_ULONG_RELAXED(tp->tp_flags, flags); @@ -1556,11 +1562,14 @@ type_set_abstractmethods(PyTypeObject *type, PyObject *value, void *context) return -1; } - PyType_Modified(type); + BEGIN_TYPE_LOCK(); + type_modified_unlocked(type); if (abstract) type_add_flags(type, Py_TPFLAGS_IS_ABSTRACT); else type_clear_flags(type, Py_TPFLAGS_IS_ABSTRACT); + END_TYPE_LOCK(); + return 0; } @@ -3328,7 +3337,7 @@ mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) // XXX Expand this to Py_TPFLAGS_IMMUTABLETYPE? if (!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)) { - PyType_Modified(type); + type_modified_unlocked(type); } else { /* For static builtin types, this is only called during init @@ -6076,8 +6085,10 @@ fini_static_type(PyInterpreterState *interp, PyTypeObject *type, clear_static_type_objects(interp, type, isbuiltin, final); if (final) { + BEGIN_TYPE_LOCK(); type_clear_flags(type, Py_TPFLAGS_READY); - _PyType_SetVersion(type, 0); + set_version_unlocked(type, 0); + END_TYPE_LOCK(); } _PyStaticType_ClearWeakRefs(interp, type); @@ -11173,7 +11184,7 @@ update_all_slots(PyTypeObject* type) ASSERT_TYPE_LOCK_HELD(); /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ - PyType_Modified(type); + type_modified_unlocked(type); for (p = slotdefs; p->name; p++) { /* update_slot returns int but can't actually fail */ @@ -11449,8 +11460,10 @@ PyType_Freeze(PyTypeObject *type) return -1; } + BEGIN_TYPE_LOCK(); type_add_flags(type, Py_TPFLAGS_IMMUTABLETYPE); - PyType_Modified(type); + type_modified_unlocked(type); + END_TYPE_LOCK(); return 0; }