diff --git a/Include/cpython/pyatomic.h b/Include/cpython/pyatomic.h index 2a0c11e7b3ad66..b4d62fb0ae60cb 100644 --- a/Include/cpython/pyatomic.h +++ b/Include/cpython/pyatomic.h @@ -545,6 +545,59 @@ static inline Py_ssize_t _Py_atomic_load_ssize_acquire(const Py_ssize_t *obj); +// --- _Py_atomic_memcpy / _Py_atomic_memmove ------------ + +static inline void * +_Py_atomic_memcpy_ptr_store_relaxed(void *dest, void *src, size_t n) +{ + assert(_Py_IS_ALIGNED(dest, sizeof(void *))); + assert(_Py_IS_ALIGNED(src, sizeof(void *))); + assert(n % sizeof(void *) == 0); + + if (dest != src) { + void **dest_ = (void **)dest; + void **src_ = (void **)src; + void **end = dest_ + n / sizeof(void *); + + for (; dest_ != end; dest_++, src_++) { + _Py_atomic_store_ptr_relaxed(dest_, *src_); + } + } + + return dest; +} + +static inline void * +_Py_atomic_memmove_ptr_store_relaxed(void *dest, void *src, size_t n) +{ + assert(_Py_IS_ALIGNED(dest, sizeof(void *))); + assert(_Py_IS_ALIGNED(src, sizeof(void *))); + assert(n % sizeof(void *) == 0); + + if (dest < src || dest >= (void *)((char *)src + n)) { + void **dest_ = (void **)dest; + void **src_ = (void **)src; + void **end = dest_ + n / sizeof(void *); + + for (; dest_ != end; dest_++, src_++) { + _Py_atomic_store_ptr_relaxed(dest_, *src_); + } + } + else if (dest > src) { + n = n / sizeof(void *) - 1; + void **dest_ = (void **)dest + n; + void **src_ = (void **)src + n; + void **end = (void **)dest - 1; + + for (; dest_ != end; dest_--, src_--) { + _Py_atomic_store_ptr_relaxed(dest_, *src_); + } + } + + return dest; +} + + // --- _Py_atomic_fence ------------------------------------------------------ diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h index ffbcebdb7dfb50..3bda1b6beb3f08 100644 --- a/Include/internal/pycore_list.h +++ b/Include/internal/pycore_list.h @@ -12,6 +12,8 @@ extern "C" { #include "pycore_stackref.h" #endif +#include "pycore_pyatomic_ft_wrappers.h" + PyAPI_FUNC(PyObject*) _PyList_Extend(PyListObject *, PyObject *); PyAPI_FUNC(PyObject) *_PyList_SliceSubscript(PyObject*, PyObject*); extern void _PyList_DebugMallocStats(FILE *out); @@ -51,15 +53,17 @@ _PyList_AppendTakeRef(PyListObject *self, PyObject *newitem) return _PyList_AppendTakeRefListResize(self, newitem); } -// Repeat the bytes of a buffer in place +// Repeat the bytes of a buffer of pointers in place static inline void -_Py_memory_repeat(char* dest, Py_ssize_t len_dest, Py_ssize_t len_src) +_Py_memory_ptrs_repeat(char* dest, Py_ssize_t len_dest, Py_ssize_t len_src) { assert(len_src > 0); + assert(len_src % sizeof(void *) == 0); + assert(_Py_IS_ALIGNED(dest, sizeof(void *))); Py_ssize_t copied = len_src; while (copied < len_dest) { Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied); - memcpy(dest + copied, dest, (size_t)bytes_to_copy); + FT_ATOMIC_MEMCPY_PTR_STORE_RELAXED(dest + copied, dest, (size_t)bytes_to_copy); copied += bytes_to_copy; } } diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index 3e41e2fd1569ca..aca0778a23ad23 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -112,6 +112,12 @@ extern "C" { #define FT_ATOMIC_ADD_SSIZE(value, new_value) \ (void)_Py_atomic_add_ssize(&value, new_value) +#define FT_ATOMIC_MEMCPY_PTR_STORE_RELAXED(dest, src, n) \ + _Py_atomic_memcpy_ptr_store_relaxed(dest, src, (size_t)(n)) +#define FT_ATOMIC_MEMMOVE_PTR_STORE_RELAXED(dest, src, n) \ + _Py_atomic_memmove_ptr_store_relaxed(dest, src, (size_t)(n)) + + #else #define FT_ATOMIC_LOAD_PTR(value) value #define FT_ATOMIC_STORE_PTR(value, new_value) value = new_value @@ -160,6 +166,9 @@ extern "C" { #define FT_ATOMIC_STORE_ULLONG_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_ADD_SSIZE(value, new_value) (void)(value += new_value) +#define FT_ATOMIC_MEMCPY_PTR_STORE_RELAXED(dest, src, n) memcpy(dest, src, n) +#define FT_ATOMIC_MEMMOVE_PTR_STORE_RELAXED(dest, src, n) memmove(dest, src, n) + #endif #ifdef __cplusplus diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-03-29-20-12-28.gh-issue-129069.QwbbWV.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-29-20-12-28.gh-issue-129069.QwbbWV.rst new file mode 100644 index 00000000000000..1cb09541dfa36b --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-29-20-12-28.gh-issue-129069.QwbbWV.rst @@ -0,0 +1 @@ +Fix data race and avoid jagged writes in ``list.list_ass_slice``. diff --git a/Modules/_testcapi/pyatomic.c b/Modules/_testcapi/pyatomic.c index 850de6f9c3366b..ff835adc49ad28 100644 --- a/Modules/_testcapi/pyatomic.c +++ b/Modules/_testcapi/pyatomic.c @@ -5,6 +5,7 @@ */ #include "parts.h" +#include "pyconfig.h" // SIZEOF_VOID_P // We define atomic bitwise operations on these types #define FOR_BITWISE_TYPES(V) \ @@ -156,6 +157,78 @@ test_atomic_load_store_int_release_acquire(PyObject *self, PyObject *obj) { \ Py_RETURN_NONE; } +static PyObject * +test_atomic_memcpy_ptr_store_relaxed(PyObject *self, PyObject *obj) { +#if SIZEOF_VOID_P == 8 +#define p0 (void *)0x5555555555555555 +#define p1 (void *)0xaaaaaaaaaaaaaaaa +#define p2 (void *)0xfedcba9876543210 +#define p3 (void *)0x0123456789abcdef +#else +#if SIZEOF_VOID_P == 4 +#define p0 (void *)0x55555555 +#define p1 (void *)0xaaaaaaaa +#define p2 (void *)0x76543210 +#define p3 (void *)0x01234567 +#else +#error "unexpected sizeof(void *), expecting 8 or 4" +#endif +#endif + void *src[4] = { (void *)0, p2, p3, (void *)0 }; + void *dst[4] = { p0, (void *)0, (void *)0, p1 }; + assert(_Py_atomic_memcpy_ptr_store_relaxed(&dst[1], &src[1], SIZEOF_VOID_P * 2) == &dst[1]); + assert(dst[0] == p0); + assert(dst[1] == p2); + assert(dst[2] == p3); + assert(dst[3] == p1); + Py_RETURN_NONE; +#undef p3 +#undef p2 +#undef p1 +#undef p0 +} + +static PyObject * +test_atomic_memmove_ptr_store_relaxed(PyObject *self, PyObject *obj) { +#if SIZEOF_VOID_P == 8 +#define p0 (void *)0x5555555555555555 +#define p1 (void *)0xaaaaaaaaaaaaaaaa +#define p2 (void *)0xfedcba9876543210 +#define p3 (void *)0x0123456789abcdef +#define p4 (void *)0x0f2d4b6987a5c3e1 +#else +#if SIZEOF_VOID_P == 4 +#define p0 (void *)0x55555555 +#define p1 (void *)0xaaaaaaaa +#define p2 (void *)0x76543210 +#define p3 (void *)0x01234567 +#define p4 (void *)0x07254361 +#else +#error "unexpected sizeof(void *), expecting 8 or 4" +#endif +#endif + void *back[5] = { p0, p2, p3, p4, p1 }; + assert(_Py_atomic_memmove_ptr_store_relaxed(&back[1], &back[2], SIZEOF_VOID_P * 2) == &back[1]); + assert(back[0] == p0); + assert(back[1] == p3); + assert(back[2] == p4); + assert(back[3] == p4); + assert(back[4] == p1); + void *fwd[5] = { p0, p2, p3, p4, p1 }; + assert(_Py_atomic_memmove_ptr_store_relaxed(&fwd[2], &fwd[1], SIZEOF_VOID_P * 2) == &fwd[2]); + assert(fwd[0] == p0); + assert(fwd[1] == p2); + assert(fwd[2] == p2); + assert(fwd[3] == p3); + assert(fwd[4] == p1); + Py_RETURN_NONE; +#undef p4 +#undef p3 +#undef p2 +#undef p1 +#undef p0 +} + // NOTE: all tests should start with "test_atomic_" to be included // in test_pyatomic.py @@ -179,6 +252,8 @@ static PyMethodDef test_methods[] = { {"test_atomic_fences", test_atomic_fences, METH_NOARGS}, {"test_atomic_release_acquire", test_atomic_release_acquire, METH_NOARGS}, {"test_atomic_load_store_int_release_acquire", test_atomic_load_store_int_release_acquire, METH_NOARGS}, + {"test_atomic_memcpy_ptr_store_relaxed", test_atomic_memcpy_ptr_store_relaxed, METH_NOARGS}, + {"test_atomic_memmove_ptr_store_relaxed", test_atomic_memmove_ptr_store_relaxed, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Objects/listobject.c b/Objects/listobject.c index 1b36f4c25abf4d..456b8d3f0f16bc 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -818,10 +818,8 @@ list_repeat_lock_held(PyListObject *a, Py_ssize_t n) _Py_RefcntAdd(*src, n); *dest++ = *src++; } - // TODO: _Py_memory_repeat calls are not safe for shared lists in - // GIL_DISABLED builds. (See issue #129069) - _Py_memory_repeat((char *)np->ob_item, sizeof(PyObject *)*output_size, - sizeof(PyObject *)*input_size); + _Py_memory_ptrs_repeat((char *)np->ob_item, sizeof(PyObject *)*output_size, + sizeof(PyObject *)*input_size); } Py_SET_SIZE(np, output_size); @@ -954,12 +952,10 @@ list_ass_slice_lock_held(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyO if (d < 0) { /* Delete -d items */ Py_ssize_t tail; tail = (Py_SIZE(a) - ihigh) * sizeof(PyObject *); - // TODO: these memmove/memcpy calls are not safe for shared lists in - // GIL_DISABLED builds. (See issue #129069) - memmove(&item[ihigh+d], &item[ihigh], tail); + FT_ATOMIC_MEMMOVE_PTR_STORE_RELAXED(&item[ihigh+d], &item[ihigh], tail); if (list_resize(a, Py_SIZE(a) + d) < 0) { - memmove(&item[ihigh], &item[ihigh+d], tail); - memcpy(&item[ilow], recycle, s); + FT_ATOMIC_MEMMOVE_PTR_STORE_RELAXED(&item[ihigh], &item[ihigh+d], tail); + FT_ATOMIC_MEMCPY_PTR_STORE_RELAXED(&item[ilow], recycle, s); goto Error; } item = a->ob_item; @@ -969,10 +965,8 @@ list_ass_slice_lock_held(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyO if (list_resize(a, k+d) < 0) goto Error; item = a->ob_item; - // TODO: these memmove/memcpy calls are not safe for shared lists in - // GIL_DISABLED builds. (See issue #129069) - memmove(&item[ihigh+d], &item[ihigh], - (k - ihigh)*sizeof(PyObject *)); + FT_ATOMIC_MEMMOVE_PTR_STORE_RELAXED(&item[ihigh+d], &item[ihigh], + (k - ihigh)*sizeof(PyObject *)); } for (k = 0; k < n; k++, ilow++) { PyObject *w = vitem[k]; @@ -1056,10 +1050,8 @@ list_inplace_repeat_lock_held(PyListObject *self, Py_ssize_t n) for (Py_ssize_t j = 0; j < input_size; j++) { _Py_RefcntAdd(items[j], n-1); } - // TODO: _Py_memory_repeat calls are not safe for shared lists in - // GIL_DISABLED builds. (See issue #129069) - _Py_memory_repeat((char *)items, sizeof(PyObject *)*output_size, - sizeof(PyObject *)*input_size); + _Py_memory_ptrs_repeat((char *)items, sizeof(PyObject *)*output_size, + sizeof(PyObject *)*input_size); return 0; } diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 9b31758485ca5e..77b49c66d4f8b5 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -5,7 +5,7 @@ #include "pycore_ceval.h" // _PyEval_GetBuiltin() #include "pycore_freelist.h" // _Py_FREELIST_PUSH() #include "pycore_gc.h" // _PyObject_GC_IS_TRACKED() -#include "pycore_list.h" // _Py_memory_repeat() +#include "pycore_list.h" // _Py_memory_ptrs_repeat() #include "pycore_modsupport.h" // _PyArg_NoKwnames() #include "pycore_object.h" // _PyObject_GC_TRACK() #include "pycore_stackref.h" // PyStackRef_AsPyObjectSteal() @@ -540,8 +540,8 @@ tuple_repeat(PyObject *self, Py_ssize_t n) *dest++ = *src++; } - _Py_memory_repeat((char *)np->ob_item, sizeof(PyObject *)*output_size, - sizeof(PyObject *)*input_size); + _Py_memory_ptrs_repeat((char *)np->ob_item, sizeof(PyObject *)*output_size, + sizeof(PyObject *)*input_size); } _PyObject_GC_TRACK(np); return (PyObject *) np; diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 52d7c25a5bb37a..e69106a4d4635e 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -32,12 +32,6 @@ thread:pthread_create # Range iteration is not thread-safe yet (issue #129068) race_top:rangeiter_next -# List resizing happens through different paths ending in memcpy or memmove -# (for efficiency), which will probably need to rewritten as explicit loops -# of ptr-sized copies to be thread-safe. (Issue #129069) -race:list_ass_slice_lock_held -race:list_inplace_repeat_lock_held - # PyObject_Realloc internally does memcpy which isn't atomic so can race # with non-locking reads. See #132070 race:PyObject_Realloc