From eb7674b042d47a4b61903d1350e137bb86ee1519 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sat, 15 Jun 2024 10:07:09 +0900 Subject: [PATCH 1/8] gh-120496: Use CAS approach for rangeiter_next --- Objects/rangeobject.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c index 7da6162744ffd6..2166d28ffea1fe 100644 --- a/Objects/rangeobject.c +++ b/Objects/rangeobject.c @@ -813,16 +813,24 @@ PyTypeObject PyRange_Type = { in the normal case, but possible for any numeric value. */ + +#include "pycore_pyatomic_ft_wrappers.h" + static PyObject * rangeiter_next(_PyRangeIterObject *r) { - if (r->len > 0) { - long result = r->start; - r->start = result + r->step; - r->len--; - return PyLong_FromLong(result); - } - return NULL; + do { + long len = _Py_atomic_load_int64_relaxed(&r->len); + if (len <= 0) { + return NULL; + } + long result = _Py_atomic_load_int64_relaxed(&r->start); + long step = _Py_atomic_load_int64_relaxed(&r->step); + if (_Py_atomic_compare_exchange_int64(&r->start, &result, result + step)) { + _Py_atomic_add_int64(&r->len, -1); + return PyLong_FromLong(result); + } + } while (1); } static PyObject * From 2d9ac70916dae67b1b72f7709a0de579b83265b4 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sat, 15 Jun 2024 10:18:32 +0900 Subject: [PATCH 2/8] nit --- Objects/rangeobject.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c index 2166d28ffea1fe..aa6cc9a855378c 100644 --- a/Objects/rangeobject.c +++ b/Objects/rangeobject.c @@ -7,6 +7,7 @@ #include "pycore_modsupport.h" // _PyArg_NoKwnames() #include "pycore_range.h" #include "pycore_tuple.h" // _PyTuple_ITEMS() +#include "pycore_pyatomic_ft_wrappers.h" /* Support objects whose length is > PY_SSIZE_T_MAX. @@ -813,24 +814,31 @@ PyTypeObject PyRange_Type = { in the normal case, but possible for any numeric value. */ - -#include "pycore_pyatomic_ft_wrappers.h" - static PyObject * rangeiter_next(_PyRangeIterObject *r) { +#ifdef Py_GIL_DISABLED + uint64_t step = _Py_atomic_load_int64_relaxed(&r->step); do { - long len = _Py_atomic_load_int64_relaxed(&r->len); + uint64_t len = _Py_atomic_load_int64_relaxed(&r->len); if (len <= 0) { return NULL; } - long result = _Py_atomic_load_int64_relaxed(&r->start); - long step = _Py_atomic_load_int64_relaxed(&r->step); + uint64_t result = _Py_atomic_load_int64_relaxed(&r->start); if (_Py_atomic_compare_exchange_int64(&r->start, &result, result + step)) { _Py_atomic_add_int64(&r->len, -1); return PyLong_FromLong(result); } } while (1); +#else + if (r->len > 0) { + long result = r->start; + r->start = result + r->step; + r->len--; + return PyLong_FromLong(result); + } + return NULL; +#endif } static PyObject * From a547e9d4be1676df3b13e88dcf52b4285fde209e Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sat, 15 Jun 2024 10:21:40 +0900 Subject: [PATCH 3/8] nit --- Objects/rangeobject.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c index aa6cc9a855378c..ba6275bc3d45eb 100644 --- a/Objects/rangeobject.c +++ b/Objects/rangeobject.c @@ -7,7 +7,6 @@ #include "pycore_modsupport.h" // _PyArg_NoKwnames() #include "pycore_range.h" #include "pycore_tuple.h" // _PyTuple_ITEMS() -#include "pycore_pyatomic_ft_wrappers.h" /* Support objects whose length is > PY_SSIZE_T_MAX. From 6ee317a61c83da650d9a8c202ede693e139e495a Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sat, 15 Jun 2024 10:23:02 +0900 Subject: [PATCH 4/8] fix --- Objects/rangeobject.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c index ba6275bc3d45eb..df81b82c0eb4b6 100644 --- a/Objects/rangeobject.c +++ b/Objects/rangeobject.c @@ -817,13 +817,13 @@ static PyObject * rangeiter_next(_PyRangeIterObject *r) { #ifdef Py_GIL_DISABLED - uint64_t step = _Py_atomic_load_int64_relaxed(&r->step); + int64_t step = _Py_atomic_load_int64_relaxed(&r->step); do { - uint64_t len = _Py_atomic_load_int64_relaxed(&r->len); + int64_t len = _Py_atomic_load_int64_relaxed(&r->len); if (len <= 0) { return NULL; } - uint64_t result = _Py_atomic_load_int64_relaxed(&r->start); + int64_t result = _Py_atomic_load_int64_relaxed(&r->start); if (_Py_atomic_compare_exchange_int64(&r->start, &result, result + step)) { _Py_atomic_add_int64(&r->len, -1); return PyLong_FromLong(result); From fd95922203452223b5f76b58dad40b541abcb7f7 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sat, 15 Jun 2024 11:43:12 +0900 Subject: [PATCH 5/8] More optimization --- Objects/rangeobject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c index df81b82c0eb4b6..acb3cf3370b8f6 100644 --- a/Objects/rangeobject.c +++ b/Objects/rangeobject.c @@ -817,7 +817,8 @@ static PyObject * rangeiter_next(_PyRangeIterObject *r) { #ifdef Py_GIL_DISABLED - int64_t step = _Py_atomic_load_int64_relaxed(&r->step); + // r->step is readonly attribute, so we can assume it is not changed. + int64_t step = (int64_t)r->step; do { int64_t len = _Py_atomic_load_int64_relaxed(&r->len); if (len <= 0) { From 901ea197672d904a8e652ed8cbad0664484d496a Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sat, 15 Jun 2024 11:57:57 +0900 Subject: [PATCH 6/8] Update atomic package --- Include/cpython/pyatomic.h | 9 ++++++++ Include/cpython/pyatomic_gcc.h | 13 ++++++++++++ Include/cpython/pyatomic_msc.h | 38 ++++++++++++++++++++++++++++++++++ Include/cpython/pyatomic_std.h | 24 +++++++++++++++++++++ Objects/rangeobject.c | 6 +++--- 5 files changed, 87 insertions(+), 3 deletions(-) diff --git a/Include/cpython/pyatomic.h b/Include/cpython/pyatomic.h index 55a139bb9158db..5c5613237c3c77 100644 --- a/Include/cpython/pyatomic.h +++ b/Include/cpython/pyatomic.h @@ -105,6 +105,9 @@ _Py_atomic_add_int32(int32_t *obj, int32_t value); static inline int64_t _Py_atomic_add_int64(int64_t *obj, int64_t value); +static inline long +_Py_atomic_add_long(long *obj, long value); + static inline intptr_t _Py_atomic_add_intptr(intptr_t *obj, intptr_t value); @@ -155,6 +158,9 @@ _Py_atomic_compare_exchange_int32(int32_t *obj, int32_t *expected, int32_t desir static inline int _Py_atomic_compare_exchange_int64(int64_t *obj, int64_t *expected, int64_t desired); +static inline int +_Py_atomic_compare_exchange_long(long *obj, long *expected, long desired); + static inline int _Py_atomic_compare_exchange_intptr(intptr_t *obj, intptr_t *expected, intptr_t desired); @@ -348,6 +354,9 @@ _Py_atomic_load_uint32_relaxed(const uint32_t *obj); static inline uint64_t _Py_atomic_load_uint64_relaxed(const uint64_t *obj); +static inline long +_Py_atomic_load_long_relaxed(const long *obj); + static inline uintptr_t _Py_atomic_load_uintptr_relaxed(const uintptr_t *obj); diff --git a/Include/cpython/pyatomic_gcc.h b/Include/cpython/pyatomic_gcc.h index c0f3747be45758..99e9ef1b88be4f 100644 --- a/Include/cpython/pyatomic_gcc.h +++ b/Include/cpython/pyatomic_gcc.h @@ -30,6 +30,10 @@ static inline int64_t _Py_atomic_add_int64(int64_t *obj, int64_t value) { return __atomic_fetch_add(obj, value, __ATOMIC_SEQ_CST); } +static inline long +_Py_atomic_add_long(long *obj, long value) +{ return __atomic_fetch_add(obj, value, __ATOMIC_SEQ_CST); } + static inline intptr_t _Py_atomic_add_intptr(intptr_t *obj, intptr_t value) { return __atomic_fetch_add(obj, value, __ATOMIC_SEQ_CST); } @@ -90,6 +94,11 @@ _Py_atomic_compare_exchange_int64(int64_t *obj, int64_t *expected, int64_t desir { return __atomic_compare_exchange_n(obj, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } +static inline int +_Py_atomic_compare_exchange_long(long *obj, long *expected, long desired) +{ return __atomic_compare_exchange_n(obj, expected, desired, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } + static inline int _Py_atomic_compare_exchange_intptr(intptr_t *obj, intptr_t *expected, intptr_t desired) { return __atomic_compare_exchange_n(obj, expected, desired, 0, @@ -342,6 +351,10 @@ static inline uint64_t _Py_atomic_load_uint64_relaxed(const uint64_t *obj) { return __atomic_load_n(obj, __ATOMIC_RELAXED); } +static inline long +_Py_atomic_load_long_relaxed(const long *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + static inline uintptr_t _Py_atomic_load_uintptr_relaxed(const uintptr_t *obj) { return __atomic_load_n(obj, __ATOMIC_RELAXED); } diff --git a/Include/cpython/pyatomic_msc.h b/Include/cpython/pyatomic_msc.h index f32995c1f578ac..e7626cbb9b4ac1 100644 --- a/Include/cpython/pyatomic_msc.h +++ b/Include/cpython/pyatomic_msc.h @@ -59,6 +59,23 @@ _Py_atomic_add_int64(int64_t *obj, int64_t value) #endif } +static inline long +_Py_atomic_add_long(long *obj, long value) +{ +#if defined(_M_X64) || defined(_M_ARM64) + _Py_atomic_ASSERT_ARG_TYPE(long); + return (int64_t)_InterlockedExchangeAdd((volatile __int64 *)obj, (__int64)value); +#else + long old_value = _Py_atomic_load_long_relaxed(obj); + for (;;) { + long new_value = old_value + value; + if (_Py_atomic_compare_exchange_long(obj, &old_value, new_value)) { + return old_value; + } + } +#endif +} + static inline uint8_t _Py_atomic_add_uint8(uint8_t *obj, uint8_t value) @@ -187,6 +204,21 @@ _Py_atomic_compare_exchange_int64(int64_t *obj, int64_t *expected, int64_t value return 0; } +static inline int +_Py_atomic_compare_exchange_long(long *obj, long *expected, long value) +{ + _Py_atomic_ASSERT_ARG_TYPE(long); + long initial = (long)_InterlockedCompareExchange( + (volatile long *)obj, + (long)value, + (long)*expected); + if (initial == *expected) { + return 1; + } + *expected = initial; + return 0; +} + static inline int _Py_atomic_compare_exchange_ptr(void *obj, void *expected, void *value) { @@ -688,6 +720,12 @@ _Py_atomic_load_uint64_relaxed(const uint64_t *obj) return *(volatile uint64_t *)obj; } +static inline long +_Py_atomic_load_long_relaxed(const long *obj) +{ + return *(volatile long *)obj; +} + static inline uintptr_t _Py_atomic_load_uintptr_relaxed(const uintptr_t *obj) { diff --git a/Include/cpython/pyatomic_std.h b/Include/cpython/pyatomic_std.h index 0cdce4e6dd39f0..031493eb563d9b 100644 --- a/Include/cpython/pyatomic_std.h +++ b/Include/cpython/pyatomic_std.h @@ -55,6 +55,13 @@ _Py_atomic_add_int64(int64_t *obj, int64_t value) return atomic_fetch_add((_Atomic(int64_t)*)obj, value); } +static inline long +_Py_atomic_add_long(long *obj, long value) +{ + _Py_USING_STD; + return atomic_fetch_add((_Atomic(long)*)obj, value); +} + static inline intptr_t _Py_atomic_add_intptr(intptr_t *obj, intptr_t value) { @@ -154,6 +161,14 @@ _Py_atomic_compare_exchange_int64(int64_t *obj, int64_t *expected, int64_t desir expected, desired); } +static inline int +_Py_atomic_compare_exchange_long(long *obj, long *expected, long desired) +{ + _Py_USING_STD; + return atomic_compare_exchange_strong((_Atomic(long)*)obj, + expected, desired); +} + static inline int _Py_atomic_compare_exchange_intptr(intptr_t *obj, intptr_t *expected, intptr_t desired) { @@ -587,6 +602,15 @@ _Py_atomic_load_uint64_relaxed(const uint64_t *obj) memory_order_relaxed); } +static inline long +_Py_atomic_load_long_relaxed(const long *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(long)*)obj, + memory_order_relaxed); +} + + static inline uintptr_t _Py_atomic_load_uintptr_relaxed(const uintptr_t *obj) { diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c index acb3cf3370b8f6..c761d7fb6290e1 100644 --- a/Objects/rangeobject.c +++ b/Objects/rangeobject.c @@ -818,13 +818,13 @@ rangeiter_next(_PyRangeIterObject *r) { #ifdef Py_GIL_DISABLED // r->step is readonly attribute, so we can assume it is not changed. - int64_t step = (int64_t)r->step; + long step = r->step; do { - int64_t len = _Py_atomic_load_int64_relaxed(&r->len); + long len = _Py_atomic_load_long_relaxed(&r->len); if (len <= 0) { return NULL; } - int64_t result = _Py_atomic_load_int64_relaxed(&r->start); + long result = _Py_atomic_load_long_relaxed(&r->start); if (_Py_atomic_compare_exchange_int64(&r->start, &result, result + step)) { _Py_atomic_add_int64(&r->len, -1); return PyLong_FromLong(result); From 827d38b6f864f10084173d2b3845ecbce749d549 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sat, 15 Jun 2024 12:00:54 +0900 Subject: [PATCH 7/8] fix --- Include/cpython/pyatomic_msc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Include/cpython/pyatomic_msc.h b/Include/cpython/pyatomic_msc.h index e7626cbb9b4ac1..66bc939235807e 100644 --- a/Include/cpython/pyatomic_msc.h +++ b/Include/cpython/pyatomic_msc.h @@ -64,7 +64,7 @@ _Py_atomic_add_long(long *obj, long value) { #if defined(_M_X64) || defined(_M_ARM64) _Py_atomic_ASSERT_ARG_TYPE(long); - return (int64_t)_InterlockedExchangeAdd((volatile __int64 *)obj, (__int64)value); + return (long)_InterlockedExchangeAdd((volatile long *)obj, (long)value); #else long old_value = _Py_atomic_load_long_relaxed(obj); for (;;) { @@ -210,8 +210,8 @@ _Py_atomic_compare_exchange_long(long *obj, long *expected, long value) _Py_atomic_ASSERT_ARG_TYPE(long); long initial = (long)_InterlockedCompareExchange( (volatile long *)obj, - (long)value, - (long)*expected); + (long)value, + (long)*expected); if (initial == *expected) { return 1; } From 805a90e201d1846ab806ebd8389d5e94db80e4d7 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sat, 15 Jun 2024 12:05:27 +0900 Subject: [PATCH 8/8] fix --- Objects/rangeobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c index c761d7fb6290e1..9adeaf92bf6cbb 100644 --- a/Objects/rangeobject.c +++ b/Objects/rangeobject.c @@ -825,8 +825,8 @@ rangeiter_next(_PyRangeIterObject *r) return NULL; } long result = _Py_atomic_load_long_relaxed(&r->start); - if (_Py_atomic_compare_exchange_int64(&r->start, &result, result + step)) { - _Py_atomic_add_int64(&r->len, -1); + if (_Py_atomic_compare_exchange_long(&r->start, &result, result + step)) { + _Py_atomic_add_long(&r->len, -1); return PyLong_FromLong(result); } } while (1);