From 758df56f0333c49b16043bf85c6ee0c7465c18e2 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 18 Dec 2024 09:31:27 +0100 Subject: [PATCH 1/2] gh-128013: Convert unicodeobject.c macros to functions Convert unicodeobject.c macros to static inline functions. * Add _PyUnicode_SET_UTF8() and _PyUnicode_SET_UTF8_LENGTH() macros. * Add PyUnicode_HASH() and PyUnicode_SET_HASH() macros. * Remove unused _PyUnicode_KIND() and _PyUnicode_GET_LENGTH() macros. --- Objects/unicodeobject.c | 116 ++++++++++++++++++++++++---------------- 1 file changed, 71 insertions(+), 45 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b7aeb06d32bcec..65b9813ed7dea5 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -112,20 +112,39 @@ NOTE: In the interpreter's initialization phase, some globals are currently # define _PyUnicode_CHECK(op) PyUnicode_Check(op) #endif -#define _PyUnicode_UTF8(op) \ - (_PyCompactUnicodeObject_CAST(op)->utf8) -#define PyUnicode_UTF8(op) \ - (assert(_PyUnicode_CHECK(op)), \ - PyUnicode_IS_COMPACT_ASCII(op) ? \ - ((char*)(_PyASCIIObject_CAST(op) + 1)) : \ - _PyUnicode_UTF8(op)) -#define _PyUnicode_UTF8_LENGTH(op) \ - (_PyCompactUnicodeObject_CAST(op)->utf8_length) -#define PyUnicode_UTF8_LENGTH(op) \ - (assert(_PyUnicode_CHECK(op)), \ - PyUnicode_IS_COMPACT_ASCII(op) ? \ - _PyASCIIObject_CAST(op)->length : \ - _PyUnicode_UTF8_LENGTH(op)) +static inline char* _PyUnicode_UTF8(PyObject *op) +{ + return (_PyCompactUnicodeObject_CAST(op)->utf8); +} + +static inline char* PyUnicode_UTF8(PyObject *op) { + assert(_PyUnicode_CHECK(op)); + if (PyUnicode_IS_COMPACT_ASCII(op)) { + return ((char*)(_PyASCIIObject_CAST(op) + 1)); + } + else { + return _PyUnicode_UTF8(op); + } +} + +static inline void PyUnicode_SET_UTF8(PyObject *op, char *utf8) +{ + _PyCompactUnicodeObject_CAST(op)->utf8 = utf8; +} + +static inline Py_ssize_t PyUnicode_UTF8_LENGTH(PyObject *op) { + assert(_PyUnicode_CHECK(op)); + if (PyUnicode_IS_COMPACT_ASCII(op)) { + return _PyASCIIObject_CAST(op)->length; + } + else { + return _PyCompactUnicodeObject_CAST(op)->utf8_length; + } +} + +static inline void PyUnicode_SET_UTF8_LENGTH(PyObject *op, Py_ssize_t length) { + _PyCompactUnicodeObject_CAST(op)->utf8_length = length; +} #define _PyUnicode_LENGTH(op) \ (_PyASCIIObject_CAST(op)->length) @@ -133,26 +152,33 @@ NOTE: In the interpreter's initialization phase, some globals are currently (_PyASCIIObject_CAST(op)->state) #define _PyUnicode_HASH(op) \ (_PyASCIIObject_CAST(op)->hash) -#define _PyUnicode_KIND(op) \ - (assert(_PyUnicode_CHECK(op)), \ - _PyASCIIObject_CAST(op)->state.kind) -#define _PyUnicode_GET_LENGTH(op) \ - (assert(_PyUnicode_CHECK(op)), \ - _PyASCIIObject_CAST(op)->length) + +static inline Py_hash_t PyUnicode_HASH(PyObject *op) { + assert(_PyUnicode_CHECK(op)); + return FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash); +} + +static inline void PyUnicode_SET_HASH(PyObject *op, Py_hash_t hash) { + FT_ATOMIC_STORE_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash, hash); +} + #define _PyUnicode_DATA_ANY(op) \ (_PyUnicodeObject_CAST(op)->data.any) -#define _PyUnicode_SHARE_UTF8(op) \ - (assert(_PyUnicode_CHECK(op)), \ - assert(!PyUnicode_IS_COMPACT_ASCII(op)), \ - (_PyUnicode_UTF8(op) == PyUnicode_DATA(op))) +static inline int _PyUnicode_SHARE_UTF8(PyObject *op) { + assert(_PyUnicode_CHECK(op)); + assert(!PyUnicode_IS_COMPACT_ASCII(op)); + return (_PyUnicode_UTF8(op) == PyUnicode_DATA(op)); +} /* true if the Unicode object has an allocated UTF-8 memory block (not shared with other data) */ -#define _PyUnicode_HAS_UTF8_MEMORY(op) \ - ((!PyUnicode_IS_COMPACT_ASCII(op) \ - && _PyUnicode_UTF8(op) \ - && _PyUnicode_UTF8(op) != PyUnicode_DATA(op))) +static inline int _PyUnicode_HAS_UTF8_MEMORY(PyObject *op) { + return (!PyUnicode_IS_COMPACT_ASCII(op) + && _PyUnicode_UTF8(op) != NULL + && _PyUnicode_UTF8(op) != PyUnicode_DATA(op)); +} + /* Generic helper macro to convert characters of different types. from_type and to_type have to be valid type names, begin and end @@ -1123,8 +1149,8 @@ resize_compact(PyObject *unicode, Py_ssize_t length) if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { PyMem_Free(_PyUnicode_UTF8(unicode)); - _PyUnicode_UTF8(unicode) = NULL; - _PyUnicode_UTF8_LENGTH(unicode) = 0; + PyUnicode_SET_UTF8(unicode, NULL); + PyUnicode_SET_UTF8_LENGTH(unicode, 0); } #ifdef Py_TRACE_REFS _Py_ForgetReference(unicode); @@ -1177,8 +1203,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode)) { PyMem_Free(_PyUnicode_UTF8(unicode)); - _PyUnicode_UTF8(unicode) = NULL; - _PyUnicode_UTF8_LENGTH(unicode) = 0; + PyUnicode_SET_UTF8(unicode, NULL); + PyUnicode_SET_UTF8_LENGTH(unicode, 0); } data = (PyObject *)PyObject_Realloc(data, new_size); @@ -1188,8 +1214,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) } _PyUnicode_DATA_ANY(unicode) = data; if (share_utf8) { - _PyUnicode_UTF8(unicode) = data; - _PyUnicode_UTF8_LENGTH(unicode) = length; + PyUnicode_SET_UTF8(unicode, data); + PyUnicode_SET_UTF8_LENGTH(unicode, length); } _PyUnicode_LENGTH(unicode) = length; PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0); @@ -1769,7 +1795,7 @@ unicode_modifiable(PyObject *unicode) assert(_PyUnicode_CHECK(unicode)); if (Py_REFCNT(unicode) != 1) return 0; - if (FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(unicode)) != -1) + if (PyUnicode_HASH(unicode) != -1) return 0; if (PyUnicode_CHECK_INTERNED(unicode)) return 0; @@ -5862,8 +5888,8 @@ unicode_fill_utf8(PyObject *unicode) PyErr_NoMemory(); return -1; } - _PyUnicode_UTF8(unicode) = cache; - _PyUnicode_UTF8_LENGTH(unicode) = len; + PyUnicode_SET_UTF8(unicode, cache); + PyUnicode_SET_UTF8_LENGTH(unicode, len); memcpy(cache, start, len); cache[len] = '\0'; _PyBytesWriter_Dealloc(&writer); @@ -11434,9 +11460,9 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right) return 0; } - Py_hash_t right_hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(right_uni)); + Py_hash_t right_hash = PyUnicode_HASH(right_uni); assert(right_hash != -1); - Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(left)); + Py_hash_t hash = PyUnicode_HASH(left); if (hash != -1 && hash != right_hash) { return 0; } @@ -11916,14 +11942,14 @@ unicode_hash(PyObject *self) #ifdef Py_DEBUG assert(_Py_HashSecret_Initialized); #endif - Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(self)); + Py_hash_t hash = PyUnicode_HASH(self); if (hash != -1) { return hash; } x = Py_HashBuffer(PyUnicode_DATA(self), PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self)); - FT_ATOMIC_STORE_SSIZE_RELAXED(_PyUnicode_HASH(self), x); + PyUnicode_SET_HASH(self, x); return x; } @@ -15427,8 +15453,8 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode) _PyUnicode_STATE(self).compact = 0; _PyUnicode_STATE(self).ascii = _PyUnicode_STATE(unicode).ascii; _PyUnicode_STATE(self).statically_allocated = 0; - _PyUnicode_UTF8_LENGTH(self) = 0; - _PyUnicode_UTF8(self) = NULL; + PyUnicode_SET_UTF8_LENGTH(self, 0); + PyUnicode_SET_UTF8(self, NULL); _PyUnicode_DATA_ANY(self) = NULL; share_utf8 = 0; @@ -15458,8 +15484,8 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode) _PyUnicode_DATA_ANY(self) = data; if (share_utf8) { - _PyUnicode_UTF8_LENGTH(self) = length; - _PyUnicode_UTF8(self) = data; + PyUnicode_SET_UTF8_LENGTH(self, length); + PyUnicode_SET_UTF8(self, data); } memcpy(data, PyUnicode_DATA(unicode), kind * (length + 1)); From 8bb16ce0884ee4c928a4c12aff6479ba5b5fb3ef Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 18 Dec 2024 13:56:14 +0100 Subject: [PATCH 2/2] PEP 7 --- Objects/unicodeobject.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 65b9813ed7dea5..53be6f5b9017a3 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -117,7 +117,8 @@ static inline char* _PyUnicode_UTF8(PyObject *op) return (_PyCompactUnicodeObject_CAST(op)->utf8); } -static inline char* PyUnicode_UTF8(PyObject *op) { +static inline char* PyUnicode_UTF8(PyObject *op) +{ assert(_PyUnicode_CHECK(op)); if (PyUnicode_IS_COMPACT_ASCII(op)) { return ((char*)(_PyASCIIObject_CAST(op) + 1)); @@ -132,7 +133,8 @@ static inline void PyUnicode_SET_UTF8(PyObject *op, char *utf8) _PyCompactUnicodeObject_CAST(op)->utf8 = utf8; } -static inline Py_ssize_t PyUnicode_UTF8_LENGTH(PyObject *op) { +static inline Py_ssize_t PyUnicode_UTF8_LENGTH(PyObject *op) +{ assert(_PyUnicode_CHECK(op)); if (PyUnicode_IS_COMPACT_ASCII(op)) { return _PyASCIIObject_CAST(op)->length; @@ -142,7 +144,8 @@ static inline Py_ssize_t PyUnicode_UTF8_LENGTH(PyObject *op) { } } -static inline void PyUnicode_SET_UTF8_LENGTH(PyObject *op, Py_ssize_t length) { +static inline void PyUnicode_SET_UTF8_LENGTH(PyObject *op, Py_ssize_t length) +{ _PyCompactUnicodeObject_CAST(op)->utf8_length = length; } @@ -153,19 +156,22 @@ static inline void PyUnicode_SET_UTF8_LENGTH(PyObject *op, Py_ssize_t length) { #define _PyUnicode_HASH(op) \ (_PyASCIIObject_CAST(op)->hash) -static inline Py_hash_t PyUnicode_HASH(PyObject *op) { +static inline Py_hash_t PyUnicode_HASH(PyObject *op) +{ assert(_PyUnicode_CHECK(op)); return FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash); } -static inline void PyUnicode_SET_HASH(PyObject *op, Py_hash_t hash) { +static inline void PyUnicode_SET_HASH(PyObject *op, Py_hash_t hash) +{ FT_ATOMIC_STORE_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash, hash); } #define _PyUnicode_DATA_ANY(op) \ (_PyUnicodeObject_CAST(op)->data.any) -static inline int _PyUnicode_SHARE_UTF8(PyObject *op) { +static inline int _PyUnicode_SHARE_UTF8(PyObject *op) +{ assert(_PyUnicode_CHECK(op)); assert(!PyUnicode_IS_COMPACT_ASCII(op)); return (_PyUnicode_UTF8(op) == PyUnicode_DATA(op)); @@ -173,7 +179,8 @@ static inline int _PyUnicode_SHARE_UTF8(PyObject *op) { /* true if the Unicode object has an allocated UTF-8 memory block (not shared with other data) */ -static inline int _PyUnicode_HAS_UTF8_MEMORY(PyObject *op) { +static inline int _PyUnicode_HAS_UTF8_MEMORY(PyObject *op) +{ return (!PyUnicode_IS_COMPACT_ASCII(op) && _PyUnicode_UTF8(op) != NULL && _PyUnicode_UTF8(op) != PyUnicode_DATA(op));