From 0022adc2d3e0cd2e9346cdf2fd0409e10c03098f Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Sun, 19 Nov 2023 00:49:54 +0200 Subject: [PATCH 1/6] gh-111972: Make ucnhash_capi initialization thread-safe --- Include/internal/pycore_ucnhash.h | 2 ++ Objects/unicodeobject.c | 17 +++++++++++++++++ Python/codecs.c | 16 +++++++--------- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/Include/internal/pycore_ucnhash.h b/Include/internal/pycore_ucnhash.h index 187dd68e7347ff..1561dfbb3150d3 100644 --- a/Include/internal/pycore_ucnhash.h +++ b/Include/internal/pycore_ucnhash.h @@ -28,6 +28,8 @@ typedef struct { } _PyUnicode_Name_CAPI; +extern _PyUnicode_Name_CAPI* _PyUnicode_GetNameCAPI(void); + #ifdef __cplusplus } #endif diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index cffc06297a9aee..da0d755ea9036f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5869,6 +5869,23 @@ PyUnicode_AsUTF16String(PyObject *unicode) return _PyUnicode_EncodeUTF16(unicode, NULL, 0); } +_PyUnicode_Name_CAPI * +_PyUnicode_GetNameCAPI(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyUnicode_Name_CAPI *ucnhash_capi; + + ucnhash_capi = _Py_atomic_load_ptr(&interp->unicode.ucnhash_capi); + if (ucnhash_capi == NULL) { + ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import( + PyUnicodeData_CAPSULE_NAME, 1); + + // It's fine if we overwite the value here. It's always the same value. + _Py_atomic_store_ptr(&interp->unicode.ucnhash_capi, ucnhash_capi); + } + return ucnhash_capi; +} + /* --- Unicode Escape Codec ----------------------------------------------- */ PyObject * diff --git a/Python/codecs.c b/Python/codecs.c index b79bf555f2f22a..da9744f32b51de 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -932,8 +932,6 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) return Py_BuildValue("(Nn)", res, end); } -static _PyUnicode_Name_CAPI *ucnhash_capi = NULL; - PyObject *PyCodec_NameReplaceErrors(PyObject *exc) { if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { @@ -954,13 +952,13 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) return NULL; if (!(object = PyUnicodeEncodeError_GetObject(exc))) return NULL; - if (!ucnhash_capi) { - /* load the unicode data module */ - ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import( - PyUnicodeData_CAPSULE_NAME, 1); - if (!ucnhash_capi) { - return NULL; - } + _PyUnicode_Name_CAPI *ucnhash_capi = _PyUnicode_GetNameCAPI(); + if (ucnhash_capi == NULL) { + PyErr_SetString( + PyExc_UnicodeError, + "\\N escapes not supported (can't load unicodedata module)" + ); + return NULL; } for (i = start, ressize = 0; i < end; ++i) { /* object is guaranteed to be "ready" */ From 06111d7f8f80eaf09739d9a5f43f9713e40de789 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Sun, 19 Nov 2023 09:48:17 +0200 Subject: [PATCH 2/6] Address review --- Objects/unicodeobject.c | 14 ++------------ Python/codecs.c | 4 ---- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index da0d755ea9036f..4ca66260d0255d 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6049,19 +6049,9 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, /* \N{name} */ case 'N': - ucnhash_capi = interp->unicode.ucnhash_capi; + ucnhash_capi = _PyUnicode_GetNameCAPI(); if (ucnhash_capi == NULL) { - /* load the unicode data module */ - ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import( - PyUnicodeData_CAPSULE_NAME, 1); - if (ucnhash_capi == NULL) { - PyErr_SetString( - PyExc_UnicodeError, - "\\N escapes not supported (can't load unicodedata module)" - ); - goto onError; - } - interp->unicode.ucnhash_capi = ucnhash_capi; + goto onError; } message = "malformed \\N character escape"; diff --git a/Python/codecs.c b/Python/codecs.c index da9744f32b51de..244f01e85d6c16 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -954,10 +954,6 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) return NULL; _PyUnicode_Name_CAPI *ucnhash_capi = _PyUnicode_GetNameCAPI(); if (ucnhash_capi == NULL) { - PyErr_SetString( - PyExc_UnicodeError, - "\\N escapes not supported (can't load unicodedata module)" - ); return NULL; } for (i = start, ressize = 0; i < end; ++i) { From ca5b5b676507d67407ecf26856d0d6f5345dfcb8 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Sun, 19 Nov 2023 09:53:38 +0200 Subject: [PATCH 3/6] Remove unused variable --- Objects/unicodeobject.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 4ca66260d0255d..a9ea65327a420f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5901,7 +5901,6 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, PyObject *errorHandler = NULL; PyObject *exc = NULL; _PyUnicode_Name_CAPI *ucnhash_capi; - PyInterpreterState *interp = _PyInterpreterState_GET(); // so we can remember if we've seen an invalid escape char or not *first_invalid_escape = NULL; From 6e2dbbdd555e2bb40a0b3980e35c0cd322fffcfd Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Sun, 19 Nov 2023 10:07:21 +0200 Subject: [PATCH 4/6] Fix test --- Lib/test/test_unicodedata.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index d3bf4ea7c7d437..ae1826eb59328f 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -289,8 +289,7 @@ def test_failed_import_during_compiling(self): # We use a separate process because the unicodedata module may already # have been loaded in this process. result = script_helper.assert_python_failure("-c", code) - error = "SyntaxError: (unicode error) \\N escapes not supported " \ - "(can't load unicodedata module)" + error = 'ImportError: PyCapsule_Import could not import module "unicodedata"' self.assertIn(error, result.err.decode("ascii")) def test_decimal_numeric_consistent(self): From 410283908307cb865bc56129d4bc29cf8e310710 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Mon, 20 Nov 2023 08:55:36 +0200 Subject: [PATCH 5/6] Restore previous behaviour --- Lib/test/test_unicodedata.py | 3 ++- Objects/unicodeobject.c | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index ae1826eb59328f..27f23f06092f53 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -289,7 +289,8 @@ def test_failed_import_during_compiling(self): # We use a separate process because the unicodedata module may already # have been loaded in this process. result = script_helper.assert_python_failure("-c", code) - error = 'ImportError: PyCapsule_Import could not import module "unicodedata"' + error = "SyntaxError: (unicode error) \\N escapes not supported " \ + "(can't load unicodedata module)" self.assertIn(error, result.err.decode("ascii")) def test_decimal_numeric_consistent(self): diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a9ea65327a420f..10022e23c04abf 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6050,6 +6050,10 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, case 'N': ucnhash_capi = _PyUnicode_GetNameCAPI(); if (ucnhash_capi == NULL) { + PyErr_SetString( + PyExc_UnicodeError, + "\\N escapes not supported (can't load unicodedata module)" + ); goto onError; } From bfbb3939b1ad8d9a1dc792c1f18016caea4816a4 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Mon, 20 Nov 2023 08:58:45 +0200 Subject: [PATCH 6/6] Fix indent --- Lib/test/test_unicodedata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index 27f23f06092f53..d3bf4ea7c7d437 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -290,7 +290,7 @@ def test_failed_import_during_compiling(self): # have been loaded in this process. result = script_helper.assert_python_failure("-c", code) error = "SyntaxError: (unicode error) \\N escapes not supported " \ - "(can't load unicodedata module)" + "(can't load unicodedata module)" self.assertIn(error, result.err.decode("ascii")) def test_decimal_numeric_consistent(self):