From d2d5ccd0f8bd05d5f6805854af68ac5a340e073d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 22 Jan 2025 12:02:28 +0100 Subject: [PATCH 1/7] Use `_PyUnicodeError_GetParams` in `PyCodec_IgnoreErrors`. --- Python/codecs.c | 75 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 24 deletions(-) diff --git a/Python/codecs.c b/Python/codecs.c index 2cb3875db35058..678db5804eb38f 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -659,44 +659,68 @@ PyObject *PyCodec_LookupError(const char *name) return handler; } -static void wrong_exception_type(PyObject *exc) + +static inline void +unsupported_unicode_error_type(PyObject *exc) { PyErr_Format(PyExc_TypeError, - "don't know how to handle %.200s in error callback", - Py_TYPE(exc)->tp_name); + "don't know how to handle %T in error callback", + exc); } + +#define _PyIsUnicodeEncodeError(EXC) \ + PyObject_TypeCheck(EXC, (PyTypeObject *)PyExc_UnicodeEncodeError) +#define _PyIsUnicodeDecodeError(EXC) \ + PyObject_TypeCheck(EXC, (PyTypeObject *)PyExc_UnicodeDecodeError) +#define _PyIsUnicodeTranslateError(EXC) \ + PyObject_TypeCheck(EXC, (PyTypeObject *)PyExc_UnicodeTranslateError) + + +// --- handler: 'strict' ------------------------------------------------------ + PyObject *PyCodec_StrictErrors(PyObject *exc) { - if (PyExceptionInstance_Check(exc)) + if (PyExceptionInstance_Check(exc)) { PyErr_SetObject(PyExceptionInstance_Class(exc), exc); - else + } + else { PyErr_SetString(PyExc_TypeError, "codec must pass exception instance"); + } return NULL; } -PyObject *PyCodec_IgnoreErrors(PyObject *exc) +// --- handler: 'ignore' ------------------------------------------------------ + +static PyObject * +_PyCodec_IgnoreError(PyObject *exc, int as_bytes) { Py_ssize_t end; + if (_PyUnicodeError_GetParams(exc, NULL, NULL, NULL, + &end, NULL, as_bytes) < 0) + { + return NULL; + } + return Py_BuildValue("(Nn)", Py_GetConstant(Py_CONSTANT_EMPTY_STR), end); +} - if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { - if (PyUnicodeEncodeError_GetEnd(exc, &end)) - return NULL; + +PyObject *PyCodec_IgnoreErrors(PyObject *exc) +{ + if (_PyIsUnicodeEncodeError(exc)) { + return _PyCodec_IgnoreError(exc, false); } - else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) { - if (PyUnicodeDecodeError_GetEnd(exc, &end)) - return NULL; + else if (_PyIsUnicodeDecodeError(exc)) { + return _PyCodec_IgnoreError(exc, true); } - else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) { - if (PyUnicodeTranslateError_GetEnd(exc, &end)) - return NULL; + else if (_PyIsUnicodeTranslateError(exc)) { + return _PyCodec_IgnoreError(exc, false); } else { - wrong_exception_type(exc); + unsupported_unicode_error_type(exc); return NULL; } - return Py_BuildValue("(Nn)", Py_GetConstant(Py_CONSTANT_EMPTY_STR), end); } @@ -748,7 +772,7 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc) return Py_BuildValue("(Nn)", res, end); } else { - wrong_exception_type(exc); + unsupported_unicode_error_type(exc); return NULL; } } @@ -846,7 +870,7 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc) return restuple; } else { - wrong_exception_type(exc); + unsupported_unicode_error_type(exc); return NULL; } } @@ -906,7 +930,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) return NULL; } else { - wrong_exception_type(exc); + unsupported_unicode_error_type(exc); return NULL; } @@ -1043,7 +1067,7 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) return restuple; } else { - wrong_exception_type(exc); + unsupported_unicode_error_type(exc); return NULL; } } @@ -1275,7 +1299,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc) return Py_BuildValue("(Nn)", res, start + bytelength); } else { - wrong_exception_type(exc); + unsupported_unicode_error_type(exc); return NULL; } } @@ -1352,19 +1376,22 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc) return Py_BuildValue("(Nn)", str, start+consumed); } else { - wrong_exception_type(exc); + unsupported_unicode_error_type(exc); return NULL; } } +// --- Codecs registry handlers ----------------------------------------------- + static PyObject *strict_errors(PyObject *self, PyObject *exc) { return PyCodec_StrictErrors(exc); } -static PyObject *ignore_errors(PyObject *self, PyObject *exc) +static inline PyObject * +ignore_errors(PyObject *Py_UNUSED(self), PyObject *exc) { return PyCodec_IgnoreErrors(exc); } From 1318d98f20cb6b5e8884f98dfcf32ef371fabe1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 23 Jan 2025 15:06:29 +0100 Subject: [PATCH 2/7] fix names --- Python/codecs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/codecs.c b/Python/codecs.c index 6f5d264cd0ecab..66caf6fea652dc 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -778,7 +778,7 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc) PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc) { if (!PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { - wrong_exception_type(exc); + unsupported_unicode_error_type(exc); return NULL; } From b907a4a40ca715d5821343c2930c757a55ac3490 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 23 Jan 2025 15:09:46 +0100 Subject: [PATCH 3/7] reverting to a better naming --- Python/codecs.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Python/codecs.c b/Python/codecs.c index 66caf6fea652dc..ed7a2b1e8bc1ea 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -661,7 +661,7 @@ PyObject *PyCodec_LookupError(const char *name) static inline void -unsupported_unicode_error_type(PyObject *exc) +wrong_exception_type(PyObject *exc) { PyErr_Format(PyExc_TypeError, "don't know how to handle %T in error callback", @@ -718,7 +718,7 @@ PyObject *PyCodec_IgnoreErrors(PyObject *exc) return _PyCodec_IgnoreError(exc, false); } else { - unsupported_unicode_error_type(exc); + wrong_exception_type(exc); return NULL; } } @@ -770,7 +770,7 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc) return Py_BuildValue("(Nn)", res, end); } else { - unsupported_unicode_error_type(exc); + wrong_exception_type(exc); return NULL; } } @@ -778,7 +778,7 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc) PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc) { if (!PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { - unsupported_unicode_error_type(exc); + wrong_exception_type(exc); return NULL; } @@ -928,7 +928,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) } } else { - unsupported_unicode_error_type(exc); + wrong_exception_type(exc); return NULL; } @@ -1077,7 +1077,7 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) return restuple; } else { - unsupported_unicode_error_type(exc); + wrong_exception_type(exc); return NULL; } } @@ -1309,7 +1309,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc) return Py_BuildValue("(Nn)", res, start + bytelength); } else { - unsupported_unicode_error_type(exc); + wrong_exception_type(exc); return NULL; } } @@ -1386,7 +1386,7 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc) return Py_BuildValue("(Nn)", str, start+consumed); } else { - unsupported_unicode_error_type(exc); + wrong_exception_type(exc); return NULL; } } From d3b1a4dc77b937685bead3c6ed27e09181349e1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 23 Jan 2025 15:10:07 +0100 Subject: [PATCH 4/7] tabs->spaces --- Python/codecs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/codecs.c b/Python/codecs.c index ed7a2b1e8bc1ea..aa9ee4c8709969 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -669,7 +669,7 @@ wrong_exception_type(PyObject *exc) } -#define _PyIsUnicodeEncodeError(EXC) \ +#define _PyIsUnicodeEncodeError(EXC) \ PyObject_TypeCheck(EXC, (PyTypeObject *)PyExc_UnicodeEncodeError) #define _PyIsUnicodeDecodeError(EXC) \ PyObject_TypeCheck(EXC, (PyTypeObject *)PyExc_UnicodeDecodeError) From 91df6378471989e61e34f14cf693be56d78b0354 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 23 Jan 2025 15:10:25 +0100 Subject: [PATCH 5/7] tabs->spaces --- Python/codecs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/codecs.c b/Python/codecs.c index aa9ee4c8709969..ae20e5af8c0b43 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -673,7 +673,7 @@ wrong_exception_type(PyObject *exc) PyObject_TypeCheck(EXC, (PyTypeObject *)PyExc_UnicodeEncodeError) #define _PyIsUnicodeDecodeError(EXC) \ PyObject_TypeCheck(EXC, (PyTypeObject *)PyExc_UnicodeDecodeError) -#define _PyIsUnicodeTranslateError(EXC) \ +#define _PyIsUnicodeTranslateError(EXC) \ PyObject_TypeCheck(EXC, (PyTypeObject *)PyExc_UnicodeTranslateError) From 4ea203f7bbf651c677e9ad65be8bf92a75cfb052 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 23 Jan 2025 16:13:24 +0100 Subject: [PATCH 6/7] cosmetics --- Python/codecs.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Python/codecs.c b/Python/codecs.c index ae20e5af8c0b43..eea39de82b8c32 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -708,15 +708,12 @@ _PyCodec_IgnoreError(PyObject *exc, int as_bytes) PyObject *PyCodec_IgnoreErrors(PyObject *exc) { - if (_PyIsUnicodeEncodeError(exc)) { + if (_PyIsUnicodeEncodeError(exc) || _PyIsUnicodeTranslateError(exc)) { return _PyCodec_IgnoreError(exc, false); } else if (_PyIsUnicodeDecodeError(exc)) { return _PyCodec_IgnoreError(exc, true); } - else if (_PyIsUnicodeTranslateError(exc)) { - return _PyCodec_IgnoreError(exc, false); - } else { wrong_exception_type(exc); return NULL; @@ -1394,7 +1391,8 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc) // --- Codecs registry handlers ----------------------------------------------- -static PyObject *strict_errors(PyObject *self, PyObject *exc) +static inline PyObject * +strict_errors(PyObject *Py_UNUSED(self), PyObject *exc) { return PyCodec_StrictErrors(exc); } From 44362615411bbd784c670210a3d770d39cc20f42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 23 Jan 2025 18:27:48 +0100 Subject: [PATCH 7/7] Update Python/codecs.c Co-authored-by: Victor Stinner --- Python/codecs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Python/codecs.c b/Python/codecs.c index eea39de82b8c32..53680a79082634 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -664,8 +664,7 @@ static inline void wrong_exception_type(PyObject *exc) { PyErr_Format(PyExc_TypeError, - "don't know how to handle %T in error callback", - exc); + "don't know how to handle %T in error callback", exc); }