From ce7c135c4df91b2e3b19f18da66e493b61277c9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 26 Aug 2024 14:50:59 +0200 Subject: [PATCH 01/20] add tests for C API `codecs` --- Modules/_testcapi/codec.c | 237 +++++++++++++++++++++++++++++++++++++- 1 file changed, 232 insertions(+), 5 deletions(-) diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c index d13f51e20331a1..8fa3b57194328f 100644 --- a/Modules/_testcapi/codec.c +++ b/Modules/_testcapi/codec.c @@ -1,17 +1,244 @@ #include "parts.h" -#include "util.h" +// === Codecs registration and un-registration ================================ + +static PyObject * +codec_register(PyObject *Py_UNUSED(module), PyObject *search_function) +{ + if (PyCodec_Register(search_function) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +static PyObject * +codec_unregister(PyObject *Py_UNUSED(module), PyObject *search_function) +{ + if (PyCodec_Unregister(search_function) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +static PyObject * +codec_known_encoding(PyObject *Py_UNUSED(module), PyObject *args) +{ + const char *encoding; // should not be NULL + if (!PyArg_ParseTuple(args, "z", &encoding)) { + return NULL; + } + assert(encoding != NULL); + return PyCodec_KnownEncoding(encoding) ? Py_True : Py_False; +} + +// === Codecs encoding and decoding interfaces ================================ + +static PyObject * +codec_encode(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *input; + const char *encoding; // should not be NULL + const char *errors; // can be NULL + if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) { + return NULL; + } + assert(encoding != NULL); + return PyCodec_Encode(input, encoding, errors); +} + +static PyObject * +codec_decode(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *input; + const char *encoding; // should not be NULL + const char *errors; // can be NULL + if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) { + return NULL; + } + assert(encoding != NULL); + return PyCodec_Decode(input, encoding, errors); +} + +static PyObject * +codec_encoder(PyObject *Py_UNUSED(module), PyObject *args) +{ + const char *encoding; // should not be NULL + if (!PyArg_ParseTuple(args, "z", &encoding)) { + return NULL; + } + assert(encoding != NULL); + return PyCodec_Encoder(encoding); +} + +static PyObject * +codec_decoder(PyObject *Py_UNUSED(module), PyObject *args) +{ + const char *encoding; // should not be NULL + if (!PyArg_ParseTuple(args, "z", &encoding)) { + return NULL; + } + assert(encoding != NULL); + return PyCodec_Decoder(encoding); +} + +static PyObject * +codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args) +{ + const char *encoding; // should not be NULL + const char *errors; // should not be NULL + if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) { + return NULL; + } + assert(encoding != NULL); + assert(errors != NULL); + return PyCodec_IncrementalEncoder(encoding, errors); +} + +static PyObject * +codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args) +{ + const char *encoding; // should not be NULL + const char *errors; // should not be NULL + if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) { + return NULL; + } + assert(encoding != NULL); + assert(errors != NULL); + return PyCodec_IncrementalDecoder(encoding, errors); +} + +static PyObject * +codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args) +{ + const char *encoding; // should not be NULL + PyObject *stream; + const char *errors; // should not be NULL + if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) { + return NULL; + } + assert(encoding != NULL); + assert(errors != NULL); + return PyCodec_StreamReader(encoding, stream, errors); +} + +static PyObject * +codec_stream_writer(PyObject *Py_UNUSED(module), PyObject *args) +{ + const char *encoding; // should not be NULL + PyObject *stream; + const char *errors; // should not be NULL + if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) { + return NULL; + } + assert(encoding != NULL); + assert(errors != NULL); + return PyCodec_StreamWriter(encoding, stream, errors); +} + +// === Codecs errors handlers ================================================= + +static PyObject * +codec_register_error(PyObject *Py_UNUSED(module), PyObject *args) +{ + const char *encoding; // should not be NULL + PyObject *error; + if (!PyArg_ParseTuple(args, "zO", &encoding, &error)) { + return NULL; + } + assert(encoding != NULL); + if (PyCodec_RegisterError(encoding, error) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +static PyObject * +codec_lookup_error(PyObject *Py_UNUSED(module), PyObject *args) +{ + const char *encoding; // can be NULL + if (!PyArg_ParseTuple(args, "z", &encoding)) { + return NULL; + } + return PyCodec_LookupError(encoding); +} + +static PyObject * +codec_strict_errors(PyObject *Py_UNUSED(module), PyObject *exc) +{ + assert(exc != NULL); + return PyCodec_StrictErrors(exc); +} + +static PyObject * +codec_ignore_errors(PyObject *Py_UNUSED(module), PyObject *exc) +{ + assert(exc != NULL); + return PyCodec_IgnoreErrors(exc); +} + +static PyObject * +codec_replace_errors(PyObject *Py_UNUSED(module), PyObject *exc) +{ + assert(exc != NULL); + return PyCodec_ReplaceErrors(exc); +} + +static PyObject * +codec_xmlcharrefreplace_errors(PyObject *Py_UNUSED(module), PyObject *exc) +{ + assert(exc != NULL); + return PyCodec_XMLCharRefReplaceErrors(exc); +} + +static PyObject * +codec_backslashreplace_errors(PyObject *Py_UNUSED(module), PyObject *exc) +{ + assert(exc != NULL); + return PyCodec_BackslashReplaceErrors(exc); +} + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000 +static PyObject * +codec_namereplace_errors(PyObject *Py_UNUSED(module), PyObject *exc) +{ + assert(exc != NULL); + return PyCodec_NameReplaceErrors(exc); +} +#endif static PyMethodDef test_methods[] = { - {NULL}, + /* codecs registration */ + {"codec_register", codec_register, METH_O}, + {"codec_unregister", codec_unregister, METH_O}, + {"codec_known_encoding", codec_known_encoding, METH_VARARGS}, + /* encoding and decoding interface */ + {"codec_encode", codec_encode, METH_VARARGS}, + {"codec_decode", codec_decode, METH_VARARGS}, + {"codec_encoder", codec_encoder, METH_VARARGS}, + {"codec_decoder", codec_decoder, METH_VARARGS}, + {"codec_incremental_encoder", codec_incremental_encoder, METH_VARARGS}, + {"codec_incremental_decoder", codec_incremental_decoder, METH_VARARGS}, + {"codec_stream_reader", codec_stream_reader, METH_VARARGS}, + {"codec_stream_writer", codec_stream_writer, METH_VARARGS}, + /* error handling */ + {"codec_register_error", codec_register_error, METH_VARARGS}, + {"codec_lookup_error", codec_lookup_error, METH_VARARGS}, + {"codec_strict_errors", codec_strict_errors, METH_O}, + {"codec_ignore_errors", codec_ignore_errors, METH_O}, + {"codec_replace_errors", codec_replace_errors, METH_O}, + {"codec_xmlcharrefreplace_errors", codec_xmlcharrefreplace_errors, METH_O}, + {"codec_backslashreplace_errors", codec_backslashreplace_errors, METH_O}, +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000 + {"codec_namereplace_errors", codec_namereplace_errors, METH_O}, +#endif + {NULL, NULL, 0, NULL}, }; int -_PyTestCapi_Init_Codec(PyObject *m) +_PyTestCapi_Init_Codec(PyObject *module) { - if (PyModule_AddFunctions(m, test_methods) < 0){ + if (PyModule_AddFunctions(module, test_methods) < 0) { return -1; } - return 0; } From f9e350a18c7d1d2f41b596a0ff73f4454b9bd1cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 26 Aug 2024 14:51:10 +0200 Subject: [PATCH 02/20] add Python tests for `_codecs` --- Lib/test/test_capi/test_codecs.py | 232 +++++++++++++++++++++++++++++- 1 file changed, 230 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py index bd521a509d07ec..f3e96cadf67066 100644 --- a/Lib/test/test_capi/test_codecs.py +++ b/Lib/test/test_capi/test_codecs.py @@ -1,5 +1,10 @@ -import unittest +import codecs +import contextlib +import io import sys +import unittest +import unittest.mock as mock +import _testcapi from test.support import import_helper _testlimitedcapi = import_helper.import_module('_testlimitedcapi') @@ -7,7 +12,7 @@ NULL = None -class CAPITest(unittest.TestCase): +class CAPIUnicodeTest(unittest.TestCase): # TODO: Test the following functions: # # PyUnicode_BuildEncodingMap @@ -516,5 +521,228 @@ def test_asrawunicodeescapestring(self): # CRASHES asrawunicodeescapestring(NULL) +class CAPICodecRegistration(unittest.TestCase): + + def setUp(self): + self.enterContext(import_helper.isolated_modules()) + self.enterContext(import_helper.CleanImport('codecs')) + self.codecs = import_helper.import_module('codecs') + # Encoding names are normalized internally by converting them + # to lowercase and their hyphens are replaced by underscores. + self.encoding_name = f'codec_reversed_{id(self)}' + # make sure that our custom codec is not already registered + self.assertRaises(LookupError, self.codecs.lookup, self.encoding_name) + # create the search function without registering yet + self._create_custom_codec() + + def _create_custom_codec(self): + def codec_encoder(m, errors='strict'): + return (type(m)().join(reversed(m)), len(m)) + + def codec_decoder(c, errors='strict'): + return (type(c)().join(reversed(c)), len(c)) + + class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codec_encoder(input) + + class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codec_decoder(input) + + class StreamReader(codecs.StreamReader): + def encode(self, input, errors='strict'): + return codec_encoder(input, errors=errors) + + def decode(self, input, errors='strict'): + return codec_decoder(input, errors=errors) + + class StreamWriter(codecs.StreamWriter): + def encode(self, input, errors='strict'): + return codec_encoder(input, errors=errors) + + def decode(self, input, errors='strict'): + return codec_decoder(input, errors=errors) + + info = codecs.CodecInfo( + encode=codec_encoder, + decode=codec_decoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + name=self.encoding_name + ) + + def search_function(encoding): + if encoding == self.encoding_name: + return info + return None + + self.codec_info = info + self.search_function = search_function + + @contextlib.contextmanager + def use_custom_encoder(self): + self.assertRaises(LookupError, self.codecs.lookup, self.encoding_name) + self.codecs.register(self.search_function) + yield + self.codecs.unregister(self.search_function) + self.assertRaises(LookupError, self.codecs.lookup, self.encoding_name) + + def test_codec_register(self): + search_function, encoding = self.search_function, self.encoding_name + self.assertIsNone(_testcapi.codec_register(search_function)) + self.assertIs(self.codecs.lookup(encoding), search_function(encoding)) + self.assertEqual(self.codecs.encode('123', encoding=encoding), '321') + + def test_codec_unregister(self): + search_function, encoding = self.search_function, self.encoding_name + self.assertRaises(LookupError, self.codecs.lookup, encoding) + self.codecs.register(search_function) + self.assertIsNone(_testcapi.codec_unregister(search_function)) + self.assertRaises(LookupError, self.codecs.lookup, encoding) + + def test_codec_known_encoding(self): + self.assertRaises(LookupError, self.codecs.lookup, 'unknown-codec') + self.assertFalse(_testcapi.codec_known_encoding('unknown-codec')) + self.assertFalse(_testcapi.codec_known_encoding('unknown_codec')) + self.assertFalse(_testcapi.codec_known_encoding('UNKNOWN-codec')) + + encoding_name = self.encoding_name + self.assertRaises(LookupError, self.codecs.lookup, encoding_name) + self.codecs.register(self.search_function) + + for name in [ + encoding_name, + encoding_name.upper(), + encoding_name.replace('_', '-'), + ]: + with self.subTest(name): + self.assertTrue(_testcapi.codec_known_encoding(name)) + + def test_codec_encode(self): + encode = _testcapi.codec_encode + self.assertEqual(encode('a', 'utf-8', NULL), b'a') + self.assertEqual(encode('a', 'utf-8', 'strict'), b'a') + self.assertEqual(encode('é', 'ascii', 'ignore'), b'') + # todo: add more cases + self.assertRaises(TypeError, encode, NULL, 'ascii', 'strict') + # CRASHES encode('a', NULL, 'strict') + + def test_codec_decode(self): + decode = _testcapi.codec_decode + + b = b'a\xc2\xa1\xe4\xbd\xa0\xf0\x9f\x98\x80' + s = 'a\xa1\u4f60\U0001f600' + + self.assertEqual(decode(b, 'utf-8', 'strict'), s) + self.assertEqual(decode(b, 'utf-8', NULL), s) + self.assertEqual(decode(b, 'latin1', 'strict'), b.decode('latin1')) + self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', 'strict') + self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', NULL) + self.assertEqual(decode(b, 'ascii', 'replace'), 'a' + '\ufffd'*9) + # todo: add more cases + + # _codecs.decode only reports unknown errors policy when they are + # used (it has a fast path for empty bytes); this is different from + # PyUnicode_Decode which checks that both the encoding and the errors + # policy are recognized. + self.assertEqual(decode(b'', 'utf-8', 'unknown-errors-policy'), '') + + self.assertRaises(TypeError, decode, NULL, 'ascii', 'strict') + # CRASHES decode(b, NULL, 'strict') + + def test_codec_encoder(self): + with self.use_custom_encoder(): + encoder = _testcapi.codec_encoder(self.encoding_name) + self.assertIs(encoder, self.codec_info.encode) + + def test_codec_decoder(self): + with self.use_custom_encoder(): + decoder = _testcapi.codec_decoder(self.encoding_name) + self.assertIs(decoder, self.codec_info.decode) + + def test_codec_incremental_encoder(self): + with self.use_custom_encoder(): + encoder = _testcapi.codec_incremental_encoder(self.encoding_name, 'strict') + self.assertIsInstance(encoder, self.codec_info.incrementalencoder) + + def test_codec_incremental_decoder(self): + with self.use_custom_encoder(): + decoder = _testcapi.codec_incremental_decoder(self.encoding_name, 'strict') + self.assertIsInstance(decoder, self.codec_info.incrementaldecoder) + + def test_codec_stream_reader(self): + with self.use_custom_encoder(): + encoding, stream = self.encoding_name, io.StringIO() + reader = _testcapi.codec_stream_reader(encoding, stream, 'strict') + self.assertIsInstance(reader, self.codec_info.streamreader) + + def test_codec_stream_writer(self): + with self.use_custom_encoder(): + encoding, stream = self.encoding_name, io.StringIO() + writer = _testcapi.codec_stream_writer(encoding, stream, 'strict') + self.assertIsInstance(writer, self.codec_info.streamwriter) + +class CAPICodecErrors(unittest.TestCase): + + def setUp(self): + self.enterContext(import_helper.isolated_modules()) + self.enterContext(import_helper.CleanImport('codecs')) + self.codecs = import_helper.import_module('codecs') + + def test_codec_register_error(self): + self.assertRaises(LookupError, _testcapi.codec_lookup_error, 'custom') + + def error_handler(exc): + raise exc + + error_handler = mock.Mock(wraps=error_handler) + _testcapi.codec_register_error('custom', error_handler) + + self.assertRaises(UnicodeEncodeError, self.codecs.encode, + '\xff', 'ascii', errors='custom') + error_handler.assert_called_once() + error_handler.reset_mock() + + self.assertRaises(UnicodeDecodeError, self.codecs.decode, + b'\xff', 'ascii', errors='custom') + error_handler.assert_called_once() + + def test_codec_lookup_error(self): + codec_lookup_error = _testcapi.codec_lookup_error + self.assertIs(codec_lookup_error(NULL), self.codecs.strict_errors) + self.assertIs(codec_lookup_error('strict'), self.codecs.strict_errors) + self.assertIs(codec_lookup_error('ignore'), self.codecs.ignore_errors) + self.assertIs(codec_lookup_error('replace'), self.codecs.replace_errors) + self.assertIs(codec_lookup_error('xmlcharrefreplace'), self.codecs.xmlcharrefreplace_errors) + self.assertIs(codec_lookup_error('namereplace'), self.codecs.namereplace_errors) + self.assertRaises(LookupError, codec_lookup_error, 'custom') + + def test_codec_error_handlers(self): + exceptions = [ + UnicodeEncodeError('bad', '', 0, 1, 'reason'), + UnicodeEncodeError('bad', 'x', 0, 1, 'reason'), + UnicodeEncodeError('bad', 'xyz123', 0, 1, 'reason'), + UnicodeEncodeError('bad', 'xyz123', 1, 4, 'reason'), + ] + + strict_handler = _testcapi.codec_strict_errors + for exc in exceptions: + with self.subTest(handler=strict_handler, exc=exc): + self.assertRaises(UnicodeEncodeError, strict_handler, exc) + + for handler in [ + _testcapi.codec_ignore_errors, + _testcapi.codec_replace_errors, + _testcapi.codec_xmlcharrefreplace_errors, + _testcapi.codec_namereplace_errors, + ]: + for exc in exceptions: + with self.subTest(handler=handler, exc=exc): + handler(exc) + + if __name__ == "__main__": unittest.main() From 15b68116da1f46ebe195ef8e2d02feb129319b1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 26 Aug 2024 17:40:48 +0200 Subject: [PATCH 03/20] fix size bug --- Objects/exceptions.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index fda62f159c1540..9bcdc88e1291ca 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2751,7 +2751,7 @@ PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start) if (*start<0) *start = 0; /*XXX check for values <0*/ if (*start>=size) - *start = size-1; + *start = size ? size-1 : 0; Py_DECREF(obj); return 0; } @@ -2769,7 +2769,7 @@ PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start) if (*start<0) *start = 0; if (*start>=size) - *start = size-1; + *start = size ? size-1 : 0; Py_DECREF(obj); return 0; } From 8048ae1d2172df5bace73a3ea30712745d4c6864 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 26 Aug 2024 17:46:04 +0200 Subject: [PATCH 04/20] rename test class --- Lib/test/test_capi/test_codecs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py index f3e96cadf67066..c7fbca32ac582b 100644 --- a/Lib/test/test_capi/test_codecs.py +++ b/Lib/test/test_capi/test_codecs.py @@ -521,7 +521,7 @@ def test_asrawunicodeescapestring(self): # CRASHES asrawunicodeescapestring(NULL) -class CAPICodecRegistration(unittest.TestCase): +class CAPICodecs(unittest.TestCase): def setUp(self): self.enterContext(import_helper.isolated_modules()) From 8487b4630cb51a07ceb16569446733b1947df05f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 25 Sep 2024 15:04:54 +0200 Subject: [PATCH 05/20] Revert "fix size bug" This reverts commit 15b68116da1f46ebe195ef8e2d02feb129319b1f. --- Objects/exceptions.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 9bcdc88e1291ca..fda62f159c1540 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2751,7 +2751,7 @@ PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start) if (*start<0) *start = 0; /*XXX check for values <0*/ if (*start>=size) - *start = size ? size-1 : 0; + *start = size-1; Py_DECREF(obj); return 0; } @@ -2769,7 +2769,7 @@ PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start) if (*start<0) *start = 0; if (*start>=size) - *start = size ? size-1 : 0; + *start = size-1; Py_DECREF(obj); return 0; } From 0097f2a4c595590d70a04dc61a34c5da08aea011 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 25 Sep 2024 15:24:09 +0200 Subject: [PATCH 06/20] Disable tests that are known to crash. --- Lib/test/test_capi/test_codecs.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py index c7fbca32ac582b..21cbf76d14dffb 100644 --- a/Lib/test/test_capi/test_codecs.py +++ b/Lib/test/test_capi/test_codecs.py @@ -722,7 +722,9 @@ def test_codec_lookup_error(self): def test_codec_error_handlers(self): exceptions = [ - UnicodeEncodeError('bad', '', 0, 1, 'reason'), + # A UnicodeError with an empty message currently crashes: + # See: https://github.com/python/cpython/issues/123378 + # UnicodeEncodeError('bad', '', 0, 1, 'reason'), UnicodeEncodeError('bad', 'x', 0, 1, 'reason'), UnicodeEncodeError('bad', 'xyz123', 0, 1, 'reason'), UnicodeEncodeError('bad', 'xyz123', 1, 4, 'reason'), From 303b13c4c61aa2a6d13fd027c3c31a0b4cef7f9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 25 Sep 2024 17:24:50 +0200 Subject: [PATCH 07/20] address Victor's review --- Lib/test/test_capi/test_codecs.py | 15 ++++++++++---- Modules/_testcapi/codec.c | 34 +++++++++---------------------- 2 files changed, 21 insertions(+), 28 deletions(-) diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py index 21cbf76d14dffb..2c72a989e32364 100644 --- a/Lib/test/test_capi/test_codecs.py +++ b/Lib/test/test_capi/test_codecs.py @@ -592,14 +592,21 @@ def use_custom_encoder(self): def test_codec_register(self): search_function, encoding = self.search_function, self.encoding_name + # register the search function using the C API self.assertIsNone(_testcapi.codec_register(search_function)) self.assertIs(self.codecs.lookup(encoding), search_function(encoding)) self.assertEqual(self.codecs.encode('123', encoding=encoding), '321') + # unregister the search function using the regular API + self.codecs.unregister(search_function) + self.assertRaises(LookupError, self.codecs.lookup, encoding) def test_codec_unregister(self): search_function, encoding = self.search_function, self.encoding_name self.assertRaises(LookupError, self.codecs.lookup, encoding) + # register the search function using the regular API self.codecs.register(search_function) + self.assertIsNotNone(self.codecs.lookup(encoding)) + # unregister the search function using the C API self.assertIsNone(_testcapi.codec_unregister(search_function)) self.assertRaises(LookupError, self.codecs.lookup, encoding) @@ -625,16 +632,16 @@ def test_codec_encode(self): encode = _testcapi.codec_encode self.assertEqual(encode('a', 'utf-8', NULL), b'a') self.assertEqual(encode('a', 'utf-8', 'strict'), b'a') - self.assertEqual(encode('é', 'ascii', 'ignore'), b'') - # todo: add more cases + self.assertEqual(encode('[é]', 'ascii', 'ignore'), b'[]') + self.assertRaises(TypeError, encode, NULL, 'ascii', 'strict') # CRASHES encode('a', NULL, 'strict') def test_codec_decode(self): decode = _testcapi.codec_decode - b = b'a\xc2\xa1\xe4\xbd\xa0\xf0\x9f\x98\x80' s = 'a\xa1\u4f60\U0001f600' + b = s.encode() self.assertEqual(decode(b, 'utf-8', 'strict'), s) self.assertEqual(decode(b, 'utf-8', NULL), s) @@ -642,7 +649,6 @@ def test_codec_decode(self): self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', 'strict') self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', NULL) self.assertEqual(decode(b, 'ascii', 'replace'), 'a' + '\ufffd'*9) - # todo: add more cases # _codecs.decode only reports unknown errors policy when they are # used (it has a fast path for empty bytes); this is different from @@ -685,6 +691,7 @@ def test_codec_stream_writer(self): writer = _testcapi.codec_stream_writer(encoding, stream, 'strict') self.assertIsInstance(writer, self.codec_info.streamwriter) + class CAPICodecErrors(unittest.TestCase): def setUp(self): diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c index 8fa3b57194328f..5cd187bde60fef 100644 --- a/Modules/_testcapi/codec.c +++ b/Modules/_testcapi/codec.c @@ -24,10 +24,9 @@ static PyObject * codec_known_encoding(PyObject *Py_UNUSED(module), PyObject *args) { const char *encoding; // should not be NULL - if (!PyArg_ParseTuple(args, "z", &encoding)) { + if (!PyArg_ParseTuple(args, "s", &encoding)) { return NULL; } - assert(encoding != NULL); return PyCodec_KnownEncoding(encoding) ? Py_True : Py_False; } @@ -39,10 +38,9 @@ codec_encode(PyObject *Py_UNUSED(module), PyObject *args) PyObject *input; const char *encoding; // should not be NULL const char *errors; // can be NULL - if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) { + if (!PyArg_ParseTuple(args, "O|sz", &input, &encoding, &errors)) { return NULL; } - assert(encoding != NULL); return PyCodec_Encode(input, encoding, errors); } @@ -52,10 +50,9 @@ codec_decode(PyObject *Py_UNUSED(module), PyObject *args) PyObject *input; const char *encoding; // should not be NULL const char *errors; // can be NULL - if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) { + if (!PyArg_ParseTuple(args, "O|sz", &input, &encoding, &errors)) { return NULL; } - assert(encoding != NULL); return PyCodec_Decode(input, encoding, errors); } @@ -63,10 +60,9 @@ static PyObject * codec_encoder(PyObject *Py_UNUSED(module), PyObject *args) { const char *encoding; // should not be NULL - if (!PyArg_ParseTuple(args, "z", &encoding)) { + if (!PyArg_ParseTuple(args, "s", &encoding)) { return NULL; } - assert(encoding != NULL); return PyCodec_Encoder(encoding); } @@ -74,10 +70,9 @@ static PyObject * codec_decoder(PyObject *Py_UNUSED(module), PyObject *args) { const char *encoding; // should not be NULL - if (!PyArg_ParseTuple(args, "z", &encoding)) { + if (!PyArg_ParseTuple(args, "s", &encoding)) { return NULL; } - assert(encoding != NULL); return PyCodec_Decoder(encoding); } @@ -86,11 +81,9 @@ codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args) { const char *encoding; // should not be NULL const char *errors; // should not be NULL - if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) { + if (!PyArg_ParseTuple(args, "ss", &encoding, &errors)) { return NULL; } - assert(encoding != NULL); - assert(errors != NULL); return PyCodec_IncrementalEncoder(encoding, errors); } @@ -99,11 +92,9 @@ codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args) { const char *encoding; // should not be NULL const char *errors; // should not be NULL - if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) { + if (!PyArg_ParseTuple(args, "ss", &encoding, &errors)) { return NULL; } - assert(encoding != NULL); - assert(errors != NULL); return PyCodec_IncrementalDecoder(encoding, errors); } @@ -113,11 +104,9 @@ codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args) const char *encoding; // should not be NULL PyObject *stream; const char *errors; // should not be NULL - if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) { + if (!PyArg_ParseTuple(args, "sOs", &encoding, &stream, &errors)) { return NULL; } - assert(encoding != NULL); - assert(errors != NULL); return PyCodec_StreamReader(encoding, stream, errors); } @@ -127,11 +116,9 @@ codec_stream_writer(PyObject *Py_UNUSED(module), PyObject *args) const char *encoding; // should not be NULL PyObject *stream; const char *errors; // should not be NULL - if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) { + if (!PyArg_ParseTuple(args, "sOs", &encoding, &stream, &errors)) { return NULL; } - assert(encoding != NULL); - assert(errors != NULL); return PyCodec_StreamWriter(encoding, stream, errors); } @@ -142,10 +129,9 @@ codec_register_error(PyObject *Py_UNUSED(module), PyObject *args) { const char *encoding; // should not be NULL PyObject *error; - if (!PyArg_ParseTuple(args, "zO", &encoding, &error)) { + if (!PyArg_ParseTuple(args, "sO", &encoding, &error)) { return NULL; } - assert(encoding != NULL); if (PyCodec_RegisterError(encoding, error) < 0) { return NULL; } From 4f474ddc570b6b127b2880f69955496bd6d83f4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 25 Sep 2024 18:13:07 +0200 Subject: [PATCH 08/20] update tests to reflect user errors --- Lib/test/test_capi/test_codecs.py | 79 ++++++++++++++++++++++++------- Modules/_testcapi/codec.c | 54 ++++++++++++--------- 2 files changed, 94 insertions(+), 39 deletions(-) diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py index 2c72a989e32364..c57491619d5976 100644 --- a/Lib/test/test_capi/test_codecs.py +++ b/Lib/test/test_capi/test_codecs.py @@ -1,6 +1,7 @@ import codecs import contextlib import io +import re import sys import unittest import unittest.mock as mock @@ -10,6 +11,7 @@ _testlimitedcapi = import_helper.import_module('_testlimitedcapi') NULL = None +BAD_ARGUMENT = re.escape('bad argument type for built-in operation') class CAPIUnicodeTest(unittest.TestCase): @@ -635,7 +637,8 @@ def test_codec_encode(self): self.assertEqual(encode('[é]', 'ascii', 'ignore'), b'[]') self.assertRaises(TypeError, encode, NULL, 'ascii', 'strict') - # CRASHES encode('a', NULL, 'strict') + with self.assertRaisesRegex(TypeError, BAD_ARGUMENT): + encode('a', NULL, 'strict') def test_codec_decode(self): decode = _testcapi.codec_decode @@ -650,46 +653,90 @@ def test_codec_decode(self): self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', NULL) self.assertEqual(decode(b, 'ascii', 'replace'), 'a' + '\ufffd'*9) - # _codecs.decode only reports unknown errors policy when they are - # used (it has a fast path for empty bytes); this is different from - # PyUnicode_Decode which checks that both the encoding and the errors - # policy are recognized. + # _codecs.decode() only reports unknown errors policy when they are + # used; this is different from PyUnicode_Decode() which checks that + # both the encoding and the errors policy are recognized before even + # attempting to call the decoder. self.assertEqual(decode(b'', 'utf-8', 'unknown-errors-policy'), '') + self.assertEqual(decode(b'a', 'utf-8', 'unknown-errors-policy'), 'a') self.assertRaises(TypeError, decode, NULL, 'ascii', 'strict') - # CRASHES decode(b, NULL, 'strict') + with self.assertRaisesRegex(TypeError, BAD_ARGUMENT): + decode(b, NULL, 'strict') def test_codec_encoder(self): + codec_encoder = _testcapi.codec_encoder + with self.use_custom_encoder(): - encoder = _testcapi.codec_encoder(self.encoding_name) + encoder = codec_encoder(self.encoding_name) self.assertIs(encoder, self.codec_info.encode) + with self.assertRaisesRegex(TypeError, BAD_ARGUMENT): + codec_encoder(NULL) + def test_codec_decoder(self): + codec_decoder = _testcapi.codec_decoder + with self.use_custom_encoder(): - decoder = _testcapi.codec_decoder(self.encoding_name) + decoder = codec_decoder(self.encoding_name) self.assertIs(decoder, self.codec_info.decode) + with self.assertRaisesRegex(TypeError, BAD_ARGUMENT): + codec_decoder(NULL) + def test_codec_incremental_encoder(self): + codec_incremental_encoder = _testcapi.codec_incremental_encoder + with self.use_custom_encoder(): - encoder = _testcapi.codec_incremental_encoder(self.encoding_name, 'strict') - self.assertIsInstance(encoder, self.codec_info.incrementalencoder) + encoding = self.encoding_name + + for policy in ['strict', NULL]: + with self.subTest(policy=policy): + encoder = codec_incremental_encoder(encoding, policy) + self.assertIsInstance(encoder, self.codec_info.incrementalencoder) + + with self.assertRaisesRegex(TypeError, BAD_ARGUMENT): + codec_incremental_encoder(NULL, 'strict') def test_codec_incremental_decoder(self): + codec_incremental_decoder = _testcapi.codec_incremental_decoder + with self.use_custom_encoder(): - decoder = _testcapi.codec_incremental_decoder(self.encoding_name, 'strict') - self.assertIsInstance(decoder, self.codec_info.incrementaldecoder) + encoding = self.encoding_name + + for policy in ['strict', NULL]: + with self.subTest(policy=policy): + decoder = codec_incremental_decoder(encoding, policy) + self.assertIsInstance(decoder, self.codec_info.incrementaldecoder) + + with self.assertRaisesRegex(TypeError, BAD_ARGUMENT): + codec_incremental_decoder(NULL, 'strict') def test_codec_stream_reader(self): + codec_stream_reader = _testcapi.codec_stream_reader + with self.use_custom_encoder(): encoding, stream = self.encoding_name, io.StringIO() - reader = _testcapi.codec_stream_reader(encoding, stream, 'strict') - self.assertIsInstance(reader, self.codec_info.streamreader) + for policy in ['strict', NULL]: + with self.subTest(policy=policy): + writer = codec_stream_reader(encoding, stream, policy) + self.assertIsInstance(writer, self.codec_info.streamreader) + + with self.assertRaisesRegex(TypeError, BAD_ARGUMENT): + codec_stream_reader(NULL, stream, 'strict') def test_codec_stream_writer(self): + codec_stream_writer = _testcapi.codec_stream_writer + with self.use_custom_encoder(): encoding, stream = self.encoding_name, io.StringIO() - writer = _testcapi.codec_stream_writer(encoding, stream, 'strict') - self.assertIsInstance(writer, self.codec_info.streamwriter) + for policy in ['strict', NULL]: + with self.subTest(policy=policy): + writer = codec_stream_writer(encoding, stream, policy) + self.assertIsInstance(writer, self.codec_info.streamwriter) + + with self.assertRaisesRegex(TypeError, BAD_ARGUMENT): + codec_stream_writer(NULL, stream, 'strict') class CAPICodecErrors(unittest.TestCase): diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c index 5cd187bde60fef..6aa19c2055d6b6 100644 --- a/Modules/_testcapi/codec.c +++ b/Modules/_testcapi/codec.c @@ -1,5 +1,13 @@ #include "parts.h" +/* + * The Codecs C API assume that 'encoding' is not NULL, lest + * it uses PyErr_BadArgument() to set a TypeError exception. + * + * In this file, we allow to call the functions using None + * as NULL to explicitly check this behaviour. + */ + // === Codecs registration and un-registration ================================ static PyObject * @@ -23,8 +31,8 @@ codec_unregister(PyObject *Py_UNUSED(module), PyObject *search_function) static PyObject * codec_known_encoding(PyObject *Py_UNUSED(module), PyObject *args) { - const char *encoding; // should not be NULL - if (!PyArg_ParseTuple(args, "s", &encoding)) { + const char *encoding; // should not be NULL (see top-file comment) + if (!PyArg_ParseTuple(args, "z", &encoding)) { return NULL; } return PyCodec_KnownEncoding(encoding) ? Py_True : Py_False; @@ -36,9 +44,9 @@ static PyObject * codec_encode(PyObject *Py_UNUSED(module), PyObject *args) { PyObject *input; - const char *encoding; // should not be NULL + const char *encoding; // should not be NULL (see top-file comment) const char *errors; // can be NULL - if (!PyArg_ParseTuple(args, "O|sz", &input, &encoding, &errors)) { + if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) { return NULL; } return PyCodec_Encode(input, encoding, errors); @@ -48,9 +56,9 @@ static PyObject * codec_decode(PyObject *Py_UNUSED(module), PyObject *args) { PyObject *input; - const char *encoding; // should not be NULL + const char *encoding; // should not be NULL (see top-file comment) const char *errors; // can be NULL - if (!PyArg_ParseTuple(args, "O|sz", &input, &encoding, &errors)) { + if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) { return NULL; } return PyCodec_Decode(input, encoding, errors); @@ -59,8 +67,8 @@ codec_decode(PyObject *Py_UNUSED(module), PyObject *args) static PyObject * codec_encoder(PyObject *Py_UNUSED(module), PyObject *args) { - const char *encoding; // should not be NULL - if (!PyArg_ParseTuple(args, "s", &encoding)) { + const char *encoding; // should not be NULL (see top-file comment) + if (!PyArg_ParseTuple(args, "z", &encoding)) { return NULL; } return PyCodec_Encoder(encoding); @@ -69,8 +77,8 @@ codec_encoder(PyObject *Py_UNUSED(module), PyObject *args) static PyObject * codec_decoder(PyObject *Py_UNUSED(module), PyObject *args) { - const char *encoding; // should not be NULL - if (!PyArg_ParseTuple(args, "s", &encoding)) { + const char *encoding; // should not be NULL (see top-file comment) + if (!PyArg_ParseTuple(args, "z", &encoding)) { return NULL; } return PyCodec_Decoder(encoding); @@ -79,9 +87,9 @@ codec_decoder(PyObject *Py_UNUSED(module), PyObject *args) static PyObject * codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args) { - const char *encoding; // should not be NULL - const char *errors; // should not be NULL - if (!PyArg_ParseTuple(args, "ss", &encoding, &errors)) { + const char *encoding; // should not be NULL (see top-file comment) + const char *errors; // can be NULL + if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) { return NULL; } return PyCodec_IncrementalEncoder(encoding, errors); @@ -90,9 +98,9 @@ codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args) static PyObject * codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args) { - const char *encoding; // should not be NULL - const char *errors; // should not be NULL - if (!PyArg_ParseTuple(args, "ss", &encoding, &errors)) { + const char *encoding; // should not be NULL (see top-file comment) + const char *errors; // can be NULL + if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) { return NULL; } return PyCodec_IncrementalDecoder(encoding, errors); @@ -101,10 +109,10 @@ codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args) static PyObject * codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args) { - const char *encoding; // should not be NULL + const char *encoding; // should not be NULL (see top-file comment) PyObject *stream; - const char *errors; // should not be NULL - if (!PyArg_ParseTuple(args, "sOs", &encoding, &stream, &errors)) { + const char *errors; // can be NULL + if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) { return NULL; } return PyCodec_StreamReader(encoding, stream, errors); @@ -113,10 +121,10 @@ codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args) static PyObject * codec_stream_writer(PyObject *Py_UNUSED(module), PyObject *args) { - const char *encoding; // should not be NULL + const char *encoding; // should not be NULL (see top-file comment) PyObject *stream; - const char *errors; // should not be NULL - if (!PyArg_ParseTuple(args, "sOs", &encoding, &stream, &errors)) { + const char *errors; // can be NULL + if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) { return NULL; } return PyCodec_StreamWriter(encoding, stream, errors); @@ -127,7 +135,7 @@ codec_stream_writer(PyObject *Py_UNUSED(module), PyObject *args) static PyObject * codec_register_error(PyObject *Py_UNUSED(module), PyObject *args) { - const char *encoding; // should not be NULL + const char *encoding; // must not be NULL PyObject *error; if (!PyArg_ParseTuple(args, "sO", &encoding, &error)) { return NULL; From 87ee0d26d4c150795437c06b547c14464e8b94b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Sep 2024 12:52:35 +0200 Subject: [PATCH 09/20] fix C API codec tests --- Lib/test/test_capi/test_codecs.py | 68 ++++++++++++++++--------------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py index c57491619d5976..4865f4384422ac 100644 --- a/Lib/test/test_capi/test_codecs.py +++ b/Lib/test/test_capi/test_codecs.py @@ -1,6 +1,7 @@ import codecs import contextlib import io +import os import re import sys import unittest @@ -526,14 +527,13 @@ def test_asrawunicodeescapestring(self): class CAPICodecs(unittest.TestCase): def setUp(self): - self.enterContext(import_helper.isolated_modules()) - self.enterContext(import_helper.CleanImport('codecs')) - self.codecs = import_helper.import_module('codecs') # Encoding names are normalized internally by converting them # to lowercase and their hyphens are replaced by underscores. self.encoding_name = f'codec_reversed_{id(self)}' - # make sure that our custom codec is not already registered - self.assertRaises(LookupError, self.codecs.lookup, self.encoding_name) + # Make sure that our custom codec is not already registered (that + # way we know whether we correctly unregistered the custom codec + # after a test or not). + self.assertRaises(LookupError, codecs.lookup, self.encoding_name) # create the search function without registering yet self._create_custom_codec() @@ -586,41 +586,47 @@ def search_function(encoding): @contextlib.contextmanager def use_custom_encoder(self): - self.assertRaises(LookupError, self.codecs.lookup, self.encoding_name) - self.codecs.register(self.search_function) + self.assertRaises(LookupError, codecs.lookup, self.encoding_name) + codecs.register(self.search_function) yield - self.codecs.unregister(self.search_function) - self.assertRaises(LookupError, self.codecs.lookup, self.encoding_name) + codecs.unregister(self.search_function) + self.assertRaises(LookupError, codecs.lookup, self.encoding_name) def test_codec_register(self): search_function, encoding = self.search_function, self.encoding_name # register the search function using the C API self.assertIsNone(_testcapi.codec_register(search_function)) - self.assertIs(self.codecs.lookup(encoding), search_function(encoding)) - self.assertEqual(self.codecs.encode('123', encoding=encoding), '321') + # in case the test failed before cleaning up + self.addCleanup(codecs.unregister, self.search_function) + self.assertIs(codecs.lookup(encoding), search_function(encoding)) + self.assertEqual(codecs.encode('123', encoding=encoding), '321') # unregister the search function using the regular API - self.codecs.unregister(search_function) - self.assertRaises(LookupError, self.codecs.lookup, encoding) + codecs.unregister(search_function) + self.assertRaises(LookupError, codecs.lookup, encoding) def test_codec_unregister(self): search_function, encoding = self.search_function, self.encoding_name - self.assertRaises(LookupError, self.codecs.lookup, encoding) + self.assertRaises(LookupError, codecs.lookup, encoding) # register the search function using the regular API - self.codecs.register(search_function) - self.assertIsNotNone(self.codecs.lookup(encoding)) + codecs.register(search_function) + # in case the test failed before cleaning up + self.addCleanup(codecs.unregister, self.search_function) + self.assertIsNotNone(codecs.lookup(encoding)) # unregister the search function using the C API self.assertIsNone(_testcapi.codec_unregister(search_function)) - self.assertRaises(LookupError, self.codecs.lookup, encoding) + self.assertRaises(LookupError, codecs.lookup, encoding) def test_codec_known_encoding(self): - self.assertRaises(LookupError, self.codecs.lookup, 'unknown-codec') + self.assertRaises(LookupError, codecs.lookup, 'unknown-codec') self.assertFalse(_testcapi.codec_known_encoding('unknown-codec')) self.assertFalse(_testcapi.codec_known_encoding('unknown_codec')) self.assertFalse(_testcapi.codec_known_encoding('UNKNOWN-codec')) encoding_name = self.encoding_name - self.assertRaises(LookupError, self.codecs.lookup, encoding_name) - self.codecs.register(self.search_function) + self.assertRaises(LookupError, codecs.lookup, encoding_name) + + codecs.register(self.search_function) + self.addCleanup(codecs.unregister, self.search_function) for name in [ encoding_name, @@ -741,11 +747,6 @@ def test_codec_stream_writer(self): class CAPICodecErrors(unittest.TestCase): - def setUp(self): - self.enterContext(import_helper.isolated_modules()) - self.enterContext(import_helper.CleanImport('codecs')) - self.codecs = import_helper.import_module('codecs') - def test_codec_register_error(self): self.assertRaises(LookupError, _testcapi.codec_lookup_error, 'custom') @@ -754,24 +755,25 @@ def error_handler(exc): error_handler = mock.Mock(wraps=error_handler) _testcapi.codec_register_error('custom', error_handler) + # self.addCleanup(codecs.unregister_error, 'custom') - self.assertRaises(UnicodeEncodeError, self.codecs.encode, + self.assertRaises(UnicodeEncodeError, codecs.encode, '\xff', 'ascii', errors='custom') error_handler.assert_called_once() error_handler.reset_mock() - self.assertRaises(UnicodeDecodeError, self.codecs.decode, + self.assertRaises(UnicodeDecodeError, codecs.decode, b'\xff', 'ascii', errors='custom') error_handler.assert_called_once() def test_codec_lookup_error(self): codec_lookup_error = _testcapi.codec_lookup_error - self.assertIs(codec_lookup_error(NULL), self.codecs.strict_errors) - self.assertIs(codec_lookup_error('strict'), self.codecs.strict_errors) - self.assertIs(codec_lookup_error('ignore'), self.codecs.ignore_errors) - self.assertIs(codec_lookup_error('replace'), self.codecs.replace_errors) - self.assertIs(codec_lookup_error('xmlcharrefreplace'), self.codecs.xmlcharrefreplace_errors) - self.assertIs(codec_lookup_error('namereplace'), self.codecs.namereplace_errors) + self.assertIs(codec_lookup_error(NULL), codecs.strict_errors) + self.assertIs(codec_lookup_error('strict'), codecs.strict_errors) + self.assertIs(codec_lookup_error('ignore'), codecs.ignore_errors) + self.assertIs(codec_lookup_error('replace'), codecs.replace_errors) + self.assertIs(codec_lookup_error('xmlcharrefreplace'), codecs.xmlcharrefreplace_errors) + self.assertIs(codec_lookup_error('namereplace'), codecs.namereplace_errors) self.assertRaises(LookupError, codec_lookup_error, 'custom') def test_codec_error_handlers(self): From 6a36eb08466e2a610dbbc2621c5c81482fd847cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Sep 2024 13:30:12 +0200 Subject: [PATCH 10/20] small hack to make the test suite correct --- Lib/test/test_capi/test_codecs.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py index 4865f4384422ac..60b48ae099dcd3 100644 --- a/Lib/test/test_capi/test_codecs.py +++ b/Lib/test/test_capi/test_codecs.py @@ -7,6 +7,7 @@ import unittest import unittest.mock as mock import _testcapi +from getopt import error from test.support import import_helper _testlimitedcapi = import_helper.import_module('_testlimitedcapi') @@ -748,14 +749,19 @@ def test_codec_stream_writer(self): class CAPICodecErrors(unittest.TestCase): def test_codec_register_error(self): - self.assertRaises(LookupError, _testcapi.codec_lookup_error, 'custom') + try: + error_handler = _testcapi.codec_lookup_error('custom') + except LookupError: + error_handler = None - def error_handler(exc): - raise exc + if error_handler is None: + def custom_error_handler(exc): + raise exc - error_handler = mock.Mock(wraps=error_handler) - _testcapi.codec_register_error('custom', error_handler) - # self.addCleanup(codecs.unregister_error, 'custom') + error_handler = mock.Mock(wraps=custom_error_handler) + _testcapi.codec_register_error('custom', error_handler) + else: + self.assertIsInstance(error_handler, mock.Mock) self.assertRaises(UnicodeEncodeError, codecs.encode, '\xff', 'ascii', errors='custom') @@ -765,6 +771,7 @@ def error_handler(exc): self.assertRaises(UnicodeDecodeError, codecs.decode, b'\xff', 'ascii', errors='custom') error_handler.assert_called_once() + error_handler.reset_mock() def test_codec_lookup_error(self): codec_lookup_error = _testcapi.codec_lookup_error @@ -774,7 +781,7 @@ def test_codec_lookup_error(self): self.assertIs(codec_lookup_error('replace'), codecs.replace_errors) self.assertIs(codec_lookup_error('xmlcharrefreplace'), codecs.xmlcharrefreplace_errors) self.assertIs(codec_lookup_error('namereplace'), codecs.namereplace_errors) - self.assertRaises(LookupError, codec_lookup_error, 'custom') + self.assertRaises(LookupError, codec_lookup_error, 'unknown') def test_codec_error_handlers(self): exceptions = [ From 145b285f6f6200eda8fa90155a2abb4846e2a769 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Sep 2024 18:29:06 +0200 Subject: [PATCH 11/20] remove un-necessary imports --- Lib/test/test_capi/test_codecs.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py index 60b48ae099dcd3..29642203b44878 100644 --- a/Lib/test/test_capi/test_codecs.py +++ b/Lib/test/test_capi/test_codecs.py @@ -1,13 +1,11 @@ import codecs import contextlib import io -import os import re import sys import unittest import unittest.mock as mock import _testcapi -from getopt import error from test.support import import_helper _testlimitedcapi = import_helper.import_module('_testlimitedcapi') From 7be1f555f9f9483525d79a7596c4c30cd9e3a6a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 29 Sep 2024 09:58:54 +0200 Subject: [PATCH 12/20] use `_codecs._unregister_error` to cleanup test state --- Lib/test/test_capi/test_codecs.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py index 29642203b44878..d9e2524a63fa88 100644 --- a/Lib/test/test_capi/test_codecs.py +++ b/Lib/test/test_capi/test_codecs.py @@ -747,19 +747,17 @@ def test_codec_stream_writer(self): class CAPICodecErrors(unittest.TestCase): def test_codec_register_error(self): - try: - error_handler = _testcapi.codec_lookup_error('custom') - except LookupError: - error_handler = None + # for cleaning up between tests + from _codecs import _unregister_error as _codecs_unregister_error - if error_handler is None: - def custom_error_handler(exc): - raise exc + self.assertRaises(LookupError, _testcapi.codec_lookup_error, 'custom') - error_handler = mock.Mock(wraps=custom_error_handler) - _testcapi.codec_register_error('custom', error_handler) - else: - self.assertIsInstance(error_handler, mock.Mock) + def custom_error_handler(exc): + raise exc + + error_handler = mock.Mock(wraps=custom_error_handler) + _testcapi.codec_register_error('custom', error_handler) + self.addCleanup(_codecs_unregister_error, 'custom') self.assertRaises(UnicodeEncodeError, codecs.encode, '\xff', 'ascii', errors='custom') @@ -769,7 +767,6 @@ def custom_error_handler(exc): self.assertRaises(UnicodeDecodeError, codecs.decode, b'\xff', 'ascii', errors='custom') error_handler.assert_called_once() - error_handler.reset_mock() def test_codec_lookup_error(self): codec_lookup_error = _testcapi.codec_lookup_error From f72be5c600824765297441319b8faa523036143a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 29 Sep 2024 10:08:28 +0200 Subject: [PATCH 13/20] indicate some semantics for NULL case being tested --- Modules/_testcapi/codec.c | 39 ++++++++++++++++----------------------- Modules/_testcapi/util.h | 10 ++++++++++ 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c index 6aa19c2055d6b6..2e4712bd55316f 100644 --- a/Modules/_testcapi/codec.c +++ b/Modules/_testcapi/codec.c @@ -1,12 +1,5 @@ #include "parts.h" - -/* - * The Codecs C API assume that 'encoding' is not NULL, lest - * it uses PyErr_BadArgument() to set a TypeError exception. - * - * In this file, we allow to call the functions using None - * as NULL to explicitly check this behaviour. - */ +#include "util.h" // === Codecs registration and un-registration ================================ @@ -44,8 +37,8 @@ static PyObject * codec_encode(PyObject *Py_UNUSED(module), PyObject *args) { PyObject *input; - const char *encoding; // should not be NULL (see top-file comment) - const char *errors; // can be NULL + const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested + const char *errors; // can be NULL if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) { return NULL; } @@ -56,8 +49,8 @@ static PyObject * codec_decode(PyObject *Py_UNUSED(module), PyObject *args) { PyObject *input; - const char *encoding; // should not be NULL (see top-file comment) - const char *errors; // can be NULL + const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested + const char *errors; // can be NULL if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) { return NULL; } @@ -67,7 +60,7 @@ codec_decode(PyObject *Py_UNUSED(module), PyObject *args) static PyObject * codec_encoder(PyObject *Py_UNUSED(module), PyObject *args) { - const char *encoding; // should not be NULL (see top-file comment) + const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested if (!PyArg_ParseTuple(args, "z", &encoding)) { return NULL; } @@ -77,7 +70,7 @@ codec_encoder(PyObject *Py_UNUSED(module), PyObject *args) static PyObject * codec_decoder(PyObject *Py_UNUSED(module), PyObject *args) { - const char *encoding; // should not be NULL (see top-file comment) + const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested if (!PyArg_ParseTuple(args, "z", &encoding)) { return NULL; } @@ -87,8 +80,8 @@ codec_decoder(PyObject *Py_UNUSED(module), PyObject *args) static PyObject * codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args) { - const char *encoding; // should not be NULL (see top-file comment) - const char *errors; // can be NULL + const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested + const char *errors; // can be NULL if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) { return NULL; } @@ -98,8 +91,8 @@ codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args) static PyObject * codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args) { - const char *encoding; // should not be NULL (see top-file comment) - const char *errors; // can be NULL + const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested + const char *errors; // can be NULL if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) { return NULL; } @@ -109,9 +102,9 @@ codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args) static PyObject * codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args) { - const char *encoding; // should not be NULL (see top-file comment) + const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested PyObject *stream; - const char *errors; // can be NULL + const char *errors; // can be NULL if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) { return NULL; } @@ -121,9 +114,9 @@ codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args) static PyObject * codec_stream_writer(PyObject *Py_UNUSED(module), PyObject *args) { - const char *encoding; // should not be NULL (see top-file comment) + const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested PyObject *stream; - const char *errors; // can be NULL + const char *errors; // can be NULL if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) { return NULL; } @@ -149,7 +142,7 @@ codec_register_error(PyObject *Py_UNUSED(module), PyObject *args) static PyObject * codec_lookup_error(PyObject *Py_UNUSED(module), PyObject *args) { - const char *encoding; // can be NULL + const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested if (!PyArg_ParseTuple(args, "z", &encoding)) { return NULL; } diff --git a/Modules/_testcapi/util.h b/Modules/_testcapi/util.h index f26d7656a10138..042e522542eddb 100644 --- a/Modules/_testcapi/util.h +++ b/Modules/_testcapi/util.h @@ -31,3 +31,13 @@ static const char uninitialized[] = "uninitialized"; #define UNINITIALIZED_SIZE ((Py_ssize_t)236892191) /* Marker to check that integer value was set. */ #define UNINITIALIZED_INT (63256717) +/* + * Marker to indicate that a NULL parameter would not be allowed + * at runtime but that the test interface will check that it is + * indeed the case. + * + * Use this macro only if passing NULL to the C API would raise + * a catchable exception (and not a fatal exception that would + * crash the interpreter). + */ + #define NULL_WOULD_RAISE(NAME) NAME From 4d02c6ceedc344e538ebbf948838c22adad2abdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 29 Sep 2024 10:08:36 +0200 Subject: [PATCH 14/20] revert a cosmetic change --- Modules/_testcapi/codec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c index 2e4712bd55316f..2bcfcd64d01794 100644 --- a/Modules/_testcapi/codec.c +++ b/Modules/_testcapi/codec.c @@ -222,9 +222,9 @@ static PyMethodDef test_methods[] = { }; int -_PyTestCapi_Init_Codec(PyObject *module) +_PyTestCapi_Init_Codec(PyObject *m) { - if (PyModule_AddFunctions(module, test_methods) < 0) { + if (PyModule_AddFunctions(m, test_methods) < 0) { return -1; } return 0; From 0f26ca7ee4179d4688bbe64f55348a1fef1607b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 29 Sep 2024 10:23:36 +0200 Subject: [PATCH 15/20] Move `PyCodec_NameReplaceErrors` test to the `_testlimitedcapi` module --- Lib/test/test_capi/test_codecs.py | 4 ++-- Modules/Setup.stdlib.in | 2 +- Modules/_testcapi/codec.c | 13 +------------ Modules/_testlimitedcapi.c | 3 +++ Modules/_testlimitedcapi/codec.c | 29 +++++++++++++++++++++++++++++ Modules/_testlimitedcapi/parts.h | 1 + 6 files changed, 37 insertions(+), 15 deletions(-) create mode 100644 Modules/_testlimitedcapi/codec.c diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py index d9e2524a63fa88..91d36b65afba46 100644 --- a/Lib/test/test_capi/test_codecs.py +++ b/Lib/test/test_capi/test_codecs.py @@ -797,11 +797,11 @@ def test_codec_error_handlers(self): _testcapi.codec_ignore_errors, _testcapi.codec_replace_errors, _testcapi.codec_xmlcharrefreplace_errors, - _testcapi.codec_namereplace_errors, + _testlimitedcapi.codec_namereplace_errors, ]: for exc in exceptions: with self.subTest(handler=handler, exc=exc): - handler(exc) + self.assertIsInstance(handler(exc), tuple) if __name__ == "__main__": diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in index 9aa398a80efa1b..52c0f883d383db 100644 --- a/Modules/Setup.stdlib.in +++ b/Modules/Setup.stdlib.in @@ -163,7 +163,7 @@ @MODULE__TESTBUFFER_TRUE@_testbuffer _testbuffer.c @MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c _testinternalcapi/set.c _testinternalcapi/test_critical_sections.c @MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/heaptype.c _testcapi/abstract.c _testcapi/unicode.c _testcapi/dict.c _testcapi/set.c _testcapi/list.c _testcapi/tuple.c _testcapi/getargs.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c _testcapi/complex.c _testcapi/numbers.c _testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c _testcapi/buffer.c _testcapi/pyatomic.c _testcapi/run.c _testcapi/file.c _testcapi/codec.c _testcapi/immortal.c _testcapi/gc.c _testcapi/hash.c _testcapi/time.c _testcapi/bytes.c _testcapi/object.c _testcapi/monitoring.c _testcapi/config.c -@MODULE__TESTLIMITEDCAPI_TRUE@_testlimitedcapi _testlimitedcapi.c _testlimitedcapi/abstract.c _testlimitedcapi/bytearray.c _testlimitedcapi/bytes.c _testlimitedcapi/complex.c _testlimitedcapi/dict.c _testlimitedcapi/eval.c _testlimitedcapi/float.c _testlimitedcapi/heaptype_relative.c _testlimitedcapi/list.c _testlimitedcapi/long.c _testlimitedcapi/object.c _testlimitedcapi/pyos.c _testlimitedcapi/set.c _testlimitedcapi/sys.c _testlimitedcapi/tuple.c _testlimitedcapi/unicode.c _testlimitedcapi/vectorcall_limited.c +@MODULE__TESTLIMITEDCAPI_TRUE@_testlimitedcapi _testlimitedcapi.c _testlimitedcapi/abstract.c _testlimitedcapi/bytearray.c _testlimitedcapi/bytes.c _testlimitedcapi/codec.c _testlimitedcapi/complex.c _testlimitedcapi/dict.c _testlimitedcapi/eval.c _testlimitedcapi/float.c _testlimitedcapi/heaptype_relative.c _testlimitedcapi/list.c _testlimitedcapi/long.c _testlimitedcapi/object.c _testlimitedcapi/pyos.c _testlimitedcapi/set.c _testlimitedcapi/sys.c _testlimitedcapi/tuple.c _testlimitedcapi/unicode.c _testlimitedcapi/vectorcall_limited.c @MODULE__TESTCLINIC_TRUE@_testclinic _testclinic.c @MODULE__TESTCLINIC_LIMITED_TRUE@_testclinic_limited _testclinic_limited.c diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c index 2bcfcd64d01794..ba614055915235 100644 --- a/Modules/_testcapi/codec.c +++ b/Modules/_testcapi/codec.c @@ -184,15 +184,6 @@ codec_backslashreplace_errors(PyObject *Py_UNUSED(module), PyObject *exc) return PyCodec_BackslashReplaceErrors(exc); } -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000 -static PyObject * -codec_namereplace_errors(PyObject *Py_UNUSED(module), PyObject *exc) -{ - assert(exc != NULL); - return PyCodec_NameReplaceErrors(exc); -} -#endif - static PyMethodDef test_methods[] = { /* codecs registration */ {"codec_register", codec_register, METH_O}, @@ -215,9 +206,7 @@ static PyMethodDef test_methods[] = { {"codec_replace_errors", codec_replace_errors, METH_O}, {"codec_xmlcharrefreplace_errors", codec_xmlcharrefreplace_errors, METH_O}, {"codec_backslashreplace_errors", codec_backslashreplace_errors, METH_O}, -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000 - {"codec_namereplace_errors", codec_namereplace_errors, METH_O}, -#endif + // PyCodec_NameReplaceErrors() is tested in _testlimitedcapi/codec.c {NULL, NULL, 0, NULL}, }; diff --git a/Modules/_testlimitedcapi.c b/Modules/_testlimitedcapi.c index e74cbfe19871bf..ba83a23117b2a5 100644 --- a/Modules/_testlimitedcapi.c +++ b/Modules/_testlimitedcapi.c @@ -38,6 +38,9 @@ PyInit__testlimitedcapi(void) if (_PyTestLimitedCAPI_Init_Bytes(mod) < 0) { return NULL; } + if (_PyTestLimitedCAPI_Init_Codec(mod) < 0) { + return NULL; + } if (_PyTestLimitedCAPI_Init_Complex(mod) < 0) { return NULL; } diff --git a/Modules/_testlimitedcapi/codec.c b/Modules/_testlimitedcapi/codec.c new file mode 100644 index 00000000000000..fdc18eedc2d288 --- /dev/null +++ b/Modules/_testlimitedcapi/codec.c @@ -0,0 +1,29 @@ +#include "pyconfig.h" // Py_GIL_DISABLED + +// Need limited C API version 3.5 for PyCodec_NameReplaceErrors() +#if !defined(Py_GIL_DISABLED) && !defined(Py_LIMITED_API) +# define Py_LIMITED_API 0x03050000 +#endif + +#include "parts.h" + +static PyObject * +codec_namereplace_errors(PyObject *Py_UNUSED(module), PyObject *exc) +{ + assert(exc != NULL); + return PyCodec_NameReplaceErrors(exc); +} + +static PyMethodDef test_methods[] = { + {"codec_namereplace_errors", codec_namereplace_errors, METH_O}, + {NULL}, +}; + +int +_PyTestLimitedCAPI_Init_Codec(PyObject *module) +{ + if (PyModule_AddFunctions(module, test_methods) < 0) { + return -1; + } + return 0; +} diff --git a/Modules/_testlimitedcapi/parts.h b/Modules/_testlimitedcapi/parts.h index 12b890853803f4..4107b150c5b4e0 100644 --- a/Modules/_testlimitedcapi/parts.h +++ b/Modules/_testlimitedcapi/parts.h @@ -25,6 +25,7 @@ int _PyTestLimitedCAPI_Init_Abstract(PyObject *module); int _PyTestLimitedCAPI_Init_ByteArray(PyObject *module); int _PyTestLimitedCAPI_Init_Bytes(PyObject *module); +int _PyTestLimitedCAPI_Init_Codec(PyObject *module); int _PyTestLimitedCAPI_Init_Complex(PyObject *module); int _PyTestLimitedCAPI_Init_Dict(PyObject *module); int _PyTestLimitedCAPI_Init_Eval(PyObject *module); From 1399779a3d9aee5ebf548bbcd040f8d5f70e1ac9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 29 Sep 2024 10:32:52 +0200 Subject: [PATCH 16/20] add comment for why we do not test `_PyCodec_UnregisterError` --- Lib/test/test_capi/test_codecs.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py index 91d36b65afba46..be64871c67d2ba 100644 --- a/Lib/test/test_capi/test_codecs.py +++ b/Lib/test/test_capi/test_codecs.py @@ -768,6 +768,10 @@ def custom_error_handler(exc): b'\xff', 'ascii', errors='custom') error_handler.assert_called_once() + # _codecs._unregister_error directly delegates to the internal C + # function so a Python-level function test is sufficient (it is + # tested in test_codeccallbacks). + def test_codec_lookup_error(self): codec_lookup_error = _testcapi.codec_lookup_error self.assertIs(codec_lookup_error(NULL), codecs.strict_errors) From 914151e1a9206fbe39ba341506d9fbe819335a06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 29 Sep 2024 10:34:22 +0200 Subject: [PATCH 17/20] update a comment --- Modules/_testcapi/codec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c index ba614055915235..dee093d35ea070 100644 --- a/Modules/_testcapi/codec.c +++ b/Modules/_testcapi/codec.c @@ -24,7 +24,7 @@ codec_unregister(PyObject *Py_UNUSED(module), PyObject *search_function) static PyObject * codec_known_encoding(PyObject *Py_UNUSED(module), PyObject *args) { - const char *encoding; // should not be NULL (see top-file comment) + const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested if (!PyArg_ParseTuple(args, "z", &encoding)) { return NULL; } From 8dd7e8d5e8767de4a33b82fe105fc318b85fe079 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 29 Sep 2024 10:35:49 +0200 Subject: [PATCH 18/20] revert one cosmetic change --- Modules/_testcapi/codec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c index dee093d35ea070..e27e64e066c458 100644 --- a/Modules/_testcapi/codec.c +++ b/Modules/_testcapi/codec.c @@ -216,5 +216,6 @@ _PyTestCapi_Init_Codec(PyObject *m) if (PyModule_AddFunctions(m, test_methods) < 0) { return -1; } + return 0; } From 1e6a5ce9f32ea34a9bc515f2c2afcb138b89d912 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 29 Sep 2024 10:52:28 +0200 Subject: [PATCH 19/20] Fix Windows compilation --- PCbuild/_testlimitedcapi.vcxproj | 1 + PCbuild/_testlimitedcapi.vcxproj.filters | 1 + 2 files changed, 2 insertions(+) diff --git a/PCbuild/_testlimitedcapi.vcxproj b/PCbuild/_testlimitedcapi.vcxproj index a1409ecf043d2d..846e027e10c7fa 100644 --- a/PCbuild/_testlimitedcapi.vcxproj +++ b/PCbuild/_testlimitedcapi.vcxproj @@ -97,6 +97,7 @@ + diff --git a/PCbuild/_testlimitedcapi.vcxproj.filters b/PCbuild/_testlimitedcapi.vcxproj.filters index e27e3171e1e6aa..57be2e2fc5b950 100644 --- a/PCbuild/_testlimitedcapi.vcxproj.filters +++ b/PCbuild/_testlimitedcapi.vcxproj.filters @@ -12,6 +12,7 @@ + From 2ba5f031ed47eea1058f700e15a52046eb5cf37c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 29 Sep 2024 16:58:29 +0200 Subject: [PATCH 20/20] address Victor's review --- Lib/test/test_capi/test_codecs.py | 39 ++++++++++++++++--------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py index be64871c67d2ba..85491a89947318 100644 --- a/Lib/test/test_capi/test_codecs.py +++ b/Lib/test/test_capi/test_codecs.py @@ -528,7 +528,7 @@ class CAPICodecs(unittest.TestCase): def setUp(self): # Encoding names are normalized internally by converting them # to lowercase and their hyphens are replaced by underscores. - self.encoding_name = f'codec_reversed_{id(self)}' + self.encoding_name = 'test.test_capi.test_codecs.codec_reversed' # Make sure that our custom codec is not already registered (that # way we know whether we correctly unregistered the custom codec # after a test or not). @@ -658,12 +658,13 @@ def test_codec_decode(self): self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', NULL) self.assertEqual(decode(b, 'ascii', 'replace'), 'a' + '\ufffd'*9) - # _codecs.decode() only reports unknown errors policy when they are - # used; this is different from PyUnicode_Decode() which checks that - # both the encoding and the errors policy are recognized before even - # attempting to call the decoder. - self.assertEqual(decode(b'', 'utf-8', 'unknown-errors-policy'), '') - self.assertEqual(decode(b'a', 'utf-8', 'unknown-errors-policy'), 'a') + # _codecs.decode() only reports an unknown error handling name when + # the corresponding error handling function is used; this difers + # from PyUnicode_Decode() which checks that both the encoding and + # the error handling name are recognized before even attempting to + # call the decoder. + self.assertEqual(decode(b'', 'utf-8', 'unknown-error-handler'), '') + self.assertEqual(decode(b'a', 'utf-8', 'unknown-error-handler'), 'a') self.assertRaises(TypeError, decode, NULL, 'ascii', 'strict') with self.assertRaisesRegex(TypeError, BAD_ARGUMENT): @@ -695,9 +696,9 @@ def test_codec_incremental_encoder(self): with self.use_custom_encoder(): encoding = self.encoding_name - for policy in ['strict', NULL]: - with self.subTest(policy=policy): - encoder = codec_incremental_encoder(encoding, policy) + for errors in ['strict', NULL]: + with self.subTest(errors): + encoder = codec_incremental_encoder(encoding, errors) self.assertIsInstance(encoder, self.codec_info.incrementalencoder) with self.assertRaisesRegex(TypeError, BAD_ARGUMENT): @@ -709,9 +710,9 @@ def test_codec_incremental_decoder(self): with self.use_custom_encoder(): encoding = self.encoding_name - for policy in ['strict', NULL]: - with self.subTest(policy=policy): - decoder = codec_incremental_decoder(encoding, policy) + for errors in ['strict', NULL]: + with self.subTest(errors): + decoder = codec_incremental_decoder(encoding, errors) self.assertIsInstance(decoder, self.codec_info.incrementaldecoder) with self.assertRaisesRegex(TypeError, BAD_ARGUMENT): @@ -722,9 +723,9 @@ def test_codec_stream_reader(self): with self.use_custom_encoder(): encoding, stream = self.encoding_name, io.StringIO() - for policy in ['strict', NULL]: - with self.subTest(policy=policy): - writer = codec_stream_reader(encoding, stream, policy) + for errors in ['strict', NULL]: + with self.subTest(errors): + writer = codec_stream_reader(encoding, stream, errors) self.assertIsInstance(writer, self.codec_info.streamreader) with self.assertRaisesRegex(TypeError, BAD_ARGUMENT): @@ -735,9 +736,9 @@ def test_codec_stream_writer(self): with self.use_custom_encoder(): encoding, stream = self.encoding_name, io.StringIO() - for policy in ['strict', NULL]: - with self.subTest(policy=policy): - writer = codec_stream_writer(encoding, stream, policy) + for errors in ['strict', NULL]: + with self.subTest(errors): + writer = codec_stream_writer(encoding, stream, errors) self.assertIsInstance(writer, self.codec_info.streamwriter) with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):