From ce7c135c4df91b2e3b19f18da66e493b61277c9f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
<10796600+picnixz@users.noreply.github.com>
Date: Mon, 26 Aug 2024 14:50:59 +0200
Subject: [PATCH 01/20] add tests for C API `codecs`
---
Modules/_testcapi/codec.c | 237 +++++++++++++++++++++++++++++++++++++-
1 file changed, 232 insertions(+), 5 deletions(-)
diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c
index d13f51e20331a1..8fa3b57194328f 100644
--- a/Modules/_testcapi/codec.c
+++ b/Modules/_testcapi/codec.c
@@ -1,17 +1,244 @@
#include "parts.h"
-#include "util.h"
+// === Codecs registration and un-registration ================================
+
+static PyObject *
+codec_register(PyObject *Py_UNUSED(module), PyObject *search_function)
+{
+ if (PyCodec_Register(search_function) < 0) {
+ return NULL;
+ }
+ Py_RETURN_NONE;
+}
+
+static PyObject *
+codec_unregister(PyObject *Py_UNUSED(module), PyObject *search_function)
+{
+ if (PyCodec_Unregister(search_function) < 0) {
+ return NULL;
+ }
+ Py_RETURN_NONE;
+}
+
+static PyObject *
+codec_known_encoding(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ const char *encoding; // should not be NULL
+ if (!PyArg_ParseTuple(args, "z", &encoding)) {
+ return NULL;
+ }
+ assert(encoding != NULL);
+ return PyCodec_KnownEncoding(encoding) ? Py_True : Py_False;
+}
+
+// === Codecs encoding and decoding interfaces ================================
+
+static PyObject *
+codec_encode(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ PyObject *input;
+ const char *encoding; // should not be NULL
+ const char *errors; // can be NULL
+ if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
+ return NULL;
+ }
+ assert(encoding != NULL);
+ return PyCodec_Encode(input, encoding, errors);
+}
+
+static PyObject *
+codec_decode(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ PyObject *input;
+ const char *encoding; // should not be NULL
+ const char *errors; // can be NULL
+ if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
+ return NULL;
+ }
+ assert(encoding != NULL);
+ return PyCodec_Decode(input, encoding, errors);
+}
+
+static PyObject *
+codec_encoder(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ const char *encoding; // should not be NULL
+ if (!PyArg_ParseTuple(args, "z", &encoding)) {
+ return NULL;
+ }
+ assert(encoding != NULL);
+ return PyCodec_Encoder(encoding);
+}
+
+static PyObject *
+codec_decoder(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ const char *encoding; // should not be NULL
+ if (!PyArg_ParseTuple(args, "z", &encoding)) {
+ return NULL;
+ }
+ assert(encoding != NULL);
+ return PyCodec_Decoder(encoding);
+}
+
+static PyObject *
+codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ const char *encoding; // should not be NULL
+ const char *errors; // should not be NULL
+ if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
+ return NULL;
+ }
+ assert(encoding != NULL);
+ assert(errors != NULL);
+ return PyCodec_IncrementalEncoder(encoding, errors);
+}
+
+static PyObject *
+codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ const char *encoding; // should not be NULL
+ const char *errors; // should not be NULL
+ if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
+ return NULL;
+ }
+ assert(encoding != NULL);
+ assert(errors != NULL);
+ return PyCodec_IncrementalDecoder(encoding, errors);
+}
+
+static PyObject *
+codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ const char *encoding; // should not be NULL
+ PyObject *stream;
+ const char *errors; // should not be NULL
+ if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
+ return NULL;
+ }
+ assert(encoding != NULL);
+ assert(errors != NULL);
+ return PyCodec_StreamReader(encoding, stream, errors);
+}
+
+static PyObject *
+codec_stream_writer(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ const char *encoding; // should not be NULL
+ PyObject *stream;
+ const char *errors; // should not be NULL
+ if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
+ return NULL;
+ }
+ assert(encoding != NULL);
+ assert(errors != NULL);
+ return PyCodec_StreamWriter(encoding, stream, errors);
+}
+
+// === Codecs errors handlers =================================================
+
+static PyObject *
+codec_register_error(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ const char *encoding; // should not be NULL
+ PyObject *error;
+ if (!PyArg_ParseTuple(args, "zO", &encoding, &error)) {
+ return NULL;
+ }
+ assert(encoding != NULL);
+ if (PyCodec_RegisterError(encoding, error) < 0) {
+ return NULL;
+ }
+ Py_RETURN_NONE;
+}
+
+static PyObject *
+codec_lookup_error(PyObject *Py_UNUSED(module), PyObject *args)
+{
+ const char *encoding; // can be NULL
+ if (!PyArg_ParseTuple(args, "z", &encoding)) {
+ return NULL;
+ }
+ return PyCodec_LookupError(encoding);
+}
+
+static PyObject *
+codec_strict_errors(PyObject *Py_UNUSED(module), PyObject *exc)
+{
+ assert(exc != NULL);
+ return PyCodec_StrictErrors(exc);
+}
+
+static PyObject *
+codec_ignore_errors(PyObject *Py_UNUSED(module), PyObject *exc)
+{
+ assert(exc != NULL);
+ return PyCodec_IgnoreErrors(exc);
+}
+
+static PyObject *
+codec_replace_errors(PyObject *Py_UNUSED(module), PyObject *exc)
+{
+ assert(exc != NULL);
+ return PyCodec_ReplaceErrors(exc);
+}
+
+static PyObject *
+codec_xmlcharrefreplace_errors(PyObject *Py_UNUSED(module), PyObject *exc)
+{
+ assert(exc != NULL);
+ return PyCodec_XMLCharRefReplaceErrors(exc);
+}
+
+static PyObject *
+codec_backslashreplace_errors(PyObject *Py_UNUSED(module), PyObject *exc)
+{
+ assert(exc != NULL);
+ return PyCodec_BackslashReplaceErrors(exc);
+}
+
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000
+static PyObject *
+codec_namereplace_errors(PyObject *Py_UNUSED(module), PyObject *exc)
+{
+ assert(exc != NULL);
+ return PyCodec_NameReplaceErrors(exc);
+}
+#endif
static PyMethodDef test_methods[] = {
- {NULL},
+ /* codecs registration */
+ {"codec_register", codec_register, METH_O},
+ {"codec_unregister", codec_unregister, METH_O},
+ {"codec_known_encoding", codec_known_encoding, METH_VARARGS},
+ /* encoding and decoding interface */
+ {"codec_encode", codec_encode, METH_VARARGS},
+ {"codec_decode", codec_decode, METH_VARARGS},
+ {"codec_encoder", codec_encoder, METH_VARARGS},
+ {"codec_decoder", codec_decoder, METH_VARARGS},
+ {"codec_incremental_encoder", codec_incremental_encoder, METH_VARARGS},
+ {"codec_incremental_decoder", codec_incremental_decoder, METH_VARARGS},
+ {"codec_stream_reader", codec_stream_reader, METH_VARARGS},
+ {"codec_stream_writer", codec_stream_writer, METH_VARARGS},
+ /* error handling */
+ {"codec_register_error", codec_register_error, METH_VARARGS},
+ {"codec_lookup_error", codec_lookup_error, METH_VARARGS},
+ {"codec_strict_errors", codec_strict_errors, METH_O},
+ {"codec_ignore_errors", codec_ignore_errors, METH_O},
+ {"codec_replace_errors", codec_replace_errors, METH_O},
+ {"codec_xmlcharrefreplace_errors", codec_xmlcharrefreplace_errors, METH_O},
+ {"codec_backslashreplace_errors", codec_backslashreplace_errors, METH_O},
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000
+ {"codec_namereplace_errors", codec_namereplace_errors, METH_O},
+#endif
+ {NULL, NULL, 0, NULL},
};
int
-_PyTestCapi_Init_Codec(PyObject *m)
+_PyTestCapi_Init_Codec(PyObject *module)
{
- if (PyModule_AddFunctions(m, test_methods) < 0){
+ if (PyModule_AddFunctions(module, test_methods) < 0) {
return -1;
}
-
return 0;
}
From f9e350a18c7d1d2f41b596a0ff73f4454b9bd1cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
<10796600+picnixz@users.noreply.github.com>
Date: Mon, 26 Aug 2024 14:51:10 +0200
Subject: [PATCH 02/20] add Python tests for `_codecs`
---
Lib/test/test_capi/test_codecs.py | 232 +++++++++++++++++++++++++++++-
1 file changed, 230 insertions(+), 2 deletions(-)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index bd521a509d07ec..f3e96cadf67066 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -1,5 +1,10 @@
-import unittest
+import codecs
+import contextlib
+import io
import sys
+import unittest
+import unittest.mock as mock
+import _testcapi
from test.support import import_helper
_testlimitedcapi = import_helper.import_module('_testlimitedcapi')
@@ -7,7 +12,7 @@
NULL = None
-class CAPITest(unittest.TestCase):
+class CAPIUnicodeTest(unittest.TestCase):
# TODO: Test the following functions:
#
# PyUnicode_BuildEncodingMap
@@ -516,5 +521,228 @@ def test_asrawunicodeescapestring(self):
# CRASHES asrawunicodeescapestring(NULL)
+class CAPICodecRegistration(unittest.TestCase):
+
+ def setUp(self):
+ self.enterContext(import_helper.isolated_modules())
+ self.enterContext(import_helper.CleanImport('codecs'))
+ self.codecs = import_helper.import_module('codecs')
+ # Encoding names are normalized internally by converting them
+ # to lowercase and their hyphens are replaced by underscores.
+ self.encoding_name = f'codec_reversed_{id(self)}'
+ # make sure that our custom codec is not already registered
+ self.assertRaises(LookupError, self.codecs.lookup, self.encoding_name)
+ # create the search function without registering yet
+ self._create_custom_codec()
+
+ def _create_custom_codec(self):
+ def codec_encoder(m, errors='strict'):
+ return (type(m)().join(reversed(m)), len(m))
+
+ def codec_decoder(c, errors='strict'):
+ return (type(c)().join(reversed(c)), len(c))
+
+ class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codec_encoder(input)
+
+ class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codec_decoder(input)
+
+ class StreamReader(codecs.StreamReader):
+ def encode(self, input, errors='strict'):
+ return codec_encoder(input, errors=errors)
+
+ def decode(self, input, errors='strict'):
+ return codec_decoder(input, errors=errors)
+
+ class StreamWriter(codecs.StreamWriter):
+ def encode(self, input, errors='strict'):
+ return codec_encoder(input, errors=errors)
+
+ def decode(self, input, errors='strict'):
+ return codec_decoder(input, errors=errors)
+
+ info = codecs.CodecInfo(
+ encode=codec_encoder,
+ decode=codec_decoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ name=self.encoding_name
+ )
+
+ def search_function(encoding):
+ if encoding == self.encoding_name:
+ return info
+ return None
+
+ self.codec_info = info
+ self.search_function = search_function
+
+ @contextlib.contextmanager
+ def use_custom_encoder(self):
+ self.assertRaises(LookupError, self.codecs.lookup, self.encoding_name)
+ self.codecs.register(self.search_function)
+ yield
+ self.codecs.unregister(self.search_function)
+ self.assertRaises(LookupError, self.codecs.lookup, self.encoding_name)
+
+ def test_codec_register(self):
+ search_function, encoding = self.search_function, self.encoding_name
+ self.assertIsNone(_testcapi.codec_register(search_function))
+ self.assertIs(self.codecs.lookup(encoding), search_function(encoding))
+ self.assertEqual(self.codecs.encode('123', encoding=encoding), '321')
+
+ def test_codec_unregister(self):
+ search_function, encoding = self.search_function, self.encoding_name
+ self.assertRaises(LookupError, self.codecs.lookup, encoding)
+ self.codecs.register(search_function)
+ self.assertIsNone(_testcapi.codec_unregister(search_function))
+ self.assertRaises(LookupError, self.codecs.lookup, encoding)
+
+ def test_codec_known_encoding(self):
+ self.assertRaises(LookupError, self.codecs.lookup, 'unknown-codec')
+ self.assertFalse(_testcapi.codec_known_encoding('unknown-codec'))
+ self.assertFalse(_testcapi.codec_known_encoding('unknown_codec'))
+ self.assertFalse(_testcapi.codec_known_encoding('UNKNOWN-codec'))
+
+ encoding_name = self.encoding_name
+ self.assertRaises(LookupError, self.codecs.lookup, encoding_name)
+ self.codecs.register(self.search_function)
+
+ for name in [
+ encoding_name,
+ encoding_name.upper(),
+ encoding_name.replace('_', '-'),
+ ]:
+ with self.subTest(name):
+ self.assertTrue(_testcapi.codec_known_encoding(name))
+
+ def test_codec_encode(self):
+ encode = _testcapi.codec_encode
+ self.assertEqual(encode('a', 'utf-8', NULL), b'a')
+ self.assertEqual(encode('a', 'utf-8', 'strict'), b'a')
+ self.assertEqual(encode('é', 'ascii', 'ignore'), b'')
+ # todo: add more cases
+ self.assertRaises(TypeError, encode, NULL, 'ascii', 'strict')
+ # CRASHES encode('a', NULL, 'strict')
+
+ def test_codec_decode(self):
+ decode = _testcapi.codec_decode
+
+ b = b'a\xc2\xa1\xe4\xbd\xa0\xf0\x9f\x98\x80'
+ s = 'a\xa1\u4f60\U0001f600'
+
+ self.assertEqual(decode(b, 'utf-8', 'strict'), s)
+ self.assertEqual(decode(b, 'utf-8', NULL), s)
+ self.assertEqual(decode(b, 'latin1', 'strict'), b.decode('latin1'))
+ self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', 'strict')
+ self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', NULL)
+ self.assertEqual(decode(b, 'ascii', 'replace'), 'a' + '\ufffd'*9)
+ # todo: add more cases
+
+ # _codecs.decode only reports unknown errors policy when they are
+ # used (it has a fast path for empty bytes); this is different from
+ # PyUnicode_Decode which checks that both the encoding and the errors
+ # policy are recognized.
+ self.assertEqual(decode(b'', 'utf-8', 'unknown-errors-policy'), '')
+
+ self.assertRaises(TypeError, decode, NULL, 'ascii', 'strict')
+ # CRASHES decode(b, NULL, 'strict')
+
+ def test_codec_encoder(self):
+ with self.use_custom_encoder():
+ encoder = _testcapi.codec_encoder(self.encoding_name)
+ self.assertIs(encoder, self.codec_info.encode)
+
+ def test_codec_decoder(self):
+ with self.use_custom_encoder():
+ decoder = _testcapi.codec_decoder(self.encoding_name)
+ self.assertIs(decoder, self.codec_info.decode)
+
+ def test_codec_incremental_encoder(self):
+ with self.use_custom_encoder():
+ encoder = _testcapi.codec_incremental_encoder(self.encoding_name, 'strict')
+ self.assertIsInstance(encoder, self.codec_info.incrementalencoder)
+
+ def test_codec_incremental_decoder(self):
+ with self.use_custom_encoder():
+ decoder = _testcapi.codec_incremental_decoder(self.encoding_name, 'strict')
+ self.assertIsInstance(decoder, self.codec_info.incrementaldecoder)
+
+ def test_codec_stream_reader(self):
+ with self.use_custom_encoder():
+ encoding, stream = self.encoding_name, io.StringIO()
+ reader = _testcapi.codec_stream_reader(encoding, stream, 'strict')
+ self.assertIsInstance(reader, self.codec_info.streamreader)
+
+ def test_codec_stream_writer(self):
+ with self.use_custom_encoder():
+ encoding, stream = self.encoding_name, io.StringIO()
+ writer = _testcapi.codec_stream_writer(encoding, stream, 'strict')
+ self.assertIsInstance(writer, self.codec_info.streamwriter)
+
+class CAPICodecErrors(unittest.TestCase):
+
+ def setUp(self):
+ self.enterContext(import_helper.isolated_modules())
+ self.enterContext(import_helper.CleanImport('codecs'))
+ self.codecs = import_helper.import_module('codecs')
+
+ def test_codec_register_error(self):
+ self.assertRaises(LookupError, _testcapi.codec_lookup_error, 'custom')
+
+ def error_handler(exc):
+ raise exc
+
+ error_handler = mock.Mock(wraps=error_handler)
+ _testcapi.codec_register_error('custom', error_handler)
+
+ self.assertRaises(UnicodeEncodeError, self.codecs.encode,
+ '\xff', 'ascii', errors='custom')
+ error_handler.assert_called_once()
+ error_handler.reset_mock()
+
+ self.assertRaises(UnicodeDecodeError, self.codecs.decode,
+ b'\xff', 'ascii', errors='custom')
+ error_handler.assert_called_once()
+
+ def test_codec_lookup_error(self):
+ codec_lookup_error = _testcapi.codec_lookup_error
+ self.assertIs(codec_lookup_error(NULL), self.codecs.strict_errors)
+ self.assertIs(codec_lookup_error('strict'), self.codecs.strict_errors)
+ self.assertIs(codec_lookup_error('ignore'), self.codecs.ignore_errors)
+ self.assertIs(codec_lookup_error('replace'), self.codecs.replace_errors)
+ self.assertIs(codec_lookup_error('xmlcharrefreplace'), self.codecs.xmlcharrefreplace_errors)
+ self.assertIs(codec_lookup_error('namereplace'), self.codecs.namereplace_errors)
+ self.assertRaises(LookupError, codec_lookup_error, 'custom')
+
+ def test_codec_error_handlers(self):
+ exceptions = [
+ UnicodeEncodeError('bad', '', 0, 1, 'reason'),
+ UnicodeEncodeError('bad', 'x', 0, 1, 'reason'),
+ UnicodeEncodeError('bad', 'xyz123', 0, 1, 'reason'),
+ UnicodeEncodeError('bad', 'xyz123', 1, 4, 'reason'),
+ ]
+
+ strict_handler = _testcapi.codec_strict_errors
+ for exc in exceptions:
+ with self.subTest(handler=strict_handler, exc=exc):
+ self.assertRaises(UnicodeEncodeError, strict_handler, exc)
+
+ for handler in [
+ _testcapi.codec_ignore_errors,
+ _testcapi.codec_replace_errors,
+ _testcapi.codec_xmlcharrefreplace_errors,
+ _testcapi.codec_namereplace_errors,
+ ]:
+ for exc in exceptions:
+ with self.subTest(handler=handler, exc=exc):
+ handler(exc)
+
+
if __name__ == "__main__":
unittest.main()
From 15b68116da1f46ebe195ef8e2d02feb129319b1f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
<10796600+picnixz@users.noreply.github.com>
Date: Mon, 26 Aug 2024 17:40:48 +0200
Subject: [PATCH 03/20] fix size bug
---
Objects/exceptions.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index fda62f159c1540..9bcdc88e1291ca 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2751,7 +2751,7 @@ PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start)
if (*start<0)
*start = 0; /*XXX check for values <0*/
if (*start>=size)
- *start = size-1;
+ *start = size ? size-1 : 0;
Py_DECREF(obj);
return 0;
}
@@ -2769,7 +2769,7 @@ PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start)
if (*start<0)
*start = 0;
if (*start>=size)
- *start = size-1;
+ *start = size ? size-1 : 0;
Py_DECREF(obj);
return 0;
}
From 8048ae1d2172df5bace73a3ea30712745d4c6864 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
<10796600+picnixz@users.noreply.github.com>
Date: Mon, 26 Aug 2024 17:46:04 +0200
Subject: [PATCH 04/20] rename test class
---
Lib/test/test_capi/test_codecs.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index f3e96cadf67066..c7fbca32ac582b 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -521,7 +521,7 @@ def test_asrawunicodeescapestring(self):
# CRASHES asrawunicodeescapestring(NULL)
-class CAPICodecRegistration(unittest.TestCase):
+class CAPICodecs(unittest.TestCase):
def setUp(self):
self.enterContext(import_helper.isolated_modules())
From 8487b4630cb51a07ceb16569446733b1947df05f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
<10796600+picnixz@users.noreply.github.com>
Date: Wed, 25 Sep 2024 15:04:54 +0200
Subject: [PATCH 05/20] Revert "fix size bug"
This reverts commit 15b68116da1f46ebe195ef8e2d02feb129319b1f.
---
Objects/exceptions.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 9bcdc88e1291ca..fda62f159c1540 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2751,7 +2751,7 @@ PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start)
if (*start<0)
*start = 0; /*XXX check for values <0*/
if (*start>=size)
- *start = size ? size-1 : 0;
+ *start = size-1;
Py_DECREF(obj);
return 0;
}
@@ -2769,7 +2769,7 @@ PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start)
if (*start<0)
*start = 0;
if (*start>=size)
- *start = size ? size-1 : 0;
+ *start = size-1;
Py_DECREF(obj);
return 0;
}
From 0097f2a4c595590d70a04dc61a34c5da08aea011 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
<10796600+picnixz@users.noreply.github.com>
Date: Wed, 25 Sep 2024 15:24:09 +0200
Subject: [PATCH 06/20] Disable tests that are known to crash.
---
Lib/test/test_capi/test_codecs.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index c7fbca32ac582b..21cbf76d14dffb 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -722,7 +722,9 @@ def test_codec_lookup_error(self):
def test_codec_error_handlers(self):
exceptions = [
- UnicodeEncodeError('bad', '', 0, 1, 'reason'),
+ # A UnicodeError with an empty message currently crashes:
+ # See: https://github.com/python/cpython/issues/123378
+ # UnicodeEncodeError('bad', '', 0, 1, 'reason'),
UnicodeEncodeError('bad', 'x', 0, 1, 'reason'),
UnicodeEncodeError('bad', 'xyz123', 0, 1, 'reason'),
UnicodeEncodeError('bad', 'xyz123', 1, 4, 'reason'),
From 303b13c4c61aa2a6d13fd027c3c31a0b4cef7f9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
<10796600+picnixz@users.noreply.github.com>
Date: Wed, 25 Sep 2024 17:24:50 +0200
Subject: [PATCH 07/20] address Victor's review
---
Lib/test/test_capi/test_codecs.py | 15 ++++++++++----
Modules/_testcapi/codec.c | 34 +++++++++----------------------
2 files changed, 21 insertions(+), 28 deletions(-)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index 21cbf76d14dffb..2c72a989e32364 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -592,14 +592,21 @@ def use_custom_encoder(self):
def test_codec_register(self):
search_function, encoding = self.search_function, self.encoding_name
+ # register the search function using the C API
self.assertIsNone(_testcapi.codec_register(search_function))
self.assertIs(self.codecs.lookup(encoding), search_function(encoding))
self.assertEqual(self.codecs.encode('123', encoding=encoding), '321')
+ # unregister the search function using the regular API
+ self.codecs.unregister(search_function)
+ self.assertRaises(LookupError, self.codecs.lookup, encoding)
def test_codec_unregister(self):
search_function, encoding = self.search_function, self.encoding_name
self.assertRaises(LookupError, self.codecs.lookup, encoding)
+ # register the search function using the regular API
self.codecs.register(search_function)
+ self.assertIsNotNone(self.codecs.lookup(encoding))
+ # unregister the search function using the C API
self.assertIsNone(_testcapi.codec_unregister(search_function))
self.assertRaises(LookupError, self.codecs.lookup, encoding)
@@ -625,16 +632,16 @@ def test_codec_encode(self):
encode = _testcapi.codec_encode
self.assertEqual(encode('a', 'utf-8', NULL), b'a')
self.assertEqual(encode('a', 'utf-8', 'strict'), b'a')
- self.assertEqual(encode('é', 'ascii', 'ignore'), b'')
- # todo: add more cases
+ self.assertEqual(encode('[é]', 'ascii', 'ignore'), b'[]')
+
self.assertRaises(TypeError, encode, NULL, 'ascii', 'strict')
# CRASHES encode('a', NULL, 'strict')
def test_codec_decode(self):
decode = _testcapi.codec_decode
- b = b'a\xc2\xa1\xe4\xbd\xa0\xf0\x9f\x98\x80'
s = 'a\xa1\u4f60\U0001f600'
+ b = s.encode()
self.assertEqual(decode(b, 'utf-8', 'strict'), s)
self.assertEqual(decode(b, 'utf-8', NULL), s)
@@ -642,7 +649,6 @@ def test_codec_decode(self):
self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', 'strict')
self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', NULL)
self.assertEqual(decode(b, 'ascii', 'replace'), 'a' + '\ufffd'*9)
- # todo: add more cases
# _codecs.decode only reports unknown errors policy when they are
# used (it has a fast path for empty bytes); this is different from
@@ -685,6 +691,7 @@ def test_codec_stream_writer(self):
writer = _testcapi.codec_stream_writer(encoding, stream, 'strict')
self.assertIsInstance(writer, self.codec_info.streamwriter)
+
class CAPICodecErrors(unittest.TestCase):
def setUp(self):
diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c
index 8fa3b57194328f..5cd187bde60fef 100644
--- a/Modules/_testcapi/codec.c
+++ b/Modules/_testcapi/codec.c
@@ -24,10 +24,9 @@ static PyObject *
codec_known_encoding(PyObject *Py_UNUSED(module), PyObject *args)
{
const char *encoding; // should not be NULL
- if (!PyArg_ParseTuple(args, "z", &encoding)) {
+ if (!PyArg_ParseTuple(args, "s", &encoding)) {
return NULL;
}
- assert(encoding != NULL);
return PyCodec_KnownEncoding(encoding) ? Py_True : Py_False;
}
@@ -39,10 +38,9 @@ codec_encode(PyObject *Py_UNUSED(module), PyObject *args)
PyObject *input;
const char *encoding; // should not be NULL
const char *errors; // can be NULL
- if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
+ if (!PyArg_ParseTuple(args, "O|sz", &input, &encoding, &errors)) {
return NULL;
}
- assert(encoding != NULL);
return PyCodec_Encode(input, encoding, errors);
}
@@ -52,10 +50,9 @@ codec_decode(PyObject *Py_UNUSED(module), PyObject *args)
PyObject *input;
const char *encoding; // should not be NULL
const char *errors; // can be NULL
- if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
+ if (!PyArg_ParseTuple(args, "O|sz", &input, &encoding, &errors)) {
return NULL;
}
- assert(encoding != NULL);
return PyCodec_Decode(input, encoding, errors);
}
@@ -63,10 +60,9 @@ static PyObject *
codec_encoder(PyObject *Py_UNUSED(module), PyObject *args)
{
const char *encoding; // should not be NULL
- if (!PyArg_ParseTuple(args, "z", &encoding)) {
+ if (!PyArg_ParseTuple(args, "s", &encoding)) {
return NULL;
}
- assert(encoding != NULL);
return PyCodec_Encoder(encoding);
}
@@ -74,10 +70,9 @@ static PyObject *
codec_decoder(PyObject *Py_UNUSED(module), PyObject *args)
{
const char *encoding; // should not be NULL
- if (!PyArg_ParseTuple(args, "z", &encoding)) {
+ if (!PyArg_ParseTuple(args, "s", &encoding)) {
return NULL;
}
- assert(encoding != NULL);
return PyCodec_Decoder(encoding);
}
@@ -86,11 +81,9 @@ codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args)
{
const char *encoding; // should not be NULL
const char *errors; // should not be NULL
- if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
+ if (!PyArg_ParseTuple(args, "ss", &encoding, &errors)) {
return NULL;
}
- assert(encoding != NULL);
- assert(errors != NULL);
return PyCodec_IncrementalEncoder(encoding, errors);
}
@@ -99,11 +92,9 @@ codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args)
{
const char *encoding; // should not be NULL
const char *errors; // should not be NULL
- if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
+ if (!PyArg_ParseTuple(args, "ss", &encoding, &errors)) {
return NULL;
}
- assert(encoding != NULL);
- assert(errors != NULL);
return PyCodec_IncrementalDecoder(encoding, errors);
}
@@ -113,11 +104,9 @@ codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args)
const char *encoding; // should not be NULL
PyObject *stream;
const char *errors; // should not be NULL
- if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
+ if (!PyArg_ParseTuple(args, "sOs", &encoding, &stream, &errors)) {
return NULL;
}
- assert(encoding != NULL);
- assert(errors != NULL);
return PyCodec_StreamReader(encoding, stream, errors);
}
@@ -127,11 +116,9 @@ codec_stream_writer(PyObject *Py_UNUSED(module), PyObject *args)
const char *encoding; // should not be NULL
PyObject *stream;
const char *errors; // should not be NULL
- if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
+ if (!PyArg_ParseTuple(args, "sOs", &encoding, &stream, &errors)) {
return NULL;
}
- assert(encoding != NULL);
- assert(errors != NULL);
return PyCodec_StreamWriter(encoding, stream, errors);
}
@@ -142,10 +129,9 @@ codec_register_error(PyObject *Py_UNUSED(module), PyObject *args)
{
const char *encoding; // should not be NULL
PyObject *error;
- if (!PyArg_ParseTuple(args, "zO", &encoding, &error)) {
+ if (!PyArg_ParseTuple(args, "sO", &encoding, &error)) {
return NULL;
}
- assert(encoding != NULL);
if (PyCodec_RegisterError(encoding, error) < 0) {
return NULL;
}
From 4f474ddc570b6b127b2880f69955496bd6d83f4d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
<10796600+picnixz@users.noreply.github.com>
Date: Wed, 25 Sep 2024 18:13:07 +0200
Subject: [PATCH 08/20] update tests to reflect user errors
---
Lib/test/test_capi/test_codecs.py | 79 ++++++++++++++++++++++++-------
Modules/_testcapi/codec.c | 54 ++++++++++++---------
2 files changed, 94 insertions(+), 39 deletions(-)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index 2c72a989e32364..c57491619d5976 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -1,6 +1,7 @@
import codecs
import contextlib
import io
+import re
import sys
import unittest
import unittest.mock as mock
@@ -10,6 +11,7 @@
_testlimitedcapi = import_helper.import_module('_testlimitedcapi')
NULL = None
+BAD_ARGUMENT = re.escape('bad argument type for built-in operation')
class CAPIUnicodeTest(unittest.TestCase):
@@ -635,7 +637,8 @@ def test_codec_encode(self):
self.assertEqual(encode('[é]', 'ascii', 'ignore'), b'[]')
self.assertRaises(TypeError, encode, NULL, 'ascii', 'strict')
- # CRASHES encode('a', NULL, 'strict')
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ encode('a', NULL, 'strict')
def test_codec_decode(self):
decode = _testcapi.codec_decode
@@ -650,46 +653,90 @@ def test_codec_decode(self):
self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', NULL)
self.assertEqual(decode(b, 'ascii', 'replace'), 'a' + '\ufffd'*9)
- # _codecs.decode only reports unknown errors policy when they are
- # used (it has a fast path for empty bytes); this is different from
- # PyUnicode_Decode which checks that both the encoding and the errors
- # policy are recognized.
+ # _codecs.decode() only reports unknown errors policy when they are
+ # used; this is different from PyUnicode_Decode() which checks that
+ # both the encoding and the errors policy are recognized before even
+ # attempting to call the decoder.
self.assertEqual(decode(b'', 'utf-8', 'unknown-errors-policy'), '')
+ self.assertEqual(decode(b'a', 'utf-8', 'unknown-errors-policy'), 'a')
self.assertRaises(TypeError, decode, NULL, 'ascii', 'strict')
- # CRASHES decode(b, NULL, 'strict')
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ decode(b, NULL, 'strict')
def test_codec_encoder(self):
+ codec_encoder = _testcapi.codec_encoder
+
with self.use_custom_encoder():
- encoder = _testcapi.codec_encoder(self.encoding_name)
+ encoder = codec_encoder(self.encoding_name)
self.assertIs(encoder, self.codec_info.encode)
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ codec_encoder(NULL)
+
def test_codec_decoder(self):
+ codec_decoder = _testcapi.codec_decoder
+
with self.use_custom_encoder():
- decoder = _testcapi.codec_decoder(self.encoding_name)
+ decoder = codec_decoder(self.encoding_name)
self.assertIs(decoder, self.codec_info.decode)
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ codec_decoder(NULL)
+
def test_codec_incremental_encoder(self):
+ codec_incremental_encoder = _testcapi.codec_incremental_encoder
+
with self.use_custom_encoder():
- encoder = _testcapi.codec_incremental_encoder(self.encoding_name, 'strict')
- self.assertIsInstance(encoder, self.codec_info.incrementalencoder)
+ encoding = self.encoding_name
+
+ for policy in ['strict', NULL]:
+ with self.subTest(policy=policy):
+ encoder = codec_incremental_encoder(encoding, policy)
+ self.assertIsInstance(encoder, self.codec_info.incrementalencoder)
+
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ codec_incremental_encoder(NULL, 'strict')
def test_codec_incremental_decoder(self):
+ codec_incremental_decoder = _testcapi.codec_incremental_decoder
+
with self.use_custom_encoder():
- decoder = _testcapi.codec_incremental_decoder(self.encoding_name, 'strict')
- self.assertIsInstance(decoder, self.codec_info.incrementaldecoder)
+ encoding = self.encoding_name
+
+ for policy in ['strict', NULL]:
+ with self.subTest(policy=policy):
+ decoder = codec_incremental_decoder(encoding, policy)
+ self.assertIsInstance(decoder, self.codec_info.incrementaldecoder)
+
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ codec_incremental_decoder(NULL, 'strict')
def test_codec_stream_reader(self):
+ codec_stream_reader = _testcapi.codec_stream_reader
+
with self.use_custom_encoder():
encoding, stream = self.encoding_name, io.StringIO()
- reader = _testcapi.codec_stream_reader(encoding, stream, 'strict')
- self.assertIsInstance(reader, self.codec_info.streamreader)
+ for policy in ['strict', NULL]:
+ with self.subTest(policy=policy):
+ writer = codec_stream_reader(encoding, stream, policy)
+ self.assertIsInstance(writer, self.codec_info.streamreader)
+
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ codec_stream_reader(NULL, stream, 'strict')
def test_codec_stream_writer(self):
+ codec_stream_writer = _testcapi.codec_stream_writer
+
with self.use_custom_encoder():
encoding, stream = self.encoding_name, io.StringIO()
- writer = _testcapi.codec_stream_writer(encoding, stream, 'strict')
- self.assertIsInstance(writer, self.codec_info.streamwriter)
+ for policy in ['strict', NULL]:
+ with self.subTest(policy=policy):
+ writer = codec_stream_writer(encoding, stream, policy)
+ self.assertIsInstance(writer, self.codec_info.streamwriter)
+
+ with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
+ codec_stream_writer(NULL, stream, 'strict')
class CAPICodecErrors(unittest.TestCase):
diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c
index 5cd187bde60fef..6aa19c2055d6b6 100644
--- a/Modules/_testcapi/codec.c
+++ b/Modules/_testcapi/codec.c
@@ -1,5 +1,13 @@
#include "parts.h"
+/*
+ * The Codecs C API assume that 'encoding' is not NULL, lest
+ * it uses PyErr_BadArgument() to set a TypeError exception.
+ *
+ * In this file, we allow to call the functions using None
+ * as NULL to explicitly check this behaviour.
+ */
+
// === Codecs registration and un-registration ================================
static PyObject *
@@ -23,8 +31,8 @@ codec_unregister(PyObject *Py_UNUSED(module), PyObject *search_function)
static PyObject *
codec_known_encoding(PyObject *Py_UNUSED(module), PyObject *args)
{
- const char *encoding; // should not be NULL
- if (!PyArg_ParseTuple(args, "s", &encoding)) {
+ const char *encoding; // should not be NULL (see top-file comment)
+ if (!PyArg_ParseTuple(args, "z", &encoding)) {
return NULL;
}
return PyCodec_KnownEncoding(encoding) ? Py_True : Py_False;
@@ -36,9 +44,9 @@ static PyObject *
codec_encode(PyObject *Py_UNUSED(module), PyObject *args)
{
PyObject *input;
- const char *encoding; // should not be NULL
+ const char *encoding; // should not be NULL (see top-file comment)
const char *errors; // can be NULL
- if (!PyArg_ParseTuple(args, "O|sz", &input, &encoding, &errors)) {
+ if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
return NULL;
}
return PyCodec_Encode(input, encoding, errors);
@@ -48,9 +56,9 @@ static PyObject *
codec_decode(PyObject *Py_UNUSED(module), PyObject *args)
{
PyObject *input;
- const char *encoding; // should not be NULL
+ const char *encoding; // should not be NULL (see top-file comment)
const char *errors; // can be NULL
- if (!PyArg_ParseTuple(args, "O|sz", &input, &encoding, &errors)) {
+ if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
return NULL;
}
return PyCodec_Decode(input, encoding, errors);
@@ -59,8 +67,8 @@ codec_decode(PyObject *Py_UNUSED(module), PyObject *args)
static PyObject *
codec_encoder(PyObject *Py_UNUSED(module), PyObject *args)
{
- const char *encoding; // should not be NULL
- if (!PyArg_ParseTuple(args, "s", &encoding)) {
+ const char *encoding; // should not be NULL (see top-file comment)
+ if (!PyArg_ParseTuple(args, "z", &encoding)) {
return NULL;
}
return PyCodec_Encoder(encoding);
@@ -69,8 +77,8 @@ codec_encoder(PyObject *Py_UNUSED(module), PyObject *args)
static PyObject *
codec_decoder(PyObject *Py_UNUSED(module), PyObject *args)
{
- const char *encoding; // should not be NULL
- if (!PyArg_ParseTuple(args, "s", &encoding)) {
+ const char *encoding; // should not be NULL (see top-file comment)
+ if (!PyArg_ParseTuple(args, "z", &encoding)) {
return NULL;
}
return PyCodec_Decoder(encoding);
@@ -79,9 +87,9 @@ codec_decoder(PyObject *Py_UNUSED(module), PyObject *args)
static PyObject *
codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args)
{
- const char *encoding; // should not be NULL
- const char *errors; // should not be NULL
- if (!PyArg_ParseTuple(args, "ss", &encoding, &errors)) {
+ const char *encoding; // should not be NULL (see top-file comment)
+ const char *errors; // can be NULL
+ if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
return NULL;
}
return PyCodec_IncrementalEncoder(encoding, errors);
@@ -90,9 +98,9 @@ codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args)
static PyObject *
codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args)
{
- const char *encoding; // should not be NULL
- const char *errors; // should not be NULL
- if (!PyArg_ParseTuple(args, "ss", &encoding, &errors)) {
+ const char *encoding; // should not be NULL (see top-file comment)
+ const char *errors; // can be NULL
+ if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
return NULL;
}
return PyCodec_IncrementalDecoder(encoding, errors);
@@ -101,10 +109,10 @@ codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args)
static PyObject *
codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args)
{
- const char *encoding; // should not be NULL
+ const char *encoding; // should not be NULL (see top-file comment)
PyObject *stream;
- const char *errors; // should not be NULL
- if (!PyArg_ParseTuple(args, "sOs", &encoding, &stream, &errors)) {
+ const char *errors; // can be NULL
+ if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
return NULL;
}
return PyCodec_StreamReader(encoding, stream, errors);
@@ -113,10 +121,10 @@ codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args)
static PyObject *
codec_stream_writer(PyObject *Py_UNUSED(module), PyObject *args)
{
- const char *encoding; // should not be NULL
+ const char *encoding; // should not be NULL (see top-file comment)
PyObject *stream;
- const char *errors; // should not be NULL
- if (!PyArg_ParseTuple(args, "sOs", &encoding, &stream, &errors)) {
+ const char *errors; // can be NULL
+ if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
return NULL;
}
return PyCodec_StreamWriter(encoding, stream, errors);
@@ -127,7 +135,7 @@ codec_stream_writer(PyObject *Py_UNUSED(module), PyObject *args)
static PyObject *
codec_register_error(PyObject *Py_UNUSED(module), PyObject *args)
{
- const char *encoding; // should not be NULL
+ const char *encoding; // must not be NULL
PyObject *error;
if (!PyArg_ParseTuple(args, "sO", &encoding, &error)) {
return NULL;
From 87ee0d26d4c150795437c06b547c14464e8b94b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
<10796600+picnixz@users.noreply.github.com>
Date: Fri, 27 Sep 2024 12:52:35 +0200
Subject: [PATCH 09/20] fix C API codec tests
---
Lib/test/test_capi/test_codecs.py | 68 ++++++++++++++++---------------
1 file changed, 35 insertions(+), 33 deletions(-)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index c57491619d5976..4865f4384422ac 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -1,6 +1,7 @@
import codecs
import contextlib
import io
+import os
import re
import sys
import unittest
@@ -526,14 +527,13 @@ def test_asrawunicodeescapestring(self):
class CAPICodecs(unittest.TestCase):
def setUp(self):
- self.enterContext(import_helper.isolated_modules())
- self.enterContext(import_helper.CleanImport('codecs'))
- self.codecs = import_helper.import_module('codecs')
# Encoding names are normalized internally by converting them
# to lowercase and their hyphens are replaced by underscores.
self.encoding_name = f'codec_reversed_{id(self)}'
- # make sure that our custom codec is not already registered
- self.assertRaises(LookupError, self.codecs.lookup, self.encoding_name)
+ # Make sure that our custom codec is not already registered (that
+ # way we know whether we correctly unregistered the custom codec
+ # after a test or not).
+ self.assertRaises(LookupError, codecs.lookup, self.encoding_name)
# create the search function without registering yet
self._create_custom_codec()
@@ -586,41 +586,47 @@ def search_function(encoding):
@contextlib.contextmanager
def use_custom_encoder(self):
- self.assertRaises(LookupError, self.codecs.lookup, self.encoding_name)
- self.codecs.register(self.search_function)
+ self.assertRaises(LookupError, codecs.lookup, self.encoding_name)
+ codecs.register(self.search_function)
yield
- self.codecs.unregister(self.search_function)
- self.assertRaises(LookupError, self.codecs.lookup, self.encoding_name)
+ codecs.unregister(self.search_function)
+ self.assertRaises(LookupError, codecs.lookup, self.encoding_name)
def test_codec_register(self):
search_function, encoding = self.search_function, self.encoding_name
# register the search function using the C API
self.assertIsNone(_testcapi.codec_register(search_function))
- self.assertIs(self.codecs.lookup(encoding), search_function(encoding))
- self.assertEqual(self.codecs.encode('123', encoding=encoding), '321')
+ # in case the test failed before cleaning up
+ self.addCleanup(codecs.unregister, self.search_function)
+ self.assertIs(codecs.lookup(encoding), search_function(encoding))
+ self.assertEqual(codecs.encode('123', encoding=encoding), '321')
# unregister the search function using the regular API
- self.codecs.unregister(search_function)
- self.assertRaises(LookupError, self.codecs.lookup, encoding)
+ codecs.unregister(search_function)
+ self.assertRaises(LookupError, codecs.lookup, encoding)
def test_codec_unregister(self):
search_function, encoding = self.search_function, self.encoding_name
- self.assertRaises(LookupError, self.codecs.lookup, encoding)
+ self.assertRaises(LookupError, codecs.lookup, encoding)
# register the search function using the regular API
- self.codecs.register(search_function)
- self.assertIsNotNone(self.codecs.lookup(encoding))
+ codecs.register(search_function)
+ # in case the test failed before cleaning up
+ self.addCleanup(codecs.unregister, self.search_function)
+ self.assertIsNotNone(codecs.lookup(encoding))
# unregister the search function using the C API
self.assertIsNone(_testcapi.codec_unregister(search_function))
- self.assertRaises(LookupError, self.codecs.lookup, encoding)
+ self.assertRaises(LookupError, codecs.lookup, encoding)
def test_codec_known_encoding(self):
- self.assertRaises(LookupError, self.codecs.lookup, 'unknown-codec')
+ self.assertRaises(LookupError, codecs.lookup, 'unknown-codec')
self.assertFalse(_testcapi.codec_known_encoding('unknown-codec'))
self.assertFalse(_testcapi.codec_known_encoding('unknown_codec'))
self.assertFalse(_testcapi.codec_known_encoding('UNKNOWN-codec'))
encoding_name = self.encoding_name
- self.assertRaises(LookupError, self.codecs.lookup, encoding_name)
- self.codecs.register(self.search_function)
+ self.assertRaises(LookupError, codecs.lookup, encoding_name)
+
+ codecs.register(self.search_function)
+ self.addCleanup(codecs.unregister, self.search_function)
for name in [
encoding_name,
@@ -741,11 +747,6 @@ def test_codec_stream_writer(self):
class CAPICodecErrors(unittest.TestCase):
- def setUp(self):
- self.enterContext(import_helper.isolated_modules())
- self.enterContext(import_helper.CleanImport('codecs'))
- self.codecs = import_helper.import_module('codecs')
-
def test_codec_register_error(self):
self.assertRaises(LookupError, _testcapi.codec_lookup_error, 'custom')
@@ -754,24 +755,25 @@ def error_handler(exc):
error_handler = mock.Mock(wraps=error_handler)
_testcapi.codec_register_error('custom', error_handler)
+ # self.addCleanup(codecs.unregister_error, 'custom')
- self.assertRaises(UnicodeEncodeError, self.codecs.encode,
+ self.assertRaises(UnicodeEncodeError, codecs.encode,
'\xff', 'ascii', errors='custom')
error_handler.assert_called_once()
error_handler.reset_mock()
- self.assertRaises(UnicodeDecodeError, self.codecs.decode,
+ self.assertRaises(UnicodeDecodeError, codecs.decode,
b'\xff', 'ascii', errors='custom')
error_handler.assert_called_once()
def test_codec_lookup_error(self):
codec_lookup_error = _testcapi.codec_lookup_error
- self.assertIs(codec_lookup_error(NULL), self.codecs.strict_errors)
- self.assertIs(codec_lookup_error('strict'), self.codecs.strict_errors)
- self.assertIs(codec_lookup_error('ignore'), self.codecs.ignore_errors)
- self.assertIs(codec_lookup_error('replace'), self.codecs.replace_errors)
- self.assertIs(codec_lookup_error('xmlcharrefreplace'), self.codecs.xmlcharrefreplace_errors)
- self.assertIs(codec_lookup_error('namereplace'), self.codecs.namereplace_errors)
+ self.assertIs(codec_lookup_error(NULL), codecs.strict_errors)
+ self.assertIs(codec_lookup_error('strict'), codecs.strict_errors)
+ self.assertIs(codec_lookup_error('ignore'), codecs.ignore_errors)
+ self.assertIs(codec_lookup_error('replace'), codecs.replace_errors)
+ self.assertIs(codec_lookup_error('xmlcharrefreplace'), codecs.xmlcharrefreplace_errors)
+ self.assertIs(codec_lookup_error('namereplace'), codecs.namereplace_errors)
self.assertRaises(LookupError, codec_lookup_error, 'custom')
def test_codec_error_handlers(self):
From 6a36eb08466e2a610dbbc2621c5c81482fd847cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
<10796600+picnixz@users.noreply.github.com>
Date: Fri, 27 Sep 2024 13:30:12 +0200
Subject: [PATCH 10/20] small hack to make the test suite correct
---
Lib/test/test_capi/test_codecs.py | 21 ++++++++++++++-------
1 file changed, 14 insertions(+), 7 deletions(-)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index 4865f4384422ac..60b48ae099dcd3 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -7,6 +7,7 @@
import unittest
import unittest.mock as mock
import _testcapi
+from getopt import error
from test.support import import_helper
_testlimitedcapi = import_helper.import_module('_testlimitedcapi')
@@ -748,14 +749,19 @@ def test_codec_stream_writer(self):
class CAPICodecErrors(unittest.TestCase):
def test_codec_register_error(self):
- self.assertRaises(LookupError, _testcapi.codec_lookup_error, 'custom')
+ try:
+ error_handler = _testcapi.codec_lookup_error('custom')
+ except LookupError:
+ error_handler = None
- def error_handler(exc):
- raise exc
+ if error_handler is None:
+ def custom_error_handler(exc):
+ raise exc
- error_handler = mock.Mock(wraps=error_handler)
- _testcapi.codec_register_error('custom', error_handler)
- # self.addCleanup(codecs.unregister_error, 'custom')
+ error_handler = mock.Mock(wraps=custom_error_handler)
+ _testcapi.codec_register_error('custom', error_handler)
+ else:
+ self.assertIsInstance(error_handler, mock.Mock)
self.assertRaises(UnicodeEncodeError, codecs.encode,
'\xff', 'ascii', errors='custom')
@@ -765,6 +771,7 @@ def error_handler(exc):
self.assertRaises(UnicodeDecodeError, codecs.decode,
b'\xff', 'ascii', errors='custom')
error_handler.assert_called_once()
+ error_handler.reset_mock()
def test_codec_lookup_error(self):
codec_lookup_error = _testcapi.codec_lookup_error
@@ -774,7 +781,7 @@ def test_codec_lookup_error(self):
self.assertIs(codec_lookup_error('replace'), codecs.replace_errors)
self.assertIs(codec_lookup_error('xmlcharrefreplace'), codecs.xmlcharrefreplace_errors)
self.assertIs(codec_lookup_error('namereplace'), codecs.namereplace_errors)
- self.assertRaises(LookupError, codec_lookup_error, 'custom')
+ self.assertRaises(LookupError, codec_lookup_error, 'unknown')
def test_codec_error_handlers(self):
exceptions = [
From 145b285f6f6200eda8fa90155a2abb4846e2a769 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
<10796600+picnixz@users.noreply.github.com>
Date: Sat, 28 Sep 2024 18:29:06 +0200
Subject: [PATCH 11/20] remove un-necessary imports
---
Lib/test/test_capi/test_codecs.py | 2 --
1 file changed, 2 deletions(-)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index 60b48ae099dcd3..29642203b44878 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -1,13 +1,11 @@
import codecs
import contextlib
import io
-import os
import re
import sys
import unittest
import unittest.mock as mock
import _testcapi
-from getopt import error
from test.support import import_helper
_testlimitedcapi = import_helper.import_module('_testlimitedcapi')
From 7be1f555f9f9483525d79a7596c4c30cd9e3a6a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
<10796600+picnixz@users.noreply.github.com>
Date: Sun, 29 Sep 2024 09:58:54 +0200
Subject: [PATCH 12/20] use `_codecs._unregister_error` to cleanup test state
---
Lib/test/test_capi/test_codecs.py | 21 +++++++++------------
1 file changed, 9 insertions(+), 12 deletions(-)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index 29642203b44878..d9e2524a63fa88 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -747,19 +747,17 @@ def test_codec_stream_writer(self):
class CAPICodecErrors(unittest.TestCase):
def test_codec_register_error(self):
- try:
- error_handler = _testcapi.codec_lookup_error('custom')
- except LookupError:
- error_handler = None
+ # for cleaning up between tests
+ from _codecs import _unregister_error as _codecs_unregister_error
- if error_handler is None:
- def custom_error_handler(exc):
- raise exc
+ self.assertRaises(LookupError, _testcapi.codec_lookup_error, 'custom')
- error_handler = mock.Mock(wraps=custom_error_handler)
- _testcapi.codec_register_error('custom', error_handler)
- else:
- self.assertIsInstance(error_handler, mock.Mock)
+ def custom_error_handler(exc):
+ raise exc
+
+ error_handler = mock.Mock(wraps=custom_error_handler)
+ _testcapi.codec_register_error('custom', error_handler)
+ self.addCleanup(_codecs_unregister_error, 'custom')
self.assertRaises(UnicodeEncodeError, codecs.encode,
'\xff', 'ascii', errors='custom')
@@ -769,7 +767,6 @@ def custom_error_handler(exc):
self.assertRaises(UnicodeDecodeError, codecs.decode,
b'\xff', 'ascii', errors='custom')
error_handler.assert_called_once()
- error_handler.reset_mock()
def test_codec_lookup_error(self):
codec_lookup_error = _testcapi.codec_lookup_error
From f72be5c600824765297441319b8faa523036143a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
<10796600+picnixz@users.noreply.github.com>
Date: Sun, 29 Sep 2024 10:08:28 +0200
Subject: [PATCH 13/20] indicate some semantics for NULL case being tested
---
Modules/_testcapi/codec.c | 39 ++++++++++++++++-----------------------
Modules/_testcapi/util.h | 10 ++++++++++
2 files changed, 26 insertions(+), 23 deletions(-)
diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c
index 6aa19c2055d6b6..2e4712bd55316f 100644
--- a/Modules/_testcapi/codec.c
+++ b/Modules/_testcapi/codec.c
@@ -1,12 +1,5 @@
#include "parts.h"
-
-/*
- * The Codecs C API assume that 'encoding' is not NULL, lest
- * it uses PyErr_BadArgument() to set a TypeError exception.
- *
- * In this file, we allow to call the functions using None
- * as NULL to explicitly check this behaviour.
- */
+#include "util.h"
// === Codecs registration and un-registration ================================
@@ -44,8 +37,8 @@ static PyObject *
codec_encode(PyObject *Py_UNUSED(module), PyObject *args)
{
PyObject *input;
- const char *encoding; // should not be NULL (see top-file comment)
- const char *errors; // can be NULL
+ const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
+ const char *errors; // can be NULL
if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
return NULL;
}
@@ -56,8 +49,8 @@ static PyObject *
codec_decode(PyObject *Py_UNUSED(module), PyObject *args)
{
PyObject *input;
- const char *encoding; // should not be NULL (see top-file comment)
- const char *errors; // can be NULL
+ const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
+ const char *errors; // can be NULL
if (!PyArg_ParseTuple(args, "O|zz", &input, &encoding, &errors)) {
return NULL;
}
@@ -67,7 +60,7 @@ codec_decode(PyObject *Py_UNUSED(module), PyObject *args)
static PyObject *
codec_encoder(PyObject *Py_UNUSED(module), PyObject *args)
{
- const char *encoding; // should not be NULL (see top-file comment)
+ const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
if (!PyArg_ParseTuple(args, "z", &encoding)) {
return NULL;
}
@@ -77,7 +70,7 @@ codec_encoder(PyObject *Py_UNUSED(module), PyObject *args)
static PyObject *
codec_decoder(PyObject *Py_UNUSED(module), PyObject *args)
{
- const char *encoding; // should not be NULL (see top-file comment)
+ const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
if (!PyArg_ParseTuple(args, "z", &encoding)) {
return NULL;
}
@@ -87,8 +80,8 @@ codec_decoder(PyObject *Py_UNUSED(module), PyObject *args)
static PyObject *
codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args)
{
- const char *encoding; // should not be NULL (see top-file comment)
- const char *errors; // can be NULL
+ const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
+ const char *errors; // can be NULL
if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
return NULL;
}
@@ -98,8 +91,8 @@ codec_incremental_encoder(PyObject *Py_UNUSED(module), PyObject *args)
static PyObject *
codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args)
{
- const char *encoding; // should not be NULL (see top-file comment)
- const char *errors; // can be NULL
+ const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
+ const char *errors; // can be NULL
if (!PyArg_ParseTuple(args, "zz", &encoding, &errors)) {
return NULL;
}
@@ -109,9 +102,9 @@ codec_incremental_decoder(PyObject *Py_UNUSED(module), PyObject *args)
static PyObject *
codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args)
{
- const char *encoding; // should not be NULL (see top-file comment)
+ const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
PyObject *stream;
- const char *errors; // can be NULL
+ const char *errors; // can be NULL
if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
return NULL;
}
@@ -121,9 +114,9 @@ codec_stream_reader(PyObject *Py_UNUSED(module), PyObject *args)
static PyObject *
codec_stream_writer(PyObject *Py_UNUSED(module), PyObject *args)
{
- const char *encoding; // should not be NULL (see top-file comment)
+ const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
PyObject *stream;
- const char *errors; // can be NULL
+ const char *errors; // can be NULL
if (!PyArg_ParseTuple(args, "zOz", &encoding, &stream, &errors)) {
return NULL;
}
@@ -149,7 +142,7 @@ codec_register_error(PyObject *Py_UNUSED(module), PyObject *args)
static PyObject *
codec_lookup_error(PyObject *Py_UNUSED(module), PyObject *args)
{
- const char *encoding; // can be NULL
+ const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
if (!PyArg_ParseTuple(args, "z", &encoding)) {
return NULL;
}
diff --git a/Modules/_testcapi/util.h b/Modules/_testcapi/util.h
index f26d7656a10138..042e522542eddb 100644
--- a/Modules/_testcapi/util.h
+++ b/Modules/_testcapi/util.h
@@ -31,3 +31,13 @@ static const char uninitialized[] = "uninitialized";
#define UNINITIALIZED_SIZE ((Py_ssize_t)236892191)
/* Marker to check that integer value was set. */
#define UNINITIALIZED_INT (63256717)
+/*
+ * Marker to indicate that a NULL parameter would not be allowed
+ * at runtime but that the test interface will check that it is
+ * indeed the case.
+ *
+ * Use this macro only if passing NULL to the C API would raise
+ * a catchable exception (and not a fatal exception that would
+ * crash the interpreter).
+ */
+ #define NULL_WOULD_RAISE(NAME) NAME
From 4d02c6ceedc344e538ebbf948838c22adad2abdc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
<10796600+picnixz@users.noreply.github.com>
Date: Sun, 29 Sep 2024 10:08:36 +0200
Subject: [PATCH 14/20] revert a cosmetic change
---
Modules/_testcapi/codec.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c
index 2e4712bd55316f..2bcfcd64d01794 100644
--- a/Modules/_testcapi/codec.c
+++ b/Modules/_testcapi/codec.c
@@ -222,9 +222,9 @@ static PyMethodDef test_methods[] = {
};
int
-_PyTestCapi_Init_Codec(PyObject *module)
+_PyTestCapi_Init_Codec(PyObject *m)
{
- if (PyModule_AddFunctions(module, test_methods) < 0) {
+ if (PyModule_AddFunctions(m, test_methods) < 0) {
return -1;
}
return 0;
From 0f26ca7ee4179d4688bbe64f55348a1fef1607b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
<10796600+picnixz@users.noreply.github.com>
Date: Sun, 29 Sep 2024 10:23:36 +0200
Subject: [PATCH 15/20] Move `PyCodec_NameReplaceErrors` test to the
`_testlimitedcapi` module
---
Lib/test/test_capi/test_codecs.py | 4 ++--
Modules/Setup.stdlib.in | 2 +-
Modules/_testcapi/codec.c | 13 +------------
Modules/_testlimitedcapi.c | 3 +++
Modules/_testlimitedcapi/codec.c | 29 +++++++++++++++++++++++++++++
Modules/_testlimitedcapi/parts.h | 1 +
6 files changed, 37 insertions(+), 15 deletions(-)
create mode 100644 Modules/_testlimitedcapi/codec.c
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index d9e2524a63fa88..91d36b65afba46 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -797,11 +797,11 @@ def test_codec_error_handlers(self):
_testcapi.codec_ignore_errors,
_testcapi.codec_replace_errors,
_testcapi.codec_xmlcharrefreplace_errors,
- _testcapi.codec_namereplace_errors,
+ _testlimitedcapi.codec_namereplace_errors,
]:
for exc in exceptions:
with self.subTest(handler=handler, exc=exc):
- handler(exc)
+ self.assertIsInstance(handler(exc), tuple)
if __name__ == "__main__":
diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in
index 9aa398a80efa1b..52c0f883d383db 100644
--- a/Modules/Setup.stdlib.in
+++ b/Modules/Setup.stdlib.in
@@ -163,7 +163,7 @@
@MODULE__TESTBUFFER_TRUE@_testbuffer _testbuffer.c
@MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c _testinternalcapi/set.c _testinternalcapi/test_critical_sections.c
@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/heaptype.c _testcapi/abstract.c _testcapi/unicode.c _testcapi/dict.c _testcapi/set.c _testcapi/list.c _testcapi/tuple.c _testcapi/getargs.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c _testcapi/complex.c _testcapi/numbers.c _testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c _testcapi/buffer.c _testcapi/pyatomic.c _testcapi/run.c _testcapi/file.c _testcapi/codec.c _testcapi/immortal.c _testcapi/gc.c _testcapi/hash.c _testcapi/time.c _testcapi/bytes.c _testcapi/object.c _testcapi/monitoring.c _testcapi/config.c
-@MODULE__TESTLIMITEDCAPI_TRUE@_testlimitedcapi _testlimitedcapi.c _testlimitedcapi/abstract.c _testlimitedcapi/bytearray.c _testlimitedcapi/bytes.c _testlimitedcapi/complex.c _testlimitedcapi/dict.c _testlimitedcapi/eval.c _testlimitedcapi/float.c _testlimitedcapi/heaptype_relative.c _testlimitedcapi/list.c _testlimitedcapi/long.c _testlimitedcapi/object.c _testlimitedcapi/pyos.c _testlimitedcapi/set.c _testlimitedcapi/sys.c _testlimitedcapi/tuple.c _testlimitedcapi/unicode.c _testlimitedcapi/vectorcall_limited.c
+@MODULE__TESTLIMITEDCAPI_TRUE@_testlimitedcapi _testlimitedcapi.c _testlimitedcapi/abstract.c _testlimitedcapi/bytearray.c _testlimitedcapi/bytes.c _testlimitedcapi/codec.c _testlimitedcapi/complex.c _testlimitedcapi/dict.c _testlimitedcapi/eval.c _testlimitedcapi/float.c _testlimitedcapi/heaptype_relative.c _testlimitedcapi/list.c _testlimitedcapi/long.c _testlimitedcapi/object.c _testlimitedcapi/pyos.c _testlimitedcapi/set.c _testlimitedcapi/sys.c _testlimitedcapi/tuple.c _testlimitedcapi/unicode.c _testlimitedcapi/vectorcall_limited.c
@MODULE__TESTCLINIC_TRUE@_testclinic _testclinic.c
@MODULE__TESTCLINIC_LIMITED_TRUE@_testclinic_limited _testclinic_limited.c
diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c
index 2bcfcd64d01794..ba614055915235 100644
--- a/Modules/_testcapi/codec.c
+++ b/Modules/_testcapi/codec.c
@@ -184,15 +184,6 @@ codec_backslashreplace_errors(PyObject *Py_UNUSED(module), PyObject *exc)
return PyCodec_BackslashReplaceErrors(exc);
}
-#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000
-static PyObject *
-codec_namereplace_errors(PyObject *Py_UNUSED(module), PyObject *exc)
-{
- assert(exc != NULL);
- return PyCodec_NameReplaceErrors(exc);
-}
-#endif
-
static PyMethodDef test_methods[] = {
/* codecs registration */
{"codec_register", codec_register, METH_O},
@@ -215,9 +206,7 @@ static PyMethodDef test_methods[] = {
{"codec_replace_errors", codec_replace_errors, METH_O},
{"codec_xmlcharrefreplace_errors", codec_xmlcharrefreplace_errors, METH_O},
{"codec_backslashreplace_errors", codec_backslashreplace_errors, METH_O},
-#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000
- {"codec_namereplace_errors", codec_namereplace_errors, METH_O},
-#endif
+ // PyCodec_NameReplaceErrors() is tested in _testlimitedcapi/codec.c
{NULL, NULL, 0, NULL},
};
diff --git a/Modules/_testlimitedcapi.c b/Modules/_testlimitedcapi.c
index e74cbfe19871bf..ba83a23117b2a5 100644
--- a/Modules/_testlimitedcapi.c
+++ b/Modules/_testlimitedcapi.c
@@ -38,6 +38,9 @@ PyInit__testlimitedcapi(void)
if (_PyTestLimitedCAPI_Init_Bytes(mod) < 0) {
return NULL;
}
+ if (_PyTestLimitedCAPI_Init_Codec(mod) < 0) {
+ return NULL;
+ }
if (_PyTestLimitedCAPI_Init_Complex(mod) < 0) {
return NULL;
}
diff --git a/Modules/_testlimitedcapi/codec.c b/Modules/_testlimitedcapi/codec.c
new file mode 100644
index 00000000000000..fdc18eedc2d288
--- /dev/null
+++ b/Modules/_testlimitedcapi/codec.c
@@ -0,0 +1,29 @@
+#include "pyconfig.h" // Py_GIL_DISABLED
+
+// Need limited C API version 3.5 for PyCodec_NameReplaceErrors()
+#if !defined(Py_GIL_DISABLED) && !defined(Py_LIMITED_API)
+# define Py_LIMITED_API 0x03050000
+#endif
+
+#include "parts.h"
+
+static PyObject *
+codec_namereplace_errors(PyObject *Py_UNUSED(module), PyObject *exc)
+{
+ assert(exc != NULL);
+ return PyCodec_NameReplaceErrors(exc);
+}
+
+static PyMethodDef test_methods[] = {
+ {"codec_namereplace_errors", codec_namereplace_errors, METH_O},
+ {NULL},
+};
+
+int
+_PyTestLimitedCAPI_Init_Codec(PyObject *module)
+{
+ if (PyModule_AddFunctions(module, test_methods) < 0) {
+ return -1;
+ }
+ return 0;
+}
diff --git a/Modules/_testlimitedcapi/parts.h b/Modules/_testlimitedcapi/parts.h
index 12b890853803f4..4107b150c5b4e0 100644
--- a/Modules/_testlimitedcapi/parts.h
+++ b/Modules/_testlimitedcapi/parts.h
@@ -25,6 +25,7 @@
int _PyTestLimitedCAPI_Init_Abstract(PyObject *module);
int _PyTestLimitedCAPI_Init_ByteArray(PyObject *module);
int _PyTestLimitedCAPI_Init_Bytes(PyObject *module);
+int _PyTestLimitedCAPI_Init_Codec(PyObject *module);
int _PyTestLimitedCAPI_Init_Complex(PyObject *module);
int _PyTestLimitedCAPI_Init_Dict(PyObject *module);
int _PyTestLimitedCAPI_Init_Eval(PyObject *module);
From 1399779a3d9aee5ebf548bbcd040f8d5f70e1ac9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
<10796600+picnixz@users.noreply.github.com>
Date: Sun, 29 Sep 2024 10:32:52 +0200
Subject: [PATCH 16/20] add comment for why we do not test
`_PyCodec_UnregisterError`
---
Lib/test/test_capi/test_codecs.py | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index 91d36b65afba46..be64871c67d2ba 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -768,6 +768,10 @@ def custom_error_handler(exc):
b'\xff', 'ascii', errors='custom')
error_handler.assert_called_once()
+ # _codecs._unregister_error directly delegates to the internal C
+ # function so a Python-level function test is sufficient (it is
+ # tested in test_codeccallbacks).
+
def test_codec_lookup_error(self):
codec_lookup_error = _testcapi.codec_lookup_error
self.assertIs(codec_lookup_error(NULL), codecs.strict_errors)
From 914151e1a9206fbe39ba341506d9fbe819335a06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
<10796600+picnixz@users.noreply.github.com>
Date: Sun, 29 Sep 2024 10:34:22 +0200
Subject: [PATCH 17/20] update a comment
---
Modules/_testcapi/codec.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c
index ba614055915235..dee093d35ea070 100644
--- a/Modules/_testcapi/codec.c
+++ b/Modules/_testcapi/codec.c
@@ -24,7 +24,7 @@ codec_unregister(PyObject *Py_UNUSED(module), PyObject *search_function)
static PyObject *
codec_known_encoding(PyObject *Py_UNUSED(module), PyObject *args)
{
- const char *encoding; // should not be NULL (see top-file comment)
+ const char *NULL_WOULD_RAISE(encoding); // NULL case will be tested
if (!PyArg_ParseTuple(args, "z", &encoding)) {
return NULL;
}
From 8dd7e8d5e8767de4a33b82fe105fc318b85fe079 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
<10796600+picnixz@users.noreply.github.com>
Date: Sun, 29 Sep 2024 10:35:49 +0200
Subject: [PATCH 18/20] revert one cosmetic change
---
Modules/_testcapi/codec.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/Modules/_testcapi/codec.c b/Modules/_testcapi/codec.c
index dee093d35ea070..e27e64e066c458 100644
--- a/Modules/_testcapi/codec.c
+++ b/Modules/_testcapi/codec.c
@@ -216,5 +216,6 @@ _PyTestCapi_Init_Codec(PyObject *m)
if (PyModule_AddFunctions(m, test_methods) < 0) {
return -1;
}
+
return 0;
}
From 1e6a5ce9f32ea34a9bc515f2c2afcb138b89d912 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
<10796600+picnixz@users.noreply.github.com>
Date: Sun, 29 Sep 2024 10:52:28 +0200
Subject: [PATCH 19/20] Fix Windows compilation
---
PCbuild/_testlimitedcapi.vcxproj | 1 +
PCbuild/_testlimitedcapi.vcxproj.filters | 1 +
2 files changed, 2 insertions(+)
diff --git a/PCbuild/_testlimitedcapi.vcxproj b/PCbuild/_testlimitedcapi.vcxproj
index a1409ecf043d2d..846e027e10c7fa 100644
--- a/PCbuild/_testlimitedcapi.vcxproj
+++ b/PCbuild/_testlimitedcapi.vcxproj
@@ -97,6 +97,7 @@
+
diff --git a/PCbuild/_testlimitedcapi.vcxproj.filters b/PCbuild/_testlimitedcapi.vcxproj.filters
index e27e3171e1e6aa..57be2e2fc5b950 100644
--- a/PCbuild/_testlimitedcapi.vcxproj.filters
+++ b/PCbuild/_testlimitedcapi.vcxproj.filters
@@ -12,6 +12,7 @@
+
From 2ba5f031ed47eea1058f700e15a52046eb5cf37c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
<10796600+picnixz@users.noreply.github.com>
Date: Sun, 29 Sep 2024 16:58:29 +0200
Subject: [PATCH 20/20] address Victor's review
---
Lib/test/test_capi/test_codecs.py | 39 ++++++++++++++++---------------
1 file changed, 20 insertions(+), 19 deletions(-)
diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py
index be64871c67d2ba..85491a89947318 100644
--- a/Lib/test/test_capi/test_codecs.py
+++ b/Lib/test/test_capi/test_codecs.py
@@ -528,7 +528,7 @@ class CAPICodecs(unittest.TestCase):
def setUp(self):
# Encoding names are normalized internally by converting them
# to lowercase and their hyphens are replaced by underscores.
- self.encoding_name = f'codec_reversed_{id(self)}'
+ self.encoding_name = 'test.test_capi.test_codecs.codec_reversed'
# Make sure that our custom codec is not already registered (that
# way we know whether we correctly unregistered the custom codec
# after a test or not).
@@ -658,12 +658,13 @@ def test_codec_decode(self):
self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', NULL)
self.assertEqual(decode(b, 'ascii', 'replace'), 'a' + '\ufffd'*9)
- # _codecs.decode() only reports unknown errors policy when they are
- # used; this is different from PyUnicode_Decode() which checks that
- # both the encoding and the errors policy are recognized before even
- # attempting to call the decoder.
- self.assertEqual(decode(b'', 'utf-8', 'unknown-errors-policy'), '')
- self.assertEqual(decode(b'a', 'utf-8', 'unknown-errors-policy'), 'a')
+ # _codecs.decode() only reports an unknown error handling name when
+ # the corresponding error handling function is used; this difers
+ # from PyUnicode_Decode() which checks that both the encoding and
+ # the error handling name are recognized before even attempting to
+ # call the decoder.
+ self.assertEqual(decode(b'', 'utf-8', 'unknown-error-handler'), '')
+ self.assertEqual(decode(b'a', 'utf-8', 'unknown-error-handler'), 'a')
self.assertRaises(TypeError, decode, NULL, 'ascii', 'strict')
with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
@@ -695,9 +696,9 @@ def test_codec_incremental_encoder(self):
with self.use_custom_encoder():
encoding = self.encoding_name
- for policy in ['strict', NULL]:
- with self.subTest(policy=policy):
- encoder = codec_incremental_encoder(encoding, policy)
+ for errors in ['strict', NULL]:
+ with self.subTest(errors):
+ encoder = codec_incremental_encoder(encoding, errors)
self.assertIsInstance(encoder, self.codec_info.incrementalencoder)
with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
@@ -709,9 +710,9 @@ def test_codec_incremental_decoder(self):
with self.use_custom_encoder():
encoding = self.encoding_name
- for policy in ['strict', NULL]:
- with self.subTest(policy=policy):
- decoder = codec_incremental_decoder(encoding, policy)
+ for errors in ['strict', NULL]:
+ with self.subTest(errors):
+ decoder = codec_incremental_decoder(encoding, errors)
self.assertIsInstance(decoder, self.codec_info.incrementaldecoder)
with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
@@ -722,9 +723,9 @@ def test_codec_stream_reader(self):
with self.use_custom_encoder():
encoding, stream = self.encoding_name, io.StringIO()
- for policy in ['strict', NULL]:
- with self.subTest(policy=policy):
- writer = codec_stream_reader(encoding, stream, policy)
+ for errors in ['strict', NULL]:
+ with self.subTest(errors):
+ writer = codec_stream_reader(encoding, stream, errors)
self.assertIsInstance(writer, self.codec_info.streamreader)
with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):
@@ -735,9 +736,9 @@ def test_codec_stream_writer(self):
with self.use_custom_encoder():
encoding, stream = self.encoding_name, io.StringIO()
- for policy in ['strict', NULL]:
- with self.subTest(policy=policy):
- writer = codec_stream_writer(encoding, stream, policy)
+ for errors in ['strict', NULL]:
+ with self.subTest(errors):
+ writer = codec_stream_writer(encoding, stream, errors)
self.assertIsInstance(writer, self.codec_info.streamwriter)
with self.assertRaisesRegex(TypeError, BAD_ARGUMENT):