From 6786f734b23ff59af3bc13021c1c3f7c860a1905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Jun 2025 13:22:30 +0200 Subject: [PATCH 1/3] fix quadratic-complexity parsing in `email.message._parseparam` --- Lib/email/message.py | 22 +++++++++++-------- Lib/test/test_email/test_email.py | 11 ++++++++++ ...-06-28-13-23-53.gh-issue-136063.aGk0Jv.rst | 2 ++ 3 files changed, 26 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2025-06-28-13-23-53.gh-issue-136063.aGk0Jv.rst diff --git a/Lib/email/message.py b/Lib/email/message.py index 41fcc2b9778798..1b4fcc23f9c477 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -74,19 +74,23 @@ def _parseparam(s): # RDM This might be a Header, so for now stringify it. s = ';' + str(s) plist = [] - while s[:1] == ';': - s = s[1:] - end = s.find(';') - while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: + start = 0 + while s.find(';', start) == start: + start += 1 + end = s.find(';', start) + while end > 0 and ( + s.count('"', start, end) - s.count('\\"', start, end) + ) % 2: end = s.find(';', end + 1) if end < 0: end = len(s) - f = s[:end] - if '=' in f: - i = f.index('=') - f = f[:i].strip().lower() + '=' + f[i+1:].strip() + i = s.find('=', start, end) + if i == -1: + f = s[start:end] + else: + f = s[start:i].rstrip().lower() + '=' + s[i+1:end].lstrip() plist.append(f.strip()) - s = s[end:] + start = end return plist diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index b8116d073a2670..be959f5510ad84 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -481,6 +481,17 @@ def test_get_param_with_quotes(self): "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"") self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') + def test_get_param_linear_complexity(self): + # Ensure that email.message._parseparam() is fast. + # See https://github.com/python/cpython/issues/136063. + N = 100_000 + res = email.message._parseparam(';' * N) + self.assertEqual(res, [''] * N) + res = email.message._parseparam('foo=bar;' * N) + self.assertEqual(res, ['foo=bar'] * N) + res = email.message._parseparam(' FOO = bar ;' * N) + self.assertEqual(res, ['foo=bar'] * N) + def test_field_containment(self): msg = email.message_from_string('Header: exists') self.assertIn('header', msg) diff --git a/Misc/NEWS.d/next/Security/2025-06-28-13-23-53.gh-issue-136063.aGk0Jv.rst b/Misc/NEWS.d/next/Security/2025-06-28-13-23-53.gh-issue-136063.aGk0Jv.rst new file mode 100644 index 00000000000000..940a3ad5a72f68 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2025-06-28-13-23-53.gh-issue-136063.aGk0Jv.rst @@ -0,0 +1,2 @@ +:mod:`email.message`: ensure linear complexity for legacy HTTP parameters +parsing. Patch by Bénédikt Tran. From d356a1449474b3bdd722529ad78b29d281431ab7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Jun 2025 14:43:38 +0200 Subject: [PATCH 2/3] fix tests --- Lib/test/test_email/test_email.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index be959f5510ad84..c52091e2963c7a 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -485,12 +485,17 @@ def test_get_param_linear_complexity(self): # Ensure that email.message._parseparam() is fast. # See https://github.com/python/cpython/issues/136063. N = 100_000 - res = email.message._parseparam(';' * N) - self.assertEqual(res, [''] * N) - res = email.message._parseparam('foo=bar;' * N) - self.assertEqual(res, ['foo=bar'] * N) - res = email.message._parseparam(' FOO = bar ;' * N) - self.assertEqual(res, ['foo=bar'] * N) + for s, r in [ + ("", ""), + ("foo=bar", "foo=bar"), + (" FOO = bar ", "foo=bar"), + ]: + with self.subTest(s=s, r=r, N=N): + src = f'{s};' * (N - 1) + s + res = email.message._parseparam(src) + self.assertEqual(len(res), N) + self.assertEqual(len(set(res)), 1) + self.assertEqual(res[0], r) def test_field_containment(self): msg = email.message_from_string('Header: exists') From 2ea02f9d8db4969f0740ebf0aab9cc98b430af5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 20 Jul 2025 10:51:43 +0200 Subject: [PATCH 3/3] eliminate quadratic time complexity when counting --- Lib/email/message.py | 17 +++++++++++++---- Lib/test/test_email/test_email.py | 5 +++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/Lib/email/message.py b/Lib/email/message.py index 1b4fcc23f9c477..3d9ffbbdb4547e 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -78,10 +78,19 @@ def _parseparam(s): while s.find(';', start) == start: start += 1 end = s.find(';', start) - while end > 0 and ( - s.count('"', start, end) - s.count('\\"', start, end) - ) % 2: - end = s.find(';', end + 1) + # The following while block is equivalent to: + # + # while end > 0 and ( + # s.count('"', start, end) - s.count('\\"', start, end) + # ) % 2: + # end = s.find(';', end + 1) + # + ind, diff = start, 0 + while end > 0: + diff += s.count('"', ind, end) - s.count('\\"', ind, end) + if diff % 2 == 0: + break + end, ind = ind, s.find(';', end + 1) if end < 0: end = len(s) i = s.find('=', start, end) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index c52091e2963c7a..b458d3f0efaabd 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -497,6 +497,11 @@ def test_get_param_linear_complexity(self): self.assertEqual(len(set(res)), 1) self.assertEqual(res[0], r) + # This will be considered as a single parameter. + malformed = 's="' + ';' * (N - 1) + res = email.message._parseparam(malformed) + self.assertEqual(res, [malformed]) + def test_field_containment(self): msg = email.message_from_string('Header: exists') self.assertIn('header', msg)