diff --git a/Doc/library/html.parser.rst b/Doc/library/html.parser.rst
index dd67fc34e856f1..b2fa043625c7e1 100644
--- a/Doc/library/html.parser.rst
+++ b/Doc/library/html.parser.rst
@@ -121,6 +121,17 @@ The output will then be:
attributes can be preserved, etc.).
+.. method:: HTMLParser.support_cdata(flag)
+
+ Sets how the parser will parse CDATA declarations.
+ If *flag* is true, then the :meth:`unknown_decl` method will be called
+ for the CDATA section ````.
+ If *flag* is false, then the :meth:`handle_comment` method will be called
+ for ````.
+
+ .. versionadded:: 3.13.6
+
+
The following methods are called when data or markup elements are encountered
and they are meant to be overridden in a subclass. The base class
implementations do nothing (except for :meth:`~HTMLParser.handle_startendtag`):
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index cc15de07b5bae6..88a084dcf1ce7d 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -144,6 +144,7 @@ def reset(self):
self.lasttag = '???'
self.interesting = interesting_normal
self.cdata_elem = None
+ self._support_cdata = False
super().reset()
def feed(self, data):
@@ -174,6 +175,9 @@ def clear_cdata_mode(self):
self.interesting = interesting_normal
self.cdata_elem = None
+ def support_cdata(self, flag=True):
+ self._support_cdata = flag
+
# Internal -- handle data as far as reasonable. May leave state
# and data to be processed by a subsequent call. If 'end' is
# true, force handling all data as if followed by EOF marker.
@@ -249,7 +253,10 @@ def goahead(self, end):
break
self.handle_comment(rawdata[i+4:j])
elif startswith("', i+9)
+ if j < 0:
+ return -1
+ self.unknown_decl(rawdata[i+3: j])
+ return j + 3
+ else:
+ return self.parse_bogus_comment(i)
elif rawdata[i:i+9].lower() == '
gtpos = rawdata.find('>', i+9)
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index d0d2c54217ccaf..65fbf5d7b618fd 100644
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -34,12 +34,16 @@ def get_events(self):
def handle_starttag(self, tag, attrs):
self.append(("starttag", tag, attrs))
+ if tag == 'svg':
+ self.support_cdata(True)
def handle_startendtag(self, tag, attrs):
self.append(("startendtag", tag, attrs))
def handle_endtag(self, tag):
self.append(("endtag", tag))
+ if tag == 'svg':
+ self.support_cdata(False)
# all other markup
@@ -643,10 +647,22 @@ def test_eof_in_declarations(self):
('')
- expected = [('unknown decl', 'CDATA[just some plain text')]
+ @support.subTests('content', [
+ 'just some plain text',
+ '',
+ '¬-an-entity-ref;',
+ "",
+ '',
+ '[[I have many brackets]]',
+ 'I have a > in the middle',
+ 'I have a ]] in the middle',
+ '] ]>',
+ ']] >',
+ ('\n'
+ ' if (a < b && a > b) {\n'
+ ' printf("[]");\n'
+ ' }\n'),
+ ])
+ def test_cdata_section_content(self, content):
+ # See "13.2.5.42 Markup declaration open state",
+ # "13.2.5.69 CDATA section state", and issue bpo-32876.
+ html = f''
+ expected = [
+ ('starttag', 'svg', []),
+ ('starttag', 'text', [('y', '100')]),
+ ('unknown decl', 'CDATA[' + content),
+ ('endtag', 'text'),
+ ('endtag', 'svg'),
+ ]
self._run_check(html, expected)
- def test_cdata_declarations_multiline(self):
- html = (' b) {'
- ' printf("[]");'
- ' }'
- ']]>')
+ def test_cdata_section(self):
+ # See "13.2.5.42 Markup declaration open state".
+ html = ('bar]]>'
+ ''
+ 'bar]]>')
expected = [
- ('starttag', 'code', []),
- ('unknown decl',
- 'CDATA[ if (a < b && a > b) { '
- 'printf("[]"); }'),
- ('endtag', 'code')
+ ('comment', '[CDATA[foo '),
+ ('starttag', 'svg', []),
+ ('starttag', 'text', [('y', '100')]),
+ ('unknown decl', 'CDATA[foo bar'),
+ ('endtag', 'text'),
+ ('endtag', 'svg'),
+ ('comment', '[CDATA[foo '),
]
self._run_check(html, expected)
diff --git a/Misc/NEWS.d/next/Security/2025-06-18-13-34-55.gh-issue-135661.NZlpWf.rst b/Misc/NEWS.d/next/Security/2025-06-18-13-34-55.gh-issue-135661.NZlpWf.rst
new file mode 100644
index 00000000000000..59c76d50f79443
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2025-06-18-13-34-55.gh-issue-135661.NZlpWf.rst
@@ -0,0 +1,2 @@
+Fix CDATA section parsing in :class:`html.parser.HTMLParser` according to
+the HTML5 standard: ``] ]>`` and ``]] >`` no longer end the CDATA section.