Skip to content

gh-126033: fix a crash in xml.etree.ElementTree.Element.remove when concurrent mutations happen #126124

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 31 commits into from
Mar 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
abc121c
Add versioning to XML elements.
picnixz Oct 27, 2024
4efa517
fix tests
picnixz Oct 27, 2024
4ca3cf0
fix portability issues
picnixz Oct 27, 2024
a1950d1
fixup
picnixz Oct 28, 2024
9b6f559
unify versioning
picnixz Oct 29, 2024
00a7a7e
handle evil mutations in `Element.remove`
picnixz Oct 29, 2024
59ade8f
blurb
picnixz Oct 29, 2024
7fc9932
improve NEWS entry formulation
picnixz Dec 6, 2024
e30756d
remove versioning
picnixz Dec 8, 2024
f2b5bb1
fix tests
picnixz Dec 8, 2024
9f73517
improve detection and tests
picnixz Dec 15, 2024
70f2aad
revert whitespaces
picnixz Dec 15, 2024
756b1eb
amend NEWS
picnixz Dec 17, 2024
4b74caf
improve test coverage
picnixz Dec 17, 2024
fd29203
align C implementation with Python implementation as much as possible
picnixz Dec 17, 2024
e7033b6
fix tests and improve coverage
picnixz Dec 17, 2024
4d3cdd7
fix tests (i'll explain afterwards)
picnixz Dec 17, 2024
883e8d2
improve comments
picnixz Dec 17, 2024
220b669
change root name to avoid special wildcards
picnixz Dec 19, 2024
7f26430
avoid strong reference on the child to remove
picnixz Dec 19, 2024
e8d84c8
address Serhiy's review
picnixz Dec 19, 2024
3a43c0f
Merge branch 'main' into fix/xml-evil-remove-126033
picnixz Jan 19, 2025
bc52c04
Update Misc/NEWS.d/next/Library/2024-10-29-12-59-45.gh-issue-126033.s…
picnixz Feb 4, 2025
f4a4dae
remove un-necessary subtest parameter
picnixz Feb 8, 2025
09a9fa9
Reduce the visual size of the tests
picnixz Feb 8, 2025
f5d352f
fixup docs
picnixz Feb 8, 2025
2b47468
Merge branch 'main' into fix/xml-evil-remove-126033
picnixz Feb 8, 2025
dc576ab
Merge branch 'main' into fix/xml-evil-remove-126033
picnixz Feb 22, 2025
ce66ac7
Merge branch 'main' into fix/xml-evil-remove-126033
picnixz Mar 14, 2025
04cb600
do not pedantically test the pure Python implementation
picnixz Mar 15, 2025
a0c2324
Merge branch 'main' into fix/xml-evil-remove-126033
picnixz Mar 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 178 additions & 10 deletions Lib/test/test_xml_etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
import textwrap
import types
import unittest
import unittest.mock as mock
import warnings
import weakref

from contextlib import nullcontext
from functools import partial
from itertools import product, islice
from test import support
Expand Down Expand Up @@ -121,6 +123,21 @@
</foo>
"""

def is_python_implementation():
assert ET is not None, "ET must be initialized"
assert pyET is not None, "pyET must be initialized"
return ET is pyET


def equal_wrapper(cls):
"""Mock cls.__eq__ to check whether it has been called or not.

The behaviour of cls.__eq__ (side-effects included) is left as is.
"""
eq = cls.__eq__
return mock.patch.object(cls, "__eq__", autospec=True, wraps=eq)


def checkwarnings(*filters, quiet=False):
def decorator(test):
def newtest(*args, **kwargs):
Expand Down Expand Up @@ -2642,6 +2659,7 @@ def test_pickle_issue18997(self):


class BadElementTest(ElementTestCase, unittest.TestCase):

def test_extend_mutable_list(self):
class X:
@property
Expand Down Expand Up @@ -2680,18 +2698,168 @@ class Y(X, ET.Element):
e = ET.Element('foo')
e.extend(L)

def test_remove_with_mutating(self):
class X(ET.Element):
def test_remove_with_clear_assume_missing(self):
# gh-126033: Check that a concurrent clear() for an assumed-to-be
# missing element does not make the interpreter crash.
self.do_test_remove_with_clear(raises=True)

def test_remove_with_clear_assume_existing(self):
# gh-126033: Check that a concurrent clear() for an assumed-to-be
# existing element does not make the interpreter crash.
self.do_test_remove_with_clear(raises=False)

def do_test_remove_with_clear(self, *, raises):

# Until the discrepency between "del root[:]" and "root.clear()" is
# resolved, we need to keep two tests. Previously, using "del root[:]"
# did not crash with the reproducer of gh-126033 while "root.clear()"
# did.

class E(ET.Element):
"""Local class to be able to mock E.__eq__ for introspection."""

class X(E):
def __eq__(self, o):
del e[:]
return False
e = ET.Element('foo')
e.extend([X('bar')])
self.assertRaises(ValueError, e.remove, ET.Element('baz'))
del root[:]
return not raises

e = ET.Element('foo')
e.extend([ET.Element('bar')])
self.assertRaises(ValueError, e.remove, X('baz'))
class Y(E):
def __eq__(self, o):
root.clear()
return not raises

if raises:
get_checker_context = lambda: self.assertRaises(ValueError)
else:
get_checker_context = nullcontext

self.assertIs(E.__eq__, object.__eq__)

for Z, side_effect in [(X, 'del root[:]'), (Y, 'root.clear()')]:
self.enterContext(self.subTest(side_effect=side_effect))

# test removing R() from [U()]
for R, U, description in [
(E, Z, "remove missing E() from [Z()]"),
(Z, E, "remove missing Z() from [E()]"),
(Z, Z, "remove missing Z() from [Z()]"),
]:
with self.subTest(description):
root = E('top')
root.extend([U('one')])
with get_checker_context():
root.remove(R('missing'))

# test removing R() from [U(), V()]
cases = self.cases_for_remove_missing_with_mutations(E, Z)
for R, U, V, description in cases:
with self.subTest(description):
root = E('top')
root.extend([U('one'), V('two')])
with get_checker_context():
root.remove(R('missing'))

# Test removing root[0] from [Z()].
#
# Since we call root.remove() with root[0], Z.__eq__()
# will not be called (we branch on the fast Py_EQ path).
with self.subTest("remove root[0] from [Z()]"):
root = E('top')
root.append(Z('rem'))
with equal_wrapper(E) as f, equal_wrapper(Z) as g:
root.remove(root[0])
f.assert_not_called()
g.assert_not_called()

# Test removing root[1] (of type R) from [U(), R()].
is_special = is_python_implementation() and raises and Z is Y
if is_python_implementation() and raises and Z is Y:
# In pure Python, using root.clear() sets the children
# list to [] without calling list.clear().
#
# For this reason, the call to root.remove() first
# checks root[0] and sets the children list to []
# since either root[0] or root[1] is an evil element.
#
# Since checking root[1] still uses the old reference
# to the children list, PyObject_RichCompareBool() branches
# to the fast Py_EQ path and Y.__eq__() is called exactly
# once (when checking root[0]).
continue
else:
cases = self.cases_for_remove_existing_with_mutations(E, Z)
for R, U, description in cases:
with self.subTest(description):
root = E('top')
root.extend([U('one'), R('rem')])
with get_checker_context():
root.remove(root[1])

def test_remove_with_mutate_root_assume_missing(self):
# gh-126033: Check that a concurrent mutation for an assumed-to-be
# missing element does not make the interpreter crash.
self.do_test_remove_with_mutate_root(raises=True)

def test_remove_with_mutate_root_assume_existing(self):
# gh-126033: Check that a concurrent mutation for an assumed-to-be
# existing element does not make the interpreter crash.
self.do_test_remove_with_mutate_root(raises=False)

def do_test_remove_with_mutate_root(self, *, raises):
E = ET.Element

class Z(E):
def __eq__(self, o):
del root[0]
return not raises

if raises:
get_checker_context = lambda: self.assertRaises(ValueError)
else:
get_checker_context = nullcontext

# test removing R() from [U(), V()]
cases = self.cases_for_remove_missing_with_mutations(E, Z)
for R, U, V, description in cases:
with self.subTest(description):
root = E('top')
root.extend([U('one'), V('two')])
with get_checker_context():
root.remove(R('missing'))

# test removing root[1] (of type R) from [U(), R()]
cases = self.cases_for_remove_existing_with_mutations(E, Z)
for R, U, description in cases:
with self.subTest(description):
root = E('top')
root.extend([U('one'), R('rem')])
with get_checker_context():
root.remove(root[1])

def cases_for_remove_missing_with_mutations(self, E, Z):
# Cases for removing R() from [U(), V()].
# The case U = V = R = E is not interesting as there is no mutation.
for U, V in [(E, Z), (Z, E), (Z, Z)]:
description = (f"remove missing {E.__name__}() from "
f"[{U.__name__}(), {V.__name__}()]")
yield E, U, V, description

for U, V in [(E, E), (E, Z), (Z, E), (Z, Z)]:
description = (f"remove missing {Z.__name__}() from "
f"[{U.__name__}(), {V.__name__}()]")
yield Z, U, V, description

def cases_for_remove_existing_with_mutations(self, E, Z):
# Cases for removing root[1] (of type R) from [U(), R()].
# The case U = R = E is not interesting as there is no mutation.
for U, R, description in [
(E, Z, "remove root[1] from [E(), Z()]"),
(Z, E, "remove root[1] from [Z(), E()]"),
(Z, Z, "remove root[1] from [Z(), Z()]"),
]:
description = (f"remove root[1] (of type {R.__name__}) "
f"from [{U.__name__}(), {R.__name__}()]")
yield R, U, description

@support.infinite_recursion(25)
def test_recursive_repr(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
:mod:`xml.etree.ElementTree`: Fix a crash in :meth:`Element.remove
<xml.etree.ElementTree.Element.remove>` when the element is
concurrently mutated. Patch by Bénédikt Tran.
58 changes: 32 additions & 26 deletions Modules/_elementtree.c
Original file line number Diff line number Diff line change
Expand Up @@ -850,6 +850,7 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
if (element_resize(element, self->extra->length) < 0)
goto error;

// TODO(picnixz): check for an evil child's __deepcopy__ on 'self'
for (i = 0; i < self->extra->length; i++) {
PyObject* child = deepcopy(st, self->extra->children[i], memo);
if (!child || !Element_Check(st, child)) {
Expand Down Expand Up @@ -1637,42 +1638,47 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
/*[clinic end generated code: output=38fe6c07d6d87d1f input=6133e1d05597d5ee]*/
{
Py_ssize_t i;
int rc;
PyObject *found;

if (!self->extra) {
/* element has no children, so raise exception */
PyErr_SetString(
PyExc_ValueError,
"list.remove(x): x not in list"
);
return NULL;
}

for (i = 0; i < self->extra->length; i++) {
if (self->extra->children[i] == subelement)
// When iterating over the list of children, we need to check that the
// list is not cleared (self->extra != NULL) and that we are still within
// the correct bounds (i < self->extra->length).
//
// We deliberately avoid protecting against children lists that grow
// faster than the index since list objects do not protect against it.
int rc = 0;
for (i = 0; self->extra && i < self->extra->length; i++) {
if (self->extra->children[i] == subelement) {
rc = 1;
break;
rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
if (rc > 0)
break;
if (rc < 0)
}
PyObject *child = Py_NewRef(self->extra->children[i]);
rc = PyObject_RichCompareBool(child, subelement, Py_EQ);
Py_DECREF(child);
if (rc < 0) {
return NULL;
}
else if (rc > 0) {
break;
}
}

if (i >= self->extra->length) {
/* subelement is not in children, so raise exception */
PyErr_SetString(
PyExc_ValueError,
"list.remove(x): x not in list"
);
if (rc == 0) {
PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list");
return NULL;
}

found = self->extra->children[i];
// An extra check must be done if the mutation occurs at the very last
// step and removes or clears the 'extra' list (the condition on the
// length would not be satisfied any more).
if (self->extra == NULL || i >= self->extra->length) {
Py_RETURN_NONE;
}

PyObject *found = self->extra->children[i];

self->extra->length--;
for (; i < self->extra->length; i++)
for (; i < self->extra->length; i++) {
self->extra->children[i] = self->extra->children[i+1];
}

Py_DECREF(found);
Py_RETURN_NONE;
Expand Down
Loading