Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Doc/library/xml.rst
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,12 @@ large tokens
be used to cause denial of service in the application parsing XML.
The issue is known as :cve:`2023-52425`.

deeply nested elements
An attacker can send XML with an arbitrarily large element nesting depth
to exhaust available stack space or memory. :mod:`xml.etree.ElementTree`
limits nesting depth to 5000 levels and raises
:exc:`~xml.etree.ElementTree.ParseError` when the limit is exceeded.

.. _libexpat: https://github.com/libexpat/libexpat
.. _Billion Laughs: https://en.wikipedia.org/wiki/Billion_laughs
.. _ZIP bomb: https://en.wikipedia.org/wiki/Zip_bomb
85 changes: 85 additions & 0 deletions Lib/test/test_xml_etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -4952,5 +4952,90 @@ def cleanup():
unittest.addModuleCleanup(ET._set_factories, *old_factories)



# --------------------------------------------------------------------


class NestingDepthTest(unittest.TestCase):
"""Tests for deeply-nested XML documents (gh-127065).

xml.etree.ElementTree must raise ParseError instead of crashing
(SIGSEGV / C stack overflow) when element nesting depth exceeds the
internal MAX_XML_NESTING_DEPTH limit (5000 levels).

The guard lives in the C accelerator (_elementtree.c) so these tests
are skipped when running against the pure-Python implementation.
"""

@classmethod
def setUpClass(cls):
# The nesting depth guard is implemented in the C accelerator only.
if ET is pyET:
raise unittest.SkipTest('nesting depth guard requires the C accelerator')

# Must match MAX_XML_NESTING_DEPTH in Modules/_elementtree.c
MAX_DEPTH = 5000

def _make_deeply_nested_xml(self, depth):
"""Return bytes of a well-formed XML document with *depth* nested elements."""
return b'<a>' * depth + b'</a>' * depth

def test_deeply_nested_xml_raises_parse_error(self):
"""Parsing XML deeper than MAX_XML_NESTING_DEPTH must raise ParseError."""
depth = self.MAX_DEPTH + 100
xml_data = self._make_deeply_nested_xml(depth)
with self.assertRaises(ET.ParseError) as cm:
ET.fromstring(xml_data)
self.assertIn("nesting depth", str(cm.exception))

def test_moderately_nested_xml_succeeds(self):
"""XML nesting within the limit must parse successfully."""
depth = 100 # well within any reasonable limit
xml_data = self._make_deeply_nested_xml(depth)
root = ET.fromstring(xml_data)
# Walk down the chain of first-children to verify structure
elem = root
for _ in range(depth - 1):
self.assertEqual(len(elem), 1)
elem = elem[0]
self.assertEqual(len(elem), 0) # innermost element has no children

def test_at_exactly_max_depth_raises_parse_error(self):
"""XML at exactly MAX_DEPTH + 1 levels must raise ParseError."""
# MAX_DEPTH + 1 because the root element itself counts as depth 0,
# so nesting one more child than the limit must fail.
xml_data = self._make_deeply_nested_xml(self.MAX_DEPTH + 1)
with self.assertRaises(ET.ParseError):
ET.fromstring(xml_data)

def test_at_max_depth_succeeds(self):
"""XML at exactly MAX_DEPTH levels must succeed (boundary check)."""
xml_data = self._make_deeply_nested_xml(self.MAX_DEPTH)
# Should parse successfully — the limit is "strictly greater than"
root = ET.fromstring(xml_data)
self.assertIsNotNone(root)

def test_treebuilder_nesting_limit(self):
"""TreeBuilder.start() must raise ParseError when depth exceeds limit."""
tb = ET.TreeBuilder()
# Fill to just below the limit
for _ in range(self.MAX_DEPTH):
tb.start('a', {})
# One more push should raise ParseError
with self.assertRaises(ET.ParseError) as cm:
tb.start('a', {})
self.assertIn("nesting depth", str(cm.exception))

def test_xmlparser_deeply_nested_raises_parse_error(self):
"""XMLParser.feed() with deeply nested XML must raise ParseError."""
depth = self.MAX_DEPTH + 100
xml_data = self._make_deeply_nested_xml(depth)
parser = ET.XMLParser()
with self.assertRaises(ET.ParseError):
parser.feed(xml_data)


# --------------------------------------------------------------------

if __name__ == '__main__':
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Add a nesting depth limit (5000 levels) to :mod:`xml.etree.ElementTree`'s
``TreeBuilder`` to prevent C stack overflows when deeply nested XML documents
are garbage-collected or copied. :exc:`~xml.etree.ElementTree.ParseError` is
now raised for documents exceeding the limit. Also document "deeply nested
elements" as an XML security attack vector in :doc:`/library/xml`.
28 changes: 27 additions & 1 deletion Modules/_elementtree.c
Original file line number Diff line number Diff line change
Expand Up @@ -2738,6 +2738,18 @@ treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
/* -------------------------------------------------------------------- */
/* handlers */

/* Maximum allowed XML element nesting depth in TreeBuilder.
* Deeply nested XML documents can exhaust the C stack when the resulting
* tree is later traversed recursively (e.g., during garbage collection or
* deepcopy). This constant limits the depth at parse time so that a clean
* error is raised instead of a C stack overflow crash.
*
* The value matches the default nesting limit used by Python's json module
* and several other XML parsers. It can be overridden by setting the
* PYTHON_XML_MAX_NESTING environment variable (reserved for future use).
*/
#define MAX_XML_NESTING_DEPTH 5000

LOCAL(PyObject*)
treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
PyObject* attrib)
Expand All @@ -2746,6 +2758,17 @@ treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
PyObject* this;
elementtreestate *st = self->state;

/* Guard against deeply-nested XML that would cause C stack overflows
* when the resulting tree is traversed recursively later (gh-127065). We
* check self->index *before* pushing, so the root element (index==0) is
* always accepted. */
if (self->index >= MAX_XML_NESTING_DEPTH) {
PyErr_Format(st->parseerror_obj,
"xml nesting depth limit (%d levels) exceeded",
MAX_XML_NESTING_DEPTH);
return NULL;
}

if (treebuilder_flush_data(self) < 0) {
return NULL;
}
Expand Down Expand Up @@ -3066,7 +3089,10 @@ treebuilder_done(TreeBuilderObject* self)
{
PyObject* res;

/* FIXME: check stack size? */
/* XML nesting depth is bounded at parse time by treebuilder_handle_start,
* which raises ParseError when MAX_XML_NESTING_DEPTH is exceeded. This
* prevents C stack overflows when deeply nested trees are later traversed
* recursively (e.g., during garbage collection or deepcopy). */

if (self->root)
res = self->root;
Expand Down
Loading