XML bomb protection for Python stdlib modules
—
Secure ElementTree-based XML parsing with configurable security restrictions. DefusedXML provides drop-in replacements for xml.etree.ElementTree and xml.etree.cElementTree with comprehensive protection against XML attacks while maintaining API compatibility.
Core parsing functions that provide secure alternatives to standard ElementTree parsing operations.
def parse(source, parser=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):
"""
Parse XML document from file or file-like object.
Args:
source (str or file-like): File path or file-like object containing XML
parser (XMLParser, optional): Custom parser instance
forbid_dtd (bool): Forbid DTD processing (default: False)
forbid_entities (bool): Forbid entity expansion (default: True)
forbid_external (bool): Forbid external references (default: True)
Returns:
ElementTree: Parsed XML document tree
Raises:
ParseError: XML syntax errors
DTDForbidden: DTD processing attempted when forbidden
EntitiesForbidden: Entity processing attempted when forbidden
ExternalReferenceForbidden: External reference attempted when forbidden
"""
def iterparse(source, events=None, parser=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):
"""
Parse XML document incrementally with event-based processing.
Args:
source (str or file-like): File path or file-like object containing XML
events (tuple, optional): Events to report ('start', 'end', 'start-ns', 'end-ns')
parser (XMLParser, optional): Custom parser instance
forbid_dtd (bool): Forbid DTD processing (default: False)
forbid_entities (bool): Forbid entity expansion (default: True)
forbid_external (bool): Forbid external references (default: True)
Returns:
iterator: Iterator yielding (event, element) tuples
Raises:
ParseError: XML syntax errors
DTDForbidden: DTD processing attempted when forbidden
EntitiesForbidden: Entity processing attempted when forbidden
ExternalReferenceForbidden: External reference attempted when forbidden
"""
def fromstring(text, forbid_dtd=False, forbid_entities=True, forbid_external=True):
"""
Parse XML document from string.
Args:
text (str or bytes): XML content as string or bytes
forbid_dtd (bool): Forbid DTD processing (default: False)
forbid_entities (bool): Forbid entity expansion (default: True)
forbid_external (bool): Forbid external references (default: True)
Returns:
Element: Root element of parsed XML document
Raises:
ParseError: XML syntax errors
DTDForbidden: DTD processing attempted when forbidden
EntitiesForbidden: Entity processing attempted when forbidden
ExternalReferenceForbidden: External reference attempted when forbidden
"""
def XML(text, forbid_dtd=False, forbid_entities=True, forbid_external=True):
"""
Parse XML document from string (alias for fromstring).
Args:
text (str or bytes): XML content as string or bytes
forbid_dtd (bool): Forbid DTD processing (default: False)
forbid_entities (bool): Forbid entity expansion (default: True)
forbid_external (bool): Forbid external references (default: True)
Returns:
Element: Root element of parsed XML document
Raises:
ParseError: XML syntax errors
DTDForbidden: DTD processing attempted when forbidden
EntitiesForbidden: Entity processing attempted when forbidden
ExternalReferenceForbidden: External reference attempted when forbidden
"""Usage Examples:
import defusedxml.ElementTree as ET
# Parse from file
tree = ET.parse('document.xml')
root = tree.getroot()
# Parse from string with custom security settings
xml_string = '<root><item>value</item></root>'
root = ET.fromstring(xml_string, forbid_dtd=True, forbid_entities=False)
# Incremental parsing for large documents
for event, elem in ET.iterparse('large_document.xml', events=('start', 'end')):
if event == 'end' and elem.tag == 'record':
process_record(elem)
elem.clear() # Free memoryDefusedXMLParser provides the core secure parsing functionality with configurable security restrictions.
class DefusedXMLParser:
"""
Secure XML parser with configurable security restrictions.
Inherits from xml.etree.ElementTree.XMLParser but adds security
handlers to prevent XML bomb attacks, DTD processing attacks,
and external entity attacks.
"""
def __init__(self, html=None, target=None, encoding=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):
"""
Initialize DefusedXMLParser with security settings.
Args:
html (deprecated): HTML parsing mode (deprecated, raises TypeError if True)
target (TreeBuilder, optional): Custom tree builder instance
encoding (str, optional): Character encoding for parsing
forbid_dtd (bool): Forbid DTD processing (default: False)
forbid_entities (bool): Forbid entity expansion (default: True)
forbid_external (bool): Forbid external references (default: True)
Raises:
TypeError: If html=True is specified (no longer supported)
DeprecationWarning: If html parameter is used
"""
def defused_start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
"""Handler that raises DTDForbidden when DTD processing is forbidden"""
def defused_entity_decl(self, name, is_parameter_entity, value, base, sysid, pubid, notation_name):
"""Handler that raises EntitiesForbidden when entity processing is forbidden"""
def defused_unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
"""Handler that raises EntitiesForbidden for unparsed entities when forbidden"""
def defused_external_entity_ref_handler(self, context, base, sysid, pubid):
"""Handler that raises ExternalReferenceForbidden when external references are forbidden"""Usage Examples:
import defusedxml.ElementTree as ET
from xml.etree.ElementTree import TreeBuilder
# Custom parser with specific security settings
parser = ET.DefusedXMLParser(forbid_dtd=True, forbid_entities=False, forbid_external=True)
tree = ET.parse('document.xml', parser)
# Parser with custom TreeBuilder
custom_builder = TreeBuilder()
parser = ET.DefusedXMLParser(target=custom_builder, forbid_dtd=False)
root = ET.fromstring(xml_content, parser)DefusedXML provides several aliases for backward compatibility and convenience.
XMLParser = DefusedXMLParser
XMLTreeBuilder = DefusedXMLParser
XMLParse = DefusedXMLParser # Backwards compatibility (typo in original)Additional utility functions re-exported from the standard library.
def tostring(element, encoding="us-ascii", method="xml"):
"""
Convert Element to XML string representation.
Re-exported from xml.etree.ElementTree.tostring for convenience.
This function is safe as it only serializes existing elements.
Args:
element (Element): Element to serialize
encoding (str): Character encoding (default: "us-ascii")
method (str): Serialization method (default: "xml")
Returns:
str or bytes: XML representation of element
"""class ParseError(Exception):
"""
XML parsing error exception.
Re-exported from xml.etree.ElementTree.ParseError for convenience.
Raised for XML syntax errors and malformed documents.
"""import defusedxml.ElementTree as ET
# Parse with secure defaults
try:
root = ET.fromstring(xml_content)
for child in root:
print(f"{child.tag}: {child.text}")
except ET.ParseError as e:
print(f"XML parsing error: {e}")import defusedxml.ElementTree as ET
import defusedxml
def parse_xml_file(filename):
"""Parse XML file with comprehensive error handling."""
try:
tree = ET.parse(filename)
return tree.getroot()
except FileNotFoundError:
print(f"File not found: {filename}")
except ET.ParseError as e:
print(f"XML syntax error: {e}")
except defusedxml.DefusedXmlException as e:
print(f"XML security violation: {e}")
return Noneimport defusedxml.ElementTree as ET
def process_large_xml(filename):
"""Process large XML files efficiently using iterparse."""
context = ET.iterparse(filename, events=('start', 'end'))
context = iter(context)
event, root = next(context)
for event, elem in context:
if event == 'end' and elem.tag == 'record':
# Process individual record
process_record(elem)
# Clear processed elements to save memory
elem.clear()
root.clear()import defusedxml.ElementTree as ET
def parse_trusted_xml(xml_content):
"""Parse XML from trusted source with relaxed security."""
# Allow DTDs but keep entity and external reference protection
return ET.fromstring(xml_content, forbid_dtd=False, forbid_entities=True, forbid_external=True)
def parse_internal_xml(xml_content):
"""Parse XML from internal systems with minimal restrictions."""
# Allow DTDs and entities but block external references
return ET.fromstring(xml_content, forbid_dtd=False, forbid_entities=False, forbid_external=True)DefusedXML is designed as a drop-in replacement:
# Before (vulnerable)
import xml.etree.ElementTree as ET
root = ET.fromstring(untrusted_xml)
# After (secure)
import defusedxml.ElementTree as ET
root = ET.fromstring(untrusted_xml) # Now protected by defaultThe API is identical except for the addition of security parameters to parsing functions.
Install with Tessl CLI
npx tessl i tessl/pypi-defusedxml