XML bomb protection for Python stdlib modules
—
Secure SAX-based XML parsing with event-driven processing and configurable security restrictions. DefusedXML provides drop-in replacements for xml.sax with comprehensive protection against XML attacks while maintaining API compatibility for event-driven XML processing.
Core SAX parsing functions that provide secure alternatives to standard SAX parsing operations.
def parse(source, handler, errorHandler=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):
"""
Parse XML document using SAX event-driven processing.
Args:
source (str or file-like): File path or file-like object containing XML
handler (ContentHandler): SAX content handler to receive parsing events
errorHandler (ErrorHandler, optional): SAX error handler for error processing
forbid_dtd (bool): Forbid DTD processing (default: False)
forbid_entities (bool): Forbid entity expansion (default: True)
forbid_external (bool): Forbid external references (default: True)
Raises:
ParseError: XML syntax errors
DTDForbidden: DTD processing attempted when forbidden
EntitiesForbidden: Entity processing attempted when forbidden
ExternalReferenceForbidden: External reference attempted when forbidden
"""
def parseString(string, handler, errorHandler=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):
"""
Parse XML document from string using SAX event-driven processing.
Args:
string (str or bytes): XML content as string or bytes
handler (ContentHandler): SAX content handler to receive parsing events
errorHandler (ErrorHandler, optional): SAX error handler for error processing
forbid_dtd (bool): Forbid DTD processing (default: False)
forbid_entities (bool): Forbid entity expansion (default: True)
forbid_external (bool): Forbid external references (default: True)
Raises:
ParseError: XML syntax errors
DTDForbidden: DTD processing attempted when forbidden
EntitiesForbidden: Entity processing attempted when forbidden
ExternalReferenceForbidden: External reference attempted when forbidden
"""
def make_parser(parser_list=[]):
"""
Create a secure SAX parser instance.
Args:
parser_list (list): List of parser names (ignored, always returns DefusedExpatParser)
Returns:
DefusedExpatParser: Secure SAX parser instance
"""Usage Examples:
import defusedxml.sax as sax
from xml.sax.handler import ContentHandler
class MyHandler(ContentHandler):
def startElement(self, name, attrs):
print(f"Start element: {name}")
def endElement(self, name):
print(f"End element: {name}")
def characters(self, content):
print(f"Character data: {content.strip()}")
# Parse from file
handler = MyHandler()
sax.parse('document.xml', handler)
# Parse from string with custom security settings
xml_string = '<root><item>value</item></root>'
sax.parseString(xml_string, handler, forbid_dtd=True, forbid_entities=False)
# Create parser manually
parser = sax.make_parser()
parser.setContentHandler(handler)
parser.forbid_dtd = True
parser.forbid_entities = True
parser.forbid_external = True
parser.parse('document.xml')DefusedExpatParser provides the core secure SAX parsing functionality with configurable security restrictions.
class DefusedExpatParser:
"""
Secure SAX parser using pyexpat with configurable security restrictions.
Inherits from xml.sax.expatreader.ExpatParser but adds security
handlers to prevent XML bomb attacks, DTD processing attacks,
and external entity attacks.
"""
def __init__(self, namespaceHandling=0, bufsize=65536-20, forbid_dtd=False, forbid_entities=True, forbid_external=True):
"""
Initialize DefusedExpatParser with security settings.
Args:
namespaceHandling (int): Namespace handling mode (0=disabled, 1=enabled)
bufsize (int): Buffer size for parsing (default: 65516)
forbid_dtd (bool): Forbid DTD processing (default: False)
forbid_entities (bool): Forbid entity expansion (default: True)
forbid_external (bool): Forbid external references (default: True)
"""
def defused_start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
"""Handler that raises DTDForbidden when DTD processing is forbidden"""
def defused_entity_decl(self, name, is_parameter_entity, value, base, sysid, pubid, notation_name):
"""Handler that raises EntitiesForbidden when entity processing is forbidden"""
def defused_unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
"""Handler that raises EntitiesForbidden for unparsed entities when forbidden"""
def defused_external_entity_ref_handler(self, context, base, sysid, pubid):
"""Handler that raises ExternalReferenceForbidden when external references are forbidden"""
def reset(self):
"""Reset parser state and reinstall security handlers"""Usage Examples:
import defusedxml.sax as sax
from xml.sax.handler import ContentHandler
class DataCollector(ContentHandler):
def __init__(self):
self.data = []
self.current_element = None
def startElement(self, name, attrs):
self.current_element = name
def characters(self, content):
if self.current_element and content.strip():
self.data.append((self.current_element, content.strip()))
# Manual parser creation with custom settings
parser = sax.make_parser()
handler = DataCollector()
parser.setContentHandler(handler)
# Configure security settings
parser.forbid_dtd = True
parser.forbid_entities = True
parser.forbid_external = True
# Parse document
parser.parse('data.xml')
print(handler.data)import defusedxml.sax as sax
from xml.sax.handler import ContentHandler
class SimpleHandler(ContentHandler):
def __init__(self):
self.elements = []
def startElement(self, name, attrs):
self.elements.append(f"<{name}>")
# Process attributes
for attr_name, attr_value in attrs.items():
print(f" {attr_name}={attr_value}")
def endElement(self, name):
self.elements.append(f"</{name}>")
def characters(self, content):
if content.strip():
self.elements.append(content.strip())
# Parse with secure defaults
handler = SimpleHandler()
try:
sax.parseString(xml_content, handler)
print("Parsed elements:", handler.elements)
except sax.ParseError as e:
print(f"XML parsing error: {e}")import defusedxml.sax as sax
from xml.sax.handler import ContentHandler
class RecordProcessor(ContentHandler):
def __init__(self):
self.in_record = False
self.current_record = {}
self.current_field = None
self.record_count = 0
def startElement(self, name, attrs):
if name == 'record':
self.in_record = True
self.current_record = {}
elif self.in_record:
self.current_field = name
def endElement(self, name):
if name == 'record' and self.in_record:
self.process_record(self.current_record)
self.in_record = False
self.record_count += 1
elif self.in_record:
self.current_field = None
def characters(self, content):
if self.in_record and self.current_field and content.strip():
self.current_record[self.current_field] = content.strip()
def process_record(self, record):
# Process individual record
print(f"Processing record {self.record_count}: {record}")
# Process large XML file efficiently
handler = RecordProcessor()
sax.parse('large_dataset.xml', handler)
print(f"Processed {handler.record_count} records")import defusedxml.sax as sax
import defusedxml
from xml.sax.handler import ContentHandler, ErrorHandler
class CustomErrorHandler(ErrorHandler):
def error(self, exception):
print(f"XML error: {exception}")
def fatalError(self, exception):
print(f"Fatal XML error: {exception}")
raise exception
def warning(self, exception):
print(f"XML warning: {exception}")
class SafeContentHandler(ContentHandler):
def __init__(self):
self.elements_processed = 0
def startElement(self, name, attrs):
self.elements_processed += 1
def safe_sax_parse(xml_content):
"""Parse XML with comprehensive error handling."""
handler = SafeContentHandler()
error_handler = CustomErrorHandler()
try:
sax.parseString(xml_content, handler, error_handler)
print(f"Successfully processed {handler.elements_processed} elements")
except defusedxml.DTDForbidden as e:
print(f"DTD processing forbidden: {e}")
except defusedxml.EntitiesForbidden as e:
print(f"Entity processing forbidden: {e}")
except defusedxml.ExternalReferenceForbidden as e:
print(f"External reference forbidden: {e}")
except sax.SAXException as e:
print(f"SAX parsing error: {e}")import defusedxml.sax as sax
from xml.sax.handler import ContentHandler
class NamespaceHandler(ContentHandler):
def __init__(self):
self.namespace_stack = []
def startPrefixMapping(self, prefix, uri):
print(f"Namespace mapping: {prefix} -> {uri}")
self.namespace_stack.append((prefix, uri))
def endPrefixMapping(self, prefix):
print(f"End namespace mapping: {prefix}")
self.namespace_stack.pop()
def startElementNS(self, name, qname, attrs):
namespace_uri, local_name = name
print(f"Start element: {local_name} (namespace: {namespace_uri})")
def endElementNS(self, name, qname):
namespace_uri, local_name = name
print(f"End element: {local_name} (namespace: {namespace_uri})")
# Parse XML with namespace support
xml_with_ns = '''<?xml version="1.0"?>
<root xmlns="http://example.com/ns1" xmlns:ns2="http://example.com/ns2">
<item>value1</item>
<ns2:item>value2</ns2:item>
</root>'''
handler = NamespaceHandler()
sax.parseString(xml_with_ns, handler)import defusedxml.sax as sax
from xml.sax.handler import ContentHandler
class ConfigurableHandler(ContentHandler):
def startElement(self, name, attrs):
print(f"Element: {name}")
def parse_with_custom_security(xml_content, trust_level='untrusted'):
"""Parse XML with security settings based on trust level."""
handler = ConfigurableHandler()
if trust_level == 'trusted':
# Relaxed security for trusted content
sax.parseString(xml_content, handler, forbid_dtd=False, forbid_entities=False, forbid_external=True)
elif trust_level == 'internal':
# Moderate security for internal content
sax.parseString(xml_content, handler, forbid_dtd=False, forbid_entities=True, forbid_external=True)
else:
# Maximum security for untrusted content
sax.parseString(xml_content, handler, forbid_dtd=True, forbid_entities=True, forbid_external=True)DefusedXML SAX is designed as a drop-in replacement:
# Before (vulnerable)
import xml.sax as sax
sax.parseString(untrusted_xml, handler)
# After (secure)
import defusedxml.sax as sax
sax.parseString(untrusted_xml, handler) # Now protected by defaultThe API is identical except for the addition of security parameters to parsing functions.
Install with Tessl CLI
npx tessl i tessl/pypi-defusedxml