XML bomb protection for Python stdlib modules
—
Secure DOM-based XML parsing that builds complete document object models with configurable security restrictions. DefusedXML provides drop-in replacements for xml.dom.minidom and xml.dom.pulldom with comprehensive protection against XML attacks while maintaining API compatibility.
Secure minidom parsing functions that build complete DOM trees with security restrictions.
def parse(file, parser=None, bufsize=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):
"""
Parse XML document from file into a DOM tree.
Args:
file (str or file-like): File path or file-like object containing XML
parser (optional): Custom parser instance (for pulldom compatibility)
bufsize (int, optional): Buffer size for parsing
forbid_dtd (bool): Forbid DTD processing (default: False)
forbid_entities (bool): Forbid entity expansion (default: True)
forbid_external (bool): Forbid external references (default: True)
Returns:
Document: DOM document object
Raises:
ParseError: XML syntax errors
DTDForbidden: DTD processing attempted when forbidden
EntitiesForbidden: Entity processing attempted when forbidden
ExternalReferenceForbidden: External reference attempted when forbidden
"""
def parseString(string, parser=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):
"""
Parse XML document from string into a DOM tree.
Args:
string (str or bytes): XML content as string or bytes
parser (optional): Custom parser instance (for pulldom compatibility)
forbid_dtd (bool): Forbid DTD processing (default: False)
forbid_entities (bool): Forbid entity expansion (default: True)
forbid_external (bool): Forbid external references (default: True)
Returns:
Document: DOM document object
Raises:
ParseError: XML syntax errors
DTDForbidden: DTD processing attempted when forbidden
EntitiesForbidden: Entity processing attempted when forbidden
ExternalReferenceForbidden: External reference attempted when forbidden
"""Usage Examples:
import defusedxml.minidom as minidom
# Parse from file
doc = minidom.parse('document.xml')
root = doc.documentElement
print(f"Root element: {root.tagName}")
# Parse from string
xml_string = '<root><item id="1">value</item></root>'
doc = minidom.parseString(xml_string)
# Navigate DOM tree
for node in doc.getElementsByTagName('item'):
print(f"Item ID: {node.getAttribute('id')}, Value: {node.firstChild.nodeValue}")
# Parse with custom security settings
doc = minidom.parseString(xml_string, forbid_dtd=True, forbid_entities=False)Secure pulldom parsing functions that provide event-driven parsing with DOM node creation on demand.
def parse(stream_or_string, parser=None, bufsize=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):
"""
Parse XML document using pull-based DOM processing.
Args:
stream_or_string (str or file-like): File path or file-like object containing XML
parser (optional): Custom parser instance
bufsize (int, optional): Buffer size for parsing
forbid_dtd (bool): Forbid DTD processing (default: False)
forbid_entities (bool): Forbid entity expansion (default: True)
forbid_external (bool): Forbid external references (default: True)
Returns:
iterator: Iterator yielding (event, node) tuples
Raises:
ParseError: XML syntax errors
DTDForbidden: DTD processing attempted when forbidden
EntitiesForbidden: Entity processing attempted when forbidden
ExternalReferenceForbidden: External reference attempted when forbidden
"""
def parseString(string, parser=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):
"""
Parse XML document from string using pull-based DOM processing.
Args:
string (str or bytes): XML content as string or bytes
parser (optional): Custom parser instance
forbid_dtd (bool): Forbid DTD processing (default: False)
forbid_entities (bool): Forbid entity expansion (default: True)
forbid_external (bool): Forbid external references (default: True)
Returns:
iterator: Iterator yielding (event, node) tuples
Raises:
ParseError: XML syntax errors
DTDForbidden: DTD processing attempted when forbidden
EntitiesForbidden: Entity processing attempted when forbidden
ExternalReferenceForbidden: External reference attempted when forbidden
"""Usage Examples:
import defusedxml.pulldom as pulldom
# Pull-based parsing for selective DOM building
doc = pulldom.parse('large_document.xml')
for event, node in doc:
if event == pulldom.START_ELEMENT and node.tagName == 'important':
# Build DOM subtree for this element only
doc.expandNode(node)
print(f"Important element: {node.toxml()}")
# Parse string with pull DOM
xml_string = '<root><item>1</item><item>2</item></root>'
doc = pulldom.parseString(xml_string)
for event, node in doc:
if event == pulldom.START_ELEMENT and node.tagName == 'item':
doc.expandNode(node)
print(f"Item value: {node.firstChild.nodeValue}")Secure DOM builder classes that handle the actual construction of DOM trees with security restrictions.
class DefusedExpatBuilder:
"""
Secure DOM builder using expat parser.
Builds DOM trees with configurable security restrictions to prevent
XML bomb attacks, DTD processing attacks, and external entity attacks.
"""
def __init__(self, options=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):
"""
Initialize DefusedExpatBuilder with security settings.
Args:
options (optional): Builder options
forbid_dtd (bool): Forbid DTD processing (default: False)
forbid_entities (bool): Forbid entity expansion (default: True)
forbid_external (bool): Forbid external references (default: True)
"""
def parseFile(self, file):
"""Parse XML from file-like object and return Document"""
def parseString(self, string):
"""Parse XML from string and return Document"""
def install(self, parser):
"""Install security handlers on parser"""
class DefusedExpatBuilderNS(DefusedExpatBuilder):
"""
Namespace-aware secure DOM builder using expat parser.
Extends DefusedExpatBuilder with namespace processing capabilities
while maintaining the same security restrictions.
"""
def install(self, parser):
"""Install security and namespace handlers on parser"""
def reset(self):
"""Reset builder state and namespace context"""Usage Examples:
from defusedxml.expatbuilder import DefusedExpatBuilder, DefusedExpatBuilderNS
# Custom DOM builder with specific security settings
builder = DefusedExpatBuilder(forbid_dtd=True, forbid_entities=False, forbid_external=True)
with open('document.xml', 'rb') as f:
doc = builder.parseFile(f)
# Namespace-aware builder
ns_builder = DefusedExpatBuilderNS(forbid_dtd=False, forbid_entities=True, forbid_external=True)
doc = ns_builder.parseString(xml_with_namespaces)Direct access to secure DOM building functions with namespace support.
def parse(file, namespaces=True, forbid_dtd=False, forbid_entities=True, forbid_external=True):
"""
Parse XML file using secure expat builder.
Args:
file (str or file-like): File path or file-like object
namespaces (bool): Enable namespace processing (default: True)
forbid_dtd (bool): Forbid DTD processing (default: False)
forbid_entities (bool): Forbid entity expansion (default: True)
forbid_external (bool): Forbid external references (default: True)
Returns:
Document: DOM document object
"""
def parseString(string, namespaces=True, forbid_dtd=False, forbid_entities=True, forbid_external=True):
"""
Parse XML string using secure expat builder.
Args:
string (str or bytes): XML content
namespaces (bool): Enable namespace processing (default: True)
forbid_dtd (bool): Forbid DTD processing (default: False)
forbid_entities (bool): Forbid entity expansion (default: True)
forbid_external (bool): Forbid external references (default: True)
Returns:
Document: DOM document object
"""import defusedxml.minidom as minidom
def process_xml_document(xml_content):
"""Process XML document using secure DOM parsing."""
try:
doc = minidom.parseString(xml_content)
root = doc.documentElement
# Process elements
for element in root.getElementsByTagName('item'):
item_id = element.getAttribute('id')
item_value = element.firstChild.nodeValue if element.firstChild else ''
print(f"Item {item_id}: {item_value}")
return doc
except minidom.ParseError as e:
print(f"XML parsing error: {e}")
return Noneimport defusedxml.pulldom as pulldom
def process_large_xml_selectively(filename):
"""Process large XML files efficiently using pulldom."""
doc = pulldom.parse(filename)
processed_count = 0
for event, node in doc:
if event == pulldom.START_ELEMENT:
if node.tagName == 'record':
# Only expand nodes we're interested in
doc.expandNode(node)
# Process the expanded DOM subtree
record_id = node.getAttribute('id')
data_elements = node.getElementsByTagName('data')
for data_elem in data_elements:
if data_elem.firstChild:
print(f"Record {record_id}: {data_elem.firstChild.nodeValue}")
processed_count += 1
print(f"Processed {processed_count} records")import defusedxml.minidom as minidom
def modify_xml_document(xml_content):
"""Parse and modify XML document."""
doc = minidom.parseString(xml_content)
# Add new element
root = doc.documentElement
new_item = doc.createElement('item')
new_item.setAttribute('id', 'new')
new_text = doc.createTextNode('new value')
new_item.appendChild(new_text)
root.appendChild(new_item)
# Modify existing elements
for item in root.getElementsByTagName('item'):
if item.getAttribute('id') == 'modify_me':
item.firstChild.nodeValue = 'modified value'
# Convert back to XML string
return doc.toxml()import defusedxml.minidom as minidom
def process_namespaced_xml(xml_content):
"""Process XML document with namespace support."""
doc = minidom.parseString(xml_content)
# Find elements by namespace and local name
root = doc.documentElement
# Process all elements regardless of namespace
for node in root.childNodes:
if node.nodeType == node.ELEMENT_NODE:
print(f"Element: {node.localName}, Namespace: {node.namespaceURI}")
# Process attributes with namespaces
if node.attributes:
for i in range(node.attributes.length):
attr = node.attributes.item(i)
print(f" Attribute: {attr.localName}={attr.value} (NS: {attr.namespaceURI})")import defusedxml.minidom as minidom
import defusedxml
def safe_dom_parse(xml_content):
"""Parse XML with comprehensive error handling."""
try:
doc = minidom.parseString(xml_content)
print(f"Successfully parsed document with root: {doc.documentElement.tagName}")
return doc
except minidom.ParseError as e:
print(f"XML syntax error: {e}")
except defusedxml.DTDForbidden as e:
print(f"DTD processing forbidden: {e}")
except defusedxml.EntitiesForbidden as e:
print(f"Entity processing forbidden: {e}")
except defusedxml.ExternalReferenceForbidden as e:
print(f"External reference forbidden: {e}")
except Exception as e:
print(f"Unexpected error: {e}")
return Noneimport defusedxml.minidom as minidom
def parse_with_trust_level(xml_content, trust_level='untrusted'):
"""Parse XML with security settings based on trust level."""
if trust_level == 'trusted':
# Allow DTDs and entities for trusted content
return minidom.parseString(xml_content, forbid_dtd=False, forbid_entities=False, forbid_external=True)
elif trust_level == 'internal':
# Allow DTDs but restrict entities
return minidom.parseString(xml_content, forbid_dtd=False, forbid_entities=True, forbid_external=True)
else:
# Maximum security for untrusted content
return minidom.parseString(xml_content, forbid_dtd=True, forbid_entities=True, forbid_external=True)DefusedXML DOM modules are designed as drop-in replacements:
# Before (vulnerable)
import xml.dom.minidom as minidom
import xml.dom.pulldom as pulldom
doc = minidom.parseString(untrusted_xml)
events = pulldom.parseString(untrusted_xml)
# After (secure)
import defusedxml.minidom as minidom
import defusedxml.pulldom as pulldom
doc = minidom.parseString(untrusted_xml) # Now protected by default
events = pulldom.parseString(untrusted_xml) # Now protected by defaultThe API is identical except for the addition of security parameters to parsing functions.
Install with Tessl CLI
npx tessl i tessl/pypi-defusedxml