CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-defusedxml

XML bomb protection for Python stdlib modules

Pending
Overview
Eval results
Files

elementtree.mddocs/

ElementTree Processing

Secure ElementTree-based XML parsing with configurable security restrictions. DefusedXML provides drop-in replacements for xml.etree.ElementTree and xml.etree.cElementTree with comprehensive protection against XML attacks while maintaining API compatibility.

Capabilities

XML Parsing Functions

Core parsing functions that provide secure alternatives to standard ElementTree parsing operations.

def parse(source, parser=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):
    """
    Parse XML document from file or file-like object.
    
    Args:
        source (str or file-like): File path or file-like object containing XML
        parser (XMLParser, optional): Custom parser instance
        forbid_dtd (bool): Forbid DTD processing (default: False)
        forbid_entities (bool): Forbid entity expansion (default: True)
        forbid_external (bool): Forbid external references (default: True)
    
    Returns:
        ElementTree: Parsed XML document tree
        
    Raises:
        ParseError: XML syntax errors
        DTDForbidden: DTD processing attempted when forbidden
        EntitiesForbidden: Entity processing attempted when forbidden
        ExternalReferenceForbidden: External reference attempted when forbidden
    """

def iterparse(source, events=None, parser=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):
    """
    Parse XML document incrementally with event-based processing.
    
    Args:
        source (str or file-like): File path or file-like object containing XML
        events (tuple, optional): Events to report ('start', 'end', 'start-ns', 'end-ns')
        parser (XMLParser, optional): Custom parser instance
        forbid_dtd (bool): Forbid DTD processing (default: False)
        forbid_entities (bool): Forbid entity expansion (default: True)
        forbid_external (bool): Forbid external references (default: True)
    
    Returns:
        iterator: Iterator yielding (event, element) tuples
        
    Raises:
        ParseError: XML syntax errors
        DTDForbidden: DTD processing attempted when forbidden
        EntitiesForbidden: Entity processing attempted when forbidden
        ExternalReferenceForbidden: External reference attempted when forbidden
    """

def fromstring(text, forbid_dtd=False, forbid_entities=True, forbid_external=True):
    """
    Parse XML document from string.
    
    Args:
        text (str or bytes): XML content as string or bytes
        forbid_dtd (bool): Forbid DTD processing (default: False)
        forbid_entities (bool): Forbid entity expansion (default: True)
        forbid_external (bool): Forbid external references (default: True)
    
    Returns:
        Element: Root element of parsed XML document
        
    Raises:
        ParseError: XML syntax errors
        DTDForbidden: DTD processing attempted when forbidden
        EntitiesForbidden: Entity processing attempted when forbidden
        ExternalReferenceForbidden: External reference attempted when forbidden
    """

def XML(text, forbid_dtd=False, forbid_entities=True, forbid_external=True):
    """
    Parse XML document from string (alias for fromstring).
    
    Args:
        text (str or bytes): XML content as string or bytes
        forbid_dtd (bool): Forbid DTD processing (default: False)
        forbid_entities (bool): Forbid entity expansion (default: True)
        forbid_external (bool): Forbid external references (default: True)
    
    Returns:
        Element: Root element of parsed XML document
        
    Raises:
        ParseError: XML syntax errors
        DTDForbidden: DTD processing attempted when forbidden
        EntitiesForbidden: Entity processing attempted when forbidden
        ExternalReferenceForbidden: External reference attempted when forbidden
    """

Usage Examples:

import defusedxml.ElementTree as ET

# Parse from file
tree = ET.parse('document.xml')
root = tree.getroot()

# Parse from string with custom security settings
xml_string = '<root><item>value</item></root>'
root = ET.fromstring(xml_string, forbid_dtd=True, forbid_entities=False)

# Incremental parsing for large documents
for event, elem in ET.iterparse('large_document.xml', events=('start', 'end')):
    if event == 'end' and elem.tag == 'record':
        process_record(elem)
        elem.clear()  # Free memory

Secure XML Parser

DefusedXMLParser provides the core secure parsing functionality with configurable security restrictions.

class DefusedXMLParser:
    """
    Secure XML parser with configurable security restrictions.
    
    Inherits from xml.etree.ElementTree.XMLParser but adds security
    handlers to prevent XML bomb attacks, DTD processing attacks,
    and external entity attacks.
    """
    
    def __init__(self, html=None, target=None, encoding=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):
        """
        Initialize DefusedXMLParser with security settings.
        
        Args:
            html (deprecated): HTML parsing mode (deprecated, raises TypeError if True)
            target (TreeBuilder, optional): Custom tree builder instance
            encoding (str, optional): Character encoding for parsing
            forbid_dtd (bool): Forbid DTD processing (default: False)
            forbid_entities (bool): Forbid entity expansion (default: True)
            forbid_external (bool): Forbid external references (default: True)
            
        Raises:
            TypeError: If html=True is specified (no longer supported)
            DeprecationWarning: If html parameter is used
        """
    
    def defused_start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
        """Handler that raises DTDForbidden when DTD processing is forbidden"""
    
    def defused_entity_decl(self, name, is_parameter_entity, value, base, sysid, pubid, notation_name):
        """Handler that raises EntitiesForbidden when entity processing is forbidden"""
    
    def defused_unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
        """Handler that raises EntitiesForbidden for unparsed entities when forbidden"""
    
    def defused_external_entity_ref_handler(self, context, base, sysid, pubid):
        """Handler that raises ExternalReferenceForbidden when external references are forbidden"""

Usage Examples:

import defusedxml.ElementTree as ET
from xml.etree.ElementTree import TreeBuilder

# Custom parser with specific security settings
parser = ET.DefusedXMLParser(forbid_dtd=True, forbid_entities=False, forbid_external=True)
tree = ET.parse('document.xml', parser)

# Parser with custom TreeBuilder
custom_builder = TreeBuilder()
parser = ET.DefusedXMLParser(target=custom_builder, forbid_dtd=False)
root = ET.fromstring(xml_content, parser)

Parser Aliases

DefusedXML provides several aliases for backward compatibility and convenience.

XMLParser = DefusedXMLParser
XMLTreeBuilder = DefusedXMLParser  
XMLParse = DefusedXMLParser  # Backwards compatibility (typo in original)

Utility Functions

Additional utility functions re-exported from the standard library.

def tostring(element, encoding="us-ascii", method="xml"):
    """
    Convert Element to XML string representation.
    
    Re-exported from xml.etree.ElementTree.tostring for convenience.
    This function is safe as it only serializes existing elements.
    
    Args:
        element (Element): Element to serialize
        encoding (str): Character encoding (default: "us-ascii")
        method (str): Serialization method (default: "xml")
    
    Returns:
        str or bytes: XML representation of element
    """

Exception Re-exports

class ParseError(Exception):
    """
    XML parsing error exception.
    
    Re-exported from xml.etree.ElementTree.ParseError for convenience.
    Raised for XML syntax errors and malformed documents.
    """

Common Usage Patterns

Basic Secure Parsing

import defusedxml.ElementTree as ET

# Parse with secure defaults
try:
    root = ET.fromstring(xml_content)
    for child in root:
        print(f"{child.tag}: {child.text}")
except ET.ParseError as e:
    print(f"XML parsing error: {e}")

File Parsing with Error Handling

import defusedxml.ElementTree as ET
import defusedxml

def parse_xml_file(filename):
    """Parse XML file with comprehensive error handling."""
    try:
        tree = ET.parse(filename)
        return tree.getroot()
    except FileNotFoundError:
        print(f"File not found: {filename}")
    except ET.ParseError as e:
        print(f"XML syntax error: {e}")
    except defusedxml.DefusedXmlException as e:
        print(f"XML security violation: {e}")
    return None

Memory-Efficient Parsing for Large Files

import defusedxml.ElementTree as ET

def process_large_xml(filename):
    """Process large XML files efficiently using iterparse."""
    context = ET.iterparse(filename, events=('start', 'end'))
    context = iter(context)
    event, root = next(context)
    
    for event, elem in context:
        if event == 'end' and elem.tag == 'record':
            # Process individual record
            process_record(elem)
            
            # Clear processed elements to save memory
            elem.clear()
            root.clear()

Custom Security Configuration

import defusedxml.ElementTree as ET

def parse_trusted_xml(xml_content):
    """Parse XML from trusted source with relaxed security."""
    # Allow DTDs but keep entity and external reference protection
    return ET.fromstring(xml_content, forbid_dtd=False, forbid_entities=True, forbid_external=True)

def parse_internal_xml(xml_content):
    """Parse XML from internal systems with minimal restrictions."""
    # Allow DTDs and entities but block external references
    return ET.fromstring(xml_content, forbid_dtd=False, forbid_entities=False, forbid_external=True)

Migration from Standard Library

DefusedXML is designed as a drop-in replacement:

# Before (vulnerable)
import xml.etree.ElementTree as ET
root = ET.fromstring(untrusted_xml)

# After (secure)  
import defusedxml.ElementTree as ET
root = ET.fromstring(untrusted_xml)  # Now protected by default

The API is identical except for the addition of security parameters to parsing functions.

Install with Tessl CLI

npx tessl i tessl/pypi-defusedxml

docs

dom.md

elementtree.md

exceptions.md

index.md

sax.md

stdlib-patching.md

xmlrpc.md

tile.json