CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-defusedxml

XML bomb protection for Python stdlib modules

Pending
Overview
Eval results
Files

sax.mddocs/

SAX Processing

Secure SAX-based XML parsing with event-driven processing and configurable security restrictions. DefusedXML provides drop-in replacements for xml.sax with comprehensive protection against XML attacks while maintaining API compatibility for event-driven XML processing.

Capabilities

SAX Parsing Functions

Core SAX parsing functions that provide secure alternatives to standard SAX parsing operations.

def parse(source, handler, errorHandler=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):
    """
    Parse XML document using SAX event-driven processing.
    
    Args:
        source (str or file-like): File path or file-like object containing XML
        handler (ContentHandler): SAX content handler to receive parsing events
        errorHandler (ErrorHandler, optional): SAX error handler for error processing
        forbid_dtd (bool): Forbid DTD processing (default: False)
        forbid_entities (bool): Forbid entity expansion (default: True)
        forbid_external (bool): Forbid external references (default: True)
    
    Raises:
        ParseError: XML syntax errors
        DTDForbidden: DTD processing attempted when forbidden
        EntitiesForbidden: Entity processing attempted when forbidden
        ExternalReferenceForbidden: External reference attempted when forbidden
    """

def parseString(string, handler, errorHandler=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):
    """
    Parse XML document from string using SAX event-driven processing.
    
    Args:
        string (str or bytes): XML content as string or bytes
        handler (ContentHandler): SAX content handler to receive parsing events
        errorHandler (ErrorHandler, optional): SAX error handler for error processing
        forbid_dtd (bool): Forbid DTD processing (default: False)
        forbid_entities (bool): Forbid entity expansion (default: True)
        forbid_external (bool): Forbid external references (default: True)
    
    Raises:
        ParseError: XML syntax errors
        DTDForbidden: DTD processing attempted when forbidden
        EntitiesForbidden: Entity processing attempted when forbidden
        ExternalReferenceForbidden: External reference attempted when forbidden
    """

def make_parser(parser_list=[]):
    """
    Create a secure SAX parser instance.
    
    Args:
        parser_list (list): List of parser names (ignored, always returns DefusedExpatParser)
    
    Returns:
        DefusedExpatParser: Secure SAX parser instance
    """

Usage Examples:

import defusedxml.sax as sax
from xml.sax.handler import ContentHandler

class MyHandler(ContentHandler):
    def startElement(self, name, attrs):
        print(f"Start element: {name}")
    
    def endElement(self, name):
        print(f"End element: {name}")
    
    def characters(self, content):
        print(f"Character data: {content.strip()}")

# Parse from file
handler = MyHandler()
sax.parse('document.xml', handler)

# Parse from string with custom security settings
xml_string = '<root><item>value</item></root>'
sax.parseString(xml_string, handler, forbid_dtd=True, forbid_entities=False)

# Create parser manually
parser = sax.make_parser()
parser.setContentHandler(handler)
parser.forbid_dtd = True
parser.forbid_entities = True
parser.forbid_external = True
parser.parse('document.xml')

Secure SAX Parser

DefusedExpatParser provides the core secure SAX parsing functionality with configurable security restrictions.

class DefusedExpatParser:
    """
    Secure SAX parser using pyexpat with configurable security restrictions.
    
    Inherits from xml.sax.expatreader.ExpatParser but adds security
    handlers to prevent XML bomb attacks, DTD processing attacks,
    and external entity attacks.
    """
    
    def __init__(self, namespaceHandling=0, bufsize=65536-20, forbid_dtd=False, forbid_entities=True, forbid_external=True):
        """
        Initialize DefusedExpatParser with security settings.
        
        Args:
            namespaceHandling (int): Namespace handling mode (0=disabled, 1=enabled)
            bufsize (int): Buffer size for parsing (default: 65516)
            forbid_dtd (bool): Forbid DTD processing (default: False)
            forbid_entities (bool): Forbid entity expansion (default: True)
            forbid_external (bool): Forbid external references (default: True)
        """
    
    def defused_start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
        """Handler that raises DTDForbidden when DTD processing is forbidden"""
    
    def defused_entity_decl(self, name, is_parameter_entity, value, base, sysid, pubid, notation_name):
        """Handler that raises EntitiesForbidden when entity processing is forbidden"""
    
    def defused_unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
        """Handler that raises EntitiesForbidden for unparsed entities when forbidden"""
    
    def defused_external_entity_ref_handler(self, context, base, sysid, pubid):
        """Handler that raises ExternalReferenceForbidden when external references are forbidden"""
    
    def reset(self):
        """Reset parser state and reinstall security handlers"""

Usage Examples:

import defusedxml.sax as sax
from xml.sax.handler import ContentHandler

class DataCollector(ContentHandler):
    def __init__(self):
        self.data = []
        self.current_element = None
    
    def startElement(self, name, attrs):
        self.current_element = name
    
    def characters(self, content):
        if self.current_element and content.strip():
            self.data.append((self.current_element, content.strip()))

# Manual parser creation with custom settings
parser = sax.make_parser()
handler = DataCollector()
parser.setContentHandler(handler)

# Configure security settings
parser.forbid_dtd = True
parser.forbid_entities = True  
parser.forbid_external = True

# Parse document
parser.parse('data.xml')
print(handler.data)

Common Usage Patterns

Basic SAX Processing

import defusedxml.sax as sax
from xml.sax.handler import ContentHandler

class SimpleHandler(ContentHandler):
    def __init__(self):
        self.elements = []
    
    def startElement(self, name, attrs):
        self.elements.append(f"<{name}>")
        # Process attributes
        for attr_name, attr_value in attrs.items():
            print(f"  {attr_name}={attr_value}")
    
    def endElement(self, name):
        self.elements.append(f"</{name}>")
    
    def characters(self, content):
        if content.strip():
            self.elements.append(content.strip())

# Parse with secure defaults
handler = SimpleHandler()
try:
    sax.parseString(xml_content, handler)
    print("Parsed elements:", handler.elements)
except sax.ParseError as e:
    print(f"XML parsing error: {e}")

Memory-Efficient Processing for Large Files

import defusedxml.sax as sax
from xml.sax.handler import ContentHandler

class RecordProcessor(ContentHandler):
    def __init__(self):
        self.in_record = False
        self.current_record = {}
        self.current_field = None
        self.record_count = 0
    
    def startElement(self, name, attrs):
        if name == 'record':
            self.in_record = True
            self.current_record = {}
        elif self.in_record:
            self.current_field = name
    
    def endElement(self, name):
        if name == 'record' and self.in_record:
            self.process_record(self.current_record)
            self.in_record = False
            self.record_count += 1
        elif self.in_record:
            self.current_field = None
    
    def characters(self, content):
        if self.in_record and self.current_field and content.strip():
            self.current_record[self.current_field] = content.strip()
    
    def process_record(self, record):
        # Process individual record
        print(f"Processing record {self.record_count}: {record}")

# Process large XML file efficiently
handler = RecordProcessor()
sax.parse('large_dataset.xml', handler)
print(f"Processed {handler.record_count} records")

Error Handling with SAX

import defusedxml.sax as sax
import defusedxml
from xml.sax.handler import ContentHandler, ErrorHandler

class CustomErrorHandler(ErrorHandler):
    def error(self, exception):
        print(f"XML error: {exception}")
    
    def fatalError(self, exception):
        print(f"Fatal XML error: {exception}")
        raise exception
    
    def warning(self, exception):
        print(f"XML warning: {exception}")

class SafeContentHandler(ContentHandler):
    def __init__(self):
        self.elements_processed = 0
    
    def startElement(self, name, attrs):
        self.elements_processed += 1

def safe_sax_parse(xml_content):
    """Parse XML with comprehensive error handling."""
    handler = SafeContentHandler()
    error_handler = CustomErrorHandler()
    
    try:
        sax.parseString(xml_content, handler, error_handler)
        print(f"Successfully processed {handler.elements_processed} elements")
    except defusedxml.DTDForbidden as e:
        print(f"DTD processing forbidden: {e}")
    except defusedxml.EntitiesForbidden as e:
        print(f"Entity processing forbidden: {e}")
    except defusedxml.ExternalReferenceForbidden as e:
        print(f"External reference forbidden: {e}")
    except sax.SAXException as e:
        print(f"SAX parsing error: {e}")

Namespace-Aware Processing

import defusedxml.sax as sax
from xml.sax.handler import ContentHandler

class NamespaceHandler(ContentHandler):
    def __init__(self):
        self.namespace_stack = []
    
    def startPrefixMapping(self, prefix, uri):
        print(f"Namespace mapping: {prefix} -> {uri}")
        self.namespace_stack.append((prefix, uri))
    
    def endPrefixMapping(self, prefix):
        print(f"End namespace mapping: {prefix}")
        self.namespace_stack.pop()
    
    def startElementNS(self, name, qname, attrs):
        namespace_uri, local_name = name
        print(f"Start element: {local_name} (namespace: {namespace_uri})")
    
    def endElementNS(self, name, qname):
        namespace_uri, local_name = name
        print(f"End element: {local_name} (namespace: {namespace_uri})")

# Parse XML with namespace support
xml_with_ns = '''<?xml version="1.0"?>
<root xmlns="http://example.com/ns1" xmlns:ns2="http://example.com/ns2">
    <item>value1</item>
    <ns2:item>value2</ns2:item>
</root>'''

handler = NamespaceHandler()
sax.parseString(xml_with_ns, handler)

Custom Security Configuration

import defusedxml.sax as sax
from xml.sax.handler import ContentHandler

class ConfigurableHandler(ContentHandler):
    def startElement(self, name, attrs):
        print(f"Element: {name}")

def parse_with_custom_security(xml_content, trust_level='untrusted'):
    """Parse XML with security settings based on trust level."""
    handler = ConfigurableHandler()
    
    if trust_level == 'trusted':
        # Relaxed security for trusted content
        sax.parseString(xml_content, handler, forbid_dtd=False, forbid_entities=False, forbid_external=True)
    elif trust_level == 'internal':  
        # Moderate security for internal content
        sax.parseString(xml_content, handler, forbid_dtd=False, forbid_entities=True, forbid_external=True)
    else:
        # Maximum security for untrusted content
        sax.parseString(xml_content, handler, forbid_dtd=True, forbid_entities=True, forbid_external=True)

Migration from Standard Library

DefusedXML SAX is designed as a drop-in replacement:

# Before (vulnerable)
import xml.sax as sax
sax.parseString(untrusted_xml, handler)

# After (secure)
import defusedxml.sax as sax  
sax.parseString(untrusted_xml, handler)  # Now protected by default

The API is identical except for the addition of security parameters to parsing functions.

Install with Tessl CLI

npx tessl i tessl/pypi-defusedxml

docs

dom.md

elementtree.md

exceptions.md

index.md

sax.md

stdlib-patching.md

xmlrpc.md

tile.json