XML bomb protection for Python stdlib modules
—
Experimental system-wide XML protection by monkey-patching all standard library XML modules with defused alternatives. This provides comprehensive protection across an entire Python application without requiring individual module imports to be changed.
System-wide monkey patching function that replaces all standard library XML modules with defused alternatives.
def defuse_stdlib():
"""
Monkey patch and defuse all standard library XML packages.
Replaces the following standard library modules with defused alternatives:
- xml.etree.ElementTree -> defusedxml.ElementTree
- xml.etree.cElementTree -> defusedxml.cElementTree
- xml.sax -> defusedxml.sax
- xml.dom.minidom -> defusedxml.minidom
- xml.dom.pulldom -> defusedxml.pulldom
- xml.dom.expatbuilder -> defusedxml.expatbuilder
- xml.sax.expatreader -> defusedxml.expatreader
- xmlrpc.client/xmlrpclib -> defusedxml.xmlrpc (via monkey_patch)
Returns:
dict: Mapping of defused modules to original stdlib modules
Warning:
This is an EXPERIMENTAL feature. The monkey patch is global
and affects all XML processing in the current Python process.
Use with caution in production environments.
"""Usage Examples:
import defusedxml
# Apply global XML security patches
defused_modules = defusedxml.defuse_stdlib()
print(f"Defused {len(defused_modules)} XML modules")
# Now all XML processing uses defused implementations automatically
import xml.etree.ElementTree as ET # Actually uses defusedxml.ElementTree
import xml.sax as sax # Actually uses defusedxml.sax
import xml.dom.minidom as minidom # Actually uses defusedxml.minidom
# Parse XML normally - it's automatically secured
root = ET.fromstring('<root><item>value</item></root>')
print(f"Root element: {root.tag}")
# SAX parsing is also automatically secured
from xml.sax.handler import ContentHandler
class MyHandler(ContentHandler):
def startElement(self, name, attrs):
print(f"Element: {name}")
handler = MyHandler()
sax.parseString('<root><item>test</item></root>', handler)The defuse_stdlib() function works by:
xmlrpc.monkey_patch()import defusedxml
import logging
def initialize_security():
"""Initialize application-wide XML security."""
try:
defused_modules = defusedxml.defuse_stdlib()
logging.info(f"XML security initialized: defused {len(defused_modules)} modules")
# Log which modules were defused
for defused_mod, stdlib_mod in defused_modules.items():
if stdlib_mod:
logging.debug(f"Defused: {defused_mod.__name__} -> {stdlib_mod.__name__}")
else:
logging.debug(f"Patched: {defused_mod.__name__}")
return True
except Exception as e:
logging.error(f"Failed to initialize XML security: {e}")
return False
# Call during application startup
if __name__ == "__main__":
if initialize_security():
print("Application starting with XML security enabled")
main()
else:
print("Warning: XML security initialization failed")
main()import os
import defusedxml
def apply_xml_security():
"""Apply XML security based on configuration."""
# Check environment variable
enable_security = os.getenv('ENABLE_XML_SECURITY', 'true').lower() == 'true'
if enable_security:
print("Applying system-wide XML security patches...")
defused_modules = defusedxml.defuse_stdlib()
print(f"Successfully defused {len(defused_modules)} XML modules")
else:
print("XML security patches disabled by configuration")
# Apply security conditionally
apply_xml_security()
# Rest of application continues normally
import xml.etree.ElementTree as ET
# This uses defused implementation if security was appliedimport unittest
import defusedxml
class TestWithXMLSecurity(unittest.TestCase):
"""Test case that applies XML security patches."""
@classmethod
def setUpClass(cls):
"""Apply XML security patches before running tests."""
cls.defused_modules = defusedxml.defuse_stdlib()
print(f"Test suite using defused XML modules: {len(cls.defused_modules)}")
def test_xml_parsing(self):
"""Test that XML parsing works with security patches."""
import xml.etree.ElementTree as ET
# This should work normally
xml_content = '<root><item>test</item></root>'
root = ET.fromstring(xml_content)
self.assertEqual(root.tag, 'root')
self.assertEqual(root[0].text, 'test')
def test_malicious_xml_blocked(self):
"""Test that malicious XML is blocked."""
import xml.etree.ElementTree as ET
import defusedxml
# XML with external entity (should be blocked)
malicious_xml = '''<?xml version="1.0"?>
<!DOCTYPE root [
<!ENTITY external SYSTEM "file:///etc/passwd">
]>
<root>&external;</root>'''
with self.assertRaises(defusedxml.ExternalReferenceForbidden):
ET.fromstring(malicious_xml)
if __name__ == '__main__':
unittest.main()import defusedxml
class SecureXMLProcessor:
"""XML processor that ensures security is applied."""
def __init__(self, auto_defuse=True):
"""Initialize processor with optional auto-defusing."""
self.auto_defuse = auto_defuse
self.defused_modules = None
if auto_defuse:
self.enable_security()
def enable_security(self):
"""Enable XML security patches."""
if not self.defused_modules:
self.defused_modules = defusedxml.defuse_stdlib()
print(f"XML security enabled: {len(self.defused_modules)} modules defused")
def process_xml(self, xml_content):
"""Process XML content with security enabled."""
import xml.etree.ElementTree as ET
try:
root = ET.fromstring(xml_content)
return self._extract_data(root)
except Exception as e:
print(f"XML processing failed: {e}")
return None
def _extract_data(self, root):
"""Extract data from XML root element."""
data = {'tag': root.tag, 'attributes': dict(root.attrib)}
# Extract child elements
children = []
for child in root:
children.append({
'tag': child.tag,
'text': child.text,
'attributes': dict(child.attrib)
})
data['children'] = children
return data
# Usage
processor = SecureXMLProcessor()
result = processor.process_xml('<root><item id="1">value</item></root>')
print(result)import defusedxml
import logging
import sys
class XMLSecurityMonitor:
"""Monitor XML security status and violations."""
def __init__(self):
self.security_enabled = False
self.defused_modules = None
self.violation_count = 0
# Set up logging
logging.basicConfig(level=logging.INFO)
self.logger = logging.getLogger(__name__)
def enable_security(self):
"""Enable XML security with monitoring."""
try:
self.defused_modules = defusedxml.defuse_stdlib()
self.security_enabled = True
self.logger.info(f"XML security enabled: {len(self.defused_modules)} modules defused")
# Install exception handler to monitor violations
self._install_violation_handler()
except Exception as e:
self.logger.error(f"Failed to enable XML security: {e}")
self.security_enabled = False
def _install_violation_handler(self):
"""Install handler to monitor security violations."""
original_excepthook = sys.excepthook
monitor = self
def security_aware_excepthook(exctype, value, traceback):
if issubclass(exctype, defusedxml.DefusedXmlException):
monitor.violation_count += 1
monitor.logger.warning(f"XML security violation #{monitor.violation_count}: {value}")
original_excepthook(exctype, value, traceback)
sys.excepthook = security_aware_excepthook
def get_status(self):
"""Get current security status."""
return {
'security_enabled': self.security_enabled,
'defused_modules': len(self.defused_modules) if self.defused_modules else 0,
'violation_count': self.violation_count
}
# Usage
monitor = XMLSecurityMonitor()
monitor.enable_security()
# Later in application
status = monitor.get_status()
print(f"XML Security Status: {status}")Instead of system-wide patching, consider:
# Selective module replacement
import defusedxml.ElementTree as ET # Explicitly use defused version
import defusedxml.sax as sax # Explicitly use defused version
# Library-specific protection
import defusedxml.xmlrpc as xmlrpc_defused
xmlrpc_defused.monkey_patch() # Only patch XML-RPC
# Application-level wrappers
def safe_parse_xml(xml_content):
import defusedxml.ElementTree as ET
return ET.fromstring(xml_content)When migrating from individual defused imports to system-wide patching:
# Before: Individual imports
import defusedxml.ElementTree as ET
import defusedxml.sax as sax
root = ET.fromstring(xml_content)
# After: System-wide patching
import defusedxml
defusedxml.defuse_stdlib()
import xml.etree.ElementTree as ET # Now automatically defused
import xml.sax as sax # Now automatically defused
root = ET.fromstring(xml_content) # Same API, but securedThe main advantage is that existing code and third-party libraries automatically benefit from security protections without modification.
Install with Tessl CLI
npx tessl i tessl/pypi-defusedxml