or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

dom.mdelementtree.mdexceptions.mdindex.mdsax.mdstdlib-patching.mdxmlrpc.md

sax.mddocs/

0

# SAX Processing

1

2

Secure SAX-based XML parsing with event-driven processing and configurable security restrictions. DefusedXML provides drop-in replacements for xml.sax with comprehensive protection against XML attacks while maintaining API compatibility for event-driven XML processing.

3

4

## Capabilities

5

6

### SAX Parsing Functions

7

8

Core SAX parsing functions that provide secure alternatives to standard SAX parsing operations.

9

10

```python { .api }

11

def parse(source, handler, errorHandler=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):

12

"""

13

Parse XML document using SAX event-driven processing.

14

15

Args:

16

source (str or file-like): File path or file-like object containing XML

17

handler (ContentHandler): SAX content handler to receive parsing events

18

errorHandler (ErrorHandler, optional): SAX error handler for error processing

19

forbid_dtd (bool): Forbid DTD processing (default: False)

20

forbid_entities (bool): Forbid entity expansion (default: True)

21

forbid_external (bool): Forbid external references (default: True)

22

23

Raises:

24

ParseError: XML syntax errors

25

DTDForbidden: DTD processing attempted when forbidden

26

EntitiesForbidden: Entity processing attempted when forbidden

27

ExternalReferenceForbidden: External reference attempted when forbidden

28

"""

29

30

def parseString(string, handler, errorHandler=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):

31

"""

32

Parse XML document from string using SAX event-driven processing.

33

34

Args:

35

string (str or bytes): XML content as string or bytes

36

handler (ContentHandler): SAX content handler to receive parsing events

37

errorHandler (ErrorHandler, optional): SAX error handler for error processing

38

forbid_dtd (bool): Forbid DTD processing (default: False)

39

forbid_entities (bool): Forbid entity expansion (default: True)

40

forbid_external (bool): Forbid external references (default: True)

41

42

Raises:

43

ParseError: XML syntax errors

44

DTDForbidden: DTD processing attempted when forbidden

45

EntitiesForbidden: Entity processing attempted when forbidden

46

ExternalReferenceForbidden: External reference attempted when forbidden

47

"""

48

49

def make_parser(parser_list=[]):

50

"""

51

Create a secure SAX parser instance.

52

53

Args:

54

parser_list (list): List of parser names (ignored, always returns DefusedExpatParser)

55

56

Returns:

57

DefusedExpatParser: Secure SAX parser instance

58

"""

59

```

60

61

**Usage Examples:**

62

63

```python

64

import defusedxml.sax as sax

65

from xml.sax.handler import ContentHandler

66

67

class MyHandler(ContentHandler):

68

def startElement(self, name, attrs):

69

print(f"Start element: {name}")

70

71

def endElement(self, name):

72

print(f"End element: {name}")

73

74

def characters(self, content):

75

print(f"Character data: {content.strip()}")

76

77

# Parse from file

78

handler = MyHandler()

79

sax.parse('document.xml', handler)

80

81

# Parse from string with custom security settings

82

xml_string = '<root><item>value</item></root>'

83

sax.parseString(xml_string, handler, forbid_dtd=True, forbid_entities=False)

84

85

# Create parser manually

86

parser = sax.make_parser()

87

parser.setContentHandler(handler)

88

parser.forbid_dtd = True

89

parser.forbid_entities = True

90

parser.forbid_external = True

91

parser.parse('document.xml')

92

```

93

94

### Secure SAX Parser

95

96

DefusedExpatParser provides the core secure SAX parsing functionality with configurable security restrictions.

97

98

```python { .api }

99

class DefusedExpatParser:

100

"""

101

Secure SAX parser using pyexpat with configurable security restrictions.

102

103

Inherits from xml.sax.expatreader.ExpatParser but adds security

104

handlers to prevent XML bomb attacks, DTD processing attacks,

105

and external entity attacks.

106

"""

107

108

def __init__(self, namespaceHandling=0, bufsize=65536-20, forbid_dtd=False, forbid_entities=True, forbid_external=True):

109

"""

110

Initialize DefusedExpatParser with security settings.

111

112

Args:

113

namespaceHandling (int): Namespace handling mode (0=disabled, 1=enabled)

114

bufsize (int): Buffer size for parsing (default: 65516)

115

forbid_dtd (bool): Forbid DTD processing (default: False)

116

forbid_entities (bool): Forbid entity expansion (default: True)

117

forbid_external (bool): Forbid external references (default: True)

118

"""

119

120

def defused_start_doctype_decl(self, name, sysid, pubid, has_internal_subset):

121

"""Handler that raises DTDForbidden when DTD processing is forbidden"""

122

123

def defused_entity_decl(self, name, is_parameter_entity, value, base, sysid, pubid, notation_name):

124

"""Handler that raises EntitiesForbidden when entity processing is forbidden"""

125

126

def defused_unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):

127

"""Handler that raises EntitiesForbidden for unparsed entities when forbidden"""

128

129

def defused_external_entity_ref_handler(self, context, base, sysid, pubid):

130

"""Handler that raises ExternalReferenceForbidden when external references are forbidden"""

131

132

def reset(self):

133

"""Reset parser state and reinstall security handlers"""

134

```

135

136

**Usage Examples:**

137

138

```python

139

import defusedxml.sax as sax

140

from xml.sax.handler import ContentHandler

141

142

class DataCollector(ContentHandler):

143

def __init__(self):

144

self.data = []

145

self.current_element = None

146

147

def startElement(self, name, attrs):

148

self.current_element = name

149

150

def characters(self, content):

151

if self.current_element and content.strip():

152

self.data.append((self.current_element, content.strip()))

153

154

# Manual parser creation with custom settings

155

parser = sax.make_parser()

156

handler = DataCollector()

157

parser.setContentHandler(handler)

158

159

# Configure security settings

160

parser.forbid_dtd = True

161

parser.forbid_entities = True

162

parser.forbid_external = True

163

164

# Parse document

165

parser.parse('data.xml')

166

print(handler.data)

167

```

168

169

## Common Usage Patterns

170

171

### Basic SAX Processing

172

173

```python

174

import defusedxml.sax as sax

175

from xml.sax.handler import ContentHandler

176

177

class SimpleHandler(ContentHandler):

178

def __init__(self):

179

self.elements = []

180

181

def startElement(self, name, attrs):

182

self.elements.append(f"<{name}>")

183

# Process attributes

184

for attr_name, attr_value in attrs.items():

185

print(f" {attr_name}={attr_value}")

186

187

def endElement(self, name):

188

self.elements.append(f"</{name}>")

189

190

def characters(self, content):

191

if content.strip():

192

self.elements.append(content.strip())

193

194

# Parse with secure defaults

195

handler = SimpleHandler()

196

try:

197

sax.parseString(xml_content, handler)

198

print("Parsed elements:", handler.elements)

199

except sax.ParseError as e:

200

print(f"XML parsing error: {e}")

201

```

202

203

### Memory-Efficient Processing for Large Files

204

205

```python

206

import defusedxml.sax as sax

207

from xml.sax.handler import ContentHandler

208

209

class RecordProcessor(ContentHandler):

210

def __init__(self):

211

self.in_record = False

212

self.current_record = {}

213

self.current_field = None

214

self.record_count = 0

215

216

def startElement(self, name, attrs):

217

if name == 'record':

218

self.in_record = True

219

self.current_record = {}

220

elif self.in_record:

221

self.current_field = name

222

223

def endElement(self, name):

224

if name == 'record' and self.in_record:

225

self.process_record(self.current_record)

226

self.in_record = False

227

self.record_count += 1

228

elif self.in_record:

229

self.current_field = None

230

231

def characters(self, content):

232

if self.in_record and self.current_field and content.strip():

233

self.current_record[self.current_field] = content.strip()

234

235

def process_record(self, record):

236

# Process individual record

237

print(f"Processing record {self.record_count}: {record}")

238

239

# Process large XML file efficiently

240

handler = RecordProcessor()

241

sax.parse('large_dataset.xml', handler)

242

print(f"Processed {handler.record_count} records")

243

```

244

245

### Error Handling with SAX

246

247

```python

248

import defusedxml.sax as sax

249

import defusedxml

250

from xml.sax.handler import ContentHandler, ErrorHandler

251

252

class CustomErrorHandler(ErrorHandler):

253

def error(self, exception):

254

print(f"XML error: {exception}")

255

256

def fatalError(self, exception):

257

print(f"Fatal XML error: {exception}")

258

raise exception

259

260

def warning(self, exception):

261

print(f"XML warning: {exception}")

262

263

class SafeContentHandler(ContentHandler):

264

def __init__(self):

265

self.elements_processed = 0

266

267

def startElement(self, name, attrs):

268

self.elements_processed += 1

269

270

def safe_sax_parse(xml_content):

271

"""Parse XML with comprehensive error handling."""

272

handler = SafeContentHandler()

273

error_handler = CustomErrorHandler()

274

275

try:

276

sax.parseString(xml_content, handler, error_handler)

277

print(f"Successfully processed {handler.elements_processed} elements")

278

except defusedxml.DTDForbidden as e:

279

print(f"DTD processing forbidden: {e}")

280

except defusedxml.EntitiesForbidden as e:

281

print(f"Entity processing forbidden: {e}")

282

except defusedxml.ExternalReferenceForbidden as e:

283

print(f"External reference forbidden: {e}")

284

except sax.SAXException as e:

285

print(f"SAX parsing error: {e}")

286

```

287

288

### Namespace-Aware Processing

289

290

```python

291

import defusedxml.sax as sax

292

from xml.sax.handler import ContentHandler

293

294

class NamespaceHandler(ContentHandler):

295

def __init__(self):

296

self.namespace_stack = []

297

298

def startPrefixMapping(self, prefix, uri):

299

print(f"Namespace mapping: {prefix} -> {uri}")

300

self.namespace_stack.append((prefix, uri))

301

302

def endPrefixMapping(self, prefix):

303

print(f"End namespace mapping: {prefix}")

304

self.namespace_stack.pop()

305

306

def startElementNS(self, name, qname, attrs):

307

namespace_uri, local_name = name

308

print(f"Start element: {local_name} (namespace: {namespace_uri})")

309

310

def endElementNS(self, name, qname):

311

namespace_uri, local_name = name

312

print(f"End element: {local_name} (namespace: {namespace_uri})")

313

314

# Parse XML with namespace support

315

xml_with_ns = '''<?xml version="1.0"?>

316

<root xmlns="http://example.com/ns1" xmlns:ns2="http://example.com/ns2">

317

<item>value1</item>

318

<ns2:item>value2</ns2:item>

319

</root>'''

320

321

handler = NamespaceHandler()

322

sax.parseString(xml_with_ns, handler)

323

```

324

325

### Custom Security Configuration

326

327

```python

328

import defusedxml.sax as sax

329

from xml.sax.handler import ContentHandler

330

331

class ConfigurableHandler(ContentHandler):

332

def startElement(self, name, attrs):

333

print(f"Element: {name}")

334

335

def parse_with_custom_security(xml_content, trust_level='untrusted'):

336

"""Parse XML with security settings based on trust level."""

337

handler = ConfigurableHandler()

338

339

if trust_level == 'trusted':

340

# Relaxed security for trusted content

341

sax.parseString(xml_content, handler, forbid_dtd=False, forbid_entities=False, forbid_external=True)

342

elif trust_level == 'internal':

343

# Moderate security for internal content

344

sax.parseString(xml_content, handler, forbid_dtd=False, forbid_entities=True, forbid_external=True)

345

else:

346

# Maximum security for untrusted content

347

sax.parseString(xml_content, handler, forbid_dtd=True, forbid_entities=True, forbid_external=True)

348

```

349

350

## Migration from Standard Library

351

352

DefusedXML SAX is designed as a drop-in replacement:

353

354

```python

355

# Before (vulnerable)

356

import xml.sax as sax

357

sax.parseString(untrusted_xml, handler)

358

359

# After (secure)

360

import defusedxml.sax as sax

361

sax.parseString(untrusted_xml, handler) # Now protected by default

362

```

363

364

The API is identical except for the addition of security parameters to parsing functions.