or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

dom.mdelementtree.mdexceptions.mdindex.mdsax.mdstdlib-patching.mdxmlrpc.md

dom.mddocs/

0

# DOM Processing

1

2

Secure DOM-based XML parsing that builds complete document object models with configurable security restrictions. DefusedXML provides drop-in replacements for xml.dom.minidom and xml.dom.pulldom with comprehensive protection against XML attacks while maintaining API compatibility.

3

4

## Capabilities

5

6

### MiniDOM Processing

7

8

Secure minidom parsing functions that build complete DOM trees with security restrictions.

9

10

```python { .api }

11

def parse(file, parser=None, bufsize=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):

12

"""

13

Parse XML document from file into a DOM tree.

14

15

Args:

16

file (str or file-like): File path or file-like object containing XML

17

parser (optional): Custom parser instance (for pulldom compatibility)

18

bufsize (int, optional): Buffer size for parsing

19

forbid_dtd (bool): Forbid DTD processing (default: False)

20

forbid_entities (bool): Forbid entity expansion (default: True)

21

forbid_external (bool): Forbid external references (default: True)

22

23

Returns:

24

Document: DOM document object

25

26

Raises:

27

ParseError: XML syntax errors

28

DTDForbidden: DTD processing attempted when forbidden

29

EntitiesForbidden: Entity processing attempted when forbidden

30

ExternalReferenceForbidden: External reference attempted when forbidden

31

"""

32

33

def parseString(string, parser=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):

34

"""

35

Parse XML document from string into a DOM tree.

36

37

Args:

38

string (str or bytes): XML content as string or bytes

39

parser (optional): Custom parser instance (for pulldom compatibility)

40

forbid_dtd (bool): Forbid DTD processing (default: False)

41

forbid_entities (bool): Forbid entity expansion (default: True)

42

forbid_external (bool): Forbid external references (default: True)

43

44

Returns:

45

Document: DOM document object

46

47

Raises:

48

ParseError: XML syntax errors

49

DTDForbidden: DTD processing attempted when forbidden

50

EntitiesForbidden: Entity processing attempted when forbidden

51

ExternalReferenceForbidden: External reference attempted when forbidden

52

"""

53

```

54

55

**Usage Examples:**

56

57

```python

58

import defusedxml.minidom as minidom

59

60

# Parse from file

61

doc = minidom.parse('document.xml')

62

root = doc.documentElement

63

print(f"Root element: {root.tagName}")

64

65

# Parse from string

66

xml_string = '<root><item id="1">value</item></root>'

67

doc = minidom.parseString(xml_string)

68

69

# Navigate DOM tree

70

for node in doc.getElementsByTagName('item'):

71

print(f"Item ID: {node.getAttribute('id')}, Value: {node.firstChild.nodeValue}")

72

73

# Parse with custom security settings

74

doc = minidom.parseString(xml_string, forbid_dtd=True, forbid_entities=False)

75

```

76

77

### PullDOM Processing

78

79

Secure pulldom parsing functions that provide event-driven parsing with DOM node creation on demand.

80

81

```python { .api }

82

def parse(stream_or_string, parser=None, bufsize=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):

83

"""

84

Parse XML document using pull-based DOM processing.

85

86

Args:

87

stream_or_string (str or file-like): File path or file-like object containing XML

88

parser (optional): Custom parser instance

89

bufsize (int, optional): Buffer size for parsing

90

forbid_dtd (bool): Forbid DTD processing (default: False)

91

forbid_entities (bool): Forbid entity expansion (default: True)

92

forbid_external (bool): Forbid external references (default: True)

93

94

Returns:

95

iterator: Iterator yielding (event, node) tuples

96

97

Raises:

98

ParseError: XML syntax errors

99

DTDForbidden: DTD processing attempted when forbidden

100

EntitiesForbidden: Entity processing attempted when forbidden

101

ExternalReferenceForbidden: External reference attempted when forbidden

102

"""

103

104

def parseString(string, parser=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):

105

"""

106

Parse XML document from string using pull-based DOM processing.

107

108

Args:

109

string (str or bytes): XML content as string or bytes

110

parser (optional): Custom parser instance

111

forbid_dtd (bool): Forbid DTD processing (default: False)

112

forbid_entities (bool): Forbid entity expansion (default: True)

113

forbid_external (bool): Forbid external references (default: True)

114

115

Returns:

116

iterator: Iterator yielding (event, node) tuples

117

118

Raises:

119

ParseError: XML syntax errors

120

DTDForbidden: DTD processing attempted when forbidden

121

EntitiesForbidden: Entity processing attempted when forbidden

122

ExternalReferenceForbidden: External reference attempted when forbidden

123

"""

124

```

125

126

**Usage Examples:**

127

128

```python

129

import defusedxml.pulldom as pulldom

130

131

# Pull-based parsing for selective DOM building

132

doc = pulldom.parse('large_document.xml')

133

for event, node in doc:

134

if event == pulldom.START_ELEMENT and node.tagName == 'important':

135

# Build DOM subtree for this element only

136

doc.expandNode(node)

137

print(f"Important element: {node.toxml()}")

138

139

# Parse string with pull DOM

140

xml_string = '<root><item>1</item><item>2</item></root>'

141

doc = pulldom.parseString(xml_string)

142

for event, node in doc:

143

if event == pulldom.START_ELEMENT and node.tagName == 'item':

144

doc.expandNode(node)

145

print(f"Item value: {node.firstChild.nodeValue}")

146

```

147

148

### DOM Builder Classes

149

150

Secure DOM builder classes that handle the actual construction of DOM trees with security restrictions.

151

152

```python { .api }

153

class DefusedExpatBuilder:

154

"""

155

Secure DOM builder using expat parser.

156

157

Builds DOM trees with configurable security restrictions to prevent

158

XML bomb attacks, DTD processing attacks, and external entity attacks.

159

"""

160

161

def __init__(self, options=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):

162

"""

163

Initialize DefusedExpatBuilder with security settings.

164

165

Args:

166

options (optional): Builder options

167

forbid_dtd (bool): Forbid DTD processing (default: False)

168

forbid_entities (bool): Forbid entity expansion (default: True)

169

forbid_external (bool): Forbid external references (default: True)

170

"""

171

172

def parseFile(self, file):

173

"""Parse XML from file-like object and return Document"""

174

175

def parseString(self, string):

176

"""Parse XML from string and return Document"""

177

178

def install(self, parser):

179

"""Install security handlers on parser"""

180

181

class DefusedExpatBuilderNS(DefusedExpatBuilder):

182

"""

183

Namespace-aware secure DOM builder using expat parser.

184

185

Extends DefusedExpatBuilder with namespace processing capabilities

186

while maintaining the same security restrictions.

187

"""

188

189

def install(self, parser):

190

"""Install security and namespace handlers on parser"""

191

192

def reset(self):

193

"""Reset builder state and namespace context"""

194

```

195

196

**Usage Examples:**

197

198

```python

199

from defusedxml.expatbuilder import DefusedExpatBuilder, DefusedExpatBuilderNS

200

201

# Custom DOM builder with specific security settings

202

builder = DefusedExpatBuilder(forbid_dtd=True, forbid_entities=False, forbid_external=True)

203

with open('document.xml', 'rb') as f:

204

doc = builder.parseFile(f)

205

206

# Namespace-aware builder

207

ns_builder = DefusedExpatBuilderNS(forbid_dtd=False, forbid_entities=True, forbid_external=True)

208

doc = ns_builder.parseString(xml_with_namespaces)

209

```

210

211

### Lower-Level Builder Functions

212

213

Direct access to secure DOM building functions with namespace support.

214

215

```python { .api }

216

def parse(file, namespaces=True, forbid_dtd=False, forbid_entities=True, forbid_external=True):

217

"""

218

Parse XML file using secure expat builder.

219

220

Args:

221

file (str or file-like): File path or file-like object

222

namespaces (bool): Enable namespace processing (default: True)

223

forbid_dtd (bool): Forbid DTD processing (default: False)

224

forbid_entities (bool): Forbid entity expansion (default: True)

225

forbid_external (bool): Forbid external references (default: True)

226

227

Returns:

228

Document: DOM document object

229

"""

230

231

def parseString(string, namespaces=True, forbid_dtd=False, forbid_entities=True, forbid_external=True):

232

"""

233

Parse XML string using secure expat builder.

234

235

Args:

236

string (str or bytes): XML content

237

namespaces (bool): Enable namespace processing (default: True)

238

forbid_dtd (bool): Forbid DTD processing (default: False)

239

forbid_entities (bool): Forbid entity expansion (default: True)

240

forbid_external (bool): Forbid external references (default: True)

241

242

Returns:

243

Document: DOM document object

244

"""

245

```

246

247

## Common Usage Patterns

248

249

### Basic DOM Processing

250

251

```python

252

import defusedxml.minidom as minidom

253

254

def process_xml_document(xml_content):

255

"""Process XML document using secure DOM parsing."""

256

try:

257

doc = minidom.parseString(xml_content)

258

root = doc.documentElement

259

260

# Process elements

261

for element in root.getElementsByTagName('item'):

262

item_id = element.getAttribute('id')

263

item_value = element.firstChild.nodeValue if element.firstChild else ''

264

print(f"Item {item_id}: {item_value}")

265

266

return doc

267

except minidom.ParseError as e:

268

print(f"XML parsing error: {e}")

269

return None

270

```

271

272

### Memory-Efficient Processing with PullDOM

273

274

```python

275

import defusedxml.pulldom as pulldom

276

277

def process_large_xml_selectively(filename):

278

"""Process large XML files efficiently using pulldom."""

279

doc = pulldom.parse(filename)

280

processed_count = 0

281

282

for event, node in doc:

283

if event == pulldom.START_ELEMENT:

284

if node.tagName == 'record':

285

# Only expand nodes we're interested in

286

doc.expandNode(node)

287

288

# Process the expanded DOM subtree

289

record_id = node.getAttribute('id')

290

data_elements = node.getElementsByTagName('data')

291

292

for data_elem in data_elements:

293

if data_elem.firstChild:

294

print(f"Record {record_id}: {data_elem.firstChild.nodeValue}")

295

296

processed_count += 1

297

298

print(f"Processed {processed_count} records")

299

```

300

301

### DOM Tree Manipulation

302

303

```python

304

import defusedxml.minidom as minidom

305

306

def modify_xml_document(xml_content):

307

"""Parse and modify XML document."""

308

doc = minidom.parseString(xml_content)

309

310

# Add new element

311

root = doc.documentElement

312

new_item = doc.createElement('item')

313

new_item.setAttribute('id', 'new')

314

new_text = doc.createTextNode('new value')

315

new_item.appendChild(new_text)

316

root.appendChild(new_item)

317

318

# Modify existing elements

319

for item in root.getElementsByTagName('item'):

320

if item.getAttribute('id') == 'modify_me':

321

item.firstChild.nodeValue = 'modified value'

322

323

# Convert back to XML string

324

return doc.toxml()

325

```

326

327

### Namespace-Aware DOM Processing

328

329

```python

330

import defusedxml.minidom as minidom

331

332

def process_namespaced_xml(xml_content):

333

"""Process XML document with namespace support."""

334

doc = minidom.parseString(xml_content)

335

336

# Find elements by namespace and local name

337

root = doc.documentElement

338

339

# Process all elements regardless of namespace

340

for node in root.childNodes:

341

if node.nodeType == node.ELEMENT_NODE:

342

print(f"Element: {node.localName}, Namespace: {node.namespaceURI}")

343

344

# Process attributes with namespaces

345

if node.attributes:

346

for i in range(node.attributes.length):

347

attr = node.attributes.item(i)

348

print(f" Attribute: {attr.localName}={attr.value} (NS: {attr.namespaceURI})")

349

```

350

351

### Error Handling with DOM

352

353

```python

354

import defusedxml.minidom as minidom

355

import defusedxml

356

357

def safe_dom_parse(xml_content):

358

"""Parse XML with comprehensive error handling."""

359

try:

360

doc = minidom.parseString(xml_content)

361

print(f"Successfully parsed document with root: {doc.documentElement.tagName}")

362

return doc

363

except minidom.ParseError as e:

364

print(f"XML syntax error: {e}")

365

except defusedxml.DTDForbidden as e:

366

print(f"DTD processing forbidden: {e}")

367

except defusedxml.EntitiesForbidden as e:

368

print(f"Entity processing forbidden: {e}")

369

except defusedxml.ExternalReferenceForbidden as e:

370

print(f"External reference forbidden: {e}")

371

except Exception as e:

372

print(f"Unexpected error: {e}")

373

374

return None

375

```

376

377

### Custom Security Configuration

378

379

```python

380

import defusedxml.minidom as minidom

381

382

def parse_with_trust_level(xml_content, trust_level='untrusted'):

383

"""Parse XML with security settings based on trust level."""

384

385

if trust_level == 'trusted':

386

# Allow DTDs and entities for trusted content

387

return minidom.parseString(xml_content, forbid_dtd=False, forbid_entities=False, forbid_external=True)

388

elif trust_level == 'internal':

389

# Allow DTDs but restrict entities

390

return minidom.parseString(xml_content, forbid_dtd=False, forbid_entities=True, forbid_external=True)

391

else:

392

# Maximum security for untrusted content

393

return minidom.parseString(xml_content, forbid_dtd=True, forbid_entities=True, forbid_external=True)

394

```

395

396

## Migration from Standard Library

397

398

DefusedXML DOM modules are designed as drop-in replacements:

399

400

```python

401

# Before (vulnerable)

402

import xml.dom.minidom as minidom

403

import xml.dom.pulldom as pulldom

404

405

doc = minidom.parseString(untrusted_xml)

406

events = pulldom.parseString(untrusted_xml)

407

408

# After (secure)

409

import defusedxml.minidom as minidom

410

import defusedxml.pulldom as pulldom

411

412

doc = minidom.parseString(untrusted_xml) # Now protected by default

413

events = pulldom.parseString(untrusted_xml) # Now protected by default

414

```

415

416

The API is identical except for the addition of security parameters to parsing functions.