or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

etree-core.mdhtml-processing.mdindex.mdobjectify-api.mdutility-modules.mdvalidation.mdxpath-xslt.md

utility-modules.mddocs/

0

# Utility Modules

1

2

Additional functionality including SAX interface compatibility, CSS selector support, element builders, XInclude processing, and namespace management. These modules provide specialized capabilities for integration with other XML tools and advanced XML processing workflows.

3

4

## Capabilities

5

6

### SAX Interface Compatibility

7

8

Bridge between lxml and Python's SAX (Simple API for XML) for integration with SAX-based applications.

9

10

```python { .api }

11

class ElementTreeContentHandler:

12

"""SAX ContentHandler that builds lxml ElementTree."""

13

14

def __init__(self, makeelement=None):

15

"""

16

Create SAX content handler for building ElementTree.

17

18

Args:

19

makeelement: Custom element factory function (optional)

20

"""

21

22

def etree(self):

23

"""Get built ElementTree after parsing completes."""

24

25

# SAX ContentHandler interface methods

26

def setDocumentLocator(self, locator): ...

27

def startDocument(self): ...

28

def endDocument(self): ...

29

def startPrefixMapping(self, prefix, uri): ...

30

def endPrefixMapping(self, prefix): ...

31

def startElement(self, name, attrs): ...

32

def endElement(self, name): ...

33

def startElementNS(self, name, qname, attrs): ...

34

def endElementNS(self, name, qname): ...

35

def characters(self, data): ...

36

def ignorableWhitespace(self, whitespace): ...

37

def processingInstruction(self, target, data): ...

38

def skippedEntity(self, name): ...

39

40

class ElementTreeProducer:

41

"""Generate SAX events from lxml ElementTree."""

42

43

def __init__(self, element_or_tree, content_handler):

44

"""

45

Create SAX event producer.

46

47

Args:

48

element_or_tree: Element or ElementTree to process

49

content_handler: SAX ContentHandler to receive events

50

"""

51

52

def saxify(self):

53

"""Generate SAX events for the element tree."""

54

55

def saxify(element_or_tree, content_handler):

56

"""

57

Generate SAX events from lxml tree.

58

59

Args:

60

element_or_tree: Element or ElementTree to process

61

content_handler: SAX ContentHandler to receive events

62

"""

63

64

class SaxError(LxmlError):

65

"""SAX processing error."""

66

```

67

68

### CSS Selectors

69

70

CSS selector support for finding elements using CSS syntax instead of XPath.

71

72

```python { .api }

73

class CSSSelector:

74

"""CSS selector that compiles to XPath for element matching."""

75

76

def __init__(self, css, namespaces=None, translator='xml'):

77

"""

78

Create CSS selector.

79

80

Args:

81

css: CSS selector string

82

namespaces: dict mapping prefixes to namespace URIs

83

translator: Selector translator ('xml' or 'html')

84

"""

85

86

def __call__(self, element):

87

"""

88

Find elements matching CSS selector.

89

90

Args:

91

element: Element or ElementTree to search

92

93

Returns:

94

list: Matching elements

95

"""

96

97

@property

98

def css(self):

99

"""CSS selector string."""

100

101

@property

102

def path(self):

103

"""Compiled XPath expression."""

104

105

class LxmlTranslator:

106

"""CSS to XPath translator with lxml-specific extensions."""

107

108

def css_to_xpath(self, css, prefix='descendant-or-self::'):

109

"""Convert CSS selector to XPath expression."""

110

111

class LxmlHTMLTranslator(LxmlTranslator):

112

"""HTML-specific CSS to XPath translator."""

113

114

# CSS selector error classes

115

class SelectorSyntaxError(Exception):

116

"""CSS selector syntax error."""

117

118

class ExpressionError(Exception):

119

"""CSS expression error."""

120

121

class SelectorError(Exception):

122

"""General CSS selector error."""

123

```

124

125

### Element Builders

126

127

Factory classes for programmatically creating XML elements with fluent APIs.

128

129

```python { .api }

130

class ElementMaker:

131

"""Factory for creating XML elements with builder pattern."""

132

133

def __init__(self, typemap=None, namespace=None, nsmap=None,

134

makeelement=None, **default_attributes):

135

"""

136

Create element factory.

137

138

Args:

139

typemap: dict mapping Python types to conversion functions

140

namespace: Default namespace URI for created elements

141

nsmap: Namespace prefix mapping

142

makeelement: Custom element factory function

143

**default_attributes: Default attributes for all elements

144

"""

145

146

def __call__(self, tag, *children, **attributes):

147

"""

148

Create element with tag, children, and attributes.

149

150

Args:

151

tag: Element tag name

152

*children: Child elements, text, or other content

153

**attributes: Element attributes

154

155

Returns:

156

Element: Created element with children and attributes

157

"""

158

159

def __getattr__(self, tag):

160

"""Create element factory method for specific tag."""

161

162

# Default element maker instance

163

E = ElementMaker()

164

```

165

166

### XInclude Processing

167

168

XML Inclusions (XInclude) processing for modular XML documents.

169

170

```python { .api }

171

def include(elem, loader=None, base_url=None, max_depth=6):

172

"""

173

Process XInclude directives in element tree.

174

175

Args:

176

elem: Element containing XInclude directives

177

loader: Custom resource loader function

178

base_url: Base URL for resolving relative hrefs

179

max_depth: Maximum inclusion recursion depth

180

181

Raises:

182

FatalIncludeError: Fatal inclusion error

183

LimitedRecursiveIncludeError: Recursion limit exceeded

184

"""

185

186

def default_loader(href, parse, encoding=None):

187

"""

188

Default XInclude resource loader.

189

190

Args:

191

href: Resource URI to load

192

parse: Parse mode ('xml' or 'text')

193

encoding: Character encoding for text resources

194

195

Returns:

196

Element or str: Loaded resource content

197

"""

198

199

class FatalIncludeError(LxmlError):

200

"""Fatal XInclude processing error."""

201

202

class LimitedRecursiveIncludeError(FatalIncludeError):

203

"""XInclude recursion limit exceeded."""

204

205

# XInclude constants

206

DEFAULT_MAX_INCLUSION_DEPTH = 6

207

XINCLUDE_NAMESPACE = "http://www.w3.org/2001/XInclude"

208

```

209

210

### ElementPath Support

211

212

Simple XPath-like expressions for element tree navigation (similar to ElementTree).

213

214

```python { .api }

215

def find(element, path, namespaces=None):

216

"""

217

Find first element matching simple path expression.

218

219

Args:

220

element: Element to search from

221

path: Simple path expression (e.g., 'child/grandchild')

222

namespaces: Namespace prefix mapping

223

224

Returns:

225

Element or None: First matching element

226

"""

227

228

def findall(element, path, namespaces=None):

229

"""

230

Find all elements matching simple path expression.

231

232

Args:

233

element: Element to search from

234

path: Simple path expression

235

namespaces: Namespace prefix mapping

236

237

Returns:

238

list: All matching elements

239

"""

240

241

def iterfind(element, path, namespaces=None):

242

"""

243

Iterate over elements matching simple path expression.

244

245

Args:

246

element: Element to search from

247

path: Simple path expression

248

namespaces: Namespace prefix mapping

249

250

Yields:

251

Element: Matching elements

252

"""

253

254

def findtext(element, path, default=None, namespaces=None):

255

"""

256

Find text content of first element matching path.

257

258

Args:

259

element: Element to search from

260

path: Simple path expression

261

default: Default value if no match found

262

namespaces: Namespace prefix mapping

263

264

Returns:

265

str or default: Text content or default value

266

"""

267

```

268

269

### Document Testing Utilities

270

271

Enhanced utilities for testing XML documents and doctests.

272

273

```python { .api }

274

# lxml.usedoctest - doctest support

275

def temp_install(modules=None, verbose=None):

276

"""Temporarily install lxml doctests."""

277

278

# lxml.doctestcompare - enhanced doctest comparison

279

class LXMLOutputChecker:

280

"""Enhanced output checker for XML doctests."""

281

282

def check_output(self, want, got, optionflags):

283

"""Compare expected and actual XML output."""

284

285

class LHTMLOutputChecker:

286

"""Enhanced output checker for HTML doctests."""

287

288

# Test options

289

PARSE_HTML = ...

290

PARSE_XML = ...

291

NOPARSE_MARKUP = ...

292

```

293

294

### Python Class Lookup

295

296

Custom element class assignment based on Python logic.

297

298

```python { .api }

299

# lxml.pyclasslookup - Python-based element class lookup

300

class PythonElementClassLookup:

301

"""Element class lookup using Python callback functions."""

302

303

def __init__(self, fallback=None):

304

"""

305

Create Python-based class lookup.

306

307

Args:

308

fallback: Fallback class lookup for unhandled cases

309

"""

310

311

def lookup(self, doc, element):

312

"""

313

Lookup element class based on document and element.

314

315

Args:

316

doc: Document containing element

317

element: Element to assign class for

318

319

Returns:

320

type or None: Element class or None for default

321

"""

322

```

323

324

### Development Utilities

325

326

Helper functions for development and compilation workflows.

327

328

```python { .api }

329

def get_include():

330

"""

331

Returns header include paths for compiling C code against lxml.

332

333

Returns paths for lxml itself, libxml2, and libxslt headers when lxml

334

was built with statically linked libraries.

335

336

Returns:

337

list: List of include directory paths

338

"""

339

```

340

341

## Usage Examples

342

343

### SAX Interface Integration

344

345

```python

346

from lxml import etree

347

from lxml.sax import ElementTreeContentHandler, saxify

348

from xml.sax import make_parser

349

import xml.sax.handler

350

351

# Build ElementTree from SAX events

352

class MyContentHandler(ElementTreeContentHandler):

353

def __init__(self):

354

super().__init__()

355

self.elements_seen = []

356

357

def startElement(self, name, attrs):

358

super().startElement(name, attrs)

359

self.elements_seen.append(name)

360

361

# Parse XML using SAX, build with lxml

362

xml_data = '''<?xml version="1.0"?>

363

<catalog>

364

<book id="1">

365

<title>Python Guide</title>

366

<author>John Doe</author>

367

</book>

368

<book id="2">

369

<title>XML Processing</title>

370

<author>Jane Smith</author>

371

</book>

372

</catalog>'''

373

374

handler = MyContentHandler()

375

parser = make_parser()

376

parser.setContentHandler(handler)

377

378

# Parse and get resulting ElementTree

379

from io import StringIO

380

parser.parse(StringIO(xml_data))

381

tree = handler.etree()

382

383

print(f"Elements seen: {handler.elements_seen}")

384

print(f"Root tag: {tree.getroot().tag}")

385

386

# Generate SAX events from lxml tree

387

class LoggingHandler(xml.sax.handler.ContentHandler):

388

def startElement(self, name, attrs):

389

print(f"Start: {name} {dict(attrs)}")

390

391

def endElement(self, name):

392

print(f"End: {name}")

393

394

def characters(self, content):

395

content = content.strip()

396

if content:

397

print(f"Text: {content}")

398

399

# Send lxml tree to SAX handler

400

root = etree.fromstring(xml_data)

401

logging_handler = LoggingHandler()

402

saxify(root, logging_handler)

403

```

404

405

### CSS Selectors

406

407

```python

408

from lxml import html

409

from lxml.cssselect import CSSSelector

410

411

# HTML document for CSS selection

412

html_content = '''

413

<html>

414

<head>

415

<title>CSS Selector Example</title>

416

</head>

417

<body>

418

<div id="header" class="main-header">

419

<h1>Welcome</h1>

420

<nav class="navigation">

421

<a href="/home" class="nav-link active">Home</a>

422

<a href="/about" class="nav-link">About</a>

423

<a href="/contact" class="nav-link">Contact</a>

424

</nav>

425

</div>

426

<div id="content" class="main-content">

427

<article class="post featured">

428

<h2>Featured Article</h2>

429

<p>This is a featured article.</p>

430

</article>

431

<article class="post">

432

<h2>Regular Article</h2>

433

<p>This is a regular article.</p>

434

</article>

435

</div>

436

<footer id="footer">

437

<p>&copy; 2023 Example Site</p>

438

</footer>

439

</body>

440

</html>

441

'''

442

443

doc = html.fromstring(html_content)

444

445

# Create CSS selectors

446

header_selector = CSSSelector('#header')

447

nav_links_selector = CSSSelector('nav.navigation a.nav-link')

448

featured_post_selector = CSSSelector('article.post.featured')

449

all_headings_selector = CSSSelector('h1, h2, h3, h4, h5, h6')

450

451

# Use selectors to find elements

452

header = header_selector(doc)

453

print(f"Header element: {header[0].get('class') if header else 'Not found'}")

454

455

nav_links = nav_links_selector(doc)

456

print(f"Navigation links: {len(nav_links)}")

457

for link in nav_links:

458

print(f" {link.text}: {link.get('href')}")

459

460

featured = featured_post_selector(doc)

461

if featured:

462

print(f"Featured article title: {featured[0].find('.//h2').text}")

463

464

headings = all_headings_selector(doc)

465

print(f"All headings:")

466

for heading in headings:

467

print(f" {heading.tag}: {heading.text}")

468

469

# Advanced CSS selectors

470

active_link_selector = CSSSelector('a.nav-link.active')

471

first_paragraph_selector = CSSSelector('article p:first-child')

472

not_featured_selector = CSSSelector('article.post:not(.featured)')

473

474

active_links = active_link_selector(doc)

475

print(f"Active navigation links: {len(active_links)}")

476

477

first_paragraphs = first_paragraph_selector(doc)

478

print(f"First paragraphs in articles: {len(first_paragraphs)}")

479

480

regular_posts = not_featured_selector(doc)

481

print(f"Regular (non-featured) posts: {len(regular_posts)}")

482

```

483

484

### Element Builders

485

486

```python

487

from lxml import etree

488

from lxml.builder import ElementMaker

489

490

# Create element maker with namespace

491

E = ElementMaker(namespace="http://example.com/catalog",

492

nsmap={None: "http://example.com/catalog"})

493

494

# Build XML structure using element maker

495

catalog = E.catalog(

496

E.metadata(

497

E.title("Book Catalog"),

498

E.created("2023-12-07"),

499

E.version("1.0")

500

),

501

E.books(

502

E.book(

503

E.title("Python Programming"),

504

E.author("John Smith"),

505

E.isbn("978-0123456789"),

506

E.price("29.99", currency="USD"),

507

E.categories(

508

E.category("Programming"),

509

E.category("Python"),

510

E.category("Computers")

511

),

512

id="1",

513

available="true"

514

),

515

E.book(

516

E.title("Web Development"),

517

E.author("Jane Doe"),

518

E.isbn("978-0987654321"),

519

E.price("34.95", currency="USD"),

520

E.categories(

521

E.category("Web"),

522

E.category("HTML"),

523

E.category("CSS")

524

),

525

id="2",

526

available="false"

527

)

528

)

529

)

530

531

print("Generated XML:")

532

print(etree.tostring(catalog, pretty_print=True, encoding='unicode'))

533

534

# Custom element maker with type mapping

535

def format_price(value):

536

"""Custom price formatter."""

537

return f"${float(value):.2f}"

538

539

def bool_to_string(value):

540

"""Convert boolean to string."""

541

return "yes" if value else "no"

542

543

custom_typemap = {

544

float: format_price,

545

bool: bool_to_string

546

}

547

548

CustomE = ElementMaker(typemap=custom_typemap)

549

550

# Use custom element maker

551

product = CustomE.product(

552

CustomE.name("Widget"),

553

CustomE.price(19.99), # Will be formatted as currency

554

CustomE.available(True), # Will be converted to "yes"

555

CustomE.features(

556

CustomE.feature("Lightweight"),

557

CustomE.feature("Durable"),

558

CustomE.feature("Affordable")

559

)

560

)

561

562

print("\nCustom formatted XML:")

563

print(etree.tostring(product, pretty_print=True, encoding='unicode'))

564

```

565

566

### XInclude Processing

567

568

```python

569

from lxml import etree

570

from lxml.ElementInclude import include, default_loader

571

import tempfile

572

import os

573

574

# Create temporary files for XInclude example

575

temp_dir = tempfile.mkdtemp()

576

577

# Create included content files

578

header_content = '''<?xml version="1.0"?>

579

<header>

580

<title>Document Title</title>

581

<author>John Doe</author>

582

<date>2023-12-07</date>

583

</header>'''

584

585

footer_content = '''<?xml version="1.0"?>

586

<footer>

587

<copyright>&copy; 2023 Example Corp</copyright>

588

<contact>contact@example.com</contact>

589

</footer>'''

590

591

# Write include files

592

header_file = os.path.join(temp_dir, 'header.xml')

593

footer_file = os.path.join(temp_dir, 'footer.xml')

594

595

with open(header_file, 'w') as f:

596

f.write(header_content)

597

598

with open(footer_file, 'w') as f:

599

f.write(footer_content)

600

601

# Main document with XInclude directives

602

main_doc_content = f'''<?xml version="1.0"?>

603

<document xmlns:xi="http://www.w3.org/2001/XInclude">

604

<xi:include href="{header_file}"/>

605

606

<content>

607

<section>

608

<h1>Introduction</h1>

609

<p>This is the main content of the document.</p>

610

</section>

611

<section>

612

<h1>Details</h1>

613

<p>More detailed information goes here.</p>

614

</section>

615

</content>

616

617

<xi:include href="{footer_file}"/>

618

</document>'''

619

620

# Parse document with XInclude processing

621

root = etree.fromstring(main_doc_content)

622

print("Before XInclude processing:")

623

print(etree.tostring(root, pretty_print=True, encoding='unicode'))

624

625

# Process XInclude directives

626

include(root)

627

print("\nAfter XInclude processing:")

628

print(etree.tostring(root, pretty_print=True, encoding='unicode'))

629

630

# Custom loader for XInclude

631

def custom_loader(href, parse, encoding=None):

632

"""Custom XInclude loader with logging."""

633

print(f"Loading: {href} (parse={parse}, encoding={encoding})")

634

return default_loader(href, parse, encoding)

635

636

# Use custom loader

637

root2 = etree.fromstring(main_doc_content)

638

include(root2, loader=custom_loader)

639

640

# Clean up temporary files

641

os.unlink(header_file)

642

os.unlink(footer_file)

643

os.rmdir(temp_dir)

644

```

645

646

### ElementPath Simple Queries

647

648

```python

649

from lxml import etree

650

from lxml._elementpath import find, findall, iterfind, findtext

651

652

# XML document for path queries

653

xml_data = '''<?xml version="1.0"?>

654

<library>

655

<section name="fiction">

656

<book id="1">

657

<title>The Great Gatsby</title>

658

<author>F. Scott Fitzgerald</author>

659

<metadata>

660

<genre>Classic Literature</genre>

661

<year>1925</year>

662

</metadata>

663

</book>

664

<book id="2">

665

<title>To Kill a Mockingbird</title>

666

<author>Harper Lee</author>

667

<metadata>

668

<genre>Classic Literature</genre>

669

<year>1960</year>

670

</metadata>

671

</book>

672

</section>

673

<section name="science">

674

<book id="3">

675

<title>A Brief History of Time</title>

676

<author>Stephen Hawking</author>

677

<metadata>

678

<genre>Science</genre>

679

<year>1988</year>

680

</metadata>

681

</book>

682

</section>

683

</library>'''

684

685

root = etree.fromstring(xml_data)

686

687

# Simple path queries (ElementTree-style)

688

fiction_section = find(root, 'section[@name="fiction"]')

689

print(f"Fiction section: {fiction_section.get('name') if fiction_section else 'Not found'}")

690

691

# Find all books in any section

692

all_books = findall(root, './/book')

693

print(f"Total books: {len(all_books)}")

694

695

# Find specific book by ID

696

book1 = find(root, './/book[@id="1"]')

697

if book1:

698

title = findtext(book1, 'title')

699

author = findtext(book1, 'author')

700

print(f"Book 1: {title} by {author}")

701

702

# Iterate over books in fiction section

703

fiction_books = iterfind(root, 'section[@name="fiction"]/book')

704

print("Fiction books:")

705

for book in fiction_books:

706

title = findtext(book, 'title')

707

year = findtext(book, 'metadata/year')

708

print(f" {title} ({year})")

709

710

# Find text with default value

711

unknown_book = findtext(root, 'section/book[@id="999"]/title', 'Unknown Book')

712

print(f"Unknown book title: {unknown_book}")

713

714

# Complex paths

715

classic_books = findall(root, './/book[metadata/genre="Classic Literature"]')

716

print(f"Classic literature books: {len(classic_books)}")

717

718

recent_books = findall(root, './/book[metadata/year>"1950"]')

719

print(f"Books after 1950: {len(recent_books)}")

720

```

721

722

### Custom Element Classes

723

724

```python

725

from lxml import etree

726

727

# Define custom element classes

728

class BookElement(etree.ElementBase):

729

"""Custom element class for book elements."""

730

731

@property

732

def title(self):

733

"""Get book title."""

734

title_elem = self.find('title')

735

return title_elem.text if title_elem is not None else None

736

737

@property

738

def author(self):

739

"""Get book author."""

740

author_elem = self.find('author')

741

return author_elem.text if author_elem is not None else None

742

743

@property

744

def year(self):

745

"""Get publication year as integer."""

746

year_elem = self.find('metadata/year')

747

if year_elem is not None:

748

try:

749

return int(year_elem.text)

750

except (ValueError, TypeError):

751

return None

752

return None

753

754

def is_classic(self):

755

"""Check if book is classic literature."""

756

genre_elem = self.find('metadata/genre')

757

return genre_elem is not None and genre_elem.text == 'Classic Literature'

758

759

class SectionElement(etree.ElementBase):

760

"""Custom element class for section elements."""

761

762

@property

763

def name(self):

764

"""Get section name."""

765

return self.get('name', 'Unnamed Section')

766

767

def get_books(self):

768

"""Get all books in this section."""

769

return self.findall('book')

770

771

def count_books(self):

772

"""Count books in this section."""

773

return len(self.findall('book'))

774

775

# Create element class lookup

776

class CustomElementClassLookup(etree.PythonElementClassLookup):

777

def lookup(self, document, element):

778

if element.tag == 'book':

779

return BookElement

780

elif element.tag == 'section':

781

return SectionElement

782

return None

783

784

# Set up parser with custom lookup

785

lookup = CustomElementClassLookup()

786

parser = etree.XMLParser()

787

parser.set_element_class_lookup(lookup)

788

789

# Parse with custom element classes

790

xml_data = '''<?xml version="1.0"?>

791

<library>

792

<section name="fiction">

793

<book id="1">

794

<title>The Great Gatsby</title>

795

<author>F. Scott Fitzgerald</author>

796

<metadata>

797

<genre>Classic Literature</genre>

798

<year>1925</year>

799

</metadata>

800

</book>

801

<book id="2">

802

<title>Modern Fiction</title>

803

<author>Contemporary Author</author>

804

<metadata>

805

<genre>Modern Literature</genre>

806

<year>2020</year>

807

</metadata>

808

</book>

809

</section>

810

</library>'''

811

812

root = etree.fromstring(xml_data, parser)

813

814

# Use custom element methods

815

fiction_section = root.find('section')

816

print(f"Section: {fiction_section.name}")

817

print(f"Books in section: {fiction_section.count_books()}")

818

819

for book in fiction_section.get_books():

820

print(f" {book.title} by {book.author} ({book.year})")

821

print(f" Is classic: {book.is_classic()}")

822

```