or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

etree-core.mdhtml-processing.mdindex.mdobjectify-api.mdutility-modules.mdvalidation.mdxpath-xslt.md

xpath-xslt.mddocs/

0

# XPath and XSLT Processing

1

2

Advanced XML querying and transformation capabilities with XPath 1.0/2.0 evaluation, XSLT 1.0 stylesheets, extension functions, namespace handling, and XML canonicalization. These features enable powerful XML processing workflows for data extraction, transformation, and analysis.

3

4

## Capabilities

5

6

### XPath Evaluation

7

8

Compile and evaluate XPath expressions with variables, extension functions, and namespace support.

9

10

```python { .api }

11

class XPath:

12

"""Compiled XPath expression for efficient repeated evaluation."""

13

14

def __init__(self, path, namespaces=None, extensions=None,

15

regexp=True, smart_strings=True):

16

"""

17

Compile XPath expression.

18

19

Args:

20

path: XPath expression string

21

namespaces: dict mapping prefixes to namespace URIs

22

extensions: dict of extension function modules

23

regexp: Enable EXSLT regular expression functions

24

smart_strings: Return Python str objects instead of lxml._ElementUnicodeResult objects

25

"""

26

27

def __call__(self, _etree_or_element, **_variables):

28

"""

29

Evaluate XPath on element or document.

30

31

Args:

32

_etree_or_element: Element or ElementTree to evaluate on

33

**_variables: XPath variables as keyword arguments

34

35

Returns:

36

list: XPath evaluation results (elements, strings, numbers, or booleans depending on expression)

37

"""

38

39

@property

40

def path(self):

41

"""XPath expression string."""

42

43

class XPathEvaluator:

44

"""XPath evaluation context with persistent variables and functions."""

45

46

def __init__(self, etree_or_element, namespaces=None, extensions=None,

47

enable_regexp=True, smart_strings=True):

48

"""

49

Create XPath evaluator for specific element/document.

50

51

Args:

52

etree_or_element: Element or ElementTree to evaluate on

53

namespaces: dict mapping prefixes to namespace URIs

54

extensions: dict of extension function modules

55

enable_regexp: Enable EXSLT regular expression functions

56

smart_strings: Return Python str objects instead of lxml._ElementUnicodeResult objects

57

"""

58

59

def __call__(self, _path, **_variables):

60

"""Evaluate XPath expression with variables."""

61

62

def evaluate(self, _path, **_variables):

63

"""Evaluate XPath expression with variables."""

64

65

def register_namespace(self, prefix, uri):

66

"""Register namespace prefix for this evaluator."""

67

68

def register_namespaces(self, namespaces):

69

"""Register multiple namespace prefixes."""

70

71

class XPathDocumentEvaluator:

72

"""Document-level XPath evaluator with document context."""

73

74

def __init__(self, etree, namespaces=None, extensions=None,

75

enable_regexp=True, smart_strings=True):

76

"""Create document-level XPath evaluator."""

77

78

def __call__(self, _path, **_variables):

79

"""Evaluate XPath expression on document."""

80

81

# Element XPath methods

82

class Element:

83

def xpath(self, _path, namespaces=None, extensions=None,

84

smart_strings=True, **_variables):

85

"""Evaluate XPath expression on element."""

86

```

87

88

### XSLT Transformation

89

90

Apply XSLT stylesheets to transform XML documents with parameters and extension functions.

91

92

```python { .api }

93

class XSLT:

94

"""XSLT stylesheet processor."""

95

96

def __init__(self, xslt_input, extensions=None, regexp=True,

97

access_control=None):

98

"""

99

Create XSLT processor from stylesheet.

100

101

Args:

102

xslt_input: Element, ElementTree, or file containing XSLT

103

extensions: dict of extension function modules

104

regexp: Enable EXSLT regular expression functions

105

access_control: XSLTAccessControl for security restrictions

106

"""

107

108

def __call__(self, _input, profile_run=False, **kwargs):

109

"""

110

Transform XML document using stylesheet.

111

112

Args:

113

_input: Element or ElementTree to transform

114

profile_run: Enable XSLT profiling

115

**kwargs: XSLT parameters as keyword arguments

116

117

Returns:

118

ElementTree: Transformation result

119

"""

120

121

def apply(self, _input, **kwargs):

122

"""Apply transformation and return result tree."""

123

124

def transform(self, _input, **kwargs):

125

"""Transform document (same as __call__)."""

126

127

@property

128

def error_log(self):

129

"""XSLT processing error log."""

130

131

@staticmethod

132

def strparam(s):

133

"""Convert Python string to XSLT string parameter."""

134

135

class XSLTAccessControl:

136

"""Security access control for XSLT processing to prevent unauthorized file/network access."""

137

138

DENY_ALL = None # Deny all external access (most secure)

139

DENY_WRITE = None # Deny write operations but allow reads

140

DENY_READ = None # Deny read operations but allow writes (rarely used)

141

142

def __init__(self, read_file=True, write_file=False, create_dir=False,

143

read_network=False, write_network=False):

144

"""

145

Create access control configuration for XSLT security.

146

147

Args:

148

read_file: Allow XSLT to read files from filesystem

149

write_file: Allow XSLT to write files to filesystem (security risk)

150

create_dir: Allow XSLT to create directories (security risk)

151

read_network: Allow XSLT to fetch resources via HTTP/HTTPS (security risk)

152

write_network: Allow XSLT to send data over network (security risk)

153

"""

154

```

155

156

### XML Canonicalization

157

158

XML canonicalization (C14N) for consistent XML representation and digital signatures.

159

160

```python { .api }

161

def canonicalize(xml_input, out=None, from_file=False, **options):

162

"""

163

Canonicalize XML document using C14N algorithm.

164

165

Args:

166

xml_input: XML string, Element, ElementTree, or filename

167

out: Output file or file-like object (optional)

168

from_file: Treat xml_input as filename

169

**options: C14N options including:

170

- exclusive: bool - Use exclusive canonicalization

171

- with_comments: bool - Include comments (default True)

172

- inclusive_ns_prefixes: list - Namespace prefixes to include

173

- strip_cdata: bool - Convert CDATA to text (default True)

174

175

Returns:

176

bytes: Canonicalized XML (if out not specified)

177

"""

178

179

class C14NWriterTarget:

180

"""Writer target for canonical XML output during parsing."""

181

182

def __init__(self, write, **c14n_options):

183

"""

184

Create C14N writer target.

185

186

Args:

187

write: Function to write canonicalized output

188

**c14n_options: C14N canonicalization options

189

"""

190

```

191

192

### Extension Functions

193

194

Create custom XPath and XSLT extension functions.

195

196

```python { .api }

197

class Extension:

198

"""Base class for XSLT extensions."""

199

200

class XSLTExtension:

201

"""XSLT extension function handler."""

202

203

class FunctionNamespace:

204

"""XPath extension function namespace."""

205

206

def __init__(self, namespace_uri):

207

"""

208

Create function namespace.

209

210

Args:

211

namespace_uri: Namespace URI for extension functions

212

"""

213

214

def __setitem__(self, function_name, function):

215

"""Register extension function."""

216

217

def __getitem__(self, function_name):

218

"""Get registered extension function."""

219

220

def __delitem__(self, function_name):

221

"""Unregister extension function."""

222

```

223

224

### XPath Error Handling

225

226

Comprehensive error classes for XPath and XSLT processing.

227

228

```python { .api }

229

class XPathError(LxmlError):

230

"""Base class for XPath-related errors."""

231

232

class XPathEvalError(XPathError):

233

"""XPath evaluation error."""

234

235

class XPathSyntaxError(XPathError):

236

"""XPath syntax error."""

237

238

class XPathResultError(XPathError):

239

"""XPath result type error."""

240

241

class XPathFunctionError(XPathError):

242

"""XPath function call error."""

243

244

class XSLTError(LxmlError):

245

"""Base class for XSLT-related errors."""

246

247

class XSLTParseError(XSLTError):

248

"""XSLT stylesheet parsing error."""

249

250

class XSLTApplyError(XSLTError):

251

"""XSLT transformation error."""

252

253

class XSLTSaveError(XSLTError):

254

"""XSLT result saving error."""

255

256

class XSLTExtensionError(XSLTError):

257

"""XSLT extension function error."""

258

259

class C14NError(LxmlError):

260

"""XML canonicalization error."""

261

```

262

263

## Usage Examples

264

265

### Basic XPath Queries

266

267

```python

268

from lxml import etree

269

270

# Sample XML document

271

xml_data = '''<?xml version="1.0"?>

272

<library xmlns:book="http://example.com/book">

273

<book:catalog>

274

<book:item id="1" category="fiction">

275

<book:title>The Great Gatsby</book:title>

276

<book:author>F. Scott Fitzgerald</book:author>

277

<book:year>1925</book:year>

278

<book:price currency="USD">12.99</book:price>

279

</book:item>

280

<book:item id="2" category="science">

281

<book:title>A Brief History of Time</book:title>

282

<book:author>Stephen Hawking</book:author>

283

<book:year>1988</book:year>

284

<book:price currency="USD">15.99</book:price>

285

</book:item>

286

<book:item id="3" category="fiction">

287

<book:title>To Kill a Mockingbird</book:title>

288

<book:author>Harper Lee</book:author>

289

<book:year>1960</book:year>

290

<book:price currency="USD">11.99</book:price>

291

</book:item>

292

</book:catalog>

293

</library>'''

294

295

root = etree.fromstring(xml_data)

296

297

# Define namespace mapping

298

namespaces = {'b': 'http://example.com/book'}

299

300

# Basic XPath queries

301

all_books = root.xpath('//b:item', namespaces=namespaces)

302

print(f"Found {len(all_books)} books")

303

304

fiction_books = root.xpath('//b:item[@category="fiction"]', namespaces=namespaces)

305

print(f"Fiction books: {len(fiction_books)}")

306

307

# Extract text content

308

titles = root.xpath('//b:title/text()', namespaces=namespaces)

309

print(f"Book titles: {titles}")

310

311

# Extract attributes

312

book_ids = root.xpath('//b:item/@id', namespaces=namespaces)

313

print(f"Book IDs: {book_ids}")

314

315

# Complex queries with predicates

316

expensive_books = root.xpath('//b:item[number(b:price) > 13]', namespaces=namespaces)

317

recent_books = root.xpath('//b:item[b:year > 1950]', namespaces=namespaces)

318

319

print(f"Expensive books: {len(expensive_books)}")

320

print(f"Recent books: {len(recent_books)}")

321

322

# XPath functions

323

oldest_book = root.xpath('//b:item[b:year = min(//b:year)]/b:title/text()', namespaces=namespaces)

324

print(f"Oldest book: {oldest_book[0] if oldest_book else 'None'}")

325

```

326

327

### Compiled XPath Expressions

328

329

```python

330

from lxml import etree

331

332

xml_data = '''

333

<products>

334

<product id="1" price="19.99" category="electronics">

335

<name>Widget</name>

336

<stock>15</stock>

337

</product>

338

<product id="2" price="29.99" category="electronics">

339

<name>Gadget</name>

340

<stock>8</stock>

341

</product>

342

<product id="3" price="9.99" category="books">

343

<name>Manual</name>

344

<stock>25</stock>

345

</product>

346

</products>

347

'''

348

349

root = etree.fromstring(xml_data)

350

351

# Compile XPath expressions for reuse

352

find_by_category = etree.XPath('//product[@category=$cat]')

353

find_by_price_range = etree.XPath('//product[number(@price) >= $min and number(@price) <= $max]')

354

count_in_stock = etree.XPath('sum(//product[@category=$cat]/stock)')

355

356

# Use compiled expressions with variables

357

electronics = find_by_category(root, cat='electronics')

358

print(f"Electronics products: {len(electronics)}")

359

360

affordable = find_by_price_range(root, min=10, max=25)

361

print(f"Affordable products: {len(affordable)}")

362

363

electronics_stock = count_in_stock(root, cat='electronics')

364

print(f"Total electronics in stock: {electronics_stock}")

365

366

# XPath evaluator for persistent context

367

evaluator = etree.XPathEvaluator(root)

368

evaluator.register_namespace('p', 'http://example.com/products')

369

370

# Evaluate multiple expressions with same context

371

product_count = evaluator('count(//product)')

372

avg_price = evaluator('sum(//product/@price) div count(//product)')

373

categories = evaluator('distinct-values(//product/@category)')

374

375

print(f"Products: {product_count}, Average price: ${avg_price:.2f}")

376

```

377

378

### XSLT Transformations

379

380

```python

381

from lxml import etree

382

383

# XML data to transform

384

xml_data = '''<?xml version="1.0"?>

385

<catalog>

386

<book id="1">

387

<title>Python Programming</title>

388

<author>John Smith</author>

389

<year>2023</year>

390

<price>29.99</price>

391

</book>

392

<book id="2">

393

<title>Web Development</title>

394

<author>Jane Doe</author>

395

<year>2022</year>

396

<price>34.95</price>

397

</book>

398

</catalog>'''

399

400

# XSLT stylesheet

401

xslt_stylesheet = '''<?xml version="1.0"?>

402

<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">

403

<xsl:param name="format" select="'html'"/>

404

<xsl:param name="title" select="'Book Catalog'"/>

405

406

<xsl:template match="/">

407

<xsl:choose>

408

<xsl:when test="$format='html'">

409

<html>

410

<head><title><xsl:value-of select="$title"/></title></head>

411

<body>

412

<h1><xsl:value-of select="$title"/></h1>

413

<table border="1">

414

<tr>

415

<th>Title</th>

416

<th>Author</th>

417

<th>Year</th>

418

<th>Price</th>

419

</tr>

420

<xsl:for-each select="catalog/book">

421

<xsl:sort select="year" order="descending"/>

422

<tr>

423

<td><xsl:value-of select="title"/></td>

424

<td><xsl:value-of select="author"/></td>

425

<td><xsl:value-of select="year"/></td>

426

<td>$<xsl:value-of select="price"/></td>

427

</tr>

428

</xsl:for-each>

429

</table>

430

</body>

431

</html>

432

</xsl:when>

433

<xsl:otherwise>

434

<book-list>

435

<xsl:for-each select="catalog/book">

436

<item>

437

<xsl:value-of select="title"/> by <xsl:value-of select="author"/> (<xsl:value-of select="year"/>)

438

</item>

439

</xsl:for-each>

440

</book-list>

441

</xsl:otherwise>

442

</xsl:choose>

443

</xsl:template>

444

</xsl:stylesheet>'''

445

446

# Parse XML and XSLT

447

xml_doc = etree.fromstring(xml_data)

448

xslt_doc = etree.fromstring(xslt_stylesheet)

449

450

# Create XSLT processor

451

transform = etree.XSLT(xslt_doc)

452

453

# Transform with parameters

454

html_result = transform(xml_doc, format="'html'", title="'My Book Collection'")

455

print("HTML transformation:")

456

print(etree.tostring(html_result, pretty_print=True, encoding='unicode'))

457

458

# Transform with different parameters

459

text_result = transform(xml_doc, format="'text'")

460

print("\nText transformation:")

461

print(etree.tostring(text_result, pretty_print=True, encoding='unicode'))

462

463

# Check for transformation errors

464

if transform.error_log:

465

print("XSLT errors:")

466

for error in transform.error_log:

467

print(f" {error}")

468

```

469

470

### Extension Functions

471

472

```python

473

from lxml import etree

474

475

# Define custom extension functions

476

def custom_format_price(context, price_list, currency='USD'):

477

"""Format price with currency symbol."""

478

if not price_list:

479

return ''

480

price = float(price_list[0])

481

symbols = {'USD': '$', 'EUR': '€', 'GBP': '£'}

482

symbol = symbols.get(currency, currency)

483

return f"{symbol}{price:.2f}"

484

485

def custom_word_count(context, text_list):

486

"""Count words in text."""

487

if not text_list:

488

return 0

489

text = str(text_list[0])

490

return len(text.split())

491

492

# Create extension namespace

493

ns = etree.FunctionNamespace('http://example.com/functions')

494

ns['format-price'] = custom_format_price

495

ns['word-count'] = custom_word_count

496

497

# XML with custom processing

498

xml_data = '''

499

<products>

500

<product>

501

<name>Programming Guide</name>

502

<description>A comprehensive guide to Python programming for beginners and experts</description>

503

<price>29.99</price>

504

</product>

505

<product>

506

<name>Quick Reference</name>

507

<description>Essential commands and functions</description>

508

<price>15.50</price>

509

</product>

510

</products>

511

'''

512

513

# XSLT using extension functions

514

xslt_with_extensions = '''<?xml version="1.0"?>

515

<xsl:stylesheet version="1.0"

516

xmlns:xsl="http://www.w3.org/1999/XSL/Transform"

517

xmlns:custom="http://example.com/functions">

518

519

<xsl:template match="/">

520

<product-report>

521

<xsl:for-each select="products/product">

522

<item>

523

<name><xsl:value-of select="name"/></name>

524

<formatted-price>

525

<xsl:value-of select="custom:format-price(price, 'USD')"/>

526

</formatted-price>

527

<description-length>

528

<xsl:value-of select="custom:word-count(description)"/> words

529

</description-length>

530

</item>

531

</xsl:for-each>

532

</product-report>

533

</xsl:template>

534

</xsl:stylesheet>

535

'''

536

537

# Transform using extensions

538

xml_doc = etree.fromstring(xml_data)

539

xslt_doc = etree.fromstring(xslt_with_extensions)

540

541

# Create transform with extensions enabled

542

extensions = {('http://example.com/functions', 'format-price'): custom_format_price,

543

('http://example.com/functions', 'word-count'): custom_word_count}

544

545

transform = etree.XSLT(xslt_doc, extensions=extensions)

546

result = transform(xml_doc)

547

548

print("Result with extension functions:")

549

print(etree.tostring(result, pretty_print=True, encoding='unicode'))

550

```

551

552

### XML Canonicalization

553

554

```python

555

from lxml import etree

556

557

# XML document with varying whitespace and attribute order

558

xml_data = '''<?xml version="1.0"?>

559

<root xmlns:a="http://example.com/a"

560

xmlns:b="http://example.com/b">

561

562

<element b:attr="value2" a:attr="value1" >

563

<child> text content </child>

564

<!-- This is a comment -->

565

<another-child/>

566

</element>

567

568

</root>'''

569

570

# Parse document

571

doc = etree.fromstring(xml_data)

572

573

# Basic canonicalization

574

canonical_xml = etree.canonicalize(xml_data)

575

print("Canonical XML (default):")

576

print(canonical_xml.decode('utf-8'))

577

578

# Canonicalization without comments

579

canonical_no_comments = etree.canonicalize(xml_data, with_comments=False)

580

print("\nCanonical XML (no comments):")

581

print(canonical_no_comments.decode('utf-8'))

582

583

# Exclusive canonicalization

584

canonical_exclusive = etree.canonicalize(xml_data, exclusive=True)

585

print("\nExclusive canonical XML:")

586

print(canonical_exclusive.decode('utf-8'))

587

588

# Canonicalize to file

589

with open('/tmp/canonical.xml', 'wb') as f:

590

etree.canonicalize(xml_data, out=f)

591

592

# Using C14N writer target during parsing

593

output_parts = []

594

def write_canonical(data):

595

output_parts.append(data)

596

597

target = etree.C14NWriterTarget(write_canonical, with_comments=False)

598

parser = etree.XMLParser(target=target)

599

etree.fromstring(xml_data, parser)

600

601

print("\nCanonical XML via writer target:")

602

print(b''.join(output_parts).decode('utf-8'))

603

```

604

605

### Advanced XPath with Namespaces

606

607

```python

608

from lxml import etree

609

610

# Complex XML with multiple namespaces

611

xml_data = '''<?xml version="1.0"?>

612

<root xmlns="http://example.com/default"

613

xmlns:meta="http://example.com/metadata"

614

xmlns:content="http://example.com/content">

615

616

<meta:info>

617

<meta:created>2023-12-07</meta:created>

618

<meta:author>John Doe</meta:author>

619

</meta:info>

620

621

<content:document>

622

<content:section id="intro">

623

<content:title>Introduction</content:title>

624

<content:paragraph>This is the introduction.</content:paragraph>

625

</content:section>

626

<content:section id="main">

627

<content:title>Main Content</content:title>

628

<content:paragraph>This is the main content.</content:paragraph>

629

<content:subsection>

630

<content:title>Subsection</content:title>

631

<content:paragraph>Subsection content.</content:paragraph>

632

</content:subsection>

633

</content:section>

634

</content:document>

635

636

</root>'''

637

638

root = etree.fromstring(xml_data)

639

640

# Define comprehensive namespace mappings

641

namespaces = {

642

'default': 'http://example.com/default',

643

'meta': 'http://example.com/metadata',

644

'content': 'http://example.com/content'

645

}

646

647

# Complex XPath queries with namespaces

648

author = root.xpath('//meta:author/text()', namespaces=namespaces)

649

print(f"Author: {author[0] if author else 'Unknown'}")

650

651

# Find all sections and subsections

652

sections = root.xpath('//content:section | //content:subsection', namespaces=namespaces)

653

print(f"Found {len(sections)} sections")

654

655

# Extract titles with context

656

titles_with_id = root.xpath('//content:section[@id]/content:title/text()', namespaces=namespaces)

657

for title in titles_with_id:

658

print(f"Section title: {title}")

659

660

# Count paragraphs in main section

661

main_paragraphs = root.xpath('count(//content:section[@id="main"]//content:paragraph)', namespaces=namespaces)

662

print(f"Paragraphs in main section: {main_paragraphs}")

663

664

# Build document outline

665

outline_xpath = etree.XPath('''

666

for $section in //content:section

667

return concat($section/@id, ": ", $section/content:title/text())

668

''', namespaces=namespaces)

669

670

outline = outline_xpath(root)

671

print("Document outline:")

672

for item in outline:

673

print(f" {item}")

674

```