Tessl Tile for pypi/lxml@6.0.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

etree-core.md html-processing.md index.md objectify-api.md utility-modules.md validation.md xpath-xslt.md

xpath-xslt.mddocs/

0
# XPath and XSLT Processing
1

2
Advanced XML querying and transformation capabilities with XPath 1.0/2.0 evaluation, XSLT 1.0 stylesheets, extension functions, namespace handling, and XML canonicalization. These features enable powerful XML processing workflows for data extraction, transformation, and analysis.
3

4
## Capabilities
5

6
### XPath Evaluation
7

8
Compile and evaluate XPath expressions with variables, extension functions, and namespace support.
9

10
```python { .api }
11
class XPath:
12
    """Compiled XPath expression for efficient repeated evaluation."""
13
    
14
    def __init__(self, path, namespaces=None, extensions=None, 
15
                 regexp=True, smart_strings=True):
16
        """
17
        Compile XPath expression.
18
        
19
        Args:
20
            path: XPath expression string
21
            namespaces: dict mapping prefixes to namespace URIs
22
            extensions: dict of extension function modules
23
            regexp: Enable EXSLT regular expression functions
24
            smart_strings: Return Python str objects instead of lxml._ElementUnicodeResult objects
25
        """
26
    
27
    def __call__(self, _etree_or_element, **_variables):
28
        """
29
        Evaluate XPath on element or document.
30
        
31
        Args:
32
            _etree_or_element: Element or ElementTree to evaluate on
33
            **_variables: XPath variables as keyword arguments
34
        
35
        Returns:
36
            list: XPath evaluation results (elements, strings, numbers, or booleans depending on expression)
37
        """
38
    
39
    @property
40
    def path(self):
41
        """XPath expression string."""
42

43
class XPathEvaluator:
44
    """XPath evaluation context with persistent variables and functions."""
45
    
46
    def __init__(self, etree_or_element, namespaces=None, extensions=None,
47
                 enable_regexp=True, smart_strings=True):
48
        """
49
        Create XPath evaluator for specific element/document.
50
        
51
        Args:
52
            etree_or_element: Element or ElementTree to evaluate on
53
            namespaces: dict mapping prefixes to namespace URIs
54
            extensions: dict of extension function modules
55
            enable_regexp: Enable EXSLT regular expression functions
56
            smart_strings: Return Python str objects instead of lxml._ElementUnicodeResult objects
57
        """
58
    
59
    def __call__(self, _path, **_variables):
60
        """Evaluate XPath expression with variables."""
61
    
62
    def evaluate(self, _path, **_variables):
63
        """Evaluate XPath expression with variables."""
64
    
65
    def register_namespace(self, prefix, uri):
66
        """Register namespace prefix for this evaluator."""
67
    
68
    def register_namespaces(self, namespaces):
69
        """Register multiple namespace prefixes."""
70

71
class XPathDocumentEvaluator:
72
    """Document-level XPath evaluator with document context."""
73
    
74
    def __init__(self, etree, namespaces=None, extensions=None,
75
                 enable_regexp=True, smart_strings=True):
76
        """Create document-level XPath evaluator."""
77
    
78
    def __call__(self, _path, **_variables):
79
        """Evaluate XPath expression on document."""
80

81
# Element XPath methods
82
class Element:
83
    def xpath(self, _path, namespaces=None, extensions=None, 
84
              smart_strings=True, **_variables):
85
        """Evaluate XPath expression on element."""
86
```
87

88
### XSLT Transformation
89

90
Apply XSLT stylesheets to transform XML documents with parameters and extension functions.
91

92
```python { .api }
93
class XSLT:
94
    """XSLT stylesheet processor."""
95
    
96
    def __init__(self, xslt_input, extensions=None, regexp=True, 
97
                 access_control=None):
98
        """
99
        Create XSLT processor from stylesheet.
100
        
101
        Args:
102
            xslt_input: Element, ElementTree, or file containing XSLT
103
            extensions: dict of extension function modules
104
            regexp: Enable EXSLT regular expression functions
105
            access_control: XSLTAccessControl for security restrictions
106
        """
107
    
108
    def __call__(self, _input, profile_run=False, **kwargs):
109
        """
110
        Transform XML document using stylesheet.
111
        
112
        Args:
113
            _input: Element or ElementTree to transform
114
            profile_run: Enable XSLT profiling
115
            **kwargs: XSLT parameters as keyword arguments
116
        
117
        Returns:
118
            ElementTree: Transformation result
119
        """
120
    
121
    def apply(self, _input, **kwargs):
122
        """Apply transformation and return result tree."""
123
    
124
    def transform(self, _input, **kwargs):
125
        """Transform document (same as __call__)."""
126
    
127
    @property
128
    def error_log(self):
129
        """XSLT processing error log."""
130
    
131
    @staticmethod
132
    def strparam(s):
133
        """Convert Python string to XSLT string parameter."""
134

135
class XSLTAccessControl:
136
    """Security access control for XSLT processing to prevent unauthorized file/network access."""
137
    
138
    DENY_ALL = None        # Deny all external access (most secure)
139
    DENY_WRITE = None      # Deny write operations but allow reads
140
    DENY_READ = None       # Deny read operations but allow writes (rarely used)
141
    
142
    def __init__(self, read_file=True, write_file=False, create_dir=False,
143
                 read_network=False, write_network=False):
144
        """
145
        Create access control configuration for XSLT security.
146
        
147
        Args:
148
            read_file: Allow XSLT to read files from filesystem
149
            write_file: Allow XSLT to write files to filesystem (security risk)
150
            create_dir: Allow XSLT to create directories (security risk)
151
            read_network: Allow XSLT to fetch resources via HTTP/HTTPS (security risk)
152
            write_network: Allow XSLT to send data over network (security risk)
153
        """
154
```
155

156
### XML Canonicalization
157

158
XML canonicalization (C14N) for consistent XML representation and digital signatures.
159

160
```python { .api }
161
def canonicalize(xml_input, out=None, from_file=False, **options):
162
    """  
163
    Canonicalize XML document using C14N algorithm.
164
    
165
    Args:
166
        xml_input: XML string, Element, ElementTree, or filename
167
        out: Output file or file-like object (optional)
168
        from_file: Treat xml_input as filename
169
        **options: C14N options including:
170
            - exclusive: bool - Use exclusive canonicalization
171
            - with_comments: bool - Include comments (default True)  
172
            - inclusive_ns_prefixes: list - Namespace prefixes to include
173
            - strip_cdata: bool - Convert CDATA to text (default True)
174
    
175
    Returns:
176
        bytes: Canonicalized XML (if out not specified)
177
    """
178

179
class C14NWriterTarget:
180
    """Writer target for canonical XML output during parsing."""
181
    
182
    def __init__(self, write, **c14n_options):
183
        """
184
        Create C14N writer target.
185
        
186
        Args:
187
            write: Function to write canonicalized output
188
            **c14n_options: C14N canonicalization options
189
        """
190
```
191

192
### Extension Functions
193

194
Create custom XPath and XSLT extension functions.
195

196
```python { .api }
197
class Extension:
198
    """Base class for XSLT extensions."""
199

200
class XSLTExtension:
201
    """XSLT extension function handler."""
202

203
class FunctionNamespace:
204
    """XPath extension function namespace."""
205
    
206
    def __init__(self, namespace_uri):
207
        """
208
        Create function namespace.
209
        
210
        Args:
211
            namespace_uri: Namespace URI for extension functions
212
        """
213
    
214
    def __setitem__(self, function_name, function):
215
        """Register extension function."""
216
    
217
    def __getitem__(self, function_name):
218
        """Get registered extension function."""
219
    
220
    def __delitem__(self, function_name):
221
        """Unregister extension function."""
222
```
223

224
### XPath Error Handling
225

226
Comprehensive error classes for XPath and XSLT processing.
227

228
```python { .api }
229
class XPathError(LxmlError):
230
    """Base class for XPath-related errors."""
231

232
class XPathEvalError(XPathError):
233
    """XPath evaluation error."""
234

235
class XPathSyntaxError(XPathError):
236
    """XPath syntax error."""
237

238
class XPathResultError(XPathError):
239
    """XPath result type error."""
240

241
class XPathFunctionError(XPathError):
242
    """XPath function call error."""
243

244
class XSLTError(LxmlError):
245
    """Base class for XSLT-related errors."""
246

247
class XSLTParseError(XSLTError):
248
    """XSLT stylesheet parsing error."""
249

250
class XSLTApplyError(XSLTError):
251
    """XSLT transformation error."""
252

253
class XSLTSaveError(XSLTError):
254
    """XSLT result saving error."""
255

256
class XSLTExtensionError(XSLTError):
257
    """XSLT extension function error."""
258

259
class C14NError(LxmlError):
260
    """XML canonicalization error."""
261
```
262

263
## Usage Examples
264

265
### Basic XPath Queries
266

267
```python
268
from lxml import etree
269

270
# Sample XML document
271
xml_data = '''<?xml version="1.0"?>
272
<library xmlns:book="http://example.com/book">
273
    <book:catalog>
274
        <book:item id="1" category="fiction">
275
            <book:title>The Great Gatsby</book:title>
276
            <book:author>F. Scott Fitzgerald</book:author>
277
            <book:year>1925</book:year>
278
            <book:price currency="USD">12.99</book:price>
279
        </book:item>
280
        <book:item id="2" category="science">
281
            <book:title>A Brief History of Time</book:title>
282
            <book:author>Stephen Hawking</book:author>
283
            <book:year>1988</book:year>
284
            <book:price currency="USD">15.99</book:price>
285
        </book:item>
286
        <book:item id="3" category="fiction">
287
            <book:title>To Kill a Mockingbird</book:title>
288
            <book:author>Harper Lee</book:author>
289
            <book:year>1960</book:year>
290
            <book:price currency="USD">11.99</book:price>
291
        </book:item>
292
    </book:catalog>
293
</library>'''
294

295
root = etree.fromstring(xml_data)
296

297
# Define namespace mapping
298
namespaces = {'b': 'http://example.com/book'}
299

300
# Basic XPath queries
301
all_books = root.xpath('//b:item', namespaces=namespaces)
302
print(f"Found {len(all_books)} books")
303

304
fiction_books = root.xpath('//b:item[@category="fiction"]', namespaces=namespaces)
305
print(f"Fiction books: {len(fiction_books)}")
306

307
# Extract text content
308
titles = root.xpath('//b:title/text()', namespaces=namespaces)
309
print(f"Book titles: {titles}")
310

311
# Extract attributes
312
book_ids = root.xpath('//b:item/@id', namespaces=namespaces)
313
print(f"Book IDs: {book_ids}")
314

315
# Complex queries with predicates
316
expensive_books = root.xpath('//b:item[number(b:price) > 13]', namespaces=namespaces)
317
recent_books = root.xpath('//b:item[b:year > 1950]', namespaces=namespaces)
318

319
print(f"Expensive books: {len(expensive_books)}")
320
print(f"Recent books: {len(recent_books)}")
321

322
# XPath functions
323
oldest_book = root.xpath('//b:item[b:year = min(//b:year)]/b:title/text()', namespaces=namespaces)
324
print(f"Oldest book: {oldest_book[0] if oldest_book else 'None'}")
325
```
326

327
### Compiled XPath Expressions
328

329
```python
330
from lxml import etree
331

332
xml_data = '''
333
<products>
334
    <product id="1" price="19.99" category="electronics">
335
        <name>Widget</name>
336
        <stock>15</stock>
337
    </product>
338
    <product id="2" price="29.99" category="electronics">
339
        <name>Gadget</name>
340
        <stock>8</stock>
341
    </product>
342
    <product id="3" price="9.99" category="books">
343
        <name>Manual</name>
344
        <stock>25</stock>
345
    </product>
346
</products>
347
'''
348

349
root = etree.fromstring(xml_data)
350

351
# Compile XPath expressions for reuse
352
find_by_category = etree.XPath('//product[@category=$cat]')
353
find_by_price_range = etree.XPath('//product[number(@price) >= $min and number(@price) <= $max]')
354
count_in_stock = etree.XPath('sum(//product[@category=$cat]/stock)')
355

356
# Use compiled expressions with variables
357
electronics = find_by_category(root, cat='electronics')
358
print(f"Electronics products: {len(electronics)}")
359

360
affordable = find_by_price_range(root, min=10, max=25)
361
print(f"Affordable products: {len(affordable)}")
362

363
electronics_stock = count_in_stock(root, cat='electronics')
364
print(f"Total electronics in stock: {electronics_stock}")
365

366
# XPath evaluator for persistent context
367
evaluator = etree.XPathEvaluator(root)
368
evaluator.register_namespace('p', 'http://example.com/products')
369

370
# Evaluate multiple expressions with same context
371
product_count = evaluator('count(//product)')
372
avg_price = evaluator('sum(//product/@price) div count(//product)')
373
categories = evaluator('distinct-values(//product/@category)')
374

375
print(f"Products: {product_count}, Average price: ${avg_price:.2f}")
376
```
377

378
### XSLT Transformations
379

380
```python
381
from lxml import etree
382

383
# XML data to transform
384
xml_data = '''<?xml version="1.0"?>
385
<catalog>
386
    <book id="1">
387
        <title>Python Programming</title>
388
        <author>John Smith</author>
389
        <year>2023</year>
390
        <price>29.99</price>
391
    </book>
392
    <book id="2">
393
        <title>Web Development</title>
394
        <author>Jane Doe</author>
395
        <year>2022</year>
396
        <price>34.95</price>
397
    </book>
398
</catalog>'''
399

400
# XSLT stylesheet
401
xslt_stylesheet = '''<?xml version="1.0"?>
402
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
403
    <xsl:param name="format" select="'html'"/>
404
    <xsl:param name="title" select="'Book Catalog'"/>
405
    
406
    <xsl:template match="/">
407
        <xsl:choose>
408
            <xsl:when test="$format='html'">
409
                <html>
410
                    <head><title><xsl:value-of select="$title"/></title></head>
411
                    <body>
412
                        <h1><xsl:value-of select="$title"/></h1>
413
                        <table border="1">
414
                            <tr>
415
                                <th>Title</th>
416
                                <th>Author</th>
417
                                <th>Year</th>
418
                                <th>Price</th>
419
                            </tr>
420
                            <xsl:for-each select="catalog/book">
421
                                <xsl:sort select="year" order="descending"/>
422
                                <tr>
423
                                    <td><xsl:value-of select="title"/></td>
424
                                    <td><xsl:value-of select="author"/></td>
425
                                    <td><xsl:value-of select="year"/></td>
426
                                    <td>$<xsl:value-of select="price"/></td>
427
                                </tr>
428
                            </xsl:for-each>
429
                        </table>
430
                    </body>
431
                </html>
432
            </xsl:when>
433
            <xsl:otherwise>
434
                <book-list>
435
                    <xsl:for-each select="catalog/book">
436
                        <item>
437
                            <xsl:value-of select="title"/> by <xsl:value-of select="author"/> (<xsl:value-of select="year"/>)
438
                        </item>
439
                    </xsl:for-each>
440
                </book-list>
441
            </xsl:otherwise>
442
        </xsl:choose>
443
    </xsl:template>
444
</xsl:stylesheet>'''
445

446
# Parse XML and XSLT
447
xml_doc = etree.fromstring(xml_data)
448
xslt_doc = etree.fromstring(xslt_stylesheet)
449

450
# Create XSLT processor
451
transform = etree.XSLT(xslt_doc)
452

453
# Transform with parameters
454
html_result = transform(xml_doc, format="'html'", title="'My Book Collection'")
455
print("HTML transformation:")
456
print(etree.tostring(html_result, pretty_print=True, encoding='unicode'))
457

458
# Transform with different parameters
459
text_result = transform(xml_doc, format="'text'")
460
print("\nText transformation:")
461
print(etree.tostring(text_result, pretty_print=True, encoding='unicode'))
462

463
# Check for transformation errors
464
if transform.error_log:
465
    print("XSLT errors:")
466
    for error in transform.error_log:
467
        print(f"  {error}")
468
```
469

470
### Extension Functions
471

472
```python
473
from lxml import etree
474

475
# Define custom extension functions
476
def custom_format_price(context, price_list, currency='USD'):
477
    """Format price with currency symbol."""
478
    if not price_list:
479
        return ''
480
    price = float(price_list[0])
481
    symbols = {'USD': '$', 'EUR': '€', 'GBP': '£'}
482
    symbol = symbols.get(currency, currency)
483
    return f"{symbol}{price:.2f}"
484

485
def custom_word_count(context, text_list):
486
    """Count words in text."""
487
    if not text_list:
488
        return 0
489
    text = str(text_list[0])
490
    return len(text.split())
491

492
# Create extension namespace
493
ns = etree.FunctionNamespace('http://example.com/functions')
494
ns['format-price'] = custom_format_price
495
ns['word-count'] = custom_word_count
496

497
# XML with custom processing
498
xml_data = '''
499
<products>
500
    <product>
501
        <name>Programming Guide</name>
502
        <description>A comprehensive guide to Python programming for beginners and experts</description>
503
        <price>29.99</price>
504
    </product>
505
    <product>
506
        <name>Quick Reference</name>
507
        <description>Essential commands and functions</description>
508
        <price>15.50</price>
509
    </product>
510
</products>
511
'''
512

513
# XSLT using extension functions
514
xslt_with_extensions = '''<?xml version="1.0"?>
515
<xsl:stylesheet version="1.0" 
516
                xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
517
                xmlns:custom="http://example.com/functions">
518
    
519
    <xsl:template match="/">
520
        <product-report>
521
            <xsl:for-each select="products/product">
522
                <item>
523
                    <name><xsl:value-of select="name"/></name>
524
                    <formatted-price>
525
                        <xsl:value-of select="custom:format-price(price, 'USD')"/>
526
                    </formatted-price>
527
                    <description-length>
528
                        <xsl:value-of select="custom:word-count(description)"/> words
529
                    </description-length>
530
                </item>
531
            </xsl:for-each>
532
        </product-report>
533
    </xsl:template>
534
</xsl:stylesheet>
535
'''
536

537
# Transform using extensions
538
xml_doc = etree.fromstring(xml_data)
539
xslt_doc = etree.fromstring(xslt_with_extensions)
540

541
# Create transform with extensions enabled
542
extensions = {('http://example.com/functions', 'format-price'): custom_format_price,
543
              ('http://example.com/functions', 'word-count'): custom_word_count}
544

545
transform = etree.XSLT(xslt_doc, extensions=extensions)
546
result = transform(xml_doc)
547

548
print("Result with extension functions:")
549
print(etree.tostring(result, pretty_print=True, encoding='unicode'))
550
```
551

552
### XML Canonicalization
553

554
```python
555
from lxml import etree
556

557
# XML document with varying whitespace and attribute order
558
xml_data = '''<?xml version="1.0"?>
559
<root    xmlns:a="http://example.com/a"  
560
         xmlns:b="http://example.com/b">
561
    
562
    <element   b:attr="value2"   a:attr="value1"  >
563
        <child>   text content   </child>
564
        <!-- This is a comment -->
565
        <another-child/>
566
    </element>
567
    
568
</root>'''
569

570
# Parse document
571
doc = etree.fromstring(xml_data)
572

573
# Basic canonicalization
574
canonical_xml = etree.canonicalize(xml_data)
575
print("Canonical XML (default):")
576
print(canonical_xml.decode('utf-8'))
577

578
# Canonicalization without comments
579
canonical_no_comments = etree.canonicalize(xml_data, with_comments=False)
580
print("\nCanonical XML (no comments):")
581
print(canonical_no_comments.decode('utf-8'))
582

583
# Exclusive canonicalization
584
canonical_exclusive = etree.canonicalize(xml_data, exclusive=True)
585
print("\nExclusive canonical XML:")
586
print(canonical_exclusive.decode('utf-8'))
587

588
# Canonicalize to file
589
with open('/tmp/canonical.xml', 'wb') as f:
590
    etree.canonicalize(xml_data, out=f)
591

592
# Using C14N writer target during parsing
593
output_parts = []
594
def write_canonical(data):
595
    output_parts.append(data)
596

597
target = etree.C14NWriterTarget(write_canonical, with_comments=False)
598
parser = etree.XMLParser(target=target)
599
etree.fromstring(xml_data, parser)
600

601
print("\nCanonical XML via writer target:")
602
print(b''.join(output_parts).decode('utf-8'))
603
```
604

605
### Advanced XPath with Namespaces
606

607
```python
608
from lxml import etree
609

610
# Complex XML with multiple namespaces
611
xml_data = '''<?xml version="1.0"?>
612
<root xmlns="http://example.com/default"
613
      xmlns:meta="http://example.com/metadata"
614
      xmlns:content="http://example.com/content">
615
    
616
    <meta:info>
617
        <meta:created>2023-12-07</meta:created>
618
        <meta:author>John Doe</meta:author>
619
    </meta:info>
620
    
621
    <content:document>
622
        <content:section id="intro">
623
            <content:title>Introduction</content:title>
624
            <content:paragraph>This is the introduction.</content:paragraph>
625
        </content:section>
626
        <content:section id="main">
627
            <content:title>Main Content</content:title>
628
            <content:paragraph>This is the main content.</content:paragraph>
629
            <content:subsection>
630
                <content:title>Subsection</content:title>
631
                <content:paragraph>Subsection content.</content:paragraph>
632
            </content:subsection>
633
        </content:section>
634
    </content:document>
635
    
636
</root>'''
637

638
root = etree.fromstring(xml_data)
639

640
# Define comprehensive namespace mappings
641
namespaces = {
642
    'default': 'http://example.com/default',
643
    'meta': 'http://example.com/metadata', 
644
    'content': 'http://example.com/content'
645
}
646

647
# Complex XPath queries with namespaces
648
author = root.xpath('//meta:author/text()', namespaces=namespaces)
649
print(f"Author: {author[0] if author else 'Unknown'}")
650

651
# Find all sections and subsections
652
sections = root.xpath('//content:section | //content:subsection', namespaces=namespaces)
653
print(f"Found {len(sections)} sections")
654

655
# Extract titles with context
656
titles_with_id = root.xpath('//content:section[@id]/content:title/text()', namespaces=namespaces)
657
for title in titles_with_id:
658
    print(f"Section title: {title}")
659

660
# Count paragraphs in main section
661
main_paragraphs = root.xpath('count(//content:section[@id="main"]//content:paragraph)', namespaces=namespaces)
662
print(f"Paragraphs in main section: {main_paragraphs}")
663

664
# Build document outline
665
outline_xpath = etree.XPath('''
666
    for $section in //content:section
667
    return concat($section/@id, ": ", $section/content:title/text())
668
''', namespaces=namespaces)
669

670
outline = outline_xpath(root)
671
print("Document outline:")
672
for item in outline:
673
    print(f"  {item}")
674
```

Version

Tile

Files

xpath-xslt.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

xpath-xslt.mddocs/