0
# Schema Validation
1
2
Comprehensive XML document validation using multiple schema languages including DTD, RelaxNG, W3C XML Schema, and ISO Schematron. The validation framework provides detailed error reporting, custom validation rules, and integration with parsing workflows.
3
4
## Capabilities
5
6
### DTD Validation
7
8
Document Type Definition validation for XML documents with entity and attribute declarations.
9
10
```python { .api }
11
class DTD:
12
"""Document Type Definition validator."""
13
14
def __init__(self, file=None, external_id=None):
15
"""
16
Create DTD validator.
17
18
Args:
19
file: Path to DTD file or file-like object
20
external_id: External DTD identifier (PUBLIC/SYSTEM)
21
"""
22
23
def validate(self, etree):
24
"""
25
Validate document against DTD.
26
27
Args:
28
etree: Element or ElementTree to validate
29
30
Returns:
31
bool: True if valid, False if invalid
32
"""
33
34
@property
35
def error_log(self):
36
"""Validation error log."""
37
38
def assertValid(self, etree):
39
"""Assert document is valid, raise DTDValidateError if not."""
40
41
# DTD parsing from strings
42
def DTD(file=None, external_id=None):
43
"""Create DTD validator from file or external identifier."""
44
```
45
46
### RelaxNG Validation
47
48
RELAX NG schema validation with compact and XML syntax support.
49
50
```python { .api }
51
class RelaxNG:
52
"""RELAX NG schema validator."""
53
54
def __init__(self, etree=None, file=None):
55
"""
56
Create RelaxNG validator.
57
58
Args:
59
etree: Element or ElementTree containing schema
60
file: Path to schema file or file-like object
61
"""
62
63
def validate(self, etree):
64
"""
65
Validate document against RelaxNG schema.
66
67
Args:
68
etree: Element or ElementTree to validate
69
70
Returns:
71
bool: True if valid, False if invalid
72
"""
73
74
@property
75
def error_log(self):
76
"""Validation error log."""
77
78
def assertValid(self, etree):
79
"""Assert document is valid, raise RelaxNGValidateError if not."""
80
81
# Factory function
82
def RelaxNG(etree=None, file=None):
83
"""Create RelaxNG validator from schema document or file."""
84
```
85
86
### XML Schema Validation
87
88
W3C XML Schema validation with full XSD 1.0 support.
89
90
```python { .api }
91
class XMLSchema:
92
"""W3C XML Schema validator."""
93
94
def __init__(self, etree=None, file=None):
95
"""
96
Create XMLSchema validator.
97
98
Args:
99
etree: Element or ElementTree containing schema
100
file: Path to schema file or file-like object
101
"""
102
103
def validate(self, etree):
104
"""
105
Validate document against XML Schema.
106
107
Args:
108
etree: Element or ElementTree to validate
109
110
Returns:
111
bool: True if valid, False if invalid
112
"""
113
114
@property
115
def error_log(self):
116
"""Validation error log."""
117
118
def assertValid(self, etree):
119
"""Assert document is valid, raise XMLSchemaValidateError if not."""
120
121
# Factory function
122
def XMLSchema(etree=None, file=None):
123
"""Create XMLSchema validator from schema document or file."""
124
```
125
126
### Schematron Validation
127
128
ISO Schematron rule-based validation with XPath assertions.
129
130
```python { .api }
131
class Schematron:
132
"""ISO Schematron validator."""
133
134
def __init__(self, etree=None, file=None, include=True, expand=True,
135
include_params=None, expand_params=None, compile_params=None,
136
store_schematron=False, store_xslt=False, store_report=False,
137
phase=None, error_finder=None):
138
"""
139
Create Schematron validator.
140
141
Args:
142
etree: Element or ElementTree containing schema
143
file: Path to schema file or file-like object
144
include: Process schematron includes (step 1)
145
expand: Expand abstract patterns (step 2)
146
include_params: Parameters for include step
147
expand_params: Parameters for expand step
148
compile_params: Parameters for compile step
149
store_schematron: Keep processed schematron document
150
store_xslt: Keep compiled XSLT stylesheet
151
store_report: Keep validation report
152
phase: Schematron validation phase
153
error_finder: Custom error finder XPath
154
"""
155
156
def validate(self, etree):
157
"""
158
Validate document against Schematron rules.
159
160
Args:
161
etree: Element or ElementTree to validate
162
163
Returns:
164
bool: True if valid, False if invalid
165
"""
166
167
@property
168
def error_log(self):
169
"""Validation error log."""
170
171
@property
172
def schematron(self):
173
"""Processed schematron document (if stored)."""
174
175
@property
176
def validator_xslt(self):
177
"""Compiled XSLT validator (if stored)."""
178
179
@property
180
def validation_report(self):
181
"""SVRL validation report (if stored)."""
182
183
def assertValid(self, etree):
184
"""Assert document is valid, raise SchematronValidateError if not."""
185
186
# Class constants for error handling
187
ASSERTS_ONLY = None # Report failed assertions only (default)
188
ASSERTS_AND_REPORTS = None # Report assertions and successful reports
189
190
# Schematron processing functions
191
def extract_xsd(schema_doc):
192
"""Extract embedded schematron from XML Schema."""
193
194
def extract_rng(schema_doc):
195
"""Extract embedded schematron from RelaxNG schema."""
196
197
def iso_dsdl_include(schematron_doc, **params):
198
"""Process schematron include directives."""
199
200
def iso_abstract_expand(schematron_doc, **params):
201
"""Expand abstract patterns in schematron."""
202
203
def iso_svrl_for_xslt1(schematron_doc, **params):
204
"""Compile schematron to XSLT validation stylesheet."""
205
206
def stylesheet_params(**kwargs):
207
"""Convert keyword arguments to XSLT stylesheet parameters."""
208
```
209
210
### Validation Error Handling
211
212
Comprehensive error classes for different validation failures.
213
214
```python { .api }
215
class DocumentInvalid(LxmlError):
216
"""Base class for document validation errors."""
217
218
class DTDError(LxmlError):
219
"""Base class for DTD-related errors."""
220
221
class DTDParseError(DTDError):
222
"""DTD parsing error."""
223
224
class DTDValidateError(DTDError, DocumentInvalid):
225
"""DTD validation error."""
226
227
class RelaxNGError(LxmlError):
228
"""Base class for RelaxNG-related errors."""
229
230
class RelaxNGParseError(RelaxNGError):
231
"""RelaxNG schema parsing error."""
232
233
class RelaxNGValidateError(RelaxNGError, DocumentInvalid):
234
"""RelaxNG validation error."""
235
236
class XMLSchemaError(LxmlError):
237
"""Base class for XML Schema-related errors."""
238
239
class XMLSchemaParseError(XMLSchemaError):
240
"""XML Schema parsing error."""
241
242
class XMLSchemaValidateError(XMLSchemaError, DocumentInvalid):
243
"""XML Schema validation error."""
244
245
class SchematronError(LxmlError):
246
"""Base class for Schematron-related errors."""
247
248
class SchematronParseError(SchematronError):
249
"""Schematron schema parsing error."""
250
251
class SchematronValidateError(SchematronError, DocumentInvalid):
252
"""Schematron validation error."""
253
```
254
255
### Parser Integration
256
257
Integrate validation directly into parsing workflow.
258
259
```python { .api }
260
class XMLParser:
261
"""XML parser with validation support."""
262
263
def __init__(self, dtd_validation=False, schema=None, **kwargs):
264
"""
265
Create parser with validation options.
266
267
Args:
268
dtd_validation: Enable DTD validation during parsing
269
schema: Validator instance (RelaxNG, XMLSchema, etc.)
270
**kwargs: Other parser options
271
"""
272
273
# Validation during parsing
274
def parse(source, parser=None, base_url=None):
275
"""Parse with validation if parser configured."""
276
277
def fromstring(text, parser=None, base_url=None):
278
"""Parse string with validation if parser configured."""
279
```
280
281
## Usage Examples
282
283
### DTD Validation
284
285
```python
286
from lxml import etree
287
288
# DTD schema
289
dtd_content = '''
290
<!ELEMENT catalog (book+)>
291
<!ELEMENT book (title, author, year, price)>
292
<!ATTLIST book id CDATA #REQUIRED
293
category (fiction|science|mystery) #REQUIRED>
294
<!ELEMENT title (#PCDATA)>
295
<!ELEMENT author (#PCDATA)>
296
<!ELEMENT year (#PCDATA)>
297
<!ELEMENT price (#PCDATA)>
298
<!ATTLIST price currency CDATA #IMPLIED>
299
'''
300
301
# XML document
302
xml_content = '''<?xml version="1.0"?>
303
<!DOCTYPE catalog [
304
''' + dtd_content + '''
305
]>
306
<catalog>
307
<book id="1" category="fiction">
308
<title>The Great Gatsby</title>
309
<author>F. Scott Fitzgerald</author>
310
<year>1925</year>
311
<price currency="USD">12.99</price>
312
</book>
313
<book id="2" category="science">
314
<title>A Brief History of Time</title>
315
<author>Stephen Hawking</author>
316
<year>1988</year>
317
<price>15.99</price>
318
</book>
319
</catalog>'''
320
321
# Parse and validate
322
parser = etree.XMLParser(dtd_validation=True)
323
try:
324
doc = etree.fromstring(xml_content, parser)
325
print("Document is valid according to DTD")
326
except etree.DTDValidateError as e:
327
print(f"DTD validation failed: {e}")
328
329
# Separate DTD validation
330
dtd = etree.DTD(external_id=None) # Would load from DOCTYPE
331
doc = etree.fromstring(xml_content)
332
if dtd.validate(doc):
333
print("Document is valid")
334
else:
335
print("Validation errors:")
336
for error in dtd.error_log:
337
print(f" Line {error.line}: {error.message}")
338
```
339
340
### RelaxNG Validation
341
342
```python
343
from lxml import etree
344
345
# RelaxNG schema
346
relaxng_schema = '''
347
<element name="catalog" xmlns="http://relaxng.org/ns/structure/1.0">
348
<oneOrMore>
349
<element name="book">
350
<attribute name="id"/>
351
<attribute name="category">
352
<choice>
353
<value>fiction</value>
354
<value>science</value>
355
<value>mystery</value>
356
</choice>
357
</attribute>
358
<element name="title"><text/></element>
359
<element name="author"><text/></element>
360
<element name="year"><text/></element>
361
<element name="price">
362
<optional>
363
<attribute name="currency"/>
364
</optional>
365
<text/>
366
</element>
367
</element>
368
</oneOrMore>
369
</element>
370
'''
371
372
# Create validator
373
relaxng_doc = etree.fromstring(relaxng_schema)
374
relaxng = etree.RelaxNG(relaxng_doc)
375
376
# XML to validate
377
xml_content = '''
378
<catalog>
379
<book id="1" category="fiction">
380
<title>The Great Gatsby</title>
381
<author>F. Scott Fitzgerald</author>
382
<year>1925</year>
383
<price currency="USD">12.99</price>
384
</book>
385
</catalog>
386
'''
387
388
# Validate
389
doc = etree.fromstring(xml_content)
390
if relaxng.validate(doc):
391
print("Document is valid according to RelaxNG")
392
else:
393
print("RelaxNG validation errors:")
394
for error in relaxng.error_log:
395
print(f" Line {error.line}: {error.message}")
396
397
# Use with parser
398
parser = etree.XMLParser(schema=relaxng)
399
try:
400
validated_doc = etree.fromstring(xml_content, parser)
401
print("Document parsed and validated successfully")
402
except etree.RelaxNGValidateError as e:
403
print(f"Validation during parsing failed: {e}")
404
```
405
406
### XML Schema Validation
407
408
```python
409
from lxml import etree
410
411
# XML Schema (XSD)
412
xsd_schema = '''<?xml version="1.0"?>
413
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
414
<xs:element name="catalog">
415
<xs:complexType>
416
<xs:sequence>
417
<xs:element name="book" maxOccurs="unbounded">
418
<xs:complexType>
419
<xs:sequence>
420
<xs:element name="title" type="xs:string"/>
421
<xs:element name="author" type="xs:string"/>
422
<xs:element name="year" type="xs:gYear"/>
423
<xs:element name="price">
424
<xs:complexType>
425
<xs:simpleContent>
426
<xs:extension base="xs:decimal">
427
<xs:attribute name="currency" type="xs:string"/>
428
</xs:extension>
429
</xs:simpleContent>
430
</xs:complexType>
431
</xs:element>
432
</xs:sequence>
433
<xs:attribute name="id" type="xs:string" use="required"/>
434
<xs:attribute name="category" use="required">
435
<xs:simpleType>
436
<xs:restriction base="xs:string">
437
<xs:enumeration value="fiction"/>
438
<xs:enumeration value="science"/>
439
<xs:enumeration value="mystery"/>
440
</xs:restriction>
441
</xs:simpleType>
442
</xs:attribute>
443
</xs:complexType>
444
</xs:element>
445
</xs:sequence>
446
</xs:complexType>
447
</xs:element>
448
</xs:schema>
449
'''
450
451
# Create XML Schema validator
452
xsd_doc = etree.fromstring(xsd_schema)
453
xmlschema = etree.XMLSchema(xsd_doc)
454
455
# Validate document
456
xml_content = '''
457
<catalog>
458
<book id="1" category="fiction">
459
<title>The Great Gatsby</title>
460
<author>F. Scott Fitzgerald</author>
461
<year>1925</year>
462
<price currency="USD">12.99</price>
463
</book>
464
</catalog>
465
'''
466
467
doc = etree.fromstring(xml_content)
468
if xmlschema.validate(doc):
469
print("Document is valid according to XML Schema")
470
else:
471
print("XML Schema validation errors:")
472
for error in xmlschema.error_log:
473
print(f" Line {error.line}: {error.message}")
474
```
475
476
### Schematron Validation
477
478
```python
479
from lxml import etree
480
from lxml.isoschematron import Schematron
481
482
# Schematron schema with business rules
483
schematron_schema = '''<?xml version="1.0"?>
484
<schema xmlns="http://purl.oclc.org/dsdl/schematron">
485
<title>Book Catalog Validation</title>
486
487
<pattern id="price-rules">
488
<title>Price validation rules</title>
489
490
<rule context="book">
491
<assert test="price[@currency]">
492
Books should have currency specified for price
493
</assert>
494
<assert test="number(price) > 0">
495
Book price must be positive: <value-of select="title"/>
496
</assert>
497
<assert test="number(price) < 100">
498
Book price seems too high: <value-of select="title"/> costs <value-of select="price"/>
499
</assert>
500
</rule>
501
502
<rule context="book[@category='fiction']">
503
<assert test="number(year) >= 1800">
504
Fiction books should be from 1800 or later
505
</assert>
506
</rule>
507
508
<rule context="book[@category='science']">
509
<assert test="number(year) >= 1900">
510
Science books should be relatively recent (1900+)
511
</assert>
512
</rule>
513
</pattern>
514
</schema>
515
'''
516
517
# Create Schematron validator
518
schematron_doc = etree.fromstring(schematron_schema)
519
schematron = Schematron(schematron_doc)
520
521
# Test valid document
522
valid_xml = '''
523
<catalog>
524
<book id="1" category="fiction">
525
<title>The Great Gatsby</title>
526
<author>F. Scott Fitzgerald</author>
527
<year>1925</year>
528
<price currency="USD">12.99</price>
529
</book>
530
</catalog>
531
'''
532
533
doc = etree.fromstring(valid_xml)
534
if schematron.validate(doc):
535
print("Document passes Schematron validation")
536
else:
537
print("Schematron validation errors:")
538
for error in schematron.error_log:
539
print(f" {error.message}")
540
541
# Test invalid document
542
invalid_xml = '''
543
<catalog>
544
<book id="1" category="science">
545
<title>Ancient Science</title>
546
<author>Old Author</author>
547
<year>1850</year>
548
<price>-5.99</price>
549
</book>
550
</catalog>
551
'''
552
553
doc = etree.fromstring(invalid_xml)
554
if not schematron.validate(doc):
555
print("\nSchematron validation failed as expected:")
556
for error in schematron.error_log:
557
print(f" {error.message}")
558
```
559
560
### Combined Validation
561
562
```python
563
from lxml import etree
564
from lxml.isoschematron import Schematron
565
566
# Multi-step validation: structure + business rules
567
def validate_document(xml_content, relaxng_schema, schematron_schema):
568
"""Validate document against both structural and business rules."""
569
570
doc = etree.fromstring(xml_content)
571
572
# Step 1: Structural validation with RelaxNG
573
relaxng = etree.RelaxNG(etree.fromstring(relaxng_schema))
574
if not relaxng.validate(doc):
575
return False, "Structural validation failed", relaxng.error_log
576
577
# Step 2: Business rules validation with Schematron
578
schematron = Schematron(etree.fromstring(schematron_schema))
579
if not schematron.validate(doc):
580
return False, "Business rules validation failed", schematron.error_log
581
582
return True, "Document is fully valid", None
583
584
# Use combined validation
585
xml_to_test = '''
586
<catalog>
587
<book id="1" category="fiction">
588
<title>Test Book</title>
589
<author>Test Author</author>
590
<year>2023</year>
591
<price currency="USD">25.99</price>
592
</book>
593
</catalog>
594
'''
595
596
is_valid, message, errors = validate_document(
597
xml_to_test, relaxng_schema, schematron_schema
598
)
599
600
print(f"Validation result: {message}")
601
if errors:
602
for error in errors:
603
print(f" {error.message}")
604
```