or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

etree-core.mdhtml-processing.mdindex.mdobjectify-api.mdutility-modules.mdvalidation.mdxpath-xslt.md

validation.mddocs/

0

# Schema Validation

1

2

Comprehensive XML document validation using multiple schema languages including DTD, RelaxNG, W3C XML Schema, and ISO Schematron. The validation framework provides detailed error reporting, custom validation rules, and integration with parsing workflows.

3

4

## Capabilities

5

6

### DTD Validation

7

8

Document Type Definition validation for XML documents with entity and attribute declarations.

9

10

```python { .api }

11

class DTD:

12

"""Document Type Definition validator."""

13

14

def __init__(self, file=None, external_id=None):

15

"""

16

Create DTD validator.

17

18

Args:

19

file: Path to DTD file or file-like object

20

external_id: External DTD identifier (PUBLIC/SYSTEM)

21

"""

22

23

def validate(self, etree):

24

"""

25

Validate document against DTD.

26

27

Args:

28

etree: Element or ElementTree to validate

29

30

Returns:

31

bool: True if valid, False if invalid

32

"""

33

34

@property

35

def error_log(self):

36

"""Validation error log."""

37

38

def assertValid(self, etree):

39

"""Assert document is valid, raise DTDValidateError if not."""

40

41

# DTD parsing from strings

42

def DTD(file=None, external_id=None):

43

"""Create DTD validator from file or external identifier."""

44

```

45

46

### RelaxNG Validation

47

48

RELAX NG schema validation with compact and XML syntax support.

49

50

```python { .api }

51

class RelaxNG:

52

"""RELAX NG schema validator."""

53

54

def __init__(self, etree=None, file=None):

55

"""

56

Create RelaxNG validator.

57

58

Args:

59

etree: Element or ElementTree containing schema

60

file: Path to schema file or file-like object

61

"""

62

63

def validate(self, etree):

64

"""

65

Validate document against RelaxNG schema.

66

67

Args:

68

etree: Element or ElementTree to validate

69

70

Returns:

71

bool: True if valid, False if invalid

72

"""

73

74

@property

75

def error_log(self):

76

"""Validation error log."""

77

78

def assertValid(self, etree):

79

"""Assert document is valid, raise RelaxNGValidateError if not."""

80

81

# Factory function

82

def RelaxNG(etree=None, file=None):

83

"""Create RelaxNG validator from schema document or file."""

84

```

85

86

### XML Schema Validation

87

88

W3C XML Schema validation with full XSD 1.0 support.

89

90

```python { .api }

91

class XMLSchema:

92

"""W3C XML Schema validator."""

93

94

def __init__(self, etree=None, file=None):

95

"""

96

Create XMLSchema validator.

97

98

Args:

99

etree: Element or ElementTree containing schema

100

file: Path to schema file or file-like object

101

"""

102

103

def validate(self, etree):

104

"""

105

Validate document against XML Schema.

106

107

Args:

108

etree: Element or ElementTree to validate

109

110

Returns:

111

bool: True if valid, False if invalid

112

"""

113

114

@property

115

def error_log(self):

116

"""Validation error log."""

117

118

def assertValid(self, etree):

119

"""Assert document is valid, raise XMLSchemaValidateError if not."""

120

121

# Factory function

122

def XMLSchema(etree=None, file=None):

123

"""Create XMLSchema validator from schema document or file."""

124

```

125

126

### Schematron Validation

127

128

ISO Schematron rule-based validation with XPath assertions.

129

130

```python { .api }

131

class Schematron:

132

"""ISO Schematron validator."""

133

134

def __init__(self, etree=None, file=None, include=True, expand=True,

135

include_params=None, expand_params=None, compile_params=None,

136

store_schematron=False, store_xslt=False, store_report=False,

137

phase=None, error_finder=None):

138

"""

139

Create Schematron validator.

140

141

Args:

142

etree: Element or ElementTree containing schema

143

file: Path to schema file or file-like object

144

include: Process schematron includes (step 1)

145

expand: Expand abstract patterns (step 2)

146

include_params: Parameters for include step

147

expand_params: Parameters for expand step

148

compile_params: Parameters for compile step

149

store_schematron: Keep processed schematron document

150

store_xslt: Keep compiled XSLT stylesheet

151

store_report: Keep validation report

152

phase: Schematron validation phase

153

error_finder: Custom error finder XPath

154

"""

155

156

def validate(self, etree):

157

"""

158

Validate document against Schematron rules.

159

160

Args:

161

etree: Element or ElementTree to validate

162

163

Returns:

164

bool: True if valid, False if invalid

165

"""

166

167

@property

168

def error_log(self):

169

"""Validation error log."""

170

171

@property

172

def schematron(self):

173

"""Processed schematron document (if stored)."""

174

175

@property

176

def validator_xslt(self):

177

"""Compiled XSLT validator (if stored)."""

178

179

@property

180

def validation_report(self):

181

"""SVRL validation report (if stored)."""

182

183

def assertValid(self, etree):

184

"""Assert document is valid, raise SchematronValidateError if not."""

185

186

# Class constants for error handling

187

ASSERTS_ONLY = None # Report failed assertions only (default)

188

ASSERTS_AND_REPORTS = None # Report assertions and successful reports

189

190

# Schematron processing functions

191

def extract_xsd(schema_doc):

192

"""Extract embedded schematron from XML Schema."""

193

194

def extract_rng(schema_doc):

195

"""Extract embedded schematron from RelaxNG schema."""

196

197

def iso_dsdl_include(schematron_doc, **params):

198

"""Process schematron include directives."""

199

200

def iso_abstract_expand(schematron_doc, **params):

201

"""Expand abstract patterns in schematron."""

202

203

def iso_svrl_for_xslt1(schematron_doc, **params):

204

"""Compile schematron to XSLT validation stylesheet."""

205

206

def stylesheet_params(**kwargs):

207

"""Convert keyword arguments to XSLT stylesheet parameters."""

208

```

209

210

### Validation Error Handling

211

212

Comprehensive error classes for different validation failures.

213

214

```python { .api }

215

class DocumentInvalid(LxmlError):

216

"""Base class for document validation errors."""

217

218

class DTDError(LxmlError):

219

"""Base class for DTD-related errors."""

220

221

class DTDParseError(DTDError):

222

"""DTD parsing error."""

223

224

class DTDValidateError(DTDError, DocumentInvalid):

225

"""DTD validation error."""

226

227

class RelaxNGError(LxmlError):

228

"""Base class for RelaxNG-related errors."""

229

230

class RelaxNGParseError(RelaxNGError):

231

"""RelaxNG schema parsing error."""

232

233

class RelaxNGValidateError(RelaxNGError, DocumentInvalid):

234

"""RelaxNG validation error."""

235

236

class XMLSchemaError(LxmlError):

237

"""Base class for XML Schema-related errors."""

238

239

class XMLSchemaParseError(XMLSchemaError):

240

"""XML Schema parsing error."""

241

242

class XMLSchemaValidateError(XMLSchemaError, DocumentInvalid):

243

"""XML Schema validation error."""

244

245

class SchematronError(LxmlError):

246

"""Base class for Schematron-related errors."""

247

248

class SchematronParseError(SchematronError):

249

"""Schematron schema parsing error."""

250

251

class SchematronValidateError(SchematronError, DocumentInvalid):

252

"""Schematron validation error."""

253

```

254

255

### Parser Integration

256

257

Integrate validation directly into parsing workflow.

258

259

```python { .api }

260

class XMLParser:

261

"""XML parser with validation support."""

262

263

def __init__(self, dtd_validation=False, schema=None, **kwargs):

264

"""

265

Create parser with validation options.

266

267

Args:

268

dtd_validation: Enable DTD validation during parsing

269

schema: Validator instance (RelaxNG, XMLSchema, etc.)

270

**kwargs: Other parser options

271

"""

272

273

# Validation during parsing

274

def parse(source, parser=None, base_url=None):

275

"""Parse with validation if parser configured."""

276

277

def fromstring(text, parser=None, base_url=None):

278

"""Parse string with validation if parser configured."""

279

```

280

281

## Usage Examples

282

283

### DTD Validation

284

285

```python

286

from lxml import etree

287

288

# DTD schema

289

dtd_content = '''

290

<!ELEMENT catalog (book+)>

291

<!ELEMENT book (title, author, year, price)>

292

<!ATTLIST book id CDATA #REQUIRED

293

category (fiction|science|mystery) #REQUIRED>

294

<!ELEMENT title (#PCDATA)>

295

<!ELEMENT author (#PCDATA)>

296

<!ELEMENT year (#PCDATA)>

297

<!ELEMENT price (#PCDATA)>

298

<!ATTLIST price currency CDATA #IMPLIED>

299

'''

300

301

# XML document

302

xml_content = '''<?xml version="1.0"?>

303

<!DOCTYPE catalog [

304

''' + dtd_content + '''

305

]>

306

<catalog>

307

<book id="1" category="fiction">

308

<title>The Great Gatsby</title>

309

<author>F. Scott Fitzgerald</author>

310

<year>1925</year>

311

<price currency="USD">12.99</price>

312

</book>

313

<book id="2" category="science">

314

<title>A Brief History of Time</title>

315

<author>Stephen Hawking</author>

316

<year>1988</year>

317

<price>15.99</price>

318

</book>

319

</catalog>'''

320

321

# Parse and validate

322

parser = etree.XMLParser(dtd_validation=True)

323

try:

324

doc = etree.fromstring(xml_content, parser)

325

print("Document is valid according to DTD")

326

except etree.DTDValidateError as e:

327

print(f"DTD validation failed: {e}")

328

329

# Separate DTD validation

330

dtd = etree.DTD(external_id=None) # Would load from DOCTYPE

331

doc = etree.fromstring(xml_content)

332

if dtd.validate(doc):

333

print("Document is valid")

334

else:

335

print("Validation errors:")

336

for error in dtd.error_log:

337

print(f" Line {error.line}: {error.message}")

338

```

339

340

### RelaxNG Validation

341

342

```python

343

from lxml import etree

344

345

# RelaxNG schema

346

relaxng_schema = '''

347

<element name="catalog" xmlns="http://relaxng.org/ns/structure/1.0">

348

<oneOrMore>

349

<element name="book">

350

<attribute name="id"/>

351

<attribute name="category">

352

<choice>

353

<value>fiction</value>

354

<value>science</value>

355

<value>mystery</value>

356

</choice>

357

</attribute>

358

<element name="title"><text/></element>

359

<element name="author"><text/></element>

360

<element name="year"><text/></element>

361

<element name="price">

362

<optional>

363

<attribute name="currency"/>

364

</optional>

365

<text/>

366

</element>

367

</element>

368

</oneOrMore>

369

</element>

370

'''

371

372

# Create validator

373

relaxng_doc = etree.fromstring(relaxng_schema)

374

relaxng = etree.RelaxNG(relaxng_doc)

375

376

# XML to validate

377

xml_content = '''

378

<catalog>

379

<book id="1" category="fiction">

380

<title>The Great Gatsby</title>

381

<author>F. Scott Fitzgerald</author>

382

<year>1925</year>

383

<price currency="USD">12.99</price>

384

</book>

385

</catalog>

386

'''

387

388

# Validate

389

doc = etree.fromstring(xml_content)

390

if relaxng.validate(doc):

391

print("Document is valid according to RelaxNG")

392

else:

393

print("RelaxNG validation errors:")

394

for error in relaxng.error_log:

395

print(f" Line {error.line}: {error.message}")

396

397

# Use with parser

398

parser = etree.XMLParser(schema=relaxng)

399

try:

400

validated_doc = etree.fromstring(xml_content, parser)

401

print("Document parsed and validated successfully")

402

except etree.RelaxNGValidateError as e:

403

print(f"Validation during parsing failed: {e}")

404

```

405

406

### XML Schema Validation

407

408

```python

409

from lxml import etree

410

411

# XML Schema (XSD)

412

xsd_schema = '''<?xml version="1.0"?>

413

<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">

414

<xs:element name="catalog">

415

<xs:complexType>

416

<xs:sequence>

417

<xs:element name="book" maxOccurs="unbounded">

418

<xs:complexType>

419

<xs:sequence>

420

<xs:element name="title" type="xs:string"/>

421

<xs:element name="author" type="xs:string"/>

422

<xs:element name="year" type="xs:gYear"/>

423

<xs:element name="price">

424

<xs:complexType>

425

<xs:simpleContent>

426

<xs:extension base="xs:decimal">

427

<xs:attribute name="currency" type="xs:string"/>

428

</xs:extension>

429

</xs:simpleContent>

430

</xs:complexType>

431

</xs:element>

432

</xs:sequence>

433

<xs:attribute name="id" type="xs:string" use="required"/>

434

<xs:attribute name="category" use="required">

435

<xs:simpleType>

436

<xs:restriction base="xs:string">

437

<xs:enumeration value="fiction"/>

438

<xs:enumeration value="science"/>

439

<xs:enumeration value="mystery"/>

440

</xs:restriction>

441

</xs:simpleType>

442

</xs:attribute>

443

</xs:complexType>

444

</xs:element>

445

</xs:sequence>

446

</xs:complexType>

447

</xs:element>

448

</xs:schema>

449

'''

450

451

# Create XML Schema validator

452

xsd_doc = etree.fromstring(xsd_schema)

453

xmlschema = etree.XMLSchema(xsd_doc)

454

455

# Validate document

456

xml_content = '''

457

<catalog>

458

<book id="1" category="fiction">

459

<title>The Great Gatsby</title>

460

<author>F. Scott Fitzgerald</author>

461

<year>1925</year>

462

<price currency="USD">12.99</price>

463

</book>

464

</catalog>

465

'''

466

467

doc = etree.fromstring(xml_content)

468

if xmlschema.validate(doc):

469

print("Document is valid according to XML Schema")

470

else:

471

print("XML Schema validation errors:")

472

for error in xmlschema.error_log:

473

print(f" Line {error.line}: {error.message}")

474

```

475

476

### Schematron Validation

477

478

```python

479

from lxml import etree

480

from lxml.isoschematron import Schematron

481

482

# Schematron schema with business rules

483

schematron_schema = '''<?xml version="1.0"?>

484

<schema xmlns="http://purl.oclc.org/dsdl/schematron">

485

<title>Book Catalog Validation</title>

486

487

<pattern id="price-rules">

488

<title>Price validation rules</title>

489

490

<rule context="book">

491

<assert test="price[@currency]">

492

Books should have currency specified for price

493

</assert>

494

<assert test="number(price) > 0">

495

Book price must be positive: <value-of select="title"/>

496

</assert>

497

<assert test="number(price) &lt; 100">

498

Book price seems too high: <value-of select="title"/> costs <value-of select="price"/>

499

</assert>

500

</rule>

501

502

<rule context="book[@category='fiction']">

503

<assert test="number(year) >= 1800">

504

Fiction books should be from 1800 or later

505

</assert>

506

</rule>

507

508

<rule context="book[@category='science']">

509

<assert test="number(year) >= 1900">

510

Science books should be relatively recent (1900+)

511

</assert>

512

</rule>

513

</pattern>

514

</schema>

515

'''

516

517

# Create Schematron validator

518

schematron_doc = etree.fromstring(schematron_schema)

519

schematron = Schematron(schematron_doc)

520

521

# Test valid document

522

valid_xml = '''

523

<catalog>

524

<book id="1" category="fiction">

525

<title>The Great Gatsby</title>

526

<author>F. Scott Fitzgerald</author>

527

<year>1925</year>

528

<price currency="USD">12.99</price>

529

</book>

530

</catalog>

531

'''

532

533

doc = etree.fromstring(valid_xml)

534

if schematron.validate(doc):

535

print("Document passes Schematron validation")

536

else:

537

print("Schematron validation errors:")

538

for error in schematron.error_log:

539

print(f" {error.message}")

540

541

# Test invalid document

542

invalid_xml = '''

543

<catalog>

544

<book id="1" category="science">

545

<title>Ancient Science</title>

546

<author>Old Author</author>

547

<year>1850</year>

548

<price>-5.99</price>

549

</book>

550

</catalog>

551

'''

552

553

doc = etree.fromstring(invalid_xml)

554

if not schematron.validate(doc):

555

print("\nSchematron validation failed as expected:")

556

for error in schematron.error_log:

557

print(f" {error.message}")

558

```

559

560

### Combined Validation

561

562

```python

563

from lxml import etree

564

from lxml.isoschematron import Schematron

565

566

# Multi-step validation: structure + business rules

567

def validate_document(xml_content, relaxng_schema, schematron_schema):

568

"""Validate document against both structural and business rules."""

569

570

doc = etree.fromstring(xml_content)

571

572

# Step 1: Structural validation with RelaxNG

573

relaxng = etree.RelaxNG(etree.fromstring(relaxng_schema))

574

if not relaxng.validate(doc):

575

return False, "Structural validation failed", relaxng.error_log

576

577

# Step 2: Business rules validation with Schematron

578

schematron = Schematron(etree.fromstring(schematron_schema))

579

if not schematron.validate(doc):

580

return False, "Business rules validation failed", schematron.error_log

581

582

return True, "Document is fully valid", None

583

584

# Use combined validation

585

xml_to_test = '''

586

<catalog>

587

<book id="1" category="fiction">

588

<title>Test Book</title>

589

<author>Test Author</author>

590

<year>2023</year>

591

<price currency="USD">25.99</price>

592

</book>

593

</catalog>

594

'''

595

596

is_valid, message, errors = validate_document(

597

xml_to_test, relaxng_schema, schematron_schema

598

)

599

600

print(f"Validation result: {message}")

601

if errors:

602

for error in errors:

603

print(f" {error.message}")

604

```