0
# DOM4J Document Creation and Parsing
1
2
This section covers creating XML documents, parsing XML from various sources, and using the factory classes that manage DOM4J object creation. DOM4J provides flexible approaches for both programmatic document creation and parsing XML from files, streams, and other sources.
3
4
## DocumentHelper - Static Utility Class
5
6
DocumentHelper provides convenient static methods for creating DOM4J objects and performing common operations.
7
8
### Package and Import
9
```java { .api }
10
import org.dom4j.DocumentHelper;
11
import org.dom4j.Document;
12
import org.dom4j.Element;
13
import org.dom4j.Attribute;
14
import org.dom4j.DocumentException;
15
```
16
17
### Document Creation
18
```java { .api }
19
public final class DocumentHelper {
20
// Document creation
21
public static Document createDocument();
22
public static Document createDocument(Element rootElement);
23
24
// Parse text as XML
25
public static Document parseText(String text) throws DocumentException;
26
}
27
```
28
29
### Element and Attribute Creation
30
```java { .api }
31
public final class DocumentHelper {
32
// Element creation
33
public static Element createElement(QName qname);
34
public static Element createElement(String name);
35
36
// Attribute creation
37
public static Attribute createAttribute(Element owner, QName qname, String value);
38
public static Attribute createAttribute(Element owner, String name, String value);
39
}
40
```
41
42
### Node Creation Methods
43
```java { .api }
44
public final class DocumentHelper {
45
// Text content nodes
46
public static CDATA createCDATA(String text);
47
public static Comment createComment(String text);
48
public static Text createText(String text);
49
public static Entity createEntity(String name, String text);
50
51
// Processing instructions
52
public static ProcessingInstruction createProcessingInstruction(String target, String data);
53
public static ProcessingInstruction createProcessingInstruction(String target, Map<String, String> data);
54
55
// Namespace and QName
56
public static Namespace createNamespace(String prefix, String uri);
57
public static QName createQName(String localName, Namespace namespace);
58
public static QName createQName(String localName);
59
}
60
```
61
62
### XPath and Pattern Methods
63
```java { .api }
64
public final class DocumentHelper {
65
// XPath creation
66
public static XPath createXPath(String xpathExpression) throws InvalidXPathException;
67
public static XPath createXPath(String xpathExpression, VariableContext context) throws InvalidXPathException;
68
69
// Pattern and filter creation
70
public static NodeFilter createXPathFilter(String xpathFilterExpression);
71
public static Pattern createPattern(String xpathPattern);
72
73
// XPath operations on node collections
74
public static List<Node> selectNodes(String xpathFilterExpression, List<Node> nodes);
75
public static List<Node> selectNodes(String xpathFilterExpression, Node node);
76
public static void sort(List<Node> list, String xpathExpression);
77
public static void sort(List<Node> list, String expression, boolean distinct);
78
}
79
```
80
81
### Utility Methods
82
```java { .api }
83
public final class DocumentHelper {
84
// Path-based element creation
85
public static Element makeElement(Branch source, String path);
86
}
87
```
88
89
### Using DocumentHelper
90
```java { .api }
91
// Create empty document
92
Document document = DocumentHelper.createDocument();
93
94
// Create document with root element
95
Element root = DocumentHelper.createElement("catalog");
96
Document catalog = DocumentHelper.createDocument(root);
97
98
// Parse XML text
99
String xmlText = "<book><title>XML Guide</title></book>";
100
Document parsed = DocumentHelper.parseText(xmlText);
101
102
// Create various node types
103
Element product = DocumentHelper.createElement("product");
104
Attribute id = DocumentHelper.createAttribute(product, "id", "P123");
105
Comment comment = DocumentHelper.createComment("Product catalog");
106
CDATA cdata = DocumentHelper.createCDATA("<script>alert('test');</script>");
107
Text text = DocumentHelper.createText("Product description");
108
109
// Create namespaced elements
110
Namespace ns = DocumentHelper.createNamespace("catalog", "http://example.com/catalog");
111
QName productQName = DocumentHelper.createQName("product", ns);
112
Element nsProduct = DocumentHelper.createElement(productQName);
113
114
// XPath creation and usage
115
XPath xpath = DocumentHelper.createXPath("//product[@id]");
116
List<Node> products = xpath.selectNodes(document);
117
118
NodeFilter filter = DocumentHelper.createXPathFilter("self::product");
119
List<Node> filtered = document.selectNodes("//node()").stream()
120
.filter(filter::matches)
121
.collect(Collectors.toList());
122
123
// Path-based element creation
124
Element catalog = DocumentHelper.createElement("catalog");
125
Element book = DocumentHelper.makeElement(catalog, "books/book");
126
// Creates: catalog/books/book if they don't exist
127
```
128
129
## DocumentFactory - Configurable Object Creation
130
131
DocumentFactory provides a configurable factory for creating all DOM4J objects with support for customization and singleton access.
132
133
### Package and Import
134
```java { .api }
135
import org.dom4j.DocumentFactory;
136
import org.dom4j.Document;
137
import org.dom4j.Element;
138
import org.dom4j.QName;
139
```
140
141
### Singleton Access
142
```java { .api }
143
public class DocumentFactory {
144
// Singleton instance
145
public static synchronized DocumentFactory getInstance();
146
}
147
```
148
149
### Document Creation
150
```java { .api }
151
public class DocumentFactory {
152
// Document creation methods
153
public Document createDocument();
154
public Document createDocument(String encoding);
155
public Document createDocument(Element rootElement);
156
157
// Document type creation
158
public DocumentType createDocType(String name, String publicId, String systemId);
159
}
160
```
161
162
### Element Creation
163
```java { .api }
164
public class DocumentFactory {
165
// Element creation
166
public Element createElement(QName qname);
167
public Element createElement(String name);
168
public Element createElement(String qualifiedName, String namespaceURI);
169
}
170
```
171
172
### Attribute Creation
173
```java { .api }
174
public class DocumentFactory {
175
// Attribute creation
176
public Attribute createAttribute(Element owner, QName qname, String value);
177
public Attribute createAttribute(Element owner, String name, String value);
178
}
179
```
180
181
### Content Node Creation
182
```java { .api }
183
public class DocumentFactory {
184
// Text-based nodes
185
public CDATA createCDATA(String text);
186
public Comment createComment(String text);
187
public Text createText(String text);
188
public Entity createEntity(String name, String text);
189
190
// Processing instructions
191
public ProcessingInstruction createProcessingInstruction(String target, String data);
192
public ProcessingInstruction createProcessingInstruction(String target, Map<String, String> data);
193
}
194
```
195
196
### QName and Namespace Creation
197
```java { .api }
198
public class DocumentFactory {
199
// Namespace creation
200
public Namespace createNamespace(String prefix, String uri);
201
202
// QName creation
203
public QName createQName(String localName, Namespace namespace);
204
public QName createQName(String localName);
205
public QName createQName(String name, String prefix, String uri);
206
public QName createQName(String qualifiedName, String uri);
207
}
208
```
209
210
### XPath Creation
211
```java { .api }
212
public class DocumentFactory {
213
// XPath and pattern creation
214
public XPath createXPath(String xpathExpression) throws InvalidXPathException;
215
public XPath createXPath(String xpathExpression, VariableContext variableContext);
216
public NodeFilter createXPathFilter(String xpathFilterExpression, VariableContext variableContext);
217
public NodeFilter createXPathFilter(String xpathFilterExpression);
218
public Pattern createPattern(String xpathPattern);
219
}
220
```
221
222
### Configuration Methods
223
```java { .api }
224
public class DocumentFactory {
225
// QName management
226
public List<QName> getQNames();
227
228
// XPath namespace configuration
229
public Map<String, String> getXPathNamespaceURIs();
230
public void setXPathNamespaceURIs(Map<String, String> namespaceURIs);
231
}
232
```
233
234
### Using DocumentFactory
235
```java { .api }
236
// Get singleton instance
237
DocumentFactory factory = DocumentFactory.getInstance();
238
239
// Create document with encoding
240
Document document = factory.createDocument("UTF-8");
241
242
// Create elements
243
Element root = factory.createElement("catalog");
244
document.setRootElement(root);
245
246
Element product = factory.createElement("product");
247
root.add(product);
248
249
// Create namespaced elements
250
Element nsElement = factory.createElement("item", "http://example.com/catalog");
251
252
// Create attributes
253
Attribute id = factory.createAttribute(product, "id", "P123");
254
product.add(id);
255
256
// Create QNames and namespaces
257
Namespace catalogNS = factory.createNamespace("cat", "http://example.com/catalog");
258
QName productQName = factory.createQName("product", catalogNS);
259
Element nsProduct = factory.createElement(productQName);
260
261
// Create content nodes
262
Comment comment = factory.createComment("Generated catalog");
263
Text description = factory.createText("Product description");
264
CDATA script = factory.createCDATA("<script>processProduct();</script>");
265
266
document.add(comment);
267
product.add(description);
268
269
// Configure XPath namespaces
270
Map<String, String> namespaces = Map.of(
271
"cat", "http://example.com/catalog",
272
"prod", "http://example.com/product"
273
);
274
factory.setXPathNamespaceURIs(namespaces);
275
276
// Create XPath with configured namespaces
277
XPath xpath = factory.createXPath("//cat:product[@prod:id]");
278
```
279
280
### Custom DocumentFactory
281
```java { .api }
282
// Create custom factory for specialized behavior
283
public class CustomDocumentFactory extends DocumentFactory {
284
@Override
285
public Element createElement(QName qname) {
286
// Return custom element implementation
287
return new MyCustomElement(qname);
288
}
289
290
@Override
291
public Attribute createAttribute(Element owner, QName qname, String value) {
292
// Return custom attribute implementation
293
return new MyCustomAttribute(qname, value);
294
}
295
}
296
297
// Use custom factory
298
DocumentFactory customFactory = new CustomDocumentFactory();
299
Document document = customFactory.createDocument();
300
Element element = customFactory.createElement("custom");
301
```
302
303
## XML Parsing with SAXReader
304
305
SAXReader creates DOM4J documents from XML sources using SAX parsing. It provides configurable parsing with support for validation, entity resolution, and custom error handling.
306
307
### Package and Import
308
```java { .api }
309
import org.dom4j.io.SAXReader;
310
import org.dom4j.Document;
311
import org.dom4j.DocumentException;
312
import org.xml.sax.XMLReader;
313
import org.xml.sax.EntityResolver;
314
import org.xml.sax.ErrorHandler;
315
```
316
317
### SAXReader Construction
318
```java { .api }
319
public class SAXReader {
320
// Constructors
321
public SAXReader();
322
public SAXReader(boolean validating);
323
public SAXReader(DocumentFactory factory);
324
public SAXReader(XMLReader xmlReader);
325
public SAXReader(String xmlReaderClassName) throws SAXException;
326
}
327
```
328
329
### Reading Methods
330
```java { .api }
331
public class SAXReader {
332
// Read from various sources
333
public Document read(File file) throws DocumentException;
334
public Document read(InputSource in) throws DocumentException;
335
public Document read(InputStream in) throws DocumentException;
336
public Document read(Reader reader) throws DocumentException;
337
public Document read(String systemId) throws DocumentException;
338
public Document read(URL url) throws DocumentException;
339
}
340
```
341
342
### Configuration Methods
343
```java { .api }
344
public class SAXReader {
345
// XMLReader configuration
346
public void setXMLReader(XMLReader reader);
347
public XMLReader getXMLReader() throws SAXException;
348
public void setXMLReaderClassName(String className) throws SAXException;
349
350
// Validation
351
public boolean isValidating();
352
public void setValidating(boolean validation);
353
354
// Factory configuration
355
public DocumentFactory getDocumentFactory();
356
public void setDocumentFactory(DocumentFactory documentFactory);
357
358
// Error handling
359
public ErrorHandler getErrorHandler();
360
public void setErrorHandler(ErrorHandler errorHandler);
361
362
// Entity resolution
363
public EntityResolver getEntityResolver();
364
public void setEntityResolver(EntityResolver entityResolver);
365
}
366
```
367
368
### Using SAXReader
369
```java { .api }
370
// Basic parsing
371
SAXReader reader = new SAXReader();
372
373
// Parse from file
374
Document document = reader.read(new File("catalog.xml"));
375
376
// Parse from URL
377
Document webDoc = reader.read(new URL("http://example.com/data.xml"));
378
379
// Parse from string
380
String xmlContent = "<book><title>Guide</title></book>";
381
Document stringDoc = reader.read(new StringReader(xmlContent));
382
383
// Parse from input stream
384
try (InputStream is = new FileInputStream("data.xml")) {
385
Document streamDoc = reader.read(is);
386
}
387
388
// Validating parser
389
SAXReader validatingReader = new SAXReader(true);
390
try {
391
Document validDoc = validatingReader.read("valid.xml");
392
} catch (DocumentException e) {
393
System.err.println("Validation error: " + e.getMessage());
394
}
395
396
// Custom error handling
397
reader.setErrorHandler(new ErrorHandler() {
398
@Override
399
public void warning(SAXParseException e) {
400
System.out.println("Warning: " + e.getMessage());
401
}
402
403
@Override
404
public void error(SAXParseException e) throws SAXException {
405
System.err.println("Error: " + e.getMessage());
406
throw e;
407
}
408
409
@Override
410
public void fatalError(SAXParseException e) throws SAXException {
411
System.err.println("Fatal: " + e.getMessage());
412
throw e;
413
}
414
});
415
416
// Custom entity resolver
417
reader.setEntityResolver(new EntityResolver() {
418
@Override
419
public InputSource resolveEntity(String publicId, String systemId) {
420
if (systemId.endsWith("custom.dtd")) {
421
return new InputSource(new StringReader("<!-- Custom DTD -->"));
422
}
423
return null; // Use default resolution
424
}
425
});
426
427
// Custom document factory
428
DocumentFactory customFactory = new MyDocumentFactory();
429
reader.setDocumentFactory(customFactory);
430
431
// Parse with custom factory
432
Document customDoc = reader.read("data.xml");
433
```
434
435
### Advanced Parsing Configuration
436
```java { .api }
437
// Configure specific XMLReader
438
XMLReader xmlReader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
439
xmlReader.setFeature("http://xml.org/sax/features/validation", true);
440
xmlReader.setFeature("http://apache.org/xml/features/validation/schema", true);
441
442
SAXReader reader = new SAXReader(xmlReader);
443
444
// Set schema validation
445
xmlReader.setProperty("http://apache.org/xml/properties/schema/external-schemaLocation",
446
"http://example.com/schema http://example.com/schema.xsd");
447
448
// Parse with schema validation
449
try {
450
Document document = reader.read("instance.xml");
451
System.out.println("Document is valid");
452
} catch (DocumentException e) {
453
System.err.println("Validation failed: " + e.getMessage());
454
}
455
```
456
457
## Document Building Patterns
458
459
### Programmatic Document Construction
460
```java { .api }
461
// Create complete document structure
462
Document catalog = DocumentHelper.createDocument();
463
464
// Add XML declaration and encoding
465
catalog.setXMLEncoding("UTF-8");
466
467
// Add processing instruction
468
catalog.addProcessingInstruction("xml-stylesheet",
469
"type=\"text/xsl\" href=\"catalog.xsl\"");
470
471
// Create root element with namespace
472
Namespace catalogNS = Namespace.get("cat", "http://example.com/catalog");
473
Element root = catalog.addElement(QName.get("catalog", catalogNS));
474
475
// Add schema location
476
root.addNamespace("xsi", "http://www.w3.org/2001/XMLSchema-instance");
477
root.addAttribute(QName.get("schemaLocation",
478
Namespace.get("xsi", "http://www.w3.org/2001/XMLSchema-instance")),
479
"http://example.com/catalog catalog.xsd");
480
481
// Build content using fluent interface
482
Element books = root.addElement("books");
483
Element book1 = books.addElement("book")
484
.addAttribute("isbn", "123-456-789")
485
.addAttribute("available", "true");
486
487
book1.addElement("title").addText("DOM4J Guide");
488
book1.addElement("author").addText("John Smith");
489
book1.addElement("price").addText("29.99");
490
491
// Add CDATA section for description
492
book1.addElement("description").add(
493
DocumentHelper.createCDATA("<p>Complete guide to <b>DOM4J</b> XML processing.</p>"));
494
495
// Add comment
496
books.addComment("More books will be added");
497
```
498
499
### Template-based Document Creation
500
```java { .api }
501
// Create document template
502
public class CatalogBuilder {
503
private final DocumentFactory factory;
504
private final Namespace catalogNS;
505
506
public CatalogBuilder() {
507
this.factory = DocumentFactory.getInstance();
508
this.catalogNS = factory.createNamespace("cat", "http://example.com/catalog");
509
}
510
511
public Document createCatalog(String title, String version) {
512
Document document = factory.createDocument("UTF-8");
513
514
Element root = document.addElement(factory.createQName("catalog", catalogNS));
515
root.addAttribute("version", version);
516
root.addAttribute("title", title);
517
root.addAttribute("generated", Instant.now().toString());
518
519
return document;
520
}
521
522
public Element addBook(Element catalog, String isbn, String title, String author) {
523
Element books = catalog.element("books");
524
if (books == null) {
525
books = catalog.addElement("books");
526
}
527
528
return books.addElement("book")
529
.addAttribute("isbn", isbn)
530
.addElement("title").addText(title).getParent()
531
.addElement("author").addText(author).getParent();
532
}
533
}
534
535
// Use template
536
CatalogBuilder builder = new CatalogBuilder();
537
Document catalog = builder.createCatalog("Technical Books", "1.0");
538
Element book = builder.addBook(catalog.getRootElement(),
539
"123-456-789", "XML Processing", "Jane Doe");
540
```
541
542
### Streaming Document Creation
543
```java { .api }
544
// For large documents, build incrementally
545
public class StreamingCatalogBuilder {
546
private final Document document;
547
private final Element booksElement;
548
549
public StreamingCatalogBuilder() {
550
this.document = DocumentHelper.createDocument();
551
Element root = document.addElement("catalog");
552
this.booksElement = root.addElement("books");
553
}
554
555
public void addBook(Map<String, String> bookData) {
556
Element book = booksElement.addElement("book");
557
558
bookData.forEach((key, value) -> {
559
if ("isbn".equals(key)) {
560
book.addAttribute("isbn", value);
561
} else {
562
book.addElement(key).addText(value);
563
}
564
});
565
}
566
567
public Document getDocument() {
568
return document;
569
}
570
571
public void writeToFile(String filename) throws IOException {
572
try (FileWriter writer = new FileWriter(filename)) {
573
XMLWriter xmlWriter = new XMLWriter(writer, OutputFormat.createPrettyPrint());
574
xmlWriter.write(document);
575
}
576
}
577
}
578
579
// Stream processing
580
StreamingCatalogBuilder builder = new StreamingCatalogBuilder();
581
582
// Process data source (e.g., database results)
583
while (resultSet.next()) {
584
Map<String, String> bookData = Map.of(
585
"isbn", resultSet.getString("isbn"),
586
"title", resultSet.getString("title"),
587
"author", resultSet.getString("author"),
588
"price", resultSet.getString("price")
589
);
590
builder.addBook(bookData);
591
}
592
593
builder.writeToFile("catalog.xml");
594
```
595
596
## Exception Handling in Document Creation
597
598
### DocumentException Handling
599
```java { .api }
600
import org.dom4j.DocumentException;
601
602
try {
603
// Parsing operations that may fail
604
SAXReader reader = new SAXReader();
605
Document document = reader.read("malformed.xml");
606
607
} catch (DocumentException e) {
608
// Get detailed error information
609
String message = e.getMessage();
610
Throwable cause = e.getCause();
611
612
// Handle specific error types
613
if (cause instanceof SAXParseException) {
614
SAXParseException saxError = (SAXParseException) cause;
615
int line = saxError.getLineNumber();
616
int column = saxError.getColumnNumber();
617
System.err.printf("Parse error at line %d, column %d: %s%n",
618
line, column, saxError.getMessage());
619
} else {
620
System.err.println("Document processing error: " + message);
621
}
622
}
623
```
624
625
### Validation Error Handling
626
```java { .api }
627
// Custom error handler for validation
628
class ValidationErrorHandler implements ErrorHandler {
629
private final List<String> errors = new ArrayList<>();
630
private final List<String> warnings = new ArrayList<>();
631
632
@Override
633
public void warning(SAXParseException e) {
634
warnings.add(formatError("WARNING", e));
635
}
636
637
@Override
638
public void error(SAXParseException e) {
639
errors.add(formatError("ERROR", e));
640
}
641
642
@Override
643
public void fatalError(SAXParseException e) throws SAXException {
644
String message = formatError("FATAL", e);
645
errors.add(message);
646
throw new SAXException(message);
647
}
648
649
private String formatError(String level, SAXParseException e) {
650
return String.format("%s [%d:%d]: %s",
651
level, e.getLineNumber(), e.getColumnNumber(), e.getMessage());
652
}
653
654
public List<String> getErrors() { return new ArrayList<>(errors); }
655
public List<String> getWarnings() { return new ArrayList<>(warnings); }
656
public boolean hasErrors() { return !errors.isEmpty(); }
657
}
658
659
// Use validation handler
660
SAXReader reader = new SAXReader(true);
661
ValidationErrorHandler errorHandler = new ValidationErrorHandler();
662
reader.setErrorHandler(errorHandler);
663
664
try {
665
Document document = reader.read("document.xml");
666
667
if (errorHandler.hasErrors()) {
668
System.err.println("Validation errors:");
669
errorHandler.getErrors().forEach(System.err::println);
670
}
671
672
if (!errorHandler.getWarnings().isEmpty()) {
673
System.out.println("Validation warnings:");
674
errorHandler.getWarnings().forEach(System.out::println);
675
}
676
677
} catch (DocumentException e) {
678
System.err.println("Parse failed: " + e.getMessage());
679
}
680
```
681
682
DOM4J's document creation and parsing capabilities provide both simplicity for basic operations and flexibility for advanced XML processing requirements. The combination of static helper methods, configurable factories, and robust parsing support makes it suitable for a wide range of XML processing scenarios from simple document creation to complex enterprise XML processing pipelines.