Flexible XML framework for Java providing comprehensive XML processing capabilities
—
DOM4J provides comprehensive input/output capabilities for reading and writing XML documents. This section covers the I/O classes in the org.dom4j.io package, including XML reading, writing, formatting, and integration with various XML processing APIs like SAX, DOM, and STAX.
STAXEventReader reads DOM4J documents from StAX XMLEventReader streams, providing integration with Java's StAX API.
import org.dom4j.io.STAXEventReader;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLStreamException;
import java.io.*;public class STAXEventReader {
// Default constructor with default DocumentFactory
public STAXEventReader();
// Constructor with custom DocumentFactory
public STAXEventReader(DocumentFactory factory);
}public class STAXEventReader {
// Read from streams
public Document readDocument(InputStream is) throws XMLStreamException;
public Document readDocument(InputStream is, String systemId) throws XMLStreamException;
public Document readDocument(Reader reader) throws XMLStreamException;
public Document readDocument(Reader reader, String systemId) throws XMLStreamException;
// Read from StAX event reader
public Document readDocument(XMLEventReader eventReader) throws XMLStreamException;
// Read individual elements
public Element readElement(XMLEventReader eventReader) throws XMLStreamException;
// Configuration
public void setDocumentFactory(DocumentFactory documentFactory);
public DocumentFactory getDocumentFactory();
}Usage Examples:
import org.dom4j.io.STAXEventReader;
import javax.xml.stream.XMLInputFactory;
// Read from InputStream
STAXEventReader staxReader = new STAXEventReader();
Document document = staxReader.readDocument(inputStream);
// Read from XMLEventReader
XMLInputFactory factory = XMLInputFactory.newInstance();
XMLEventReader eventReader = factory.createXMLEventReader(inputStream);
Document doc = staxReader.readDocument(eventReader);STAXEventWriter writes DOM4J documents and nodes to StAX XMLEventWriter streams.
import org.dom4j.io.STAXEventWriter;
import javax.xml.stream.XMLEventWriter;
import javax.xml.stream.XMLStreamException;public class STAXEventWriter {
// Constructor with XMLEventWriter
public STAXEventWriter(XMLEventWriter xmlEventWriter);
}public class STAXEventWriter {
// Write complete documents
public void writeDocument(Document document) throws XMLStreamException;
// Write individual nodes
public void writeNode(Node node) throws XMLStreamException;
public void writeElement(Element element) throws XMLStreamException;
// Write open/close element tags
public void writeStartElement(Element element) throws XMLStreamException;
public void writeEndElement(Element element) throws XMLStreamException;
// Configuration
public XMLEventWriter getXMLEventWriter();
}HTMLWriter extends XMLWriter to produce HTML-compliant output with HTML-specific formatting rules.
import org.dom4j.io.HTMLWriter;
import org.dom4j.io.OutputFormat;public class HTMLWriter extends XMLWriter {
// Default constructor
public HTMLWriter();
// Writer-based constructors
public HTMLWriter(Writer writer);
public HTMLWriter(Writer writer, OutputFormat format);
// OutputStream-based constructors
public HTMLWriter(OutputStream out) throws UnsupportedEncodingException;
public HTMLWriter(OutputStream out, OutputFormat format) throws UnsupportedEncodingException;
}public class HTMLWriter extends XMLWriter {
// HTML element handling
protected boolean isElementSpaceSensitive(Element element);
protected void writeElementContent(Element element) throws IOException;
// HTML formatting options
public void setHtmlMode(boolean htmlMode);
public boolean isHtmlMode();
}XMLWriter is the primary class for writing DOM4J documents to XML format with configurable formatting options.
import org.dom4j.io.XMLWriter;
import org.dom4j.io.OutputFormat;
import java.io.*;public class XMLWriter {
// Default constructor - writes to System.out
public XMLWriter();
// Writer-based constructors
public XMLWriter(Writer writer);
public XMLWriter(Writer writer, OutputFormat format);
// OutputStream-based constructors
public XMLWriter(OutputStream out) throws UnsupportedEncodingException;
public XMLWriter(OutputStream out, OutputFormat format) throws UnsupportedEncodingException;
}public class XMLWriter {
// Write complete documents
public void write(Document doc) throws IOException;
// Write individual nodes
public void write(Element element) throws IOException;
public void write(Node node) throws IOException;
public void write(Attribute attribute) throws IOException;
public void write(CDATA cdata) throws IOException;
public void write(Comment comment) throws IOException;
public void write(Entity entity) throws IOException;
public void write(ProcessingInstruction pi) throws IOException;
public void write(Text text) throws IOException;
// Element streaming
public void writeOpen(Element element) throws IOException;
public void writeClose(Element element) throws IOException;
// Stream management
public void flush() throws IOException;
public void close() throws IOException;
}public class XMLWriter {
// Format configuration
public OutputFormat getOutputFormat();
public void setOutputFormat(OutputFormat outputFormat);
// Entity reference handling
public boolean getResolveEntityRefs();
public void setResolveEntityRefs(boolean resolve);
}// Basic document writing
Document document = createSampleDocument();
// Write to file with default formatting
try (FileWriter fileWriter = new FileWriter("output.xml")) {
XMLWriter writer = new XMLWriter(fileWriter);
writer.write(document);
}
// Write to string
StringWriter stringWriter = new StringWriter();
XMLWriter xmlWriter = new XMLWriter(stringWriter);
xmlWriter.write(document);
String xmlString = stringWriter.toString();
// Write to output stream with encoding
try (FileOutputStream fos = new FileOutputStream("output.xml")) {
OutputFormat format = OutputFormat.createPrettyPrint();
format.setEncoding("UTF-8");
XMLWriter writer = new XMLWriter(fos, format);
writer.write(document);
}
// Write individual elements
Element rootElement = document.getRootElement();
try (PrintWriter pw = new PrintWriter("element.xml")) {
XMLWriter writer = new XMLWriter(pw, OutputFormat.createCompactFormat());
writer.write(rootElement);
}
// Streaming element writing for large documents
try (FileWriter fw = new FileWriter("large.xml")) {
XMLWriter writer = new XMLWriter(fw, OutputFormat.createPrettyPrint());
// Manual document structure
writer.writeOpen(DocumentHelper.createElement("catalog"));
for (Product product : products) {
Element productElement = createElement(product);
writer.write(productElement);
}
writer.writeClose(DocumentHelper.createElement("catalog"));
}OutputFormat controls how XML is formatted during output, including indentation, encoding, and various formatting options.
import org.dom4j.io.OutputFormat;public class OutputFormat {
// Predefined formats
public static OutputFormat createPrettyPrint();
public static OutputFormat createCompactFormat();
// Custom format
public OutputFormat();
public OutputFormat(String indent);
public OutputFormat(String indent, boolean newlines);
public OutputFormat(String indent, boolean newlines, String encoding);
}public class OutputFormat {
// XML declaration
public boolean isSuppressDeclaration();
public void setSuppressDeclaration(boolean suppressDeclaration);
public boolean isNewLineAfterDeclaration();
public void setNewLineAfterDeclaration(boolean newLineAfterDeclaration);
// Encoding configuration
public String getEncoding();
public void setEncoding(String encoding);
public boolean isOmitEncoding();
public void setOmitEncoding(boolean omitEncoding);
}public class OutputFormat {
// Indentation
public String getIndent();
public void setIndent(String indent);
public void setIndent(boolean doIndent);
// Line formatting
public boolean isNewlines();
public void setNewlines(boolean newlines);
public String getLineSeparator();
public void setLineSeparator(String separator);
// Element formatting
public boolean isExpandEmptyElements();
public void setExpandEmptyElements(boolean expandEmptyElements);
}public class OutputFormat {
// Text processing
public boolean isTrimText();
public void setTrimText(boolean trimText);
public boolean isPadText();
public void setPadText(boolean padText);
// Attribute formatting
public char getAttributeQuoteCharacter();
public void setAttributeQuoteCharacter(char quoteChar);
}public class OutputFormat {
// XHTML mode
public boolean isXHTML();
public void setXHTML(boolean doXHTML);
}// Pretty printed XML with indentation
OutputFormat prettyFormat = OutputFormat.createPrettyPrint();
prettyFormat.setEncoding("UTF-8");
prettyFormat.setIndent(" "); // 2-space indentation
prettyFormat.setNewLineAfterDeclaration(false);
// Compact format for minimal size
OutputFormat compactFormat = OutputFormat.createCompactFormat();
compactFormat.setSuppressDeclaration(true);
compactFormat.setTrimText(true);
// Custom formatting
OutputFormat customFormat = new OutputFormat();
customFormat.setIndent(true);
customFormat.setNewlines(true);
customFormat.setLineSeparator("\n");
customFormat.setEncoding("ISO-8859-1");
customFormat.setExpandEmptyElements(false); // Use <element/> instead of <element></element>
customFormat.setAttributeQuoteCharacter('\''); // Use single quotes for attributes
// XHTML formatting
OutputFormat xhtmlFormat = OutputFormat.createPrettyPrint();
xhtmlFormat.setXHTML(true);
xhtmlFormat.setExpandEmptyElements(false);
// Text processing configuration
OutputFormat textFormat = OutputFormat.createPrettyPrint();
textFormat.setTrimText(true); // Remove leading/trailing whitespace
textFormat.setPadText(false); // Don't add extra whitespace
// Use formats with XMLWriter
Document document = createDocument();
try (FileWriter writer = new FileWriter("pretty.xml")) {
XMLWriter xmlWriter = new XMLWriter(writer, prettyFormat);
xmlWriter.write(document);
}
try (FileWriter writer = new FileWriter("compact.xml")) {
XMLWriter xmlWriter = new XMLWriter(writer, compactFormat);
xmlWriter.write(document);
}Extended coverage of SAXReader for advanced parsing scenarios beyond basic document creation.
import org.dom4j.io.SAXReader;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
// Custom XMLReader configuration
SAXReader reader = new SAXReader();
XMLReader xmlReader = reader.getXMLReader();
// Configure SAX features
xmlReader.setFeature("http://xml.org/sax/features/namespaces", true);
xmlReader.setFeature("http://xml.org/sax/features/namespace-prefixes", false);
xmlReader.setFeature("http://xml.org/sax/features/validation", true);
// Configure SAX properties
xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", lexicalHandler);
xmlReader.setProperty("http://xml.org/sax/properties/declaration-handler", declHandler);
// Schema validation
xmlReader.setFeature("http://apache.org/xml/features/validation/schema", true);
xmlReader.setProperty("http://apache.org/xml/properties/schema/external-schemaLocation",
"http://example.com/schema http://example.com/schema.xsd");// Memory-efficient parsing for large documents
SAXReader reader = new SAXReader();
// Configure for memory efficiency
reader.setValidating(false); // Disable validation for speed
reader.setEntityResolver(null); // No external entity resolution
// Custom document factory for memory optimization
reader.setDocumentFactory(new DocumentFactory() {
@Override
public Element createElement(QName qname) {
// Return memory-optimized element implementation
return new LightweightElement(qname);
}
});
// Parse with progress monitoring
class ProgressEntityResolver implements EntityResolver {
private long bytesRead = 0;
@Override
public InputSource resolveEntity(String publicId, String systemId) throws IOException {
// Monitor parsing progress
System.out.println("Processing entity: " + systemId);
return null;
}
}
reader.setEntityResolver(new ProgressEntityResolver());HTMLWriter extends XMLWriter for HTML-specific formatting requirements.
import org.dom4j.io.HTMLWriter;// HTML-specific formatting
HTMLWriter htmlWriter = new HTMLWriter();
// HTML formatting automatically handles:
// - Empty elements (br, hr, img, etc.)
// - Case sensitivity
// - Entity encoding for HTML
Document htmlDocument = createHtmlDocument();
try (FileWriter writer = new FileWriter("output.html")) {
HTMLWriter htmlOut = new HTMLWriter(writer);
htmlOut.write(htmlDocument);
}
// Custom HTML formatting
OutputFormat htmlFormat = OutputFormat.createPrettyPrint();
htmlFormat.setXHTML(true); // XHTML compatibility
htmlFormat.setExpandEmptyElements(false); // <br/> instead of <br></br>
try (FileWriter writer = new FileWriter("xhtml.html")) {
HTMLWriter htmlOut = new HTMLWriter(writer, htmlFormat);
htmlOut.write(htmlDocument);
}SAXWriter writes DOM4J documents as SAX events, enabling integration with SAX-based processing pipelines.
import org.dom4j.io.SAXWriter;
import org.xml.sax.ContentHandler;
import org.xml.sax.helpers.DefaultHandler;// Write DOM4J document as SAX events
Document document = createDocument();
// Custom SAX content handler
ContentHandler handler = new DefaultHandler() {
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) {
System.out.println("Start element: " + qName);
}
@Override
public void endElement(String uri, String localName, String qName) {
System.out.println("End element: " + qName);
}
@Override
public void characters(char[] ch, int start, int length) {
String text = new String(ch, start, length).trim();
if (!text.isEmpty()) {
System.out.println("Text: " + text);
}
}
};
// Generate SAX events
SAXWriter saxWriter = new SAXWriter(handler);
saxWriter.write(document);
// Chain with SAX transformers
TransformerFactory factory = TransformerFactory.newInstance();
Templates template = factory.newTemplates(new StreamSource("transform.xsl"));
Transformer transformer = template.newTransformer();
SAXResult result = new SAXResult(handler);
SAXWriter chainedWriter = new SAXWriter(transformer);
chainedWriter.write(document);DOM4J provides dispatch handler classes for event-driven processing of large XML documents with memory efficiency.
import org.dom4j.io.DispatchHandler;
import org.dom4j.ElementHandler;
import org.dom4j.ElementPath;
public class DispatchHandler extends SAXContentHandler {
// Constructor
public DispatchHandler();
// Handler registration
public void addHandler(String path, ElementHandler handler);
public void removeHandler(String path);
public boolean containsHandler(String path);
// Handler access
public ElementHandler getHandler(String path);
public int getActiveHandlerCount();
// Path-based handlers
public void setDefaultHandler(ElementHandler handler);
public ElementHandler getDefaultHandler();
}import org.dom4j.io.PruningDispatchHandler;
public class PruningDispatchHandler extends DispatchHandler {
// Automatically prunes elements when no handlers are active
// Useful for processing large documents with minimal memory usage
// Constructor
public PruningDispatchHandler();
// Inherited methods from DispatchHandler
// Automatically detaches processed elements to save memory
}Usage Examples:
import org.dom4j.io.PruningDispatchHandler;
import org.dom4j.ElementHandler;
import org.dom4j.ElementPath;
// Memory-efficient processing of large documents
PruningDispatchHandler handler = new PruningDispatchHandler();
// Process specific elements and prune them
handler.addHandler("/catalog/product", new ElementHandler() {
public void onStart(ElementPath path) {
Element product = path.getCurrent();
System.out.println("Processing product: " + product.attributeValue("id"));
}
public void onEnd(ElementPath path) {
Element product = path.getCurrent();
// Process product data
processProduct(product);
// Element will be automatically pruned after this handler
}
});
// Use with SAXReader for large document processing
SAXReader reader = new SAXReader();
reader.setContentHandler(handler);
reader.read(largeXmlFile); // Processes with minimal memory usageimport org.dom4j.io.ElementStack;
public class ElementStack {
// Constructor
public ElementStack();
// Stack operations
public void pushElement(Element element);
public Element popElement();
public Element peekElement();
// Path access
public ElementPath getPath();
public String getPathString();
public int getDepth();
// Path queries
public boolean matches(String pathPattern);
public Element getElementAtDepth(int depth);
}DOM4J provides classes for converting between DOM4J and W3C DOM representations.
import org.dom4j.io.DOMReader;
import org.w3c.dom.Document as W3CDocument;
// Convert W3C DOM to DOM4J
DOMReader domReader = new DOMReader();
// From W3C Document
W3CDocument w3cDocument = getW3CDocument();
Document dom4jDocument = domReader.read(w3cDocument);
// From W3C Element
org.w3c.dom.Element w3cElement = getW3CElement();
Document fragmentDoc = domReader.read(w3cElement);import org.dom4j.io.DOMWriter;
import javax.xml.parsers.DocumentBuilderFactory;
// Convert DOM4J to W3C DOM
DOMWriter domWriter = new DOMWriter();
Document dom4jDocument = createDocument();
// Write to W3C Document
W3CDocument w3cDocument = domWriter.write(dom4jDocument);
// Write to existing W3C Document
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
W3CDocument targetDocument = factory.newDocumentBuilder().newDocument();
W3CDocument result = domWriter.write(dom4jDocument, targetDocument);
// Write element to W3C Document
Element dom4jElement = dom4jDocument.getRootElement();
org.w3c.dom.Element w3cElement = domWriter.write(dom4jElement, targetDocument);DOM4J provides JAXP (Java API for XML Processing) integration classes for seamless interoperability with other XML processing APIs.
import org.dom4j.io.DocumentInputSource;
import org.xml.sax.InputSource;
public class DocumentInputSource extends InputSource {
// Constructors
public DocumentInputSource();
public DocumentInputSource(Document document);
// Document access
public Document getDocument();
public void setDocument(Document document);
// InputSource methods (inherited)
public Reader getCharacterStream();
public void setCharacterStream(Reader characterStream);
}import org.dom4j.io.DocumentResult;
import javax.xml.transform.Result;
public class DocumentResult implements Result {
// Constructors
public DocumentResult();
public DocumentResult(DocumentFactory factory);
// Result interface implementation
public void setSystemId(String systemId);
public String getSystemId();
// DOM4J specific methods
public Document getDocument();
public void setDocumentFactory(DocumentFactory factory);
public DocumentFactory getDocumentFactory();
}import org.dom4j.io.DocumentSource;
import javax.xml.transform.Source;
public class DocumentSource implements Source {
// Constructors
public DocumentSource(Document document);
public DocumentSource(Node node);
// Source interface implementation
public void setSystemId(String systemId);
public String getSystemId();
// DOM4J specific methods
public Document getDocument();
public Node getNode();
}DOM4J supports XML Pull Parser for efficient streaming XML processing.
import org.dom4j.io.XPP3Reader;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserFactory;
// Create pull parser
XmlPullParserFactory factory = XmlPullParserFactory.newInstance();
factory.setNamespaceAware(true);
XmlPullParser parser = factory.newPullParser();
parser.setInput(new FileReader("input.xml"));
// Read with XPP3Reader
XPP3Reader xppReader = new XPP3Reader();
Document document = xppReader.read(parser);
// Custom document factory
xppReader.setDocumentFactory(customFactory);
Document customDoc = xppReader.read(parser);// Process large XML documents incrementally
public class StreamingXMLProcessor {
private final XMLWriter writer;
private final OutputFormat format;
public StreamingXMLProcessor(OutputStream output) throws IOException {
this.format = OutputFormat.createCompactFormat();
this.writer = new XMLWriter(output, format);
}
public void processLargeDocument(InputStream input) throws Exception {
SAXReader reader = new SAXReader();
// Custom content handler for streaming processing
reader.setContentHandler(new DefaultHandler() {
private Element currentElement;
private boolean inTargetElement = false;
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) {
if ("target".equals(localName)) {
inTargetElement = true;
currentElement = DocumentHelper.createElement(qName);
// Copy attributes
for (int i = 0; i < attributes.getLength(); i++) {
currentElement.addAttribute(attributes.getQName(i), attributes.getValue(i));
}
}
}
@Override
public void characters(char[] ch, int start, int length) {
if (inTargetElement && currentElement != null) {
currentElement.addText(new String(ch, start, length));
}
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if ("target".equals(localName) && currentElement != null) {
try {
// Process and write element immediately
processElement(currentElement);
writer.write(currentElement);
writer.flush();
} catch (IOException e) {
throw new SAXException("Write error", e);
} finally {
currentElement = null;
inTargetElement = false;
}
}
}
});
reader.read(input);
}
private void processElement(Element element) {
// Custom processing logic
String processed = element.getText().toUpperCase();
element.setText(processed);
}
public void close() throws IOException {
writer.close();
}
}// Process multiple documents concurrently
public class ParallelXMLProcessor {
private final ExecutorService executor;
private final OutputFormat format;
public ParallelXMLProcessor(int threads) {
this.executor = Executors.newFixedThreadPool(threads);
this.format = OutputFormat.createPrettyPrint();
}
public CompletableFuture<Void> processDocumentAsync(File inputFile, File outputFile) {
return CompletableFuture.runAsync(() -> {
try {
SAXReader reader = new SAXReader();
Document document = reader.read(inputFile);
// Process document
transformDocument(document);
// Write result
try (FileWriter writer = new FileWriter(outputFile)) {
XMLWriter xmlWriter = new XMLWriter(writer, format);
xmlWriter.write(document);
}
} catch (Exception e) {
throw new RuntimeException("Processing failed for " + inputFile, e);
}
}, executor);
}
public void processMultipleDocuments(List<File> inputFiles, File outputDirectory) {
List<CompletableFuture<Void>> futures = inputFiles.stream()
.map(file -> {
File outputFile = new File(outputDirectory, file.getName());
return processDocumentAsync(file, outputFile);
})
.collect(Collectors.toList());
// Wait for all to complete
CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();
}
private void transformDocument(Document document) {
// Custom transformation logic
List<Element> elements = document.selectNodes("//element");
elements.forEach(element -> element.addAttribute("processed", "true"));
}
public void shutdown() {
executor.shutdown();
}
}// Custom document factory for memory optimization
public class MemoryEfficientDocumentFactory extends DocumentFactory {
@Override
public Element createElement(QName qname) {
return new MemoryEfficientElement(qname);
}
@Override
public Attribute createAttribute(Element owner, QName qname, String value) {
return new MemoryEfficientAttribute(qname, value);
}
}
// Lightweight element implementation
class MemoryEfficientElement extends DefaultElement {
public MemoryEfficientElement(QName qname) {
super(qname);
}
// Override methods to reduce memory usage
@Override
protected List<Node> createContentList() {
// Use memory-efficient list implementation
return new ArrayList<>(4); // Small initial capacity
}
@Override
protected List<Attribute> createAttributeList() {
return new ArrayList<>(2); // Most elements have few attributes
}
}
// Use memory-efficient factory
SAXReader reader = new SAXReader();
reader.setDocumentFactory(new MemoryEfficientDocumentFactory());
Document document = reader.read(largeXmlFile);
// Document uses less memory due to optimized implementationsDOM4J's I/O capabilities provide comprehensive support for reading and writing XML documents with fine-grained control over formatting, performance, and integration with other XML processing APIs. The flexible architecture allows for both simple operations and complex enterprise XML processing scenarios.
Install with Tessl CLI
npx tessl i tessl/maven-org-dom4j--dom4j