XML processing utilities for Apache Groovy including markup builders, parsers, and navigation tools
—
Streaming XML processing in Groovy XML enables efficient handling of large XML documents without loading entire structures into memory, using StreamingMarkupBuilder and supporting classes.
Primary class for creating XML content that can be written to streams without materializing the entire document in memory.
class StreamingMarkupBuilder extends AbstractStreamingBuilder {
// Configuration properties
boolean useDoubleQuotes
boolean expandEmptyElements
String encoding
// Core methods
Writable bind(Closure closure)
Writable bindNode(Object node)
}def smb = new StreamingMarkupBuilder()
// Configure output format
smb.encoding = 'UTF-8'
smb.useDoubleQuotes = true
smb.expandEmptyElements = false
// Create streamable XML content
def xmlContent = smb.bind {
root {
metadata(version: '1.0') {
created(new Date().toString())
generator('Groovy StreamingMarkupBuilder')
}
data {
// Generate large amounts of data efficiently
(1..10000).each { i ->
record(id: i) {
name("Record ${i}")
value(Math.random() * 100)
timestamp(System.currentTimeMillis())
}
}
}
}
}
// Write to various outputs without loading into memory
xmlContent.writeTo(new FileWriter('large-output.xml'))
xmlContent.writeTo(System.out)
// Or get as string (only for smaller content)
def xmlString = xmlContent.toString()def smb = new StreamingMarkupBuilder()
def content = smb.bind {
catalog {
(1..1000).each { i ->
book(id: i) {
title("Book ${i}")
author("Author ${i % 100}")
price(String.format("%.2f", Math.random() * 50))
}
}
}
}
// Stream to file
new FileOutputStream('catalog.xml').withStream { fos ->
content.writeTo(new OutputStreamWriter(fos, 'UTF-8'))
}
// Stream to HTTP response
response.contentType = 'application/xml'
response.characterEncoding = 'UTF-8'
content.writeTo(response.writer)
// Stream to compressed output
new GZIPOutputStream(new FileOutputStream('catalog.xml.gz')).withStream { gzos ->
content.writeTo(new OutputStreamWriter(gzos, 'UTF-8'))
}
// Chain with other writers
def bufferedContent = new BufferedWriter(new FileWriter('buffered.xml'))
content.writeTo(bufferedContent)
bufferedContent.close()Base class providing common streaming builder functionality.
abstract class AbstractStreamingBuilder {
// Template method for creating bound content
abstract Writable bind(Closure closure)
// Support methods for subclasses
protected Object createNode(Object name)
protected Object createNode(Object name, Object value)
protected Object createNode(Object name, Map attributes)
protected Object createNode(Object name, Map attributes, Object value)
}Core functionality for markup generation in streaming context.
public class BaseMarkupBuilder {
// Configuration for output formatting
protected boolean doubleQuotes;
protected boolean omitNullAttributes;
protected boolean omitEmptyAttributes;
protected boolean expandEmptyElements;
// Core building methods
protected void startTag(String tagName, Map<String, Object> attributes);
protected void endTag(String tagName);
protected void text(String content);
protected void comment(String content);
}Specialized Writer for streaming markup output with proper XML formatting.
public class StreamingMarkupWriter extends Writer {
// Constructors
public StreamingMarkupWriter(Writer writer);
public StreamingMarkupWriter(Writer writer, String encoding);
// Writer methods
@Override
public void write(char[] cbuf, int off, int len) throws IOException;
@Override
public void flush() throws IOException;
@Override
public void close() throws IOException;
// Specialized XML writing methods
public void startTag(String name, Map<String, Object> attributes) throws IOException;
public void endTag(String name) throws IOException;
public void emptyTag(String name, Map<String, Object> attributes) throws IOException;
public void text(String content) throws IOException;
public void comment(String content) throws IOException;
public void processingInstruction(String target, String data) throws IOException;
}public class Builder {
// Internal builder state management
protected Map<String, String> namespaceMethodMap;
protected Map<String, String> namespaceDeclarations;
// Node creation support
public Object invokeMethod(String name, Object args);
public void setProperty(String name, Object value);
public Object getProperty(String name);
}def generateLargeReport = { outputFile, recordCount ->
def smb = new StreamingMarkupBuilder()
smb.encoding = 'UTF-8'
def report = smb.bind {
report {
header {
title('Large Data Report')
generated(new Date().toString())
recordCount(recordCount)
}
// Generate data in chunks to manage memory
data {
(1..recordCount).each { i ->
if (i % 1000 == 0) {
System.gc() // Hint for garbage collection on large datasets
}
record(id: i) {
timestamp(System.currentTimeMillis())
data1(generateRandomData())
data2(generateRandomData())
data3(generateRandomData())
// Nested structures
details {
category(i % 10)
subcategory(i % 100)
tags {
(1..(i % 5 + 1)).each { j ->
tag("tag${j}")
}
}
}
}
}
}
}
}
new FileWriter(outputFile).withWriter { writer ->
report.writeTo(writer)
}
}
// Generate 100,000 record report
generateLargeReport('massive-report.xml', 100000)def createNamespacedDocument = {
def smb = new StreamingMarkupBuilder()
smb.bind {
mkp.xmlDeclaration(version: '1.0', encoding: 'UTF-8')
namespaces = [
'': 'http://example.com/default',
'meta': 'http://example.com/metadata',
'data': 'http://example.com/data'
]
document(xmlns: namespaces[''],
'xmlns:meta': namespaces['meta'],
'xmlns:data': namespaces['data']) {
'meta:header' {
'meta:title'('Namespaced Document')
'meta:version'('1.0')
}
'data:content' {
(1..1000).each { i ->
'data:item' {
'data:id'(i)
'data:value'("Value ${i}")
'meta:created'(new Date().toString())
}
}
}
}
}
}
def nsDocument = createNamespacedDocument()
nsDocument.writeTo(new FileWriter('namespaced.xml'))// Build XML progressively without storing entire structure
def progressiveBuilder = { outputWriter ->
def smb = new StreamingMarkupBuilder()
def xml = smb.bind {
progressiveReport {
mkp.comment('Report generated progressively')
// Header section
header {
title('Progressive Report')
startTime(new Date().toString())
}
// Process data in batches
sections {
processDataInBatches { batchData ->
section(id: batchData.id) {
batchData.items.each { item ->
item(id: item.id) {
name(item.name)
value(item.value)
}
}
}
}
}
footer {
endTime(new Date().toString())
totalItems(getTotalItemCount())
}
}
}
xml.writeTo(outputWriter)
}
// Use with buffered writer for large outputs
new BufferedWriter(new FileWriter('progressive.xml')).withWriter { writer ->
progressiveBuilder(writer)
}// Combine streaming generation with streaming consumption
def processLargeXmlPipeline = { inputFile, outputFile ->
// Parse input efficiently
def slurper = new XmlSlurper()
def input = slurper.parse(inputFile)
// Generate output with streaming
def smb = new StreamingMarkupBuilder()
def output = smb.bind {
processedData {
mkp.comment("Processed from ${inputFile.name}")
// Process input lazily and stream output
input.records.record.each { record ->
processedRecord(id: record.'@id') {
// Transform and stream without accumulating
originalValue(record.value.text())
processedValue(transformValue(record.value.text()))
processedAt(new Date().toString())
}
}
}
}
new FileWriter(outputFile).withWriter { writer ->
output.writeTo(writer)
}
}// Efficient streaming for large documents
def efficientLargeDocumentGeneration = {
def smb = new StreamingMarkupBuilder()
// Configure for minimal memory usage
smb.useDoubleQuotes = false // Slightly less memory per attribute
smb.expandEmptyElements = false // More compact output
def content = smb.bind {
largeDocument {
// Process in chunks to avoid memory buildup
def chunkSize = 1000
def totalRecords = 1000000
(0..<totalRecords).step(chunkSize) { start ->
def end = Math.min(start + chunkSize, totalRecords)
(start..<end).each { i ->
record(id: i) {
data("Record ${i}")
timestamp(System.currentTimeMillis())
}
}
// Yield control periodically
if (start % 10000 == 0) {
Thread.yield()
}
}
}
}
return content
}// Best practices for streaming XML
class StreamingXmlBestPractices {
static void streamToFile(Closure xmlClosure, File outputFile) {
def smb = new StreamingMarkupBuilder()
smb.encoding = 'UTF-8'
def content = smb.bind(xmlClosure)
// Use buffered writer for better performance
new BufferedWriter(
new OutputStreamWriter(
new FileOutputStream(outputFile),
'UTF-8'
)
).withWriter { writer ->
content.writeTo(writer)
}
}
static void streamToResponse(Closure xmlClosure, HttpServletResponse response) {
response.contentType = 'application/xml; charset=UTF-8'
def smb = new StreamingMarkupBuilder()
smb.encoding = 'UTF-8'
smb.useDoubleQuotes = true
def content = smb.bind(xmlClosure)
content.writeTo(response.writer)
}
static void streamLargeDataset(Collection data, Writer output) {
def smb = new StreamingMarkupBuilder()
def xml = smb.bind {
dataset {
mkp.xmlDeclaration(version: '1.0', encoding: 'UTF-8')
data.eachWithIndex { item, index ->
record(index: index) {
// Process each item without accumulating
processItem(item)
}
// Periodic memory management
if (index % 1000 == 0) {
System.gc()
}
}
}
}
xml.writeTo(output)
}
}// Streaming to databases
def streamToDatabase = { data, connection ->
def smb = new StreamingMarkupBuilder()
def xml = smb.bind {
dataExport {
data.each { record ->
item {
id(record.id)
name(record.name)
value(record.value)
}
}
}
}
// Stream directly to database CLOB
def stmt = connection.prepareStatement(
"INSERT INTO xml_exports (data) VALUES (?)"
)
def writer = stmt.setCharacterStream(1, -1)
xml.writeTo(writer)
stmt.executeUpdate()
}
// Streaming to message queues
def streamToQueue = { messageQueue, xmlClosure ->
def smb = new StreamingMarkupBuilder()
def content = smb.bind(xmlClosure)
// Stream to string for messaging
def writer = new StringWriter()
content.writeTo(writer)
messageQueue.send(writer.toString())
}Install with Tessl CLI
npx tessl i tessl/maven-org-codehaus-groovy--groovy-xml