tessl/maven-org-codehaus-groovy--groovy-xml

XML processing utilities for Apache Groovy including markup builders, parsers, and navigation tools

—

Pending

Overview

Eval results

Files

Streaming XML Processing

Name: tessl/maven-org-codehaus-groovy--groovy-xml
Author: tessl

Streaming XML processing in Groovy XML enables efficient handling of large XML documents without loading entire structures into memory, using StreamingMarkupBuilder and supporting classes.

StreamingMarkupBuilder

Primary class for creating XML content that can be written to streams without materializing the entire document in memory.

class StreamingMarkupBuilder extends AbstractStreamingBuilder {
    // Configuration properties
    boolean useDoubleQuotes
    boolean expandEmptyElements  
    String encoding
    
    // Core methods
    Writable bind(Closure closure)
    Writable bindNode(Object node)
}

Basic Streaming Usage

def smb = new StreamingMarkupBuilder()

// Configure output format
smb.encoding = 'UTF-8'
smb.useDoubleQuotes = true
smb.expandEmptyElements = false

// Create streamable XML content
def xmlContent = smb.bind {
    root {
        metadata(version: '1.0') {
            created(new Date().toString())
            generator('Groovy StreamingMarkupBuilder')
        }
        
        data {
            // Generate large amounts of data efficiently
            (1..10000).each { i ->
                record(id: i) {
                    name("Record ${i}")
                    value(Math.random() * 100)
                    timestamp(System.currentTimeMillis())
                }
            }
        }
    }
}

// Write to various outputs without loading into memory
xmlContent.writeTo(new FileWriter('large-output.xml'))
xmlContent.writeTo(System.out)

// Or get as string (only for smaller content)
def xmlString = xmlContent.toString()

Streaming to Different Outputs

def smb = new StreamingMarkupBuilder()
def content = smb.bind {
    catalog {
        (1..1000).each { i ->
            book(id: i) {
                title("Book ${i}")
                author("Author ${i % 100}")
                price(String.format("%.2f", Math.random() * 50))
            }
        }
    }
}

// Stream to file
new FileOutputStream('catalog.xml').withStream { fos ->
    content.writeTo(new OutputStreamWriter(fos, 'UTF-8'))
}

// Stream to HTTP response
response.contentType = 'application/xml'
response.characterEncoding = 'UTF-8'
content.writeTo(response.writer)

// Stream to compressed output
new GZIPOutputStream(new FileOutputStream('catalog.xml.gz')).withStream { gzos ->
    content.writeTo(new OutputStreamWriter(gzos, 'UTF-8'))
}

// Chain with other writers
def bufferedContent = new BufferedWriter(new FileWriter('buffered.xml'))
content.writeTo(bufferedContent)
bufferedContent.close()

AbstractStreamingBuilder

Base class providing common streaming builder functionality.

abstract class AbstractStreamingBuilder {
    // Template method for creating bound content
    abstract Writable bind(Closure closure)
    
    // Support methods for subclasses
    protected Object createNode(Object name)
    protected Object createNode(Object name, Object value)
    protected Object createNode(Object name, Map attributes)
    protected Object createNode(Object name, Map attributes, Object value)
}

Streaming Support Classes

BaseMarkupBuilder

Core functionality for markup generation in streaming context.

public class BaseMarkupBuilder {
    // Configuration for output formatting
    protected boolean doubleQuotes;
    protected boolean omitNullAttributes;
    protected boolean omitEmptyAttributes;
    protected boolean expandEmptyElements;
    
    // Core building methods
    protected void startTag(String tagName, Map<String, Object> attributes);
    protected void endTag(String tagName);
    protected void text(String content);
    protected void comment(String content);
}

StreamingMarkupWriter

Specialized Writer for streaming markup output with proper XML formatting.

public class StreamingMarkupWriter extends Writer {
    // Constructors
    public StreamingMarkupWriter(Writer writer);
    public StreamingMarkupWriter(Writer writer, String encoding);
    
    // Writer methods
    @Override
    public void write(char[] cbuf, int off, int len) throws IOException;
    @Override
    public void flush() throws IOException;
    @Override
    public void close() throws IOException;
    
    // Specialized XML writing methods
    public void startTag(String name, Map<String, Object> attributes) throws IOException;
    public void endTag(String name) throws IOException;
    public void emptyTag(String name, Map<String, Object> attributes) throws IOException;
    public void text(String content) throws IOException;
    public void comment(String content) throws IOException;
    public void processingInstruction(String target, String data) throws IOException;
}

Builder Support Classes

public class Builder {
    // Internal builder state management
    protected Map<String, String> namespaceMethodMap;
    protected Map<String, String> namespaceDeclarations;
    
    // Node creation support
    public Object invokeMethod(String name, Object args);
    public void setProperty(String name, Object value);
    public Object getProperty(String name);
}

Advanced Streaming Patterns

Large Dataset Generation

def generateLargeReport = { outputFile, recordCount ->
    def smb = new StreamingMarkupBuilder()
    smb.encoding = 'UTF-8'
    
    def report = smb.bind {
        report {
            header {
                title('Large Data Report')
                generated(new Date().toString())
                recordCount(recordCount)
            }
            
            // Generate data in chunks to manage memory
            data {
                (1..recordCount).each { i ->
                    if (i % 1000 == 0) {
                        System.gc() // Hint for garbage collection on large datasets
                    }
                    
                    record(id: i) {
                        timestamp(System.currentTimeMillis())
                        data1(generateRandomData())
                        data2(generateRandomData())
                        data3(generateRandomData())
                        
                        // Nested structures
                        details {
                            category(i % 10)
                            subcategory(i % 100)
                            tags {
                                (1..(i % 5 + 1)).each { j ->
                                    tag("tag${j}")
                                }
                            }
                        }
                    }
                }
            }
        }
    }
    
    new FileWriter(outputFile).withWriter { writer ->
        report.writeTo(writer)
    }
}

// Generate 100,000 record report
generateLargeReport('massive-report.xml', 100000)

Streaming with Namespaces

def createNamespacedDocument = {
    def smb = new StreamingMarkupBuilder()
    smb.bind {
        mkp.xmlDeclaration(version: '1.0', encoding: 'UTF-8')
        
        namespaces = [
            '': 'http://example.com/default',
            'meta': 'http://example.com/metadata',
            'data': 'http://example.com/data'
        ]
        
        document(xmlns: namespaces[''], 
                'xmlns:meta': namespaces['meta'],
                'xmlns:data': namespaces['data']) {
            
            'meta:header' {
                'meta:title'('Namespaced Document')
                'meta:version'('1.0')
            }
            
            'data:content' {
                (1..1000).each { i ->
                    'data:item' {
                        'data:id'(i)
                        'data:value'("Value ${i}")
                        'meta:created'(new Date().toString())
                    }
                }
            }
        }
    }
}

def nsDocument = createNamespacedDocument()
nsDocument.writeTo(new FileWriter('namespaced.xml'))

Progressive XML Generation

// Build XML progressively without storing entire structure
def progressiveBuilder = { outputWriter ->
    def smb = new StreamingMarkupBuilder()
    def xml = smb.bind {
        progressiveReport {
            mkp.comment('Report generated progressively')
            
            // Header section
            header {
                title('Progressive Report')
                startTime(new Date().toString())
            }
            
            // Process data in batches
            sections {
                processDataInBatches { batchData ->
                    section(id: batchData.id) {
                        batchData.items.each { item ->
                            item(id: item.id) {
                                name(item.name)
                                value(item.value)
                            }
                        }
                    }
                }
            }
            
            footer {
                endTime(new Date().toString())
                totalItems(getTotalItemCount())
            }
        }
    }
    
    xml.writeTo(outputWriter)
}

// Use with buffered writer for large outputs
new BufferedWriter(new FileWriter('progressive.xml')).withWriter { writer ->
    progressiveBuilder(writer)
}

Memory-Efficient Processing

// Combine streaming generation with streaming consumption
def processLargeXmlPipeline = { inputFile, outputFile ->
    // Parse input efficiently
    def slurper = new XmlSlurper()
    def input = slurper.parse(inputFile)
    
    // Generate output with streaming
    def smb = new StreamingMarkupBuilder()
    def output = smb.bind {
        processedData {
            mkp.comment("Processed from ${inputFile.name}")
            
            // Process input lazily and stream output
            input.records.record.each { record ->
                processedRecord(id: record.'@id') {
                    // Transform and stream without accumulating
                    originalValue(record.value.text())
                    processedValue(transformValue(record.value.text()))
                    processedAt(new Date().toString())
                }
            }
        }
    }
    
    new FileWriter(outputFile).withWriter { writer ->
        output.writeTo(writer)
    }
}

Performance Considerations

Memory Management

// Efficient streaming for large documents
def efficientLargeDocumentGeneration = {
    def smb = new StreamingMarkupBuilder()
    
    // Configure for minimal memory usage
    smb.useDoubleQuotes = false  // Slightly less memory per attribute
    smb.expandEmptyElements = false  // More compact output
    
    def content = smb.bind {
        largeDocument {
            // Process in chunks to avoid memory buildup
            def chunkSize = 1000
            def totalRecords = 1000000
            
            (0..<totalRecords).step(chunkSize) { start ->
                def end = Math.min(start + chunkSize, totalRecords)
                
                (start..<end).each { i ->
                    record(id: i) {
                        data("Record ${i}")
                        timestamp(System.currentTimeMillis())
                    }
                }
                
                // Yield control periodically
                if (start % 10000 == 0) {
                    Thread.yield()
                }
            }
        }
    }
    
    return content
}

Streaming Best Practices

// Best practices for streaming XML
class StreamingXmlBestPractices {
    
    static void streamToFile(Closure xmlClosure, File outputFile) {
        def smb = new StreamingMarkupBuilder()
        smb.encoding = 'UTF-8'
        
        def content = smb.bind(xmlClosure)
        
        // Use buffered writer for better performance
        new BufferedWriter(
            new OutputStreamWriter(
                new FileOutputStream(outputFile), 
                'UTF-8'
            )
        ).withWriter { writer ->
            content.writeTo(writer)
        }
    }
    
    static void streamToResponse(Closure xmlClosure, HttpServletResponse response) {
        response.contentType = 'application/xml; charset=UTF-8'
        
        def smb = new StreamingMarkupBuilder()
        smb.encoding = 'UTF-8'
        smb.useDoubleQuotes = true
        
        def content = smb.bind(xmlClosure)
        content.writeTo(response.writer)
    }
    
    static void streamLargeDataset(Collection data, Writer output) {
        def smb = new StreamingMarkupBuilder()
        
        def xml = smb.bind {
            dataset {
                mkp.xmlDeclaration(version: '1.0', encoding: 'UTF-8')
                
                data.eachWithIndex { item, index ->
                    record(index: index) {
                        // Process each item without accumulating
                        processItem(item)
                    }
                    
                    // Periodic memory management
                    if (index % 1000 == 0) {
                        System.gc()
                    }
                }
            }
        }
        
        xml.writeTo(output)
    }
}

Integration with Other Systems

// Streaming to databases
def streamToDatabase = { data, connection ->
    def smb = new StreamingMarkupBuilder()
    def xml = smb.bind {
        dataExport {
            data.each { record ->
                item {
                    id(record.id)
                    name(record.name)
                    value(record.value)
                }
            }
        }
    }
    
    // Stream directly to database CLOB
    def stmt = connection.prepareStatement(
        "INSERT INTO xml_exports (data) VALUES (?)"
    )
    
    def writer = stmt.setCharacterStream(1, -1)
    xml.writeTo(writer)
    stmt.executeUpdate()
}

// Streaming to message queues
def streamToQueue = { messageQueue, xmlClosure ->
    def smb = new StreamingMarkupBuilder()
    def content = smb.bind(xmlClosure)
    
    // Stream to string for messaging
    def writer = new StringWriter()
    content.writeTo(writer)
    
    messageQueue.send(writer.toString())
}

Install with Tessl CLI

npx tessl i tessl/maven-org-codehaus-groovy--groovy-xml

docs