tessl/maven-org-codehaus-groovy--groovy-xml

XML processing utilities for Apache Groovy including markup builders, parsers, and navigation tools

—

Pending

Overview

Eval results

Files

XML Parsing

Name: tessl/maven-org-codehaus-groovy--groovy-xml
Author: tessl

Groovy XML provides two main approaches to parsing XML documents: XmlParser for Node-based manipulation and XmlSlurper for XPath-like navigation using GPathResult.

XmlParser

Parses XML into a tree of Node objects that can be directly manipulated, modified, and traversed.

public class XmlParser implements ContentHandler {
    // Constructors
    public XmlParser();
    public XmlParser(boolean validating, boolean namespaceAware);
    public XmlParser(boolean validating, boolean namespaceAware, boolean allowDocTypeDeclaration);
    public XmlParser(XMLReader reader);
    public XmlParser(SAXParser parser);
    
    // Parsing methods
    public Node parse(File file) throws IOException, SAXException;
    public Node parse(InputSource input) throws IOException, SAXException;
    public Node parse(InputStream input) throws IOException, SAXException;
    public Node parse(Reader in) throws IOException, SAXException;
    public Node parse(String uri) throws IOException, SAXException;
    public Node parseText(String text) throws SAXException;
    
    // Configuration methods
    public boolean isTrimWhitespace();
    public void setTrimWhitespace(boolean trimWhitespace);
    public boolean isKeepIgnorableWhitespace();
    public void setKeepIgnorableWhitespace(boolean keepIgnorableWhitespace);
    public boolean isNamespaceAware();
    public void setNamespaceAware(boolean namespaceAware);
    
    // SAX ContentHandler configuration
    public void setContentHandler(ContentHandler contentHandler);
    public void setErrorHandler(ErrorHandler errorHandler);
    public void setEntityResolver(EntityResolver entityResolver);
    public void setDTDHandler(DTDHandler dtdHandler);
}

XmlParser Usage

// Basic parsing
def parser = new XmlParser()
def root = parser.parseText('''
    <catalog>
        <book id="1" category="fiction">
            <title>The Great Gatsby</title>
            <author>F. Scott Fitzgerald</author>
            <price currency="USD">12.99</price>
        </book>
        <book id="2" category="science">
            <title>A Brief History of Time</title>
            <author>Stephen Hawking</author>
            <price currency="USD">15.99</price>
        </book>
    </catalog>
''')

// Access elements and attributes
println root.name()                    // "catalog"
println root.book.size()               // 2
println root.book[0].title.text()      // "The Great Gatsby"
println root.book[0].'@id'             // "1"
println root.book[0].'@category'       // "fiction"

// Modify the structure
root.book[0].title[0].value = 'New Title'
root.book[0].author[0].value = 'New Author'

// Add new elements
root.appendNode('publisher', 'Penguin Books')
root.book[0].appendNode('isbn', '978-0-7432-7356-5')

// Parse from file
def fileNode = parser.parse(new File('catalog.xml'))

// Parse with validation and namespace awareness
def validatingParser = new XmlParser(true, true)
validatingParser.setTrimWhitespace(true)
validatingParser.setKeepIgnorableWhitespace(false)
def validatedRoot = validatingParser.parseText(xmlString)

XmlSlurper

Parses XML into GPathResult objects providing XPath-like navigation and lazy evaluation.

public class XmlSlurper extends DefaultHandler {
    // Constructors
    public XmlSlurper();
    public XmlSlurper(boolean validating, boolean namespaceAware);
    public XmlSlurper(boolean validating, boolean namespaceAware, boolean allowDocTypeDeclaration);
    public XmlSlurper(XMLReader reader);
    public XmlSlurper(SAXParser parser);
    
    // Parsing methods
    public GPathResult parse(InputSource input) throws IOException, SAXException;
    public GPathResult parse(File file) throws IOException, SAXException;
    public GPathResult parse(InputStream input) throws IOException, SAXException;
    public GPathResult parse(Reader in) throws IOException, SAXException;
    public GPathResult parse(String uri) throws IOException, SAXException;
    public GPathResult parseText(String text) throws SAXException;
    
    // Configuration methods
    public GPathResult getDocument();
    public void setKeepIgnorableWhitespace(boolean keepIgnorableWhitespace);
    public boolean isKeepIgnorableWhitespace();
    public void setEntityBaseUrl(URL base);
    
    // SAX Handler configuration
    public void setContentHandler(ContentHandler contentHandler);
    public void setErrorHandler(ErrorHandler errorHandler);
    public void setEntityResolver(EntityResolver entityResolver);
    public void setDTDHandler(DTDHandler dtdHandler);
}

XmlSlurper Usage

// Basic slurping
def slurper = new XmlSlurper()
def catalog = slurper.parseText('''
    <catalog>
        <book id="1" category="fiction">
            <title>The Great Gatsby</title>
            <author>F. Scott Fitzgerald</author>
            <price currency="USD">12.99</price>
        </book>
        <book id="2" category="science">
            <title>A Brief History of Time</title>
            <author>Stephen Hawking</author>
            <price currency="USD">15.99</price>
        </book>
    </catalog>
''')

// XPath-like navigation
println catalog.book.title.text()           // All titles as text
println catalog.book[0].title               // "The Great Gatsby"
println catalog.book.'@category'            // All category attributes
println catalog.book.find { it.'@id' == '1' }.title  // Find by attribute

// Advanced navigation
println catalog.'**'.findAll { it.name() == 'price' }.text()  // All prices
println catalog.book.findAll { it.price.toDouble() > 13.0 }   // Books over $13

// Attribute access
catalog.book.each { book ->
    println "Book ${book.'@id'}: ${book.title} by ${book.author}"
    println "Price: ${book.price.'@currency'} ${book.price.text()}"
}

// Parse from file with configuration
def namespaceSlurper = new XmlSlurper(false, true)  // not validating, namespace aware
namespaceSlurper.setKeepIgnorableWhitespace(false)
def result = namespaceSlurper.parse(new File('document.xml'))

Node Type (XmlParser Result)

The Node class represents parsed XML elements from XmlParser.

public class Node implements Serializable {
    // Basic properties
    public String name();
    public String text();
    public List<Node> children();
    public Map<String, String> attributes();
    
    // Content access
    public Object get(String key);
    public Object getAt(String key);
    public void putAt(String key, Object value);
    
    // Modification methods
    public void setValue(String value);
    public Node appendNode(String name);
    public Node appendNode(String name, String value);
    public Node appendNode(String name, Map<String, Object> attributes);
    public Node appendNode(String name, Map<String, Object> attributes, String value);
    public boolean remove(Node child);
    
    // Navigation
    public Node parent();
    public List<Node> breadthFirst();
    public List<Node> depthFirst();
    
    // Utility methods
    public Node plus(Node node);
    public Iterator<Node> iterator();
}

Node Usage Examples

def parser = new XmlParser()
def root = parser.parseText('<root><item id="1">value</item></root>')

// Access node properties
println root.name()                    // "root"
println root.item[0].text()           // "value"
println root.item[0].'@id'            // "1"

// Traverse and modify
root.children().each { child ->
    println "Child: ${child.name()} = ${child.text()}"
}

// Add new nodes
def newItem = root.appendNode('item', [id: '2'], 'new value')
root.appendNode('metadata') {
    appendNode('created', new Date().toString())
    appendNode('version', '1.0')
}

// Remove nodes
root.item.findAll { it.'@id' == '1' }.each { root.remove(it) }

Parser Configuration

Both parsers support extensive configuration for different parsing scenarios:

// Validation and namespace configuration
def validatingParser = new XmlParser(
    true,    // validating
    true,    // namespace aware
    false    // allow DOCTYPE declaration
)

// Whitespace handling
parser.setTrimWhitespace(true)              // Trim whitespace around text
parser.setKeepIgnorableWhitespace(false)    // Don't keep insignificant whitespace

// Custom SAX configuration
parser.setErrorHandler(new MyErrorHandler())
parser.setEntityResolver(new MyEntityResolver())

// For XmlSlurper
def slurper = new XmlSlurper(false, true)   // not validating, namespace aware
slurper.setKeepIgnorableWhitespace(false)
slurper.setEntityBaseUrl(new URL('http://example.com/'))

Error Handling

Both parsers can throw SAXException and IOException during parsing:

try {
    def parser = new XmlParser()
    def result = parser.parseText(invalidXml)
} catch (SAXException e) {
    println "XML parsing error: ${e.message}"
} catch (IOException e) {
    println "IO error: ${e.message}"
}

// Custom error handling
parser.setErrorHandler(new ErrorHandler() {
    void error(SAXParseException e) throws SAXException {
        println "Parsing error at line ${e.lineNumber}: ${e.message}"
    }
    
    void fatalError(SAXParseException e) throws SAXException {
        throw e  // Re-throw fatal errors
    }
    
    void warning(SAXParseException e) throws SAXException {
        println "Warning: ${e.message}"
    }
})

Comparison: XmlParser vs XmlSlurper

Feature	XmlParser	XmlSlurper
Result Type	Node (mutable)	GPathResult (immutable)
Memory Usage	Higher (full DOM)	Lower (lazy evaluation)
Navigation	Object traversal	XPath-like
Modification	Yes	No (read-only)
Performance	Better for modification	Better for navigation
Suitable For	Manipulating XML	Querying XML

Choose XmlParser when you need to modify XML structures, and XmlSlurper when you primarily need to read and navigate XML content.

Parser Factory Classes

Factory classes provide convenient methods for creating parser instances with standard configurations.

XmlParserFactory

class XmlParserFactory {
    static Object newParser(Object... args)
}

XmlSlurperFactory

class XmlSlurperFactory {
    static Object newSlurper(Object... args)
}

Factory Usage

// Using parser factory with default settings
def parser = XmlParserFactory.newParser()

// Using parser factory with custom settings
def validatingParser = XmlParserFactory.newParser(
    true,   // validating
    true    // namespace aware
)

// Using slurper factory
def slurper = XmlSlurperFactory.newSlurper(false, true)  // not validating, namespace aware

// Factories handle ParserConfigurationException and SAXException internally
try {
    def customParser = XmlParserFactory.newParser(true, true, false)  // validating, namespace aware, no DOCTYPE
    def result = customParser.parseText(xmlString)
} catch (Exception e) {
    println "Parser creation or parsing failed: ${e.message}"
}

Advanced Error Handling

Comprehensive Exception Handling

import javax.xml.parsers.ParserConfigurationException
import org.xml.sax.SAXException
import org.xml.sax.SAXParseException

def robustParse = { xmlString ->
    try {
        def parser = new XmlParser()
        return parser.parseText(xmlString)
        
    } catch (ParserConfigurationException e) {
        println "Parser configuration error: ${e.message}"
        println "Check your XML parser installation and configuration"
        return null
        
    } catch (SAXParseException e) {
        println "XML structure error at line ${e.lineNumber}, column ${e.columnNumber}:"
        println "  ${e.message}"
        println "  System ID: ${e.systemId}"
        println "  Public ID: ${e.publicId}"
        return null
        
    } catch (SAXException e) {
        println "XML parsing error: ${e.message}"
        if (e.exception) {
            println "Root cause: ${e.exception.message}"
        }
        return null
        
    } catch (IOException e) {
        println "IO error while parsing: ${e.message}"
        return null
        
    } catch (Exception e) {
        println "Unexpected error during parsing: ${e.message}"
        e.printStackTrace()
        return null
    }
}

// Usage
def xml = '<root><item>valid</item></root>'
def result = robustParse(xml)

def invalidXml = '<root><item>unclosed'
def failedResult = robustParse(invalidXml)  // Will handle the error gracefully

Custom Error Handlers

import org.xml.sax.ErrorHandler
import org.xml.sax.SAXParseException

class DetailedErrorHandler implements ErrorHandler {
    List<String> warnings = []
    List<String> errors = []
    List<String> fatalErrors = []
    
    @Override
    void warning(SAXParseException e) throws SAXException {
        def msg = "Warning at line ${e.lineNumber}: ${e.message}"
        warnings << msg
        println msg
    }
    
    @Override
    void error(SAXParseException e) throws SAXException {
        def msg = "Error at line ${e.lineNumber}: ${e.message}"
        errors << msg
        println msg
        // Don't throw - allow parsing to continue
    }
    
    @Override
    void fatalError(SAXParseException e) throws SAXException {
        def msg = "Fatal error at line ${e.lineNumber}: ${e.message}"
        fatalErrors << msg
        println msg
        throw e  // Must throw for fatal errors
    }
    
    boolean hasErrors() {
        return !errors.isEmpty() || !fatalErrors.isEmpty()
    }
    
    void printSummary() {
        println "Parsing summary:"
        println "  Warnings: ${warnings.size()}"
        println "  Errors: ${errors.size()}"
        println "  Fatal errors: ${fatalErrors.size()}"
    }
}

// Usage with custom error handler
def parseWithDetailedErrors = { xmlString ->
    def errorHandler = new DetailedErrorHandler()
    
    try {
        def parser = new XmlParser()
        parser.setErrorHandler(errorHandler)
        
        def result = parser.parseText(xmlString)
        errorHandler.printSummary()
        
        if (errorHandler.hasErrors()) {
            println "Parsing completed with errors - results may be incomplete"
        }
        
        return result
        
    } catch (Exception e) {
        errorHandler.printSummary()
        println "Parsing failed: ${e.message}"
        return null
    }
}

Validation Error Handling

import javax.xml.validation.SchemaFactory
import javax.xml.validation.Schema
import javax.xml.XMLConstants
import javax.xml.transform.stream.StreamSource

def parseWithSchemaValidation = { xmlString, xsdFile ->
    try {
        // Create schema from XSD file
        def schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI)
        def schema = schemaFactory.newSchema(xsdFile)
        
        // Create validating parser
        def parser = XmlUtil.newSAXParser(XMLConstants.W3C_XML_SCHEMA_NS_URI, schema)
        def xmlParser = new XmlParser(parser)
        
        // Custom error handler for validation errors
        def validationErrors = []
        xmlParser.setErrorHandler(new ErrorHandler() {
            void warning(SAXParseException e) {
                validationErrors << "Validation warning: ${e.message}"
            }
            
            void error(SAXParseException e) {
                validationErrors << "Validation error: ${e.message}"
            }
            
            void fatalError(SAXParseException e) throws SAXException {
                validationErrors << "Fatal validation error: ${e.message}"
                throw e
            }
        })
        
        def result = xmlParser.parseText(xmlString)
        
        if (validationErrors) {
            println "Validation issues found:"
            validationErrors.each { println "  ${it}" }
        } else {
            println "Document is valid according to schema"
        }
        
        return result
        
    } catch (Exception e) {
        println "Schema validation failed: ${e.message}"
        return null
    }
}

// Usage
def xsdFile = new File('catalog.xsd')
def validXml = '''
    <catalog xmlns="http://example.com/catalog">
        <book id="1">
            <title>Valid Book</title>
            <author>Valid Author</author>
        </book>
    </catalog>
'''

def result = parseWithSchemaValidation(validXml, xsdFile)

Recovery Strategies

class XmlParsingRecovery {
    
    static Node parseWithFallback(String xmlString) {
        // Try strict parsing first
        try {
            def parser = new XmlParser(true, true)  // validating, namespace aware
            return parser.parseText(xmlString)
        } catch (Exception e) {
            println "Strict parsing failed: ${e.message}"
        }
        
        // Try lenient parsing
        try {
            def parser = new XmlParser(false, false)  // non-validating, not namespace aware
            parser.setTrimWhitespace(true)
            return parser.parseText(xmlString.trim())
        } catch (Exception e) {
            println "Lenient parsing failed: ${e.message}"
        }
        
        // Try to fix common issues and parse again
        try {
            def fixedXml = fixCommonXmlIssues(xmlString)
            def parser = new XmlParser(false, false)
            return parser.parseText(fixedXml)
        } catch (Exception e) {
            println "Recovery parsing failed: ${e.message}"
        }
        
        return null
    }
    
    static String fixCommonXmlIssues(String xml) {
        return xml
            .replaceAll(/&(?![a-zA-Z0-9#]+;)/, '&amp;')  // Fix unescaped ampersands
            .replaceAll(/<(?!\/?[a-zA-Z])/,'&lt;')       // Fix unescaped less-than
            .replaceAll(/(?<![a-zA-Z0-9])>/, '&gt;')     // Fix unescaped greater-than
            .replaceAll(/\r\n|\r/, '\n')                 // Normalize line endings
            .trim()
    }
    
    static GPathResult slurpWithFallback(String xmlString) {
        // Similar fallback strategy for XmlSlurper
        try {
            def slurper = new XmlSlurper(true, true)
            return slurper.parseText(xmlString)
        } catch (Exception e) {
            println "Strict slurping failed: ${e.message}"
        }
        
        try {
            def slurper = new XmlSlurper(false, false)
            slurper.setKeepIgnorableWhitespace(false)
            return slurper.parseText(xmlString.trim())
        } catch (Exception e) {
            println "Lenient slurping failed: ${e.message}"
        }
        
        try {
            def fixedXml = fixCommonXmlIssues(xmlString)
            def slurper = new XmlSlurper(false, false)
            return slurper.parseText(fixedXml)
        } catch (Exception e) {
            println "Recovery slurping failed: ${e.message}"
        }
        
        return null
    }
}

// Usage
def problematicXml = '<root><item>Text with & unescaped chars < ></item></root>'
def recovered = XmlParsingRecovery.parseWithFallback(problematicXml)

if (recovered) {
    println "Successfully recovered and parsed XML"
    println recovered.item.text()
} else {
    println "Could not recover the XML"
}

Performance Considerations

Memory Management for Large Documents

// For large XML files, prefer XmlSlurper over XmlParser
def processLargeXml = { file ->
    if (file.size() > 10 * 1024 * 1024) {  // > 10MB
        println "Large file detected, using XmlSlurper for better memory efficiency"
        def slurper = new XmlSlurper()
        slurper.setKeepIgnorableWhitespace(false)
        return slurper.parse(file)
    } else {
        println "Small file, using XmlParser for full DOM access"
        def parser = new XmlParser()
        return parser.parse(file)
    }
}

// Process in chunks for very large files
def processXmlInChunks = { file, chunkProcessor ->
    def slurper = new XmlSlurper()
    def doc = slurper.parse(file)
    
    // Process top-level elements one at a time
    doc.children().each { element ->
        chunkProcessor(element)
        // Allow garbage collection of processed elements
        System.gc()
    }
}

Install with Tessl CLI

npx tessl i tessl/maven-org-codehaus-groovy--groovy-xml

docs