XML processing utilities for Apache Groovy including markup builders, parsers, and navigation tools
—
Groovy XML provides two main approaches to parsing XML documents: XmlParser for Node-based manipulation and XmlSlurper for XPath-like navigation using GPathResult.
Parses XML into a tree of Node objects that can be directly manipulated, modified, and traversed.
public class XmlParser implements ContentHandler {
// Constructors
public XmlParser();
public XmlParser(boolean validating, boolean namespaceAware);
public XmlParser(boolean validating, boolean namespaceAware, boolean allowDocTypeDeclaration);
public XmlParser(XMLReader reader);
public XmlParser(SAXParser parser);
// Parsing methods
public Node parse(File file) throws IOException, SAXException;
public Node parse(InputSource input) throws IOException, SAXException;
public Node parse(InputStream input) throws IOException, SAXException;
public Node parse(Reader in) throws IOException, SAXException;
public Node parse(String uri) throws IOException, SAXException;
public Node parseText(String text) throws SAXException;
// Configuration methods
public boolean isTrimWhitespace();
public void setTrimWhitespace(boolean trimWhitespace);
public boolean isKeepIgnorableWhitespace();
public void setKeepIgnorableWhitespace(boolean keepIgnorableWhitespace);
public boolean isNamespaceAware();
public void setNamespaceAware(boolean namespaceAware);
// SAX ContentHandler configuration
public void setContentHandler(ContentHandler contentHandler);
public void setErrorHandler(ErrorHandler errorHandler);
public void setEntityResolver(EntityResolver entityResolver);
public void setDTDHandler(DTDHandler dtdHandler);
}// Basic parsing
def parser = new XmlParser()
def root = parser.parseText('''
<catalog>
<book id="1" category="fiction">
<title>The Great Gatsby</title>
<author>F. Scott Fitzgerald</author>
<price currency="USD">12.99</price>
</book>
<book id="2" category="science">
<title>A Brief History of Time</title>
<author>Stephen Hawking</author>
<price currency="USD">15.99</price>
</book>
</catalog>
''')
// Access elements and attributes
println root.name() // "catalog"
println root.book.size() // 2
println root.book[0].title.text() // "The Great Gatsby"
println root.book[0].'@id' // "1"
println root.book[0].'@category' // "fiction"
// Modify the structure
root.book[0].title[0].value = 'New Title'
root.book[0].author[0].value = 'New Author'
// Add new elements
root.appendNode('publisher', 'Penguin Books')
root.book[0].appendNode('isbn', '978-0-7432-7356-5')
// Parse from file
def fileNode = parser.parse(new File('catalog.xml'))
// Parse with validation and namespace awareness
def validatingParser = new XmlParser(true, true)
validatingParser.setTrimWhitespace(true)
validatingParser.setKeepIgnorableWhitespace(false)
def validatedRoot = validatingParser.parseText(xmlString)Parses XML into GPathResult objects providing XPath-like navigation and lazy evaluation.
public class XmlSlurper extends DefaultHandler {
// Constructors
public XmlSlurper();
public XmlSlurper(boolean validating, boolean namespaceAware);
public XmlSlurper(boolean validating, boolean namespaceAware, boolean allowDocTypeDeclaration);
public XmlSlurper(XMLReader reader);
public XmlSlurper(SAXParser parser);
// Parsing methods
public GPathResult parse(InputSource input) throws IOException, SAXException;
public GPathResult parse(File file) throws IOException, SAXException;
public GPathResult parse(InputStream input) throws IOException, SAXException;
public GPathResult parse(Reader in) throws IOException, SAXException;
public GPathResult parse(String uri) throws IOException, SAXException;
public GPathResult parseText(String text) throws SAXException;
// Configuration methods
public GPathResult getDocument();
public void setKeepIgnorableWhitespace(boolean keepIgnorableWhitespace);
public boolean isKeepIgnorableWhitespace();
public void setEntityBaseUrl(URL base);
// SAX Handler configuration
public void setContentHandler(ContentHandler contentHandler);
public void setErrorHandler(ErrorHandler errorHandler);
public void setEntityResolver(EntityResolver entityResolver);
public void setDTDHandler(DTDHandler dtdHandler);
}// Basic slurping
def slurper = new XmlSlurper()
def catalog = slurper.parseText('''
<catalog>
<book id="1" category="fiction">
<title>The Great Gatsby</title>
<author>F. Scott Fitzgerald</author>
<price currency="USD">12.99</price>
</book>
<book id="2" category="science">
<title>A Brief History of Time</title>
<author>Stephen Hawking</author>
<price currency="USD">15.99</price>
</book>
</catalog>
''')
// XPath-like navigation
println catalog.book.title.text() // All titles as text
println catalog.book[0].title // "The Great Gatsby"
println catalog.book.'@category' // All category attributes
println catalog.book.find { it.'@id' == '1' }.title // Find by attribute
// Advanced navigation
println catalog.'**'.findAll { it.name() == 'price' }.text() // All prices
println catalog.book.findAll { it.price.toDouble() > 13.0 } // Books over $13
// Attribute access
catalog.book.each { book ->
println "Book ${book.'@id'}: ${book.title} by ${book.author}"
println "Price: ${book.price.'@currency'} ${book.price.text()}"
}
// Parse from file with configuration
def namespaceSlurper = new XmlSlurper(false, true) // not validating, namespace aware
namespaceSlurper.setKeepIgnorableWhitespace(false)
def result = namespaceSlurper.parse(new File('document.xml'))The Node class represents parsed XML elements from XmlParser.
public class Node implements Serializable {
// Basic properties
public String name();
public String text();
public List<Node> children();
public Map<String, String> attributes();
// Content access
public Object get(String key);
public Object getAt(String key);
public void putAt(String key, Object value);
// Modification methods
public void setValue(String value);
public Node appendNode(String name);
public Node appendNode(String name, String value);
public Node appendNode(String name, Map<String, Object> attributes);
public Node appendNode(String name, Map<String, Object> attributes, String value);
public boolean remove(Node child);
// Navigation
public Node parent();
public List<Node> breadthFirst();
public List<Node> depthFirst();
// Utility methods
public Node plus(Node node);
public Iterator<Node> iterator();
}def parser = new XmlParser()
def root = parser.parseText('<root><item id="1">value</item></root>')
// Access node properties
println root.name() // "root"
println root.item[0].text() // "value"
println root.item[0].'@id' // "1"
// Traverse and modify
root.children().each { child ->
println "Child: ${child.name()} = ${child.text()}"
}
// Add new nodes
def newItem = root.appendNode('item', [id: '2'], 'new value')
root.appendNode('metadata') {
appendNode('created', new Date().toString())
appendNode('version', '1.0')
}
// Remove nodes
root.item.findAll { it.'@id' == '1' }.each { root.remove(it) }Both parsers support extensive configuration for different parsing scenarios:
// Validation and namespace configuration
def validatingParser = new XmlParser(
true, // validating
true, // namespace aware
false // allow DOCTYPE declaration
)
// Whitespace handling
parser.setTrimWhitespace(true) // Trim whitespace around text
parser.setKeepIgnorableWhitespace(false) // Don't keep insignificant whitespace
// Custom SAX configuration
parser.setErrorHandler(new MyErrorHandler())
parser.setEntityResolver(new MyEntityResolver())
// For XmlSlurper
def slurper = new XmlSlurper(false, true) // not validating, namespace aware
slurper.setKeepIgnorableWhitespace(false)
slurper.setEntityBaseUrl(new URL('http://example.com/'))Both parsers can throw SAXException and IOException during parsing:
try {
def parser = new XmlParser()
def result = parser.parseText(invalidXml)
} catch (SAXException e) {
println "XML parsing error: ${e.message}"
} catch (IOException e) {
println "IO error: ${e.message}"
}
// Custom error handling
parser.setErrorHandler(new ErrorHandler() {
void error(SAXParseException e) throws SAXException {
println "Parsing error at line ${e.lineNumber}: ${e.message}"
}
void fatalError(SAXParseException e) throws SAXException {
throw e // Re-throw fatal errors
}
void warning(SAXParseException e) throws SAXException {
println "Warning: ${e.message}"
}
})| Feature | XmlParser | XmlSlurper |
|---|---|---|
| Result Type | Node (mutable) | GPathResult (immutable) |
| Memory Usage | Higher (full DOM) | Lower (lazy evaluation) |
| Navigation | Object traversal | XPath-like |
| Modification | Yes | No (read-only) |
| Performance | Better for modification | Better for navigation |
| Suitable For | Manipulating XML | Querying XML |
Choose XmlParser when you need to modify XML structures, and XmlSlurper when you primarily need to read and navigate XML content.
Factory classes provide convenient methods for creating parser instances with standard configurations.
class XmlParserFactory {
static Object newParser(Object... args)
}class XmlSlurperFactory {
static Object newSlurper(Object... args)
}// Using parser factory with default settings
def parser = XmlParserFactory.newParser()
// Using parser factory with custom settings
def validatingParser = XmlParserFactory.newParser(
true, // validating
true // namespace aware
)
// Using slurper factory
def slurper = XmlSlurperFactory.newSlurper(false, true) // not validating, namespace aware
// Factories handle ParserConfigurationException and SAXException internally
try {
def customParser = XmlParserFactory.newParser(true, true, false) // validating, namespace aware, no DOCTYPE
def result = customParser.parseText(xmlString)
} catch (Exception e) {
println "Parser creation or parsing failed: ${e.message}"
}import javax.xml.parsers.ParserConfigurationException
import org.xml.sax.SAXException
import org.xml.sax.SAXParseException
def robustParse = { xmlString ->
try {
def parser = new XmlParser()
return parser.parseText(xmlString)
} catch (ParserConfigurationException e) {
println "Parser configuration error: ${e.message}"
println "Check your XML parser installation and configuration"
return null
} catch (SAXParseException e) {
println "XML structure error at line ${e.lineNumber}, column ${e.columnNumber}:"
println " ${e.message}"
println " System ID: ${e.systemId}"
println " Public ID: ${e.publicId}"
return null
} catch (SAXException e) {
println "XML parsing error: ${e.message}"
if (e.exception) {
println "Root cause: ${e.exception.message}"
}
return null
} catch (IOException e) {
println "IO error while parsing: ${e.message}"
return null
} catch (Exception e) {
println "Unexpected error during parsing: ${e.message}"
e.printStackTrace()
return null
}
}
// Usage
def xml = '<root><item>valid</item></root>'
def result = robustParse(xml)
def invalidXml = '<root><item>unclosed'
def failedResult = robustParse(invalidXml) // Will handle the error gracefullyimport org.xml.sax.ErrorHandler
import org.xml.sax.SAXParseException
class DetailedErrorHandler implements ErrorHandler {
List<String> warnings = []
List<String> errors = []
List<String> fatalErrors = []
@Override
void warning(SAXParseException e) throws SAXException {
def msg = "Warning at line ${e.lineNumber}: ${e.message}"
warnings << msg
println msg
}
@Override
void error(SAXParseException e) throws SAXException {
def msg = "Error at line ${e.lineNumber}: ${e.message}"
errors << msg
println msg
// Don't throw - allow parsing to continue
}
@Override
void fatalError(SAXParseException e) throws SAXException {
def msg = "Fatal error at line ${e.lineNumber}: ${e.message}"
fatalErrors << msg
println msg
throw e // Must throw for fatal errors
}
boolean hasErrors() {
return !errors.isEmpty() || !fatalErrors.isEmpty()
}
void printSummary() {
println "Parsing summary:"
println " Warnings: ${warnings.size()}"
println " Errors: ${errors.size()}"
println " Fatal errors: ${fatalErrors.size()}"
}
}
// Usage with custom error handler
def parseWithDetailedErrors = { xmlString ->
def errorHandler = new DetailedErrorHandler()
try {
def parser = new XmlParser()
parser.setErrorHandler(errorHandler)
def result = parser.parseText(xmlString)
errorHandler.printSummary()
if (errorHandler.hasErrors()) {
println "Parsing completed with errors - results may be incomplete"
}
return result
} catch (Exception e) {
errorHandler.printSummary()
println "Parsing failed: ${e.message}"
return null
}
}import javax.xml.validation.SchemaFactory
import javax.xml.validation.Schema
import javax.xml.XMLConstants
import javax.xml.transform.stream.StreamSource
def parseWithSchemaValidation = { xmlString, xsdFile ->
try {
// Create schema from XSD file
def schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI)
def schema = schemaFactory.newSchema(xsdFile)
// Create validating parser
def parser = XmlUtil.newSAXParser(XMLConstants.W3C_XML_SCHEMA_NS_URI, schema)
def xmlParser = new XmlParser(parser)
// Custom error handler for validation errors
def validationErrors = []
xmlParser.setErrorHandler(new ErrorHandler() {
void warning(SAXParseException e) {
validationErrors << "Validation warning: ${e.message}"
}
void error(SAXParseException e) {
validationErrors << "Validation error: ${e.message}"
}
void fatalError(SAXParseException e) throws SAXException {
validationErrors << "Fatal validation error: ${e.message}"
throw e
}
})
def result = xmlParser.parseText(xmlString)
if (validationErrors) {
println "Validation issues found:"
validationErrors.each { println " ${it}" }
} else {
println "Document is valid according to schema"
}
return result
} catch (Exception e) {
println "Schema validation failed: ${e.message}"
return null
}
}
// Usage
def xsdFile = new File('catalog.xsd')
def validXml = '''
<catalog xmlns="http://example.com/catalog">
<book id="1">
<title>Valid Book</title>
<author>Valid Author</author>
</book>
</catalog>
'''
def result = parseWithSchemaValidation(validXml, xsdFile)class XmlParsingRecovery {
static Node parseWithFallback(String xmlString) {
// Try strict parsing first
try {
def parser = new XmlParser(true, true) // validating, namespace aware
return parser.parseText(xmlString)
} catch (Exception e) {
println "Strict parsing failed: ${e.message}"
}
// Try lenient parsing
try {
def parser = new XmlParser(false, false) // non-validating, not namespace aware
parser.setTrimWhitespace(true)
return parser.parseText(xmlString.trim())
} catch (Exception e) {
println "Lenient parsing failed: ${e.message}"
}
// Try to fix common issues and parse again
try {
def fixedXml = fixCommonXmlIssues(xmlString)
def parser = new XmlParser(false, false)
return parser.parseText(fixedXml)
} catch (Exception e) {
println "Recovery parsing failed: ${e.message}"
}
return null
}
static String fixCommonXmlIssues(String xml) {
return xml
.replaceAll(/&(?![a-zA-Z0-9#]+;)/, '&') // Fix unescaped ampersands
.replaceAll(/<(?!\/?[a-zA-Z])/,'<') // Fix unescaped less-than
.replaceAll(/(?<![a-zA-Z0-9])>/, '>') // Fix unescaped greater-than
.replaceAll(/\r\n|\r/, '\n') // Normalize line endings
.trim()
}
static GPathResult slurpWithFallback(String xmlString) {
// Similar fallback strategy for XmlSlurper
try {
def slurper = new XmlSlurper(true, true)
return slurper.parseText(xmlString)
} catch (Exception e) {
println "Strict slurping failed: ${e.message}"
}
try {
def slurper = new XmlSlurper(false, false)
slurper.setKeepIgnorableWhitespace(false)
return slurper.parseText(xmlString.trim())
} catch (Exception e) {
println "Lenient slurping failed: ${e.message}"
}
try {
def fixedXml = fixCommonXmlIssues(xmlString)
def slurper = new XmlSlurper(false, false)
return slurper.parseText(fixedXml)
} catch (Exception e) {
println "Recovery slurping failed: ${e.message}"
}
return null
}
}
// Usage
def problematicXml = '<root><item>Text with & unescaped chars < ></item></root>'
def recovered = XmlParsingRecovery.parseWithFallback(problematicXml)
if (recovered) {
println "Successfully recovered and parsed XML"
println recovered.item.text()
} else {
println "Could not recover the XML"
}// For large XML files, prefer XmlSlurper over XmlParser
def processLargeXml = { file ->
if (file.size() > 10 * 1024 * 1024) { // > 10MB
println "Large file detected, using XmlSlurper for better memory efficiency"
def slurper = new XmlSlurper()
slurper.setKeepIgnorableWhitespace(false)
return slurper.parse(file)
} else {
println "Small file, using XmlParser for full DOM access"
def parser = new XmlParser()
return parser.parse(file)
}
}
// Process in chunks for very large files
def processXmlInChunks = { file, chunkProcessor ->
def slurper = new XmlSlurper()
def doc = slurper.parse(file)
// Process top-level elements one at a time
doc.children().each { element ->
chunkProcessor(element)
// Allow garbage collection of processed elements
System.gc()
}
}Install with Tessl CLI
npx tessl i tessl/maven-org-codehaus-groovy--groovy-xml