0
# XML Processing
1
2
Groovy provides comprehensive XML support through multiple approaches: XmlSlurper for GPath-based navigation, XmlParser for DOM-style parsing, and various builders for XML generation.
3
4
## XML Parsing
5
6
### XmlSlurper
7
8
High-level XML parser that provides GPath navigation capabilities for easy XML traversal.
9
10
```groovy { .api }
11
class XmlSlurper {
12
XmlSlurper()
13
XmlSlurper(boolean validating, boolean namespaceAware)
14
XmlSlurper(XMLReader reader)
15
XmlSlurper(SAXParser parser)
16
XmlSlurper(boolean validating, boolean namespaceAware, boolean allowDocTypeDeclaration)
17
18
GPathResult parseText(String text)
19
GPathResult parse(File file)
20
GPathResult parse(InputStream input)
21
GPathResult parse(Reader reader)
22
GPathResult parse(InputSource input)
23
GPathResult parse(String uri)
24
GPathResult parse(Path path)
25
26
void setFeature(String feature, boolean value)
27
boolean getFeature(String feature)
28
void setProperty(String property, Object value)
29
Object getProperty(String property)
30
31
EntityResolver getEntityResolver()
32
void setEntityResolver(EntityResolver resolver)
33
ErrorHandler getErrorHandler()
34
void setErrorHandler(ErrorHandler handler)
35
}
36
```
37
38
### GPathResult
39
40
Result object from XmlSlurper that supports GPath navigation syntax.
41
42
```groovy { .api }
43
abstract class GPathResult implements Writable {
44
abstract String name()
45
abstract Object text()
46
abstract GPathResult parent()
47
abstract Iterator childNodes()
48
abstract Iterator iterator()
49
abstract GPathResult find(Closure closure)
50
abstract GPathResult findAll(Closure closure)
51
abstract Iterator depthFirst()
52
abstract Iterator breadthFirst()
53
54
int size()
55
boolean isEmpty()
56
String toString()
57
Writer writeTo(Writer out)
58
GPathResult getProperty(String property)
59
void setProperty(String property, Object value)
60
61
// Attribute access
62
String attribute(String name)
63
GPathResult attributes()
64
65
// Navigation
66
GPathResult getAt(int index)
67
GPathResult getAt(String name)
68
GPathResult children()
69
}
70
```
71
72
Usage examples:
73
```groovy
74
import groovy.util.XmlSlurper
75
76
def xml = '''
77
<library>
78
<book id="1" category="fiction">
79
<title>The Great Gatsby</title>
80
<author>F. Scott Fitzgerald</author>
81
<year>1925</year>
82
</book>
83
<book id="2" category="science">
84
<title>Relativity</title>
85
<author>Albert Einstein</author>
86
<year>1916</year>
87
</book>
88
</library>
89
'''
90
91
def slurper = new XmlSlurper()
92
def library = slurper.parseText(xml)
93
94
// Navigate using GPath
95
assert library.book.size() == 2
96
assert library.book[0].title.text() == 'The Great Gatsby'
97
assert library.book.find { it.@category == 'science' }.title.text() == 'Relativity'
98
99
// Access attributes
100
assert library.book[0].@id == '1'
101
assert library.book[0].@category == 'fiction'
102
103
// Find all books by criteria
104
def fictionBooks = library.book.findAll { it.@category == 'fiction' }
105
def modernBooks = library.book.findAll { it.year.text() as Integer > 1920 }
106
107
// Iterate through elements
108
library.book.each { book ->
109
println "${book.title.text()} by ${book.author.text()}"
110
}
111
```
112
113
### XmlParser
114
115
DOM-style XML parser that creates a Node tree structure.
116
117
```groovy { .api }
118
class XmlParser {
119
XmlParser()
120
XmlParser(boolean validating, boolean namespaceAware)
121
XmlParser(XMLReader reader)
122
123
Node parseText(String text)
124
Node parse(File file)
125
Node parse(InputStream input)
126
Node parse(Reader reader)
127
Node parse(InputSource input)
128
Node parse(URL url)
129
130
void setFeature(String feature, boolean value)
131
boolean getFeature(String feature)
132
void setProperty(String property, Object value)
133
Object getProperty(String property)
134
}
135
```
136
137
### Node
138
139
Represents an XML element in the DOM-style tree.
140
141
```groovy { .api }
142
class Node {
143
Node(Node parent, Object name)
144
Node(Node parent, Object name, Map attributes)
145
Node(Node parent, Object name, String value)
146
Node(Node parent, Object name, Map attributes, String value)
147
148
String name()
149
Object value()
150
String text()
151
List children()
152
Map attributes()
153
Node parent()
154
155
void setName(Object name)
156
void setValue(Object value)
157
void attributes(Map attributes)
158
void setParent(Node parent)
159
160
Object getAt(String key) // Attribute access
161
NodeList getAt(QName name) // Child access
162
void putAt(String key, Object value) // Set attribute
163
164
Node appendNode(Object name)
165
Node appendNode(Object name, Object value)
166
Node appendNode(Object name, Map attributes)
167
Node appendNode(Object name, Map attributes, Object value)
168
169
boolean remove(Node child)
170
NodeList depthFirst()
171
NodeList breadthFirst()
172
}
173
```
174
175
Usage example:
176
```groovy
177
import groovy.util.XmlParser
178
179
def parser = new XmlParser()
180
def library = parser.parseText(xml)
181
182
// Navigate the DOM tree
183
assert library.name() == 'library'
184
assert library.book.size() == 2
185
assert library.book[0].title.text() == 'The Great Gatsby'
186
187
// Modify the tree
188
library.book[0].appendNode('publisher', 'Scribner')
189
library.book[1].@category = 'physics' // Modify attribute
190
191
// Add new elements
192
def newBook = library.appendNode('book', [id: '3', category: 'mystery'])
193
newBook.appendNode('title', 'The Maltese Falcon')
194
newBook.appendNode('author', 'Dashiell Hammett')
195
newBook.appendNode('year', '1930')
196
```
197
198
## XML Generation
199
200
### MarkupBuilder
201
202
Builder for generating XML and HTML markup with a fluent API.
203
204
```groovy { .api }
205
class MarkupBuilder extends BuilderSupport {
206
MarkupBuilder()
207
MarkupBuilder(Writer writer)
208
MarkupBuilder(IndentPrinter printer)
209
210
Object invokeMethod(String methodName, Object args)
211
void yield(Object value)
212
void yield(Object value, boolean escapeMarkup)
213
void yieldUnescaped(Object value)
214
215
Writer getWriter()
216
IndentPrinter getPrinter()
217
boolean isDoubleQuotes()
218
void setDoubleQuotes(boolean doubleQuotes)
219
boolean isExpandEmptyElements()
220
void setExpandEmptyElements(boolean expandEmptyElements)
221
boolean isOmitEmptyAttributes()
222
void setOmitEmptyAttributes(boolean omitEmptyAttributes)
223
boolean isOmitNullAttributes()
224
void setOmitNullAttributes(boolean omitNullAttributes)
225
}
226
```
227
228
Usage examples:
229
```groovy
230
import groovy.xml.MarkupBuilder
231
232
def writer = new StringWriter()
233
def xml = new MarkupBuilder(writer)
234
235
xml.library {
236
book(id: '1', category: 'fiction') {
237
title('The Great Gatsby')
238
author('F. Scott Fitzgerald')
239
year(1925)
240
description {
241
yield('A classic American novel about the ', false)
242
em('Jazz Age')
243
yield('.', false)
244
}
245
}
246
book(id: '2', category: 'science') {
247
title('Relativity')
248
author('Albert Einstein')
249
year(1916)
250
}
251
}
252
253
println writer.toString()
254
```
255
256
### StreamingMarkupBuilder
257
258
Memory-efficient builder for generating large XML documents using streaming output.
259
260
```groovy { .api }
261
class StreamingMarkupBuilder {
262
StreamingMarkupBuilder()
263
264
Object bind(Closure closure)
265
void setEncoding(String encoding)
266
String getEncoding()
267
void setUseDoubleQuotes(boolean useDoubleQuotes)
268
boolean getUseDoubleQuotes()
269
}
270
```
271
272
Usage example:
273
```groovy
274
import groovy.xml.StreamingMarkupBuilder
275
276
def builder = new StreamingMarkupBuilder()
277
builder.encoding = 'UTF-8'
278
279
def xml = builder.bind {
280
library {
281
books.each { bookData ->
282
book(id: bookData.id, category: bookData.category) {
283
title(bookData.title)
284
author(bookData.author)
285
year(bookData.year)
286
}
287
}
288
}
289
}
290
291
println xml.toString()
292
```
293
294
### DOMBuilder
295
296
Builder that creates a DOM Document tree.
297
298
```groovy { .api }
299
class DOMBuilder extends BuilderSupport {
300
DOMBuilder()
301
DOMBuilder(DocumentBuilderFactory factory)
302
303
Document getDocument()
304
DocumentBuilderFactory getDocumentBuilderFactory()
305
void setDocumentBuilderFactory(DocumentBuilderFactory factory)
306
307
static Document parse(Reader reader)
308
static Document parse(Reader reader, boolean validating)
309
static Document parse(Reader reader, boolean validating, boolean namespaceAware)
310
static Document parse(File file)
311
static Document parse(String uri)
312
static Document parse(InputSource input)
313
static Document parse(InputStream input)
314
}
315
```
316
317
### SAXBuilder
318
319
SAX-based builder for event-driven XML processing.
320
321
```groovy { .api }
322
class SAXBuilder extends BuilderSupport {
323
SAXBuilder(ContentHandler handler)
324
325
ContentHandler getContentHandler()
326
void setContentHandler(ContentHandler handler)
327
}
328
```
329
330
## XML Utilities
331
332
### XmlUtil
333
334
Utility class for common XML operations.
335
336
```groovy { .api }
337
class XmlUtil {
338
static String serialize(String xmlString)
339
static String serialize(GPathResult node)
340
static String serialize(Node node)
341
static String serialize(Element element)
342
static String serialize(Document document)
343
344
static void serialize(Node node, OutputStream os)
345
static void serialize(Node node, Writer writer)
346
static void serialize(GPathResult node, Writer writer)
347
348
static String escapeXml(String value)
349
static String unescapeXml(String value)
350
351
static Node asNode(GPathResult gpathResult)
352
static String asString(Node node)
353
}
354
```
355
356
Usage examples:
357
```groovy
358
import groovy.xml.XmlUtil
359
import groovy.util.XmlSlurper
360
361
// Serialize XML structures
362
def xml = '''<book><title>Test</title></book>'''
363
def slurper = new XmlSlurper()
364
def book = slurper.parseText(xml)
365
366
def serialized = XmlUtil.serialize(book)
367
println serialized
368
369
// Escape/unescape XML
370
def unsafe = 'Title with <special> & "quoted" text'
371
def escaped = XmlUtil.escapeXml(unsafe)
372
def unescaped = XmlUtil.unescapeXml(escaped)
373
assert unescaped == unsafe
374
```
375
376
## Namespace Support
377
378
### Namespace
379
380
Support for XML namespaces in parsing and generation.
381
382
```groovy { .api }
383
class Namespace {
384
Namespace(String uri)
385
Namespace(String uri, String prefix)
386
387
String getUri()
388
String getPrefix()
389
390
QName name(String localName)
391
boolean equals(Object other)
392
int hashCode()
393
String toString()
394
}
395
```
396
397
Usage examples:
398
```groovy
399
import groovy.xml.Namespace
400
import groovy.xml.MarkupBuilder
401
402
// Define namespaces
403
def xhtml = new Namespace('http://www.w3.org/1999/xhtml', 'h')
404
def custom = new Namespace('http://example.com/custom', 'c')
405
406
// Use in MarkupBuilder
407
def writer = new StringWriter()
408
def xml = new MarkupBuilder(writer)
409
410
xml.'h:html'('xmlns:h': xhtml.uri, 'xmlns:c': custom.uri) {
411
'h:head' {
412
'h:title'('Test Document')
413
}
414
'h:body' {
415
'h:p'('This is a paragraph.')
416
'c:custom-element'(attribute: 'value', 'Custom content')
417
}
418
}
419
420
// Parse namespaced XML
421
def namespacedXml = '''
422
<root xmlns:ns="http://example.com/ns">
423
<ns:element>Content</ns:element>
424
</root>
425
'''
426
427
def slurper = new XmlSlurper(false, true) // namespace aware
428
def root = slurper.parseText(namespacedXml)
429
def ns = new groovy.xml.Namespace('http://example.com/ns', 'ns')
430
assert root[ns.element].text() == 'Content'
431
```
432
433
## Error Handling and Validation
434
435
### XML Validation
436
437
```groovy
438
import groovy.util.XmlSlurper
439
import javax.xml.validation.SchemaFactory
440
import javax.xml.XMLConstants
441
442
// Parse with validation
443
def schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI)
444
def schema = schemaFactory.newSchema(new File('schema.xsd'))
445
446
def slurper = new XmlSlurper()
447
slurper.setProperty('http://java.sun.com/xml/jaxp/properties/schemaLanguage',
448
XMLConstants.W3C_XML_SCHEMA_NS_URI)
449
slurper.setProperty('http://java.sun.com/xml/jaxp/properties/schemaSource',
450
schema)
451
452
try {
453
def result = slurper.parse(new File('document.xml'))
454
// Process validated XML
455
} catch (SAXException e) {
456
println "Validation error: ${e.message}"
457
}
458
```
459
460
### Error Handling
461
462
```groovy
463
import groovy.util.XmlSlurper
464
import org.xml.sax.SAXParseException
465
466
try {
467
def slurper = new XmlSlurper()
468
def result = slurper.parseText('<invalid><xml>') // Malformed XML
469
} catch (SAXParseException e) {
470
println "XML parsing error at line ${e.lineNumber}, column ${e.columnNumber}: ${e.message}"
471
} catch (Exception e) {
472
println "General error: ${e.message}"
473
}
474
```
475
476
## Advanced XML Processing
477
478
### Custom Entity Resolution
479
480
```groovy
481
import org.xml.sax.EntityResolver
482
import org.xml.sax.InputSource
483
484
class CustomEntityResolver implements EntityResolver {
485
InputSource resolveEntity(String publicId, String systemId) {
486
if (systemId.endsWith('custom.dtd')) {
487
return new InputSource(new StringReader('<!ENTITY custom "Custom Entity">'))
488
}
489
return null
490
}
491
}
492
493
def slurper = new XmlSlurper()
494
slurper.entityResolver = new CustomEntityResolver()
495
```
496
497
### Performance Considerations
498
499
```groovy
500
// For large XML files, use streaming approach
501
def processLargeXml(File xmlFile) {
502
def slurper = new XmlSlurper()
503
xmlFile.withInputStream { stream ->
504
def root = slurper.parse(stream)
505
// Process elements one at a time to minimize memory usage
506
root.children().each { element ->
507
processElement(element)
508
// Clear processed data if possible
509
}
510
}
511
}
512
513
// For memory-constrained environments
514
def slurper = new XmlSlurper(false, false) // Non-validating, non-namespace aware
515
slurper.setFeature('http://apache.org/xml/features/nonvalidating/load-external-dtd', false)
516
```