Tessl Tile for maven/org.codehaus.groovy/groovy-xml@2.5.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

builders.md entities.md index.md jaxb.md namespaces.md parsing.md streaming.md utilities.md

parsing.mddocs/

0
# XML Parsing
1

2
Groovy XML provides two main approaches to parsing XML documents: XmlParser for Node-based manipulation and XmlSlurper for XPath-like navigation using GPathResult.
3

4
## XmlParser
5

6
Parses XML into a tree of Node objects that can be directly manipulated, modified, and traversed.
7

8
```java { .api }
9
public class XmlParser implements ContentHandler {
10
    // Constructors
11
    public XmlParser();
12
    public XmlParser(boolean validating, boolean namespaceAware);
13
    public XmlParser(boolean validating, boolean namespaceAware, boolean allowDocTypeDeclaration);
14
    public XmlParser(XMLReader reader);
15
    public XmlParser(SAXParser parser);
16
    
17
    // Parsing methods
18
    public Node parse(File file) throws IOException, SAXException;
19
    public Node parse(InputSource input) throws IOException, SAXException;
20
    public Node parse(InputStream input) throws IOException, SAXException;
21
    public Node parse(Reader in) throws IOException, SAXException;
22
    public Node parse(String uri) throws IOException, SAXException;
23
    public Node parseText(String text) throws SAXException;
24
    
25
    // Configuration methods
26
    public boolean isTrimWhitespace();
27
    public void setTrimWhitespace(boolean trimWhitespace);
28
    public boolean isKeepIgnorableWhitespace();
29
    public void setKeepIgnorableWhitespace(boolean keepIgnorableWhitespace);
30
    public boolean isNamespaceAware();
31
    public void setNamespaceAware(boolean namespaceAware);
32
    
33
    // SAX ContentHandler configuration
34
    public void setContentHandler(ContentHandler contentHandler);
35
    public void setErrorHandler(ErrorHandler errorHandler);
36
    public void setEntityResolver(EntityResolver entityResolver);
37
    public void setDTDHandler(DTDHandler dtdHandler);
38
}
39
```
40

41
### XmlParser Usage
42

43
```groovy
44
// Basic parsing
45
def parser = new XmlParser()
46
def root = parser.parseText('''
47
    <catalog>
48
        <book id="1" category="fiction">
49
            <title>The Great Gatsby</title>
50
            <author>F. Scott Fitzgerald</author>
51
            <price currency="USD">12.99</price>
52
        </book>
53
        <book id="2" category="science">
54
            <title>A Brief History of Time</title>
55
            <author>Stephen Hawking</author>
56
            <price currency="USD">15.99</price>
57
        </book>
58
    </catalog>
59
''')
60

61
// Access elements and attributes
62
println root.name()                    // "catalog"
63
println root.book.size()               // 2
64
println root.book[0].title.text()      // "The Great Gatsby"
65
println root.book[0].'@id'             // "1"
66
println root.book[0].'@category'       // "fiction"
67

68
// Modify the structure
69
root.book[0].title[0].value = 'New Title'
70
root.book[0].author[0].value = 'New Author'
71

72
// Add new elements
73
root.appendNode('publisher', 'Penguin Books')
74
root.book[0].appendNode('isbn', '978-0-7432-7356-5')
75

76
// Parse from file
77
def fileNode = parser.parse(new File('catalog.xml'))
78

79
// Parse with validation and namespace awareness
80
def validatingParser = new XmlParser(true, true)
81
validatingParser.setTrimWhitespace(true)
82
validatingParser.setKeepIgnorableWhitespace(false)
83
def validatedRoot = validatingParser.parseText(xmlString)
84
```
85

86
## XmlSlurper
87

88
Parses XML into GPathResult objects providing XPath-like navigation and lazy evaluation.
89

90
```java { .api }
91
public class XmlSlurper extends DefaultHandler {
92
    // Constructors
93
    public XmlSlurper();
94
    public XmlSlurper(boolean validating, boolean namespaceAware);
95
    public XmlSlurper(boolean validating, boolean namespaceAware, boolean allowDocTypeDeclaration);
96
    public XmlSlurper(XMLReader reader);
97
    public XmlSlurper(SAXParser parser);
98
    
99
    // Parsing methods
100
    public GPathResult parse(InputSource input) throws IOException, SAXException;
101
    public GPathResult parse(File file) throws IOException, SAXException;
102
    public GPathResult parse(InputStream input) throws IOException, SAXException;
103
    public GPathResult parse(Reader in) throws IOException, SAXException;
104
    public GPathResult parse(String uri) throws IOException, SAXException;
105
    public GPathResult parseText(String text) throws SAXException;
106
    
107
    // Configuration methods
108
    public GPathResult getDocument();
109
    public void setKeepIgnorableWhitespace(boolean keepIgnorableWhitespace);
110
    public boolean isKeepIgnorableWhitespace();
111
    public void setEntityBaseUrl(URL base);
112
    
113
    // SAX Handler configuration
114
    public void setContentHandler(ContentHandler contentHandler);
115
    public void setErrorHandler(ErrorHandler errorHandler);
116
    public void setEntityResolver(EntityResolver entityResolver);
117
    public void setDTDHandler(DTDHandler dtdHandler);
118
}
119
```
120

121
### XmlSlurper Usage
122

123
```groovy
124
// Basic slurping
125
def slurper = new XmlSlurper()
126
def catalog = slurper.parseText('''
127
    <catalog>
128
        <book id="1" category="fiction">
129
            <title>The Great Gatsby</title>
130
            <author>F. Scott Fitzgerald</author>
131
            <price currency="USD">12.99</price>
132
        </book>
133
        <book id="2" category="science">
134
            <title>A Brief History of Time</title>
135
            <author>Stephen Hawking</author>
136
            <price currency="USD">15.99</price>
137
        </book>
138
    </catalog>
139
''')
140

141
// XPath-like navigation
142
println catalog.book.title.text()           // All titles as text
143
println catalog.book[0].title               // "The Great Gatsby"
144
println catalog.book.'@category'            // All category attributes
145
println catalog.book.find { it.'@id' == '1' }.title  // Find by attribute
146

147
// Advanced navigation
148
println catalog.'**'.findAll { it.name() == 'price' }.text()  // All prices
149
println catalog.book.findAll { it.price.toDouble() > 13.0 }   // Books over $13
150

151
// Attribute access
152
catalog.book.each { book ->
153
    println "Book ${book.'@id'}: ${book.title} by ${book.author}"
154
    println "Price: ${book.price.'@currency'} ${book.price.text()}"
155
}
156

157
// Parse from file with configuration
158
def namespaceSlurper = new XmlSlurper(false, true)  // not validating, namespace aware
159
namespaceSlurper.setKeepIgnorableWhitespace(false)
160
def result = namespaceSlurper.parse(new File('document.xml'))
161
```
162

163
## Node Type (XmlParser Result)
164

165
The Node class represents parsed XML elements from XmlParser.
166

167
```java { .api }
168
public class Node implements Serializable {
169
    // Basic properties
170
    public String name();
171
    public String text();
172
    public List<Node> children();
173
    public Map<String, String> attributes();
174
    
175
    // Content access
176
    public Object get(String key);
177
    public Object getAt(String key);
178
    public void putAt(String key, Object value);
179
    
180
    // Modification methods
181
    public void setValue(String value);
182
    public Node appendNode(String name);
183
    public Node appendNode(String name, String value);
184
    public Node appendNode(String name, Map<String, Object> attributes);
185
    public Node appendNode(String name, Map<String, Object> attributes, String value);
186
    public boolean remove(Node child);
187
    
188
    // Navigation
189
    public Node parent();
190
    public List<Node> breadthFirst();
191
    public List<Node> depthFirst();
192
    
193
    // Utility methods
194
    public Node plus(Node node);
195
    public Iterator<Node> iterator();
196
}
197
```
198

199
### Node Usage Examples
200

201
```groovy
202
def parser = new XmlParser()
203
def root = parser.parseText('<root><item id="1">value</item></root>')
204

205
// Access node properties
206
println root.name()                    // "root"
207
println root.item[0].text()           // "value"
208
println root.item[0].'@id'            // "1"
209

210
// Traverse and modify
211
root.children().each { child ->
212
    println "Child: ${child.name()} = ${child.text()}"
213
}
214

215
// Add new nodes
216
def newItem = root.appendNode('item', [id: '2'], 'new value')
217
root.appendNode('metadata') {
218
    appendNode('created', new Date().toString())
219
    appendNode('version', '1.0')
220
}
221

222
// Remove nodes
223
root.item.findAll { it.'@id' == '1' }.each { root.remove(it) }
224
```
225

226
## Parser Configuration
227

228
Both parsers support extensive configuration for different parsing scenarios:
229

230
```groovy
231
// Validation and namespace configuration
232
def validatingParser = new XmlParser(
233
    true,    // validating
234
    true,    // namespace aware
235
    false    // allow DOCTYPE declaration
236
)
237

238
// Whitespace handling
239
parser.setTrimWhitespace(true)              // Trim whitespace around text
240
parser.setKeepIgnorableWhitespace(false)    // Don't keep insignificant whitespace
241

242
// Custom SAX configuration
243
parser.setErrorHandler(new MyErrorHandler())
244
parser.setEntityResolver(new MyEntityResolver())
245

246
// For XmlSlurper
247
def slurper = new XmlSlurper(false, true)   // not validating, namespace aware
248
slurper.setKeepIgnorableWhitespace(false)
249
slurper.setEntityBaseUrl(new URL('http://example.com/'))
250
```
251

252
## Error Handling
253

254
Both parsers can throw SAXException and IOException during parsing:
255

256
```groovy
257
try {
258
    def parser = new XmlParser()
259
    def result = parser.parseText(invalidXml)
260
} catch (SAXException e) {
261
    println "XML parsing error: ${e.message}"
262
} catch (IOException e) {
263
    println "IO error: ${e.message}"
264
}
265

266
// Custom error handling
267
parser.setErrorHandler(new ErrorHandler() {
268
    void error(SAXParseException e) throws SAXException {
269
        println "Parsing error at line ${e.lineNumber}: ${e.message}"
270
    }
271
    
272
    void fatalError(SAXParseException e) throws SAXException {
273
        throw e  // Re-throw fatal errors
274
    }
275
    
276
    void warning(SAXParseException e) throws SAXException {
277
        println "Warning: ${e.message}"
278
    }
279
})
280
```
281

282
## Comparison: XmlParser vs XmlSlurper
283

284
| Feature | XmlParser | XmlSlurper |
285
|---------|-----------|------------|
286
| Result Type | Node (mutable) | GPathResult (immutable) |
287
| Memory Usage | Higher (full DOM) | Lower (lazy evaluation) |
288
| Navigation | Object traversal | XPath-like |
289
| Modification | Yes | No (read-only) |
290
| Performance | Better for modification | Better for navigation |
291
| Suitable For | Manipulating XML | Querying XML |
292
293
Choose XmlParser when you need to modify XML structures, and XmlSlurper when you primarily need to read and navigate XML content.
294

295
## Parser Factory Classes
296

297
Factory classes provide convenient methods for creating parser instances with standard configurations.
298

299
### XmlParserFactory
300

301
```groovy { .api }
302
class XmlParserFactory {
303
    static Object newParser(Object... args)
304
}
305
```
306

307
### XmlSlurperFactory
308

309
```groovy { .api }
310
class XmlSlurperFactory {
311
    static Object newSlurper(Object... args)
312
}
313
```
314

315
### Factory Usage
316

317
```groovy
318
// Using parser factory with default settings
319
def parser = XmlParserFactory.newParser()
320

321
// Using parser factory with custom settings
322
def validatingParser = XmlParserFactory.newParser(
323
    true,   // validating
324
    true    // namespace aware
325
)
326

327
// Using slurper factory
328
def slurper = XmlSlurperFactory.newSlurper(false, true)  // not validating, namespace aware
329

330
// Factories handle ParserConfigurationException and SAXException internally
331
try {
332
    def customParser = XmlParserFactory.newParser(true, true, false)  // validating, namespace aware, no DOCTYPE
333
    def result = customParser.parseText(xmlString)
334
} catch (Exception e) {
335
    println "Parser creation or parsing failed: ${e.message}"
336
}
337
```
338

339
## Advanced Error Handling
340

341
### Comprehensive Exception Handling
342

343
```groovy
344
import javax.xml.parsers.ParserConfigurationException
345
import org.xml.sax.SAXException
346
import org.xml.sax.SAXParseException
347

348
def robustParse = { xmlString ->
349
    try {
350
        def parser = new XmlParser()
351
        return parser.parseText(xmlString)
352
        
353
    } catch (ParserConfigurationException e) {
354
        println "Parser configuration error: ${e.message}"
355
        println "Check your XML parser installation and configuration"
356
        return null
357
        
358
    } catch (SAXParseException e) {
359
        println "XML structure error at line ${e.lineNumber}, column ${e.columnNumber}:"
360
        println "  ${e.message}"
361
        println "  System ID: ${e.systemId}"
362
        println "  Public ID: ${e.publicId}"
363
        return null
364
        
365
    } catch (SAXException e) {
366
        println "XML parsing error: ${e.message}"
367
        if (e.exception) {
368
            println "Root cause: ${e.exception.message}"
369
        }
370
        return null
371
        
372
    } catch (IOException e) {
373
        println "IO error while parsing: ${e.message}"
374
        return null
375
        
376
    } catch (Exception e) {
377
        println "Unexpected error during parsing: ${e.message}"
378
        e.printStackTrace()
379
        return null
380
    }
381
}
382

383
// Usage
384
def xml = '<root><item>valid</item></root>'
385
def result = robustParse(xml)
386

387
def invalidXml = '<root><item>unclosed'
388
def failedResult = robustParse(invalidXml)  // Will handle the error gracefully
389
```
390

391
### Custom Error Handlers
392

393
```groovy
394
import org.xml.sax.ErrorHandler
395
import org.xml.sax.SAXParseException
396

397
class DetailedErrorHandler implements ErrorHandler {
398
    List<String> warnings = []
399
    List<String> errors = []
400
    List<String> fatalErrors = []
401
    
402
    @Override
403
    void warning(SAXParseException e) throws SAXException {
404
        def msg = "Warning at line ${e.lineNumber}: ${e.message}"
405
        warnings << msg
406
        println msg
407
    }
408
    
409
    @Override
410
    void error(SAXParseException e) throws SAXException {
411
        def msg = "Error at line ${e.lineNumber}: ${e.message}"
412
        errors << msg
413
        println msg
414
        // Don't throw - allow parsing to continue
415
    }
416
    
417
    @Override
418
    void fatalError(SAXParseException e) throws SAXException {
419
        def msg = "Fatal error at line ${e.lineNumber}: ${e.message}"
420
        fatalErrors << msg
421
        println msg
422
        throw e  // Must throw for fatal errors
423
    }
424
    
425
    boolean hasErrors() {
426
        return !errors.isEmpty() || !fatalErrors.isEmpty()
427
    }
428
    
429
    void printSummary() {
430
        println "Parsing summary:"
431
        println "  Warnings: ${warnings.size()}"
432
        println "  Errors: ${errors.size()}"
433
        println "  Fatal errors: ${fatalErrors.size()}"
434
    }
435
}
436

437
// Usage with custom error handler
438
def parseWithDetailedErrors = { xmlString ->
439
    def errorHandler = new DetailedErrorHandler()
440
    
441
    try {
442
        def parser = new XmlParser()
443
        parser.setErrorHandler(errorHandler)
444
        
445
        def result = parser.parseText(xmlString)
446
        errorHandler.printSummary()
447
        
448
        if (errorHandler.hasErrors()) {
449
            println "Parsing completed with errors - results may be incomplete"
450
        }
451
        
452
        return result
453
        
454
    } catch (Exception e) {
455
        errorHandler.printSummary()
456
        println "Parsing failed: ${e.message}"
457
        return null
458
    }
459
}
460
```
461

462
### Validation Error Handling
463

464
```groovy
465
import javax.xml.validation.SchemaFactory
466
import javax.xml.validation.Schema
467
import javax.xml.XMLConstants
468
import javax.xml.transform.stream.StreamSource
469

470
def parseWithSchemaValidation = { xmlString, xsdFile ->
471
    try {
472
        // Create schema from XSD file
473
        def schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI)
474
        def schema = schemaFactory.newSchema(xsdFile)
475
        
476
        // Create validating parser
477
        def parser = XmlUtil.newSAXParser(XMLConstants.W3C_XML_SCHEMA_NS_URI, schema)
478
        def xmlParser = new XmlParser(parser)
479
        
480
        // Custom error handler for validation errors
481
        def validationErrors = []
482
        xmlParser.setErrorHandler(new ErrorHandler() {
483
            void warning(SAXParseException e) {
484
                validationErrors << "Validation warning: ${e.message}"
485
            }
486
            
487
            void error(SAXParseException e) {
488
                validationErrors << "Validation error: ${e.message}"
489
            }
490
            
491
            void fatalError(SAXParseException e) throws SAXException {
492
                validationErrors << "Fatal validation error: ${e.message}"
493
                throw e
494
            }
495
        })
496
        
497
        def result = xmlParser.parseText(xmlString)
498
        
499
        if (validationErrors) {
500
            println "Validation issues found:"
501
            validationErrors.each { println "  ${it}" }
502
        } else {
503
            println "Document is valid according to schema"
504
        }
505
        
506
        return result
507
        
508
    } catch (Exception e) {
509
        println "Schema validation failed: ${e.message}"
510
        return null
511
    }
512
}
513

514
// Usage
515
def xsdFile = new File('catalog.xsd')
516
def validXml = '''
517
    <catalog xmlns="http://example.com/catalog">
518
        <book id="1">
519
            <title>Valid Book</title>
520
            <author>Valid Author</author>
521
        </book>
522
    </catalog>
523
'''
524

525
def result = parseWithSchemaValidation(validXml, xsdFile)
526
```
527

528
### Recovery Strategies
529

530
```groovy
531
class XmlParsingRecovery {
532
    
533
    static Node parseWithFallback(String xmlString) {
534
        // Try strict parsing first
535
        try {
536
            def parser = new XmlParser(true, true)  // validating, namespace aware
537
            return parser.parseText(xmlString)
538
        } catch (Exception e) {
539
            println "Strict parsing failed: ${e.message}"
540
        }
541
        
542
        // Try lenient parsing
543
        try {
544
            def parser = new XmlParser(false, false)  // non-validating, not namespace aware
545
            parser.setTrimWhitespace(true)
546
            return parser.parseText(xmlString.trim())
547
        } catch (Exception e) {
548
            println "Lenient parsing failed: ${e.message}"
549
        }
550
        
551
        // Try to fix common issues and parse again
552
        try {
553
            def fixedXml = fixCommonXmlIssues(xmlString)
554
            def parser = new XmlParser(false, false)
555
            return parser.parseText(fixedXml)
556
        } catch (Exception e) {
557
            println "Recovery parsing failed: ${e.message}"
558
        }
559
        
560
        return null
561
    }
562
    
563
    static String fixCommonXmlIssues(String xml) {
564
        return xml
565
            .replaceAll(/&(?![a-zA-Z0-9#]+;)/, '&amp;')  // Fix unescaped ampersands
566
            .replaceAll(/<(?!\/?[a-zA-Z])/,'&lt;')       // Fix unescaped less-than
567
            .replaceAll(/(?<![a-zA-Z0-9])>/, '&gt;')     // Fix unescaped greater-than
568
            .replaceAll(/\r\n|\r/, '\n')                 // Normalize line endings
569
            .trim()
570
    }
571
    
572
    static GPathResult slurpWithFallback(String xmlString) {
573
        // Similar fallback strategy for XmlSlurper
574
        try {
575
            def slurper = new XmlSlurper(true, true)
576
            return slurper.parseText(xmlString)
577
        } catch (Exception e) {
578
            println "Strict slurping failed: ${e.message}"
579
        }
580
        
581
        try {
582
            def slurper = new XmlSlurper(false, false)
583
            slurper.setKeepIgnorableWhitespace(false)
584
            return slurper.parseText(xmlString.trim())
585
        } catch (Exception e) {
586
            println "Lenient slurping failed: ${e.message}"
587
        }
588
        
589
        try {
590
            def fixedXml = fixCommonXmlIssues(xmlString)
591
            def slurper = new XmlSlurper(false, false)
592
            return slurper.parseText(fixedXml)
593
        } catch (Exception e) {
594
            println "Recovery slurping failed: ${e.message}"
595
        }
596
        
597
        return null
598
    }
599
}
600

601
// Usage
602
def problematicXml = '<root><item>Text with & unescaped chars < ></item></root>'
603
def recovered = XmlParsingRecovery.parseWithFallback(problematicXml)
604

605
if (recovered) {
606
    println "Successfully recovered and parsed XML"
607
    println recovered.item.text()
608
} else {
609
    println "Could not recover the XML"
610
}
611
```
612

613
## Performance Considerations
614

615
### Memory Management for Large Documents
616

617
```groovy
618
// For large XML files, prefer XmlSlurper over XmlParser
619
def processLargeXml = { file ->
620
    if (file.size() > 10 * 1024 * 1024) {  // > 10MB
621
        println "Large file detected, using XmlSlurper for better memory efficiency"
622
        def slurper = new XmlSlurper()
623
        slurper.setKeepIgnorableWhitespace(false)
624
        return slurper.parse(file)
625
    } else {
626
        println "Small file, using XmlParser for full DOM access"
627
        def parser = new XmlParser()
628
        return parser.parse(file)
629
    }
630
}
631

632
// Process in chunks for very large files
633
def processXmlInChunks = { file, chunkProcessor ->
634
    def slurper = new XmlSlurper()
635
    def doc = slurper.parse(file)
636
    
637
    // Process top-level elements one at a time
638
    doc.children().each { element ->
639
        chunkProcessor(element)
640
        // Allow garbage collection of processed elements
641
        System.gc()
642
    }
643
}
644
```

Version

Tile

Files

parsing.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

parsing.mddocs/