0
# Streaming XML Processing
1
2
Streaming XML processing in Groovy XML enables efficient handling of large XML documents without loading entire structures into memory, using StreamingMarkupBuilder and supporting classes.
3
4
## StreamingMarkupBuilder
5
6
Primary class for creating XML content that can be written to streams without materializing the entire document in memory.
7
8
```groovy { .api }
9
class StreamingMarkupBuilder extends AbstractStreamingBuilder {
10
// Configuration properties
11
boolean useDoubleQuotes
12
boolean expandEmptyElements
13
String encoding
14
15
// Core methods
16
Writable bind(Closure closure)
17
Writable bindNode(Object node)
18
}
19
```
20
21
### Basic Streaming Usage
22
23
```groovy
24
def smb = new StreamingMarkupBuilder()
25
26
// Configure output format
27
smb.encoding = 'UTF-8'
28
smb.useDoubleQuotes = true
29
smb.expandEmptyElements = false
30
31
// Create streamable XML content
32
def xmlContent = smb.bind {
33
root {
34
metadata(version: '1.0') {
35
created(new Date().toString())
36
generator('Groovy StreamingMarkupBuilder')
37
}
38
39
data {
40
// Generate large amounts of data efficiently
41
(1..10000).each { i ->
42
record(id: i) {
43
name("Record ${i}")
44
value(Math.random() * 100)
45
timestamp(System.currentTimeMillis())
46
}
47
}
48
}
49
}
50
}
51
52
// Write to various outputs without loading into memory
53
xmlContent.writeTo(new FileWriter('large-output.xml'))
54
xmlContent.writeTo(System.out)
55
56
// Or get as string (only for smaller content)
57
def xmlString = xmlContent.toString()
58
```
59
60
### Streaming to Different Outputs
61
62
```groovy
63
def smb = new StreamingMarkupBuilder()
64
def content = smb.bind {
65
catalog {
66
(1..1000).each { i ->
67
book(id: i) {
68
title("Book ${i}")
69
author("Author ${i % 100}")
70
price(String.format("%.2f", Math.random() * 50))
71
}
72
}
73
}
74
}
75
76
// Stream to file
77
new FileOutputStream('catalog.xml').withStream { fos ->
78
content.writeTo(new OutputStreamWriter(fos, 'UTF-8'))
79
}
80
81
// Stream to HTTP response
82
response.contentType = 'application/xml'
83
response.characterEncoding = 'UTF-8'
84
content.writeTo(response.writer)
85
86
// Stream to compressed output
87
new GZIPOutputStream(new FileOutputStream('catalog.xml.gz')).withStream { gzos ->
88
content.writeTo(new OutputStreamWriter(gzos, 'UTF-8'))
89
}
90
91
// Chain with other writers
92
def bufferedContent = new BufferedWriter(new FileWriter('buffered.xml'))
93
content.writeTo(bufferedContent)
94
bufferedContent.close()
95
```
96
97
## AbstractStreamingBuilder
98
99
Base class providing common streaming builder functionality.
100
101
```groovy { .api }
102
abstract class AbstractStreamingBuilder {
103
// Template method for creating bound content
104
abstract Writable bind(Closure closure)
105
106
// Support methods for subclasses
107
protected Object createNode(Object name)
108
protected Object createNode(Object name, Object value)
109
protected Object createNode(Object name, Map attributes)
110
protected Object createNode(Object name, Map attributes, Object value)
111
}
112
```
113
114
## Streaming Support Classes
115
116
### BaseMarkupBuilder
117
118
Core functionality for markup generation in streaming context.
119
120
```java { .api }
121
public class BaseMarkupBuilder {
122
// Configuration for output formatting
123
protected boolean doubleQuotes;
124
protected boolean omitNullAttributes;
125
protected boolean omitEmptyAttributes;
126
protected boolean expandEmptyElements;
127
128
// Core building methods
129
protected void startTag(String tagName, Map<String, Object> attributes);
130
protected void endTag(String tagName);
131
protected void text(String content);
132
protected void comment(String content);
133
}
134
```
135
136
### StreamingMarkupWriter
137
138
Specialized Writer for streaming markup output with proper XML formatting.
139
140
```java { .api }
141
public class StreamingMarkupWriter extends Writer {
142
// Constructors
143
public StreamingMarkupWriter(Writer writer);
144
public StreamingMarkupWriter(Writer writer, String encoding);
145
146
// Writer methods
147
@Override
148
public void write(char[] cbuf, int off, int len) throws IOException;
149
@Override
150
public void flush() throws IOException;
151
@Override
152
public void close() throws IOException;
153
154
// Specialized XML writing methods
155
public void startTag(String name, Map<String, Object> attributes) throws IOException;
156
public void endTag(String name) throws IOException;
157
public void emptyTag(String name, Map<String, Object> attributes) throws IOException;
158
public void text(String content) throws IOException;
159
public void comment(String content) throws IOException;
160
public void processingInstruction(String target, String data) throws IOException;
161
}
162
```
163
164
### Builder Support Classes
165
166
```java { .api }
167
public class Builder {
168
// Internal builder state management
169
protected Map<String, String> namespaceMethodMap;
170
protected Map<String, String> namespaceDeclarations;
171
172
// Node creation support
173
public Object invokeMethod(String name, Object args);
174
public void setProperty(String name, Object value);
175
public Object getProperty(String name);
176
}
177
```
178
179
## Advanced Streaming Patterns
180
181
### Large Dataset Generation
182
183
```groovy
184
def generateLargeReport = { outputFile, recordCount ->
185
def smb = new StreamingMarkupBuilder()
186
smb.encoding = 'UTF-8'
187
188
def report = smb.bind {
189
report {
190
header {
191
title('Large Data Report')
192
generated(new Date().toString())
193
recordCount(recordCount)
194
}
195
196
// Generate data in chunks to manage memory
197
data {
198
(1..recordCount).each { i ->
199
if (i % 1000 == 0) {
200
System.gc() // Hint for garbage collection on large datasets
201
}
202
203
record(id: i) {
204
timestamp(System.currentTimeMillis())
205
data1(generateRandomData())
206
data2(generateRandomData())
207
data3(generateRandomData())
208
209
// Nested structures
210
details {
211
category(i % 10)
212
subcategory(i % 100)
213
tags {
214
(1..(i % 5 + 1)).each { j ->
215
tag("tag${j}")
216
}
217
}
218
}
219
}
220
}
221
}
222
}
223
}
224
225
new FileWriter(outputFile).withWriter { writer ->
226
report.writeTo(writer)
227
}
228
}
229
230
// Generate 100,000 record report
231
generateLargeReport('massive-report.xml', 100000)
232
```
233
234
### Streaming with Namespaces
235
236
```groovy
237
def createNamespacedDocument = {
238
def smb = new StreamingMarkupBuilder()
239
smb.bind {
240
mkp.xmlDeclaration(version: '1.0', encoding: 'UTF-8')
241
242
namespaces = [
243
'': 'http://example.com/default',
244
'meta': 'http://example.com/metadata',
245
'data': 'http://example.com/data'
246
]
247
248
document(xmlns: namespaces[''],
249
'xmlns:meta': namespaces['meta'],
250
'xmlns:data': namespaces['data']) {
251
252
'meta:header' {
253
'meta:title'('Namespaced Document')
254
'meta:version'('1.0')
255
}
256
257
'data:content' {
258
(1..1000).each { i ->
259
'data:item' {
260
'data:id'(i)
261
'data:value'("Value ${i}")
262
'meta:created'(new Date().toString())
263
}
264
}
265
}
266
}
267
}
268
}
269
270
def nsDocument = createNamespacedDocument()
271
nsDocument.writeTo(new FileWriter('namespaced.xml'))
272
```
273
274
### Progressive XML Generation
275
276
```groovy
277
// Build XML progressively without storing entire structure
278
def progressiveBuilder = { outputWriter ->
279
def smb = new StreamingMarkupBuilder()
280
def xml = smb.bind {
281
progressiveReport {
282
mkp.comment('Report generated progressively')
283
284
// Header section
285
header {
286
title('Progressive Report')
287
startTime(new Date().toString())
288
}
289
290
// Process data in batches
291
sections {
292
processDataInBatches { batchData ->
293
section(id: batchData.id) {
294
batchData.items.each { item ->
295
item(id: item.id) {
296
name(item.name)
297
value(item.value)
298
}
299
}
300
}
301
}
302
}
303
304
footer {
305
endTime(new Date().toString())
306
totalItems(getTotalItemCount())
307
}
308
}
309
}
310
311
xml.writeTo(outputWriter)
312
}
313
314
// Use with buffered writer for large outputs
315
new BufferedWriter(new FileWriter('progressive.xml')).withWriter { writer ->
316
progressiveBuilder(writer)
317
}
318
```
319
320
### Memory-Efficient Processing
321
322
```groovy
323
// Combine streaming generation with streaming consumption
324
def processLargeXmlPipeline = { inputFile, outputFile ->
325
// Parse input efficiently
326
def slurper = new XmlSlurper()
327
def input = slurper.parse(inputFile)
328
329
// Generate output with streaming
330
def smb = new StreamingMarkupBuilder()
331
def output = smb.bind {
332
processedData {
333
mkp.comment("Processed from ${inputFile.name}")
334
335
// Process input lazily and stream output
336
input.records.record.each { record ->
337
processedRecord(id: record.'@id') {
338
// Transform and stream without accumulating
339
originalValue(record.value.text())
340
processedValue(transformValue(record.value.text()))
341
processedAt(new Date().toString())
342
}
343
}
344
}
345
}
346
347
new FileWriter(outputFile).withWriter { writer ->
348
output.writeTo(writer)
349
}
350
}
351
```
352
353
## Performance Considerations
354
355
### Memory Management
356
357
```groovy
358
// Efficient streaming for large documents
359
def efficientLargeDocumentGeneration = {
360
def smb = new StreamingMarkupBuilder()
361
362
// Configure for minimal memory usage
363
smb.useDoubleQuotes = false // Slightly less memory per attribute
364
smb.expandEmptyElements = false // More compact output
365
366
def content = smb.bind {
367
largeDocument {
368
// Process in chunks to avoid memory buildup
369
def chunkSize = 1000
370
def totalRecords = 1000000
371
372
(0..<totalRecords).step(chunkSize) { start ->
373
def end = Math.min(start + chunkSize, totalRecords)
374
375
(start..<end).each { i ->
376
record(id: i) {
377
data("Record ${i}")
378
timestamp(System.currentTimeMillis())
379
}
380
}
381
382
// Yield control periodically
383
if (start % 10000 == 0) {
384
Thread.yield()
385
}
386
}
387
}
388
}
389
390
return content
391
}
392
```
393
394
### Streaming Best Practices
395
396
```groovy
397
// Best practices for streaming XML
398
class StreamingXmlBestPractices {
399
400
static void streamToFile(Closure xmlClosure, File outputFile) {
401
def smb = new StreamingMarkupBuilder()
402
smb.encoding = 'UTF-8'
403
404
def content = smb.bind(xmlClosure)
405
406
// Use buffered writer for better performance
407
new BufferedWriter(
408
new OutputStreamWriter(
409
new FileOutputStream(outputFile),
410
'UTF-8'
411
)
412
).withWriter { writer ->
413
content.writeTo(writer)
414
}
415
}
416
417
static void streamToResponse(Closure xmlClosure, HttpServletResponse response) {
418
response.contentType = 'application/xml; charset=UTF-8'
419
420
def smb = new StreamingMarkupBuilder()
421
smb.encoding = 'UTF-8'
422
smb.useDoubleQuotes = true
423
424
def content = smb.bind(xmlClosure)
425
content.writeTo(response.writer)
426
}
427
428
static void streamLargeDataset(Collection data, Writer output) {
429
def smb = new StreamingMarkupBuilder()
430
431
def xml = smb.bind {
432
dataset {
433
mkp.xmlDeclaration(version: '1.0', encoding: 'UTF-8')
434
435
data.eachWithIndex { item, index ->
436
record(index: index) {
437
// Process each item without accumulating
438
processItem(item)
439
}
440
441
// Periodic memory management
442
if (index % 1000 == 0) {
443
System.gc()
444
}
445
}
446
}
447
}
448
449
xml.writeTo(output)
450
}
451
}
452
```
453
454
## Integration with Other Systems
455
456
```groovy
457
// Streaming to databases
458
def streamToDatabase = { data, connection ->
459
def smb = new StreamingMarkupBuilder()
460
def xml = smb.bind {
461
dataExport {
462
data.each { record ->
463
item {
464
id(record.id)
465
name(record.name)
466
value(record.value)
467
}
468
}
469
}
470
}
471
472
// Stream directly to database CLOB
473
def stmt = connection.prepareStatement(
474
"INSERT INTO xml_exports (data) VALUES (?)"
475
)
476
477
def writer = stmt.setCharacterStream(1, -1)
478
xml.writeTo(writer)
479
stmt.executeUpdate()
480
}
481
482
// Streaming to message queues
483
def streamToQueue = { messageQueue, xmlClosure ->
484
def smb = new StreamingMarkupBuilder()
485
def content = smb.bind(xmlClosure)
486
487
// Stream to string for messaging
488
def writer = new StringWriter()
489
content.writeTo(writer)
490
491
messageQueue.send(writer.toString())
492
}
493
```