or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

builders.mdentities.mdindex.mdjaxb.mdnamespaces.mdnavigation.mdparsing.mdstreaming.mdutilities.md

streaming.mddocs/

0

# Streaming XML Processing

1

2

Streaming XML processing in Groovy XML enables efficient handling of large XML documents without loading entire structures into memory, using StreamingMarkupBuilder and supporting classes.

3

4

## StreamingMarkupBuilder

5

6

Primary class for creating XML content that can be written to streams without materializing the entire document in memory.

7

8

```groovy { .api }

9

class StreamingMarkupBuilder extends AbstractStreamingBuilder {

10

// Configuration properties

11

boolean useDoubleQuotes

12

boolean expandEmptyElements

13

String encoding

14

15

// Core methods

16

Writable bind(Closure closure)

17

Writable bindNode(Object node)

18

}

19

```

20

21

### Basic Streaming Usage

22

23

```groovy

24

def smb = new StreamingMarkupBuilder()

25

26

// Configure output format

27

smb.encoding = 'UTF-8'

28

smb.useDoubleQuotes = true

29

smb.expandEmptyElements = false

30

31

// Create streamable XML content

32

def xmlContent = smb.bind {

33

root {

34

metadata(version: '1.0') {

35

created(new Date().toString())

36

generator('Groovy StreamingMarkupBuilder')

37

}

38

39

data {

40

// Generate large amounts of data efficiently

41

(1..10000).each { i ->

42

record(id: i) {

43

name("Record ${i}")

44

value(Math.random() * 100)

45

timestamp(System.currentTimeMillis())

46

}

47

}

48

}

49

}

50

}

51

52

// Write to various outputs without loading into memory

53

xmlContent.writeTo(new FileWriter('large-output.xml'))

54

xmlContent.writeTo(System.out)

55

56

// Or get as string (only for smaller content)

57

def xmlString = xmlContent.toString()

58

```

59

60

### Streaming to Different Outputs

61

62

```groovy

63

def smb = new StreamingMarkupBuilder()

64

def content = smb.bind {

65

catalog {

66

(1..1000).each { i ->

67

book(id: i) {

68

title("Book ${i}")

69

author("Author ${i % 100}")

70

price(String.format("%.2f", Math.random() * 50))

71

}

72

}

73

}

74

}

75

76

// Stream to file

77

new FileOutputStream('catalog.xml').withStream { fos ->

78

content.writeTo(new OutputStreamWriter(fos, 'UTF-8'))

79

}

80

81

// Stream to HTTP response

82

response.contentType = 'application/xml'

83

response.characterEncoding = 'UTF-8'

84

content.writeTo(response.writer)

85

86

// Stream to compressed output

87

new GZIPOutputStream(new FileOutputStream('catalog.xml.gz')).withStream { gzos ->

88

content.writeTo(new OutputStreamWriter(gzos, 'UTF-8'))

89

}

90

91

// Chain with other writers

92

def bufferedContent = new BufferedWriter(new FileWriter('buffered.xml'))

93

content.writeTo(bufferedContent)

94

bufferedContent.close()

95

```

96

97

## AbstractStreamingBuilder

98

99

Base class providing common streaming builder functionality.

100

101

```groovy { .api }

102

abstract class AbstractStreamingBuilder {

103

// Template method for creating bound content

104

abstract Writable bind(Closure closure)

105

106

// Support methods for subclasses

107

protected Object createNode(Object name)

108

protected Object createNode(Object name, Object value)

109

protected Object createNode(Object name, Map attributes)

110

protected Object createNode(Object name, Map attributes, Object value)

111

}

112

```

113

114

## Streaming Support Classes

115

116

### BaseMarkupBuilder

117

118

Core functionality for markup generation in streaming context.

119

120

```java { .api }

121

public class BaseMarkupBuilder {

122

// Configuration for output formatting

123

protected boolean doubleQuotes;

124

protected boolean omitNullAttributes;

125

protected boolean omitEmptyAttributes;

126

protected boolean expandEmptyElements;

127

128

// Core building methods

129

protected void startTag(String tagName, Map<String, Object> attributes);

130

protected void endTag(String tagName);

131

protected void text(String content);

132

protected void comment(String content);

133

}

134

```

135

136

### StreamingMarkupWriter

137

138

Specialized Writer for streaming markup output with proper XML formatting.

139

140

```java { .api }

141

public class StreamingMarkupWriter extends Writer {

142

// Constructors

143

public StreamingMarkupWriter(Writer writer);

144

public StreamingMarkupWriter(Writer writer, String encoding);

145

146

// Writer methods

147

@Override

148

public void write(char[] cbuf, int off, int len) throws IOException;

149

@Override

150

public void flush() throws IOException;

151

@Override

152

public void close() throws IOException;

153

154

// Specialized XML writing methods

155

public void startTag(String name, Map<String, Object> attributes) throws IOException;

156

public void endTag(String name) throws IOException;

157

public void emptyTag(String name, Map<String, Object> attributes) throws IOException;

158

public void text(String content) throws IOException;

159

public void comment(String content) throws IOException;

160

public void processingInstruction(String target, String data) throws IOException;

161

}

162

```

163

164

### Builder Support Classes

165

166

```java { .api }

167

public class Builder {

168

// Internal builder state management

169

protected Map<String, String> namespaceMethodMap;

170

protected Map<String, String> namespaceDeclarations;

171

172

// Node creation support

173

public Object invokeMethod(String name, Object args);

174

public void setProperty(String name, Object value);

175

public Object getProperty(String name);

176

}

177

```

178

179

## Advanced Streaming Patterns

180

181

### Large Dataset Generation

182

183

```groovy

184

def generateLargeReport = { outputFile, recordCount ->

185

def smb = new StreamingMarkupBuilder()

186

smb.encoding = 'UTF-8'

187

188

def report = smb.bind {

189

report {

190

header {

191

title('Large Data Report')

192

generated(new Date().toString())

193

recordCount(recordCount)

194

}

195

196

// Generate data in chunks to manage memory

197

data {

198

(1..recordCount).each { i ->

199

if (i % 1000 == 0) {

200

System.gc() // Hint for garbage collection on large datasets

201

}

202

203

record(id: i) {

204

timestamp(System.currentTimeMillis())

205

data1(generateRandomData())

206

data2(generateRandomData())

207

data3(generateRandomData())

208

209

// Nested structures

210

details {

211

category(i % 10)

212

subcategory(i % 100)

213

tags {

214

(1..(i % 5 + 1)).each { j ->

215

tag("tag${j}")

216

}

217

}

218

}

219

}

220

}

221

}

222

}

223

}

224

225

new FileWriter(outputFile).withWriter { writer ->

226

report.writeTo(writer)

227

}

228

}

229

230

// Generate 100,000 record report

231

generateLargeReport('massive-report.xml', 100000)

232

```

233

234

### Streaming with Namespaces

235

236

```groovy

237

def createNamespacedDocument = {

238

def smb = new StreamingMarkupBuilder()

239

smb.bind {

240

mkp.xmlDeclaration(version: '1.0', encoding: 'UTF-8')

241

242

namespaces = [

243

'': 'http://example.com/default',

244

'meta': 'http://example.com/metadata',

245

'data': 'http://example.com/data'

246

]

247

248

document(xmlns: namespaces[''],

249

'xmlns:meta': namespaces['meta'],

250

'xmlns:data': namespaces['data']) {

251

252

'meta:header' {

253

'meta:title'('Namespaced Document')

254

'meta:version'('1.0')

255

}

256

257

'data:content' {

258

(1..1000).each { i ->

259

'data:item' {

260

'data:id'(i)

261

'data:value'("Value ${i}")

262

'meta:created'(new Date().toString())

263

}

264

}

265

}

266

}

267

}

268

}

269

270

def nsDocument = createNamespacedDocument()

271

nsDocument.writeTo(new FileWriter('namespaced.xml'))

272

```

273

274

### Progressive XML Generation

275

276

```groovy

277

// Build XML progressively without storing entire structure

278

def progressiveBuilder = { outputWriter ->

279

def smb = new StreamingMarkupBuilder()

280

def xml = smb.bind {

281

progressiveReport {

282

mkp.comment('Report generated progressively')

283

284

// Header section

285

header {

286

title('Progressive Report')

287

startTime(new Date().toString())

288

}

289

290

// Process data in batches

291

sections {

292

processDataInBatches { batchData ->

293

section(id: batchData.id) {

294

batchData.items.each { item ->

295

item(id: item.id) {

296

name(item.name)

297

value(item.value)

298

}

299

}

300

}

301

}

302

}

303

304

footer {

305

endTime(new Date().toString())

306

totalItems(getTotalItemCount())

307

}

308

}

309

}

310

311

xml.writeTo(outputWriter)

312

}

313

314

// Use with buffered writer for large outputs

315

new BufferedWriter(new FileWriter('progressive.xml')).withWriter { writer ->

316

progressiveBuilder(writer)

317

}

318

```

319

320

### Memory-Efficient Processing

321

322

```groovy

323

// Combine streaming generation with streaming consumption

324

def processLargeXmlPipeline = { inputFile, outputFile ->

325

// Parse input efficiently

326

def slurper = new XmlSlurper()

327

def input = slurper.parse(inputFile)

328

329

// Generate output with streaming

330

def smb = new StreamingMarkupBuilder()

331

def output = smb.bind {

332

processedData {

333

mkp.comment("Processed from ${inputFile.name}")

334

335

// Process input lazily and stream output

336

input.records.record.each { record ->

337

processedRecord(id: record.'@id') {

338

// Transform and stream without accumulating

339

originalValue(record.value.text())

340

processedValue(transformValue(record.value.text()))

341

processedAt(new Date().toString())

342

}

343

}

344

}

345

}

346

347

new FileWriter(outputFile).withWriter { writer ->

348

output.writeTo(writer)

349

}

350

}

351

```

352

353

## Performance Considerations

354

355

### Memory Management

356

357

```groovy

358

// Efficient streaming for large documents

359

def efficientLargeDocumentGeneration = {

360

def smb = new StreamingMarkupBuilder()

361

362

// Configure for minimal memory usage

363

smb.useDoubleQuotes = false // Slightly less memory per attribute

364

smb.expandEmptyElements = false // More compact output

365

366

def content = smb.bind {

367

largeDocument {

368

// Process in chunks to avoid memory buildup

369

def chunkSize = 1000

370

def totalRecords = 1000000

371

372

(0..<totalRecords).step(chunkSize) { start ->

373

def end = Math.min(start + chunkSize, totalRecords)

374

375

(start..<end).each { i ->

376

record(id: i) {

377

data("Record ${i}")

378

timestamp(System.currentTimeMillis())

379

}

380

}

381

382

// Yield control periodically

383

if (start % 10000 == 0) {

384

Thread.yield()

385

}

386

}

387

}

388

}

389

390

return content

391

}

392

```

393

394

### Streaming Best Practices

395

396

```groovy

397

// Best practices for streaming XML

398

class StreamingXmlBestPractices {

399

400

static void streamToFile(Closure xmlClosure, File outputFile) {

401

def smb = new StreamingMarkupBuilder()

402

smb.encoding = 'UTF-8'

403

404

def content = smb.bind(xmlClosure)

405

406

// Use buffered writer for better performance

407

new BufferedWriter(

408

new OutputStreamWriter(

409

new FileOutputStream(outputFile),

410

'UTF-8'

411

)

412

).withWriter { writer ->

413

content.writeTo(writer)

414

}

415

}

416

417

static void streamToResponse(Closure xmlClosure, HttpServletResponse response) {

418

response.contentType = 'application/xml; charset=UTF-8'

419

420

def smb = new StreamingMarkupBuilder()

421

smb.encoding = 'UTF-8'

422

smb.useDoubleQuotes = true

423

424

def content = smb.bind(xmlClosure)

425

content.writeTo(response.writer)

426

}

427

428

static void streamLargeDataset(Collection data, Writer output) {

429

def smb = new StreamingMarkupBuilder()

430

431

def xml = smb.bind {

432

dataset {

433

mkp.xmlDeclaration(version: '1.0', encoding: 'UTF-8')

434

435

data.eachWithIndex { item, index ->

436

record(index: index) {

437

// Process each item without accumulating

438

processItem(item)

439

}

440

441

// Periodic memory management

442

if (index % 1000 == 0) {

443

System.gc()

444

}

445

}

446

}

447

}

448

449

xml.writeTo(output)

450

}

451

}

452

```

453

454

## Integration with Other Systems

455

456

```groovy

457

// Streaming to databases

458

def streamToDatabase = { data, connection ->

459

def smb = new StreamingMarkupBuilder()

460

def xml = smb.bind {

461

dataExport {

462

data.each { record ->

463

item {

464

id(record.id)

465

name(record.name)

466

value(record.value)

467

}

468

}

469

}

470

}

471

472

// Stream directly to database CLOB

473

def stmt = connection.prepareStatement(

474

"INSERT INTO xml_exports (data) VALUES (?)"

475

)

476

477

def writer = stmt.setCharacterStream(1, -1)

478

xml.writeTo(writer)

479

stmt.executeUpdate()

480

}

481

482

// Streaming to message queues

483

def streamToQueue = { messageQueue, xmlClosure ->

484

def smb = new StreamingMarkupBuilder()

485

def content = smb.bind(xmlClosure)

486

487

// Stream to string for messaging

488

def writer = new StringWriter()

489

content.writeTo(writer)

490

491

messageQueue.send(writer.toString())

492

}

493

```