or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

dom-processing.mdindex.mdnamespace-support.mdxml-generation.mdxml-parsing.mdxml-utilities.md

xml-parsing.mddocs/

0

# XML Parsing

1

2

Two primary parsing approaches for XML documents: tree-based parsing with XmlParser that creates mutable document trees, and GPath-based parsing with XmlSlurper that provides lazy evaluation and XPath-like navigation capabilities.

3

4

## Capabilities

5

6

### XmlParser

7

8

Creates a mutable tree of `groovy.util.Node` objects representing the parsed XML document. Ideal for scenarios requiring document modification or when working with smaller XML files where full tree loading is acceptable.

9

10

```groovy { .api }

11

/**

12

* Default constructor with standard parsing settings

13

*/

14

XmlParser()

15

16

/**

17

* Constructor with validation and namespace control

18

* @param validating - Enable DTD validation

19

* @param namespaceAware - Enable namespace processing

20

*/

21

XmlParser(boolean validating, boolean namespaceAware)

22

23

/**

24

* Constructor with full parsing control

25

* @param validating - Enable DTD validation

26

* @param namespaceAware - Enable namespace processing

27

* @param allowDocTypeDeclaration - Allow DOCTYPE declarations

28

*/

29

XmlParser(boolean validating, boolean namespaceAware, boolean allowDocTypeDeclaration)

30

31

/**

32

* Constructor using custom XMLReader

33

* @param reader - Custom XMLReader instance

34

*/

35

XmlParser(XMLReader reader)

36

37

/**

38

* Constructor using SAXParser

39

* @param parser - SAXParser instance

40

*/

41

XmlParser(SAXParser parser)

42

```

43

44

#### Parsing Methods

45

46

```groovy { .api }

47

/**

48

* Parse XML from a file

49

* @param file - File containing XML content

50

* @return Root Node of parsed document

51

*/

52

Node parse(File file)

53

54

/**

55

* Parse XML from a Path

56

* @param path - Path to XML file

57

* @return Root Node of parsed document

58

*/

59

Node parse(Path path)

60

61

/**

62

* Parse XML from URI/URL

63

* @param uri - URI string pointing to XML resource

64

* @return Root Node of parsed document

65

*/

66

Node parse(String uri)

67

68

/**

69

* Parse XML from InputStream

70

* @param input - InputStream containing XML

71

* @return Root Node of parsed document

72

*/

73

Node parse(InputStream input)

74

75

/**

76

* Parse XML from Reader

77

* @param reader - Reader containing XML

78

* @return Root Node of parsed document

79

*/

80

Node parse(Reader reader)

81

82

/**

83

* Parse XML from InputSource

84

* @param input - SAX InputSource

85

* @return Root Node of parsed document

86

*/

87

Node parse(InputSource input)

88

89

/**

90

* Parse XML from string content

91

* @param text - String containing XML content

92

* @return Root Node of parsed document

93

* @throws IOException if I/O error occurs

94

* @throws SAXException if parsing error occurs

95

*/

96

Node parseText(String text) throws IOException, SAXException

97

```

98

99

#### Configuration Methods

100

101

```groovy { .api }

102

/**

103

* Control whitespace trimming in text content

104

* @param trimWhitespace - Whether to trim whitespace

105

*/

106

void setTrimWhitespace(boolean trimWhitespace)

107

boolean isTrimWhitespace()

108

109

/**

110

* Control preservation of ignorable whitespace

111

* @param keepIgnorableWhitespace - Whether to keep ignorable whitespace

112

*/

113

void setKeepIgnorableWhitespace(boolean keepIgnorableWhitespace)

114

boolean isKeepIgnorableWhitespace()

115

116

/**

117

* Check if parser is namespace aware

118

* @return true if namespace aware

119

*/

120

boolean isNamespaceAware()

121

122

/**

123

* Set namespace awareness (configuration method)

124

* @param namespaceAware - true to enable namespace processing

125

*/

126

void setNamespaceAware(boolean namespaceAware)

127

```

128

129

#### XMLReader Delegation Methods

130

131

```groovy { .api }

132

/**

133

* Get DTD handler for parser

134

* @return Current DTDHandler

135

*/

136

DTDHandler getDTDHandler()

137

138

/**

139

* Set DTD handler for parser

140

* @param dtdHandler - DTDHandler instance

141

*/

142

void setDTDHandler(DTDHandler dtdHandler)

143

144

/**

145

* Get entity resolver for parser

146

* @return Current EntityResolver

147

*/

148

EntityResolver getEntityResolver()

149

150

/**

151

* Set entity resolver for parser

152

* @param entityResolver - EntityResolver instance

153

*/

154

void setEntityResolver(EntityResolver entityResolver)

155

156

/**

157

* Get error handler for parser

158

* @return Current ErrorHandler

159

*/

160

ErrorHandler getErrorHandler()

161

162

/**

163

* Set error handler for parser

164

* @param errorHandler - ErrorHandler instance

165

*/

166

void setErrorHandler(ErrorHandler errorHandler)

167

168

/**

169

* Get parser feature value

170

* @param uri - Feature URI

171

* @return Feature value

172

* @throws SAXNotRecognizedException if feature not recognized

173

* @throws SAXNotSupportedException if feature not supported

174

*/

175

boolean getFeature(String uri) throws SAXNotRecognizedException, SAXNotSupportedException

176

177

/**

178

* Set parser feature value

179

* @param uri - Feature URI

180

* @param value - Feature value

181

* @throws SAXNotRecognizedException if feature not recognized

182

* @throws SAXNotSupportedException if feature not supported

183

*/

184

void setFeature(String uri, boolean value) throws SAXNotRecognizedException, SAXNotSupportedException

185

186

/**

187

* Get parser property value

188

* @param uri - Property URI

189

* @return Property value

190

* @throws SAXNotRecognizedException if property not recognized

191

* @throws SAXNotSupportedException if property not supported

192

*/

193

Object getProperty(String uri) throws SAXNotRecognizedException, SAXNotSupportedException

194

195

/**

196

* Set parser property value

197

* @param uri - Property URI

198

* @param value - Property value

199

* @throws SAXNotRecognizedException if property not recognized

200

* @throws SAXNotSupportedException if property not supported

201

*/

202

void setProperty(String uri, Object value) throws SAXNotRecognizedException, SAXNotSupportedException

203

```

204

205

**Usage Examples:**

206

207

```groovy

208

import groovy.xml.XmlParser

209

210

// Basic parsing

211

def parser = new XmlParser()

212

def root = parser.parseText('<books><book title="Groovy"/></books>')

213

println root.book[0].@title // "Groovy"

214

215

// Namespace-aware parsing

216

def nsParser = new XmlParser(false, true)

217

def nsRoot = nsParser.parseText('''

218

<books xmlns:lib="http://library.org">

219

<lib:book title="Advanced Groovy"/>

220

</books>

221

''')

222

223

// File parsing with error handling

224

try {

225

def fileRoot = parser.parse(new File("data.xml"))

226

fileRoot.book.each { book ->

227

println "Title: ${book.@title}, Author: ${book.@author}"

228

}

229

} catch (Exception e) {

230

println "Parsing failed: ${e.message}"

231

}

232

233

// Whitespace control

234

parser.setTrimWhitespace(true)

235

parser.setKeepIgnorableWhitespace(false)

236

```

237

238

---

239

240

### XmlSlurper

241

242

Provides lazy-evaluated GPath expressions for XML navigation. Ideal for read-only access to XML documents, especially when working with large files or when only specific parts of the document are needed.

243

244

```groovy { .api }

245

/**

246

* Default constructor with standard parsing settings

247

*/

248

XmlSlurper()

249

250

/**

251

* Constructor with validation and namespace control

252

* @param validating - Enable DTD validation

253

* @param namespaceAware - Enable namespace processing

254

*/

255

XmlSlurper(boolean validating, boolean namespaceAware)

256

257

/**

258

* Constructor with full parsing control

259

* @param validating - Enable DTD validation

260

* @param namespaceAware - Enable namespace processing

261

* @param allowDocTypeDeclaration - Allow DOCTYPE declarations

262

*/

263

XmlSlurper(boolean validating, boolean namespaceAware, boolean allowDocTypeDeclaration)

264

265

/**

266

* Constructor using custom XMLReader

267

* @param reader - Custom XMLReader instance

268

*/

269

XmlSlurper(XMLReader reader)

270

271

/**

272

* Constructor using SAXParser

273

* @param parser - SAXParser instance

274

*/

275

XmlSlurper(SAXParser parser)

276

```

277

278

#### Parsing Methods

279

280

```groovy { .api }

281

/**

282

* Parse XML from InputSource

283

* @param input - SAX InputSource

284

* @return GPathResult for navigation

285

*/

286

GPathResult parse(InputSource input)

287

288

/**

289

* Parse XML from File

290

* @param file - File containing XML

291

* @return GPathResult for navigation

292

*/

293

GPathResult parse(File file)

294

295

/**

296

* Parse XML from InputStream

297

* @param input - InputStream containing XML

298

* @return GPathResult for navigation

299

*/

300

GPathResult parse(InputStream input)

301

302

/**

303

* Parse XML from Reader

304

* @param reader - Reader containing XML

305

* @return GPathResult for navigation

306

*/

307

GPathResult parse(Reader reader)

308

309

/**

310

* Parse XML from URI/URL

311

* @param uri - URI string pointing to XML resource

312

* @return GPathResult for navigation

313

*/

314

GPathResult parse(String uri)

315

316

/**

317

* Parse XML from Path

318

* @param path - Path to XML file

319

* @return GPathResult for navigation

320

*/

321

GPathResult parse(Path path)

322

323

/**

324

* Parse XML from string content

325

* @param text - String containing XML content

326

* @return GPathResult for navigation

327

*/

328

GPathResult parseText(String text)

329

```

330

331

#### Configuration Methods

332

333

```groovy { .api }

334

/**

335

* Control preservation of ignorable whitespace

336

* @param keepIgnorableWhitespace - Whether to preserve ignorable whitespace

337

*/

338

void setKeepIgnorableWhitespace(boolean keepIgnorableWhitespace)

339

boolean isKeepIgnorableWhitespace()

340

341

/**

342

* Get the parsed document as GPathResult

343

* @return The root GPathResult

344

*/

345

GPathResult getDocument()

346

347

/**

348

* Set base URL for entity resolution

349

* @param base - Base URL for relative entity references

350

*/

351

void setEntityBaseUrl(URL base)

352

```

353

354

#### XMLReader Delegation Methods (XmlSlurper)

355

356

```groovy { .api }

357

/**

358

* Get DTD handler for slurper

359

* @return Current DTDHandler

360

*/

361

DTDHandler getDTDHandler()

362

363

/**

364

* Set DTD handler for slurper

365

* @param dtdHandler - DTDHandler instance

366

*/

367

void setDTDHandler(DTDHandler dtdHandler)

368

369

/**

370

* Get entity resolver for slurper

371

* @return Current EntityResolver

372

*/

373

EntityResolver getEntityResolver()

374

375

/**

376

* Set entity resolver for slurper

377

* @param entityResolver - EntityResolver instance

378

*/

379

void setEntityResolver(EntityResolver entityResolver)

380

381

/**

382

* Get error handler for slurper

383

* @return Current ErrorHandler

384

*/

385

ErrorHandler getErrorHandler()

386

387

/**

388

* Set error handler for slurper

389

* @param errorHandler - ErrorHandler instance

390

*/

391

void setErrorHandler(ErrorHandler errorHandler)

392

393

/**

394

* Get slurper feature value

395

* @param uri - Feature URI

396

* @return Feature value

397

* @throws SAXNotRecognizedException if feature not recognized

398

* @throws SAXNotSupportedException if feature not supported

399

*/

400

boolean getFeature(String uri) throws SAXNotRecognizedException, SAXNotSupportedException

401

402

/**

403

* Set slurper feature value

404

* @param uri - Feature URI

405

* @param value - Feature value

406

* @throws SAXNotRecognizedException if feature not recognized

407

* @throws SAXNotSupportedException if feature not supported

408

*/

409

void setFeature(String uri, boolean value) throws SAXNotRecognizedException, SAXNotSupportedException

410

411

/**

412

* Get slurper property value

413

* @param uri - Property URI

414

* @return Property value

415

* @throws SAXNotRecognizedException if property not recognized

416

* @throws SAXNotSupportedException if property not supported

417

*/

418

Object getProperty(String uri) throws SAXNotRecognizedException, SAXNotSupportedException

419

420

/**

421

* Set slurper property value

422

* @param uri - Property URI

423

* @param value - Property value

424

* @throws SAXNotRecognizedException if property not recognized

425

* @throws SAXNotSupportedException if property not supported

426

*/

427

void setProperty(String uri, Object value) throws SAXNotRecognizedException, SAXNotSupportedException

428

```

429

430

**Usage Examples:**

431

432

```groovy

433

import groovy.xml.XmlSlurper

434

435

// Basic GPath navigation

436

def slurper = new XmlSlurper()

437

def books = slurper.parseText('''

438

<library>

439

<book id="1" author="Venkat">

440

<title>Programming Groovy</title>

441

<price>45.99</price>

442

</book>

443

<book id="2" author="Dierk">

444

<title>Groovy in Action</title>

445

<price>55.99</price>

446

</book>

447

</library>

448

''')

449

450

// GPath navigation examples

451

println books.book.size() // 2

452

println books.book[0].@id // "1"

453

println books.book[0].@author // "Venkat"

454

println books.book[0].title.text() // "Programming Groovy"

455

println books.book*.@author // ["Venkat", "Dierk"]

456

println books.book.title*.text() // ["Programming Groovy", "Groovy in Action"]

457

458

// Find operations

459

def expensiveBooks = books.book.findAll {

460

it.price.text().toDouble() > 50.0

461

}

462

println expensiveBooks.size() // 1

463

464

// Namespace handling

465

def nsSlurper = new XmlSlurper(false, true)

466

def catalog = nsSlurper.parseText('''

467

<catalog xmlns:book="http://books.org">

468

<book:item isbn="123">

469

<book:title>XML Processing</book:title>

470

</book:item>

471

</catalog>

472

''')

473

```

474

475

---

476

477

### GPathResult Operations

478

479

The result type returned by XmlSlurper provides extensive navigation and querying capabilities.

480

481

```groovy { .api }

482

/**

483

* Get element name

484

* @return Element name as string

485

*/

486

String name()

487

488

/**

489

* Get text content of element and children

490

* @return Combined text content

491

*/

492

String text()

493

494

/**

495

* Get number of child elements

496

* @return Count of children

497

*/

498

int size()

499

500

/**

501

* Check if result is empty

502

* @return true if no elements

503

*/

504

boolean isEmpty()

505

506

/**

507

* Get parent element

508

* @return Parent GPathResult

509

*/

510

GPathResult parent()

511

512

/**

513

* Get all children

514

* @return Children as GPathResult

515

*/

516

GPathResult children()

517

518

/**

519

* Navigate to parents

520

* @return All parent elements

521

*/

522

GPathResult parents()

523

524

/**

525

* Iterator over child nodes

526

* @return Iterator of child nodes

527

*/

528

Iterator iterator()

529

530

/**

531

* Iterator over all child nodes including text

532

* @return Iterator of all child content

533

*/

534

Iterator childNodes()

535

536

/**

537

* Find first element matching closure

538

* @param closure - Matching predicate

539

* @return First matching GPathResult

540

*/

541

GPathResult find(Closure closure)

542

543

/**

544

* Find all elements matching closure

545

* @param closure - Matching predicate

546

* @return All matching elements as GPathResult

547

*/

548

GPathResult findAll(Closure closure)

549

550

/**

551

* Depth-first traversal iterator

552

* @return Iterator for depth-first traversal

553

*/

554

Iterator depthFirst()

555

556

/**

557

* Breadth-first traversal iterator

558

* @return Iterator for breadth-first traversal

559

*/

560

Iterator breadthFirst()

561

```

562

563

**Usage Examples:**

564

565

```groovy

566

// Advanced GPath operations

567

def data = slurper.parseText('''

568

<company>

569

<department name="Engineering">

570

<employee id="1" name="Alice"/>

571

<employee id="2" name="Bob"/>

572

</department>

573

<department name="Sales">

574

<employee id="3" name="Charlie"/>

575

</department>

576

</company>

577

''')

578

579

// Navigation and querying

580

println data.department.size() // 2

581

println data.department.@name // ["Engineering", "Sales"]

582

println data.department.employee.@name // ["Alice", "Bob", "Charlie"]

583

584

// Finding specific elements

585

def engineering = data.department.find { it.@name == "Engineering" }

586

println engineering.employee.size() // 2

587

588

def alice = data.department.employee.find { it.@name == "Alice" }

589

println alice.@id // "1"

590

591

// Tree traversal

592

data.depthFirst().each { node ->

593

if (node.name() == "employee") {

594

println "Employee: ${node.@name}"

595

}

596

}

597

```