or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

error-handling.mdhtml-utilities.mdindex.mdparsing.mdserialization.mdtokenization.mdtree-adapters.md

tree-adapters.mddocs/

0

# Tree Adapters

1

2

Tree adapters provide a pluggable interface for customizing how parsed HTML is represented in memory. They define the structure of AST nodes and provide methods for creating, manipulating, and inspecting nodes.

3

4

## Capabilities

5

6

### Default Tree Adapter

7

8

The built-in tree adapter that creates DOM-like node structures with standard properties.

9

10

```typescript { .api }

11

/**

12

* Default tree adapter that creates DOM-like nodes

13

*/

14

const defaultTreeAdapter: TreeAdapter<DefaultTreeAdapterMap>;

15

16

/**

17

* Type mapping for the default tree adapter

18

*/

19

interface DefaultTreeAdapterMap extends TreeAdapterTypeMap<

20

Node,

21

ParentNode,

22

ChildNode,

23

Document,

24

DocumentFragment,

25

Element,

26

CommentNode,

27

TextNode,

28

Template,

29

DocumentType

30

> {}

31

```

32

33

**Usage Examples:**

34

35

```typescript

36

import { parse, defaultTreeAdapter } from "parse5";

37

38

// Default tree adapter is used automatically

39

const document = parse('<div>Content</div>');

40

41

// Can be specified explicitly

42

const documentExplicit = parse('<div>Content</div>', {

43

treeAdapter: defaultTreeAdapter

44

});

45

46

// Access node properties (default tree adapter structure)

47

const element = document.childNodes[1].childNodes[1].childNodes[0]; // html > body > div

48

console.log(element.tagName); // 'div'

49

console.log(element.nodeName); // 'div'

50

console.log(element.childNodes[0].value); // 'Content'

51

```

52

53

### Tree Adapter Interface

54

55

Complete interface for implementing custom tree adapters.

56

57

```typescript { .api }

58

/**

59

* Tree adapter interface defining all required methods for AST manipulation

60

*/

61

interface TreeAdapter<T extends TreeAdapterTypeMap = TreeAdapterTypeMap> {

62

// Node creation methods

63

createDocument(): T['document'];

64

createDocumentFragment(): T['documentFragment'];

65

createElement(tagName: string, namespaceURI: NS, attrs: Attribute[]): T['element'];

66

createCommentNode(data: string): T['commentNode'];

67

createTextNode(value: string): T['textNode'];

68

69

// Node manipulation methods

70

appendChild(parentNode: T['parentNode'], newNode: T['childNode']): void;

71

insertBefore(parentNode: T['parentNode'], newNode: T['childNode'], referenceNode: T['childNode']): void;

72

setTemplateContent(templateElement: T['template'], contentElement: T['documentFragment']): void;

73

getTemplateContent(templateElement: T['template']): T['documentFragment'];

74

detachNode(node: T['childNode']): void;

75

76

// Node inspection methods

77

getTagName(element: T['element']): string;

78

getNamespaceURI(element: T['element']): string;

79

getTextNodeContent(textNode: T['textNode']): string;

80

getCommentNodeContent(commentNode: T['commentNode']): string;

81

getDocumentTypeNodeName(doctypeNode: T['documentType']): string;

82

getDocumentTypeNodePublicId(doctypeNode: T['documentType']): string;

83

getDocumentTypeNodeSystemId(doctypeNode: T['documentType']): string;

84

85

// Attribute methods

86

getAttrList(element: T['element']): Attribute[];

87

getAttr(element: T['element'], attrName: string): string | null;

88

setAttr(element: T['element'], attrName: string, attrValue: string): void;

89

90

// Parent/child relationships

91

getChildNodes(node: T['parentNode']): T['childNode'][];

92

getParentNode(node: T['childNode']): T['parentNode'] | null;

93

getFirstChild(node: T['parentNode']): T['childNode'] | null;

94

95

// Type guard methods

96

isElementNode(node: T['node']): node is T['element'];

97

isTextNode(node: T['node']): node is T['textNode'];

98

isCommentNode(node: T['node']): node is T['commentNode'];

99

isDocumentTypeNode(node: T['node']): node is T['documentType'];

100

101

// Special methods

102

adoptAttributes(recipient: T['element'], attrs: Attribute[]): void;

103

getDocumentMode(document: T['document']): DOCUMENT_MODE;

104

setDocumentMode(document: T['document'], mode: DOCUMENT_MODE): void;

105

setDocumentType(document: T['document'], name: string, publicId: string, systemId: string): void;

106

insertText(parentNode: T['parentNode'], text: string): void;

107

insertTextBefore(parentNode: T['parentNode'], text: string, referenceNode: T['childNode']): void;

108

109

// Template methods

110

getTemplateContent(templateElement: T['template']): T['documentFragment'];

111

setTemplateContent(templateElement: T['template'], contentElement: T['documentFragment']): void;

112

113

// Location methods

114

getNodeSourceCodeLocation(node: T['node']): ElementLocation | undefined | null;

115

setNodeSourceCodeLocation(node: T['node'], location: ElementLocation | null): void;

116

updateNodeSourceCodeLocation(node: T['node'], location: Partial<ElementLocation>): void;

117

118

// Optional callback methods

119

onItemPush?(item: T['element']): void;

120

onItemPop?(item: T['element'], newTop: T['parentNode']): void;

121

}

122

```

123

124

### Default Tree Adapter Node Types

125

126

Node interfaces provided by the default tree adapter.

127

128

```typescript { .api }

129

/**

130

* Document node representing the entire HTML document

131

*/

132

interface Document {

133

nodeName: '#document';

134

mode: 'no-quirks' | 'quirks' | 'limited-quirks';

135

childNodes: ChildNode[];

136

sourceCodeLocation?: Location | null;

137

}

138

139

/**

140

* Document fragment node for parsing HTML fragments

141

*/

142

interface DocumentFragment {

143

nodeName: '#document-fragment';

144

childNodes: ChildNode[];

145

sourceCodeLocation?: Location | null;

146

}

147

148

/**

149

* Element node representing HTML elements

150

*/

151

interface Element {

152

nodeName: string;

153

tagName: string;

154

attrs: Attribute[];

155

namespaceURI: string;

156

sourceCodeLocation?: ElementLocation | null;

157

parentNode: ParentNode | null;

158

childNodes: ChildNode[];

159

}

160

161

/**

162

* Text node containing text content

163

*/

164

interface TextNode {

165

nodeName: '#text';

166

parentNode: ParentNode | null;

167

value: string;

168

sourceCodeLocation?: Location | null;

169

}

170

171

/**

172

* Comment node containing comment text

173

*/

174

interface CommentNode {

175

nodeName: '#comment';

176

parentNode: ParentNode | null;

177

data: string;

178

sourceCodeLocation?: Location | null;

179

}

180

181

/**

182

* Document type node (DOCTYPE declaration)

183

*/

184

interface DocumentType {

185

nodeName: '#documentType';

186

parentNode: ParentNode | null;

187

name: string;

188

publicId: string;

189

systemId: string;

190

sourceCodeLocation?: Location | null;

191

}

192

193

/**

194

* Template element with content fragment

195

*/

196

interface Template extends Element {

197

nodeName: 'template';

198

tagName: 'template';

199

content: DocumentFragment;

200

}

201

202

/**

203

* Union types for node categorization

204

*/

205

type ParentNode = Document | DocumentFragment | Element | Template;

206

type ChildNode = Element | Template | CommentNode | TextNode | DocumentType;

207

type Node = ParentNode | ChildNode;

208

209

/**

210

* Document mode enumeration

211

*/

212

type DOCUMENT_MODE = 'no-quirks' | 'quirks' | 'limited-quirks';

213

214

/**

215

* Namespace enumeration

216

*/

217

enum NS {

218

HTML = 'http://www.w3.org/1999/xhtml',

219

MATHML = 'http://www.w3.org/1998/Math/MathML',

220

SVG = 'http://www.w3.org/2000/svg',

221

XLINK = 'http://www.w3.org/1999/xlink',

222

XML = 'http://www.w3.org/XML/1998/namespace',

223

XMLNS = 'http://www.w3.org/2000/xmlns/'

224

}

225

226

/**

227

* Attribute interface

228

*/

229

interface Attribute {

230

name: string;

231

value: string;

232

namespace?: string;

233

prefix?: string;

234

}

235

236

/**

237

* Basic location interface

238

*/

239

interface Location {

240

startLine: number;

241

startCol: number;

242

startOffset: number;

243

endLine: number;

244

endCol: number;

245

endOffset: number;

246

}

247

248

/**

249

* Element location interface

250

*/

251

interface ElementLocation extends Location {

252

startTag?: Location;

253

endTag?: Location;

254

attrs?: Record<string, Location>;

255

}

256

```

257

258

**Usage Examples:**

259

260

```typescript

261

import { parse, parseFragment } from "parse5";

262

import type { Element, TextNode, Document } from "parse5";

263

264

// Type-safe node access with default tree adapter

265

const document: Document = parse('<div>Hello <span>World</span></div>');

266

const htmlElement = document.childNodes[1] as Element;

267

const bodyElement = htmlElement.childNodes[1] as Element;

268

const divElement = bodyElement.childNodes[0] as Element;

269

270

console.log(divElement.tagName); // 'div'

271

console.log(divElement.attrs); // []

272

console.log(divElement.childNodes.length); // 2

273

274

const textNode = divElement.childNodes[0] as TextNode;

275

console.log(textNode.value); // 'Hello '

276

277

const spanElement = divElement.childNodes[1] as Element;

278

console.log(spanElement.tagName); // 'span'

279

```

280

281

### Custom Tree Adapter Implementation

282

283

Example of implementing a custom tree adapter for specialized use cases.

284

285

```typescript { .api }

286

/**

287

* Example custom tree adapter that adds custom properties to nodes

288

*/

289

interface CustomElement {

290

type: 'element';

291

tag: string;

292

attributes: Record<string, string>;

293

children: CustomNode[];

294

customProperty: string;

295

}

296

297

interface CustomText {

298

type: 'text';

299

content: string;

300

}

301

302

type CustomNode = CustomElement | CustomText;

303

304

const customTreeAdapter: TreeAdapter<CustomTreeAdapterMap> = {

305

// Implement all required TreeAdapter methods

306

createElement(tagName: string, namespaceURI: string, attrs: Attribute[]): CustomElement {

307

return {

308

type: 'element',

309

tag: tagName,

310

attributes: Object.fromEntries(attrs.map(attr => [attr.name, attr.value])),

311

children: [],

312

customProperty: `custom-${tagName}`

313

};

314

},

315

316

createTextNode(value: string): CustomText {

317

return {

318

type: 'text',

319

content: value

320

};

321

},

322

323

appendChild(parent: CustomElement, child: CustomNode): void {

324

parent.children.push(child);

325

},

326

327

isElementNode(node: CustomNode): node is CustomElement {

328

return node.type === 'element';

329

},

330

331

getTagName(element: CustomElement): string {

332

return element.tag;

333

},

334

335

// ... implement all other required methods

336

};

337

```

338

339

**Usage Examples:**

340

341

```typescript

342

import { parse } from "parse5";

343

344

// Use custom tree adapter

345

const document = parse('<div class="container">Content</div>', {

346

treeAdapter: customTreeAdapter

347

});

348

349

// Access custom properties

350

const element = document.children[0].children[0].children[0];

351

console.log(element.customProperty); // 'custom-div'

352

console.log(element.attributes.class); // 'container'

353

```

354

355

### Tree Adapter Type Mapping

356

357

Type mapping interface for defining node types in custom tree adapters.

358

359

```typescript { .api }

360

/**

361

* Generic type mapping interface for tree adapters

362

*/

363

interface TreeAdapterTypeMap<

364

Node = unknown,

365

ParentNode = unknown,

366

ChildNode = unknown,

367

Document = unknown,

368

DocumentFragment = unknown,

369

Element = unknown,

370

CommentNode = unknown,

371

TextNode = unknown,

372

Template = unknown,

373

DocumentType = unknown

374

> {

375

node: Node;

376

parentNode: ParentNode;

377

childNode: ChildNode;

378

document: Document;

379

documentFragment: DocumentFragment;

380

element: Element;

381

commentNode: CommentNode;

382

textNode: TextNode;

383

template: Template;

384

documentType: DocumentType;

385

}

386

```

387

388

## Common Tree Adapter Patterns

389

390

### Node Traversal

391

392

```typescript

393

import { parse, defaultTreeAdapter } from "parse5";

394

import type { Element, Node } from "parse5";

395

396

function traverseNodes(node: Node, callback: (node: Node) => void): void {

397

callback(node);

398

399

if (defaultTreeAdapter.isElementNode(node) ||

400

node.nodeName === '#document' ||

401

node.nodeName === '#document-fragment') {

402

const children = defaultTreeAdapter.getChildNodes(node);

403

children.forEach(child => traverseNodes(child, callback));

404

}

405

}

406

407

const document = parse('<div><p>Text</p><span>More text</span></div>');

408

traverseNodes(document, (node) => {

409

console.log(node.nodeName);

410

});

411

```

412

413

### Element Filtering

414

415

```typescript

416

import { parse, defaultTreeAdapter } from "parse5";

417

import type { Element } from "parse5";

418

419

function findElementsByTagName(root: Node, tagName: string): Element[] {

420

const results: Element[] = [];

421

422

function traverse(node: Node): void {

423

if (defaultTreeAdapter.isElementNode(node) &&

424

defaultTreeAdapter.getTagName(node) === tagName) {

425

results.push(node);

426

}

427

428

if (defaultTreeAdapter.isElementNode(node) ||

429

node.nodeName === '#document' ||

430

node.nodeName === '#document-fragment') {

431

const children = defaultTreeAdapter.getChildNodes(node);

432

children.forEach(traverse);

433

}

434

}

435

436

traverse(root);

437

return results;

438

}

439

440

const document = parse('<div><p>Para 1</p><div><p>Para 2</p></div></div>');

441

const paragraphs = findElementsByTagName(document, 'p');

442

console.log(paragraphs.length); // 2

443

```

444

445

### Attribute Manipulation

446

447

```typescript

448

import { parseFragment, defaultTreeAdapter } from "parse5";

449

import type { Element } from "parse5";

450

451

const fragment = parseFragment('<div class="old">Content</div>');

452

const element = fragment.childNodes[0] as Element;

453

454

// Read attributes

455

const classList = defaultTreeAdapter.getAttr(element, 'class');

456

console.log(classList); // 'old'

457

458

// Modify attributes

459

defaultTreeAdapter.setAttr(element, 'class', 'new updated');

460

defaultTreeAdapter.setAttr(element, 'data-id', '123');

461

462

// Check all attributes

463

const allAttrs = defaultTreeAdapter.getAttrList(element);

464

console.log(allAttrs); // [{ name: 'class', value: 'new updated' }, { name: 'data-id', value: '123' }]

465

```