0
# Tree Adapters
1
2
Tree adapters provide a pluggable interface for customizing how parsed HTML is represented in memory. They define the structure of AST nodes and provide methods for creating, manipulating, and inspecting nodes.
3
4
## Capabilities
5
6
### Default Tree Adapter
7
8
The built-in tree adapter that creates DOM-like node structures with standard properties.
9
10
```typescript { .api }
11
/**
12
* Default tree adapter that creates DOM-like nodes
13
*/
14
const defaultTreeAdapter: TreeAdapter<DefaultTreeAdapterMap>;
15
16
/**
17
* Type mapping for the default tree adapter
18
*/
19
interface DefaultTreeAdapterMap extends TreeAdapterTypeMap<
20
Node,
21
ParentNode,
22
ChildNode,
23
Document,
24
DocumentFragment,
25
Element,
26
CommentNode,
27
TextNode,
28
Template,
29
DocumentType
30
> {}
31
```
32
33
**Usage Examples:**
34
35
```typescript
36
import { parse, defaultTreeAdapter } from "parse5";
37
38
// Default tree adapter is used automatically
39
const document = parse('<div>Content</div>');
40
41
// Can be specified explicitly
42
const documentExplicit = parse('<div>Content</div>', {
43
treeAdapter: defaultTreeAdapter
44
});
45
46
// Access node properties (default tree adapter structure)
47
const element = document.childNodes[1].childNodes[1].childNodes[0]; // html > body > div
48
console.log(element.tagName); // 'div'
49
console.log(element.nodeName); // 'div'
50
console.log(element.childNodes[0].value); // 'Content'
51
```
52
53
### Tree Adapter Interface
54
55
Complete interface for implementing custom tree adapters.
56
57
```typescript { .api }
58
/**
59
* Tree adapter interface defining all required methods for AST manipulation
60
*/
61
interface TreeAdapter<T extends TreeAdapterTypeMap = TreeAdapterTypeMap> {
62
// Node creation methods
63
createDocument(): T['document'];
64
createDocumentFragment(): T['documentFragment'];
65
createElement(tagName: string, namespaceURI: NS, attrs: Attribute[]): T['element'];
66
createCommentNode(data: string): T['commentNode'];
67
createTextNode(value: string): T['textNode'];
68
69
// Node manipulation methods
70
appendChild(parentNode: T['parentNode'], newNode: T['childNode']): void;
71
insertBefore(parentNode: T['parentNode'], newNode: T['childNode'], referenceNode: T['childNode']): void;
72
setTemplateContent(templateElement: T['template'], contentElement: T['documentFragment']): void;
73
getTemplateContent(templateElement: T['template']): T['documentFragment'];
74
detachNode(node: T['childNode']): void;
75
76
// Node inspection methods
77
getTagName(element: T['element']): string;
78
getNamespaceURI(element: T['element']): string;
79
getTextNodeContent(textNode: T['textNode']): string;
80
getCommentNodeContent(commentNode: T['commentNode']): string;
81
getDocumentTypeNodeName(doctypeNode: T['documentType']): string;
82
getDocumentTypeNodePublicId(doctypeNode: T['documentType']): string;
83
getDocumentTypeNodeSystemId(doctypeNode: T['documentType']): string;
84
85
// Attribute methods
86
getAttrList(element: T['element']): Attribute[];
87
getAttr(element: T['element'], attrName: string): string | null;
88
setAttr(element: T['element'], attrName: string, attrValue: string): void;
89
90
// Parent/child relationships
91
getChildNodes(node: T['parentNode']): T['childNode'][];
92
getParentNode(node: T['childNode']): T['parentNode'] | null;
93
getFirstChild(node: T['parentNode']): T['childNode'] | null;
94
95
// Type guard methods
96
isElementNode(node: T['node']): node is T['element'];
97
isTextNode(node: T['node']): node is T['textNode'];
98
isCommentNode(node: T['node']): node is T['commentNode'];
99
isDocumentTypeNode(node: T['node']): node is T['documentType'];
100
101
// Special methods
102
adoptAttributes(recipient: T['element'], attrs: Attribute[]): void;
103
getDocumentMode(document: T['document']): DOCUMENT_MODE;
104
setDocumentMode(document: T['document'], mode: DOCUMENT_MODE): void;
105
setDocumentType(document: T['document'], name: string, publicId: string, systemId: string): void;
106
insertText(parentNode: T['parentNode'], text: string): void;
107
insertTextBefore(parentNode: T['parentNode'], text: string, referenceNode: T['childNode']): void;
108
109
// Template methods
110
getTemplateContent(templateElement: T['template']): T['documentFragment'];
111
setTemplateContent(templateElement: T['template'], contentElement: T['documentFragment']): void;
112
113
// Location methods
114
getNodeSourceCodeLocation(node: T['node']): ElementLocation | undefined | null;
115
setNodeSourceCodeLocation(node: T['node'], location: ElementLocation | null): void;
116
updateNodeSourceCodeLocation(node: T['node'], location: Partial<ElementLocation>): void;
117
118
// Optional callback methods
119
onItemPush?(item: T['element']): void;
120
onItemPop?(item: T['element'], newTop: T['parentNode']): void;
121
}
122
```
123
124
### Default Tree Adapter Node Types
125
126
Node interfaces provided by the default tree adapter.
127
128
```typescript { .api }
129
/**
130
* Document node representing the entire HTML document
131
*/
132
interface Document {
133
nodeName: '#document';
134
mode: 'no-quirks' | 'quirks' | 'limited-quirks';
135
childNodes: ChildNode[];
136
sourceCodeLocation?: Location | null;
137
}
138
139
/**
140
* Document fragment node for parsing HTML fragments
141
*/
142
interface DocumentFragment {
143
nodeName: '#document-fragment';
144
childNodes: ChildNode[];
145
sourceCodeLocation?: Location | null;
146
}
147
148
/**
149
* Element node representing HTML elements
150
*/
151
interface Element {
152
nodeName: string;
153
tagName: string;
154
attrs: Attribute[];
155
namespaceURI: string;
156
sourceCodeLocation?: ElementLocation | null;
157
parentNode: ParentNode | null;
158
childNodes: ChildNode[];
159
}
160
161
/**
162
* Text node containing text content
163
*/
164
interface TextNode {
165
nodeName: '#text';
166
parentNode: ParentNode | null;
167
value: string;
168
sourceCodeLocation?: Location | null;
169
}
170
171
/**
172
* Comment node containing comment text
173
*/
174
interface CommentNode {
175
nodeName: '#comment';
176
parentNode: ParentNode | null;
177
data: string;
178
sourceCodeLocation?: Location | null;
179
}
180
181
/**
182
* Document type node (DOCTYPE declaration)
183
*/
184
interface DocumentType {
185
nodeName: '#documentType';
186
parentNode: ParentNode | null;
187
name: string;
188
publicId: string;
189
systemId: string;
190
sourceCodeLocation?: Location | null;
191
}
192
193
/**
194
* Template element with content fragment
195
*/
196
interface Template extends Element {
197
nodeName: 'template';
198
tagName: 'template';
199
content: DocumentFragment;
200
}
201
202
/**
203
* Union types for node categorization
204
*/
205
type ParentNode = Document | DocumentFragment | Element | Template;
206
type ChildNode = Element | Template | CommentNode | TextNode | DocumentType;
207
type Node = ParentNode | ChildNode;
208
209
/**
210
* Document mode enumeration
211
*/
212
type DOCUMENT_MODE = 'no-quirks' | 'quirks' | 'limited-quirks';
213
214
/**
215
* Namespace enumeration
216
*/
217
enum NS {
218
HTML = 'http://www.w3.org/1999/xhtml',
219
MATHML = 'http://www.w3.org/1998/Math/MathML',
220
SVG = 'http://www.w3.org/2000/svg',
221
XLINK = 'http://www.w3.org/1999/xlink',
222
XML = 'http://www.w3.org/XML/1998/namespace',
223
XMLNS = 'http://www.w3.org/2000/xmlns/'
224
}
225
226
/**
227
* Attribute interface
228
*/
229
interface Attribute {
230
name: string;
231
value: string;
232
namespace?: string;
233
prefix?: string;
234
}
235
236
/**
237
* Basic location interface
238
*/
239
interface Location {
240
startLine: number;
241
startCol: number;
242
startOffset: number;
243
endLine: number;
244
endCol: number;
245
endOffset: number;
246
}
247
248
/**
249
* Element location interface
250
*/
251
interface ElementLocation extends Location {
252
startTag?: Location;
253
endTag?: Location;
254
attrs?: Record<string, Location>;
255
}
256
```
257
258
**Usage Examples:**
259
260
```typescript
261
import { parse, parseFragment } from "parse5";
262
import type { Element, TextNode, Document } from "parse5";
263
264
// Type-safe node access with default tree adapter
265
const document: Document = parse('<div>Hello <span>World</span></div>');
266
const htmlElement = document.childNodes[1] as Element;
267
const bodyElement = htmlElement.childNodes[1] as Element;
268
const divElement = bodyElement.childNodes[0] as Element;
269
270
console.log(divElement.tagName); // 'div'
271
console.log(divElement.attrs); // []
272
console.log(divElement.childNodes.length); // 2
273
274
const textNode = divElement.childNodes[0] as TextNode;
275
console.log(textNode.value); // 'Hello '
276
277
const spanElement = divElement.childNodes[1] as Element;
278
console.log(spanElement.tagName); // 'span'
279
```
280
281
### Custom Tree Adapter Implementation
282
283
Example of implementing a custom tree adapter for specialized use cases.
284
285
```typescript { .api }
286
/**
287
* Example custom tree adapter that adds custom properties to nodes
288
*/
289
interface CustomElement {
290
type: 'element';
291
tag: string;
292
attributes: Record<string, string>;
293
children: CustomNode[];
294
customProperty: string;
295
}
296
297
interface CustomText {
298
type: 'text';
299
content: string;
300
}
301
302
type CustomNode = CustomElement | CustomText;
303
304
const customTreeAdapter: TreeAdapter<CustomTreeAdapterMap> = {
305
// Implement all required TreeAdapter methods
306
createElement(tagName: string, namespaceURI: string, attrs: Attribute[]): CustomElement {
307
return {
308
type: 'element',
309
tag: tagName,
310
attributes: Object.fromEntries(attrs.map(attr => [attr.name, attr.value])),
311
children: [],
312
customProperty: `custom-${tagName}`
313
};
314
},
315
316
createTextNode(value: string): CustomText {
317
return {
318
type: 'text',
319
content: value
320
};
321
},
322
323
appendChild(parent: CustomElement, child: CustomNode): void {
324
parent.children.push(child);
325
},
326
327
isElementNode(node: CustomNode): node is CustomElement {
328
return node.type === 'element';
329
},
330
331
getTagName(element: CustomElement): string {
332
return element.tag;
333
},
334
335
// ... implement all other required methods
336
};
337
```
338
339
**Usage Examples:**
340
341
```typescript
342
import { parse } from "parse5";
343
344
// Use custom tree adapter
345
const document = parse('<div class="container">Content</div>', {
346
treeAdapter: customTreeAdapter
347
});
348
349
// Access custom properties
350
const element = document.children[0].children[0].children[0];
351
console.log(element.customProperty); // 'custom-div'
352
console.log(element.attributes.class); // 'container'
353
```
354
355
### Tree Adapter Type Mapping
356
357
Type mapping interface for defining node types in custom tree adapters.
358
359
```typescript { .api }
360
/**
361
* Generic type mapping interface for tree adapters
362
*/
363
interface TreeAdapterTypeMap<
364
Node = unknown,
365
ParentNode = unknown,
366
ChildNode = unknown,
367
Document = unknown,
368
DocumentFragment = unknown,
369
Element = unknown,
370
CommentNode = unknown,
371
TextNode = unknown,
372
Template = unknown,
373
DocumentType = unknown
374
> {
375
node: Node;
376
parentNode: ParentNode;
377
childNode: ChildNode;
378
document: Document;
379
documentFragment: DocumentFragment;
380
element: Element;
381
commentNode: CommentNode;
382
textNode: TextNode;
383
template: Template;
384
documentType: DocumentType;
385
}
386
```
387
388
## Common Tree Adapter Patterns
389
390
### Node Traversal
391
392
```typescript
393
import { parse, defaultTreeAdapter } from "parse5";
394
import type { Element, Node } from "parse5";
395
396
function traverseNodes(node: Node, callback: (node: Node) => void): void {
397
callback(node);
398
399
if (defaultTreeAdapter.isElementNode(node) ||
400
node.nodeName === '#document' ||
401
node.nodeName === '#document-fragment') {
402
const children = defaultTreeAdapter.getChildNodes(node);
403
children.forEach(child => traverseNodes(child, callback));
404
}
405
}
406
407
const document = parse('<div><p>Text</p><span>More text</span></div>');
408
traverseNodes(document, (node) => {
409
console.log(node.nodeName);
410
});
411
```
412
413
### Element Filtering
414
415
```typescript
416
import { parse, defaultTreeAdapter } from "parse5";
417
import type { Element } from "parse5";
418
419
function findElementsByTagName(root: Node, tagName: string): Element[] {
420
const results: Element[] = [];
421
422
function traverse(node: Node): void {
423
if (defaultTreeAdapter.isElementNode(node) &&
424
defaultTreeAdapter.getTagName(node) === tagName) {
425
results.push(node);
426
}
427
428
if (defaultTreeAdapter.isElementNode(node) ||
429
node.nodeName === '#document' ||
430
node.nodeName === '#document-fragment') {
431
const children = defaultTreeAdapter.getChildNodes(node);
432
children.forEach(traverse);
433
}
434
}
435
436
traverse(root);
437
return results;
438
}
439
440
const document = parse('<div><p>Para 1</p><div><p>Para 2</p></div></div>');
441
const paragraphs = findElementsByTagName(document, 'p');
442
console.log(paragraphs.length); // 2
443
```
444
445
### Attribute Manipulation
446
447
```typescript
448
import { parseFragment, defaultTreeAdapter } from "parse5";
449
import type { Element } from "parse5";
450
451
const fragment = parseFragment('<div class="old">Content</div>');
452
const element = fragment.childNodes[0] as Element;
453
454
// Read attributes
455
const classList = defaultTreeAdapter.getAttr(element, 'class');
456
console.log(classList); // 'old'
457
458
// Modify attributes
459
defaultTreeAdapter.setAttr(element, 'class', 'new updated');
460
defaultTreeAdapter.setAttr(element, 'data-id', '123');
461
462
// Check all attributes
463
const allAttrs = defaultTreeAdapter.getAttrList(element);
464
console.log(allAttrs); // [{ name: 'class', value: 'new updated' }, { name: 'data-id', value: '123' }]
465
```