0
# parse5-htmlparser2-tree-adapter
1
2
Tree adapter that enables parse5 (a WHATWG HTML5-compliant HTML parser) to work with htmlparser2's DOM representation and API. This adapter translates parse5's internal tree representation to htmlparser2's format, allowing developers to leverage parse5's spec-compliant parsing while maintaining compatibility with existing htmlparser2-based codebases.
3
4
## Package Information
5
6
- **Package Name**: parse5-htmlparser2-tree-adapter
7
- **Package Type**: npm
8
- **Language**: TypeScript
9
- **Installation**: `npm install parse5-htmlparser2-tree-adapter`
10
11
## Core Imports
12
13
```typescript
14
import { adapter, serializeDoctypeContent, type Htmlparser2TreeAdapterMap } from "parse5-htmlparser2-tree-adapter";
15
```
16
17
For CommonJS:
18
19
```javascript
20
const { adapter, serializeDoctypeContent } = require("parse5-htmlparser2-tree-adapter");
21
```
22
23
## Basic Usage
24
25
```typescript
26
import { parse, parseFragment } from "parse5";
27
import { adapter } from "parse5-htmlparser2-tree-adapter";
28
29
// Parse HTML document with htmlparser2 DOM representation
30
const document = parse("<html><body><h1>Hello World</h1></body></html>", {
31
treeAdapter: adapter
32
});
33
34
// Parse HTML fragment
35
const fragment = parseFragment("<p>Hello <strong>world</strong></p>", {
36
treeAdapter: adapter
37
});
38
39
// The resulting nodes are htmlparser2-compatible (domhandler) nodes
40
console.log(document.children[0].name); // "html"
41
```
42
43
## Architecture
44
45
The adapter implements parse5's TreeAdapter interface using domhandler node types:
46
47
- **Node Type Mapping**: Maps parse5's generic tree types to specific domhandler classes (Document, Element, Text, Comment, ProcessingInstruction)
48
- **Attribute Handling**: Manages HTML attributes with namespace and prefix information in htmlparser2 format
49
- **Source Location**: Supports parse5's source code location tracking when enabled
50
- **DOM Tree Operations**: Implements all required tree mutation and traversal operations
51
52
## Capabilities
53
54
### Tree Adapter Interface
55
56
Main adapter object implementing parse5's TreeAdapter interface for htmlparser2 DOM representation.
57
58
```typescript { .api }
59
declare const adapter: TreeAdapter<Htmlparser2TreeAdapterMap>;
60
61
interface TreeAdapter<TMap extends TreeAdapterTypeMap> {
62
// Node type checking
63
isCommentNode(node: TMap['node']): node is TMap['commentNode'];
64
isElementNode(node: TMap['node']): node is TMap['element'];
65
isTextNode(node: TMap['node']): node is TMap['textNode'];
66
isDocumentTypeNode(node: TMap['node']): node is TMap['documentTypeNode'];
67
68
// Node construction
69
createDocument(): TMap['document'];
70
createDocumentFragment(): TMap['documentFragment'];
71
createElement(tagName: string, namespaceURI: html.NS, attrs: Token.Attribute[]): TMap['element'];
72
createCommentNode(data: string): TMap['commentNode'];
73
createTextNode(value: string): TMap['textNode'];
74
75
// Tree mutation
76
appendChild(parentNode: TMap['parentNode'], newNode: TMap['childNode']): void;
77
insertBefore(parentNode: TMap['parentNode'], newNode: TMap['childNode'], referenceNode: TMap['childNode']): void;
78
detachNode(node: TMap['childNode']): void;
79
insertText(parentNode: TMap['parentNode'], text: string): void;
80
insertTextBefore(parentNode: TMap['parentNode'], text: string, referenceNode: TMap['childNode']): void;
81
adoptAttributes(recipient: TMap['element'], attrs: Token.Attribute[]): void;
82
83
// Template handling
84
setTemplateContent(templateElement: TMap['template'], contentElement: TMap['documentFragment']): void;
85
getTemplateContent(templateElement: TMap['template']): TMap['documentFragment'];
86
87
// Document type and mode
88
setDocumentType(document: TMap['document'], name: string, publicId: string, systemId: string): void;
89
setDocumentMode(document: TMap['document'], mode: html.DOCUMENT_MODE): void;
90
getDocumentMode(document: TMap['document']): html.DOCUMENT_MODE;
91
92
// Tree traversal
93
getFirstChild(node: TMap['parentNode']): TMap['childNode'] | null;
94
getChildNodes(node: TMap['parentNode']): TMap['childNode'][];
95
getParentNode(node: TMap['node']): TMap['parentNode'] | null;
96
getAttrList(element: TMap['element']): Token.Attribute[];
97
98
// Node data access
99
getTagName(element: TMap['element']): string;
100
getNamespaceURI(element: TMap['element']): html.NS;
101
getTextNodeContent(textNode: TMap['textNode']): string;
102
getCommentNodeContent(commentNode: TMap['commentNode']): string;
103
getDocumentTypeNodeName(doctypeNode: TMap['documentTypeNode']): string;
104
getDocumentTypeNodePublicId(doctypeNode: TMap['documentTypeNode']): string;
105
getDocumentTypeNodeSystemId(doctypeNode: TMap['documentTypeNode']): string;
106
107
// Source code location
108
setNodeSourceCodeLocation(node: TMap['node'], location: Token.ElementLocation | null): void;
109
getNodeSourceCodeLocation(node: TMap['node']): Token.ElementLocation | null | undefined;
110
updateNodeSourceCodeLocation(node: TMap['node'], endLocation: Token.ElementLocation): void;
111
}
112
```
113
114
### DOCTYPE Serialization
115
116
Utility function for serializing DOCTYPE declaration content to string format.
117
118
```typescript { .api }
119
/**
120
* Serializes DOCTYPE declaration content to string format
121
* @param name - DOCTYPE name
122
* @param publicId - Public identifier
123
* @param systemId - System identifier
124
* @returns Serialized DOCTYPE string
125
*/
126
declare function serializeDoctypeContent(name: string, publicId: string, systemId: string): string;
127
```
128
129
**Usage Example:**
130
131
```typescript
132
import { serializeDoctypeContent } from "parse5-htmlparser2-tree-adapter";
133
134
// Basic DOCTYPE
135
const doctype = serializeDoctypeContent("html", "", "");
136
// Result: "!DOCTYPE html"
137
138
// DOCTYPE with public ID
139
const xhtmlDoctype = serializeDoctypeContent(
140
"html",
141
"-//W3C//DTD XHTML 1.0 Strict//EN",
142
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
143
);
144
// Result: "!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\""
145
```
146
147
## Types
148
149
### Htmlparser2TreeAdapterMap
150
151
Type map specialization that defines which domhandler node types correspond to each parse5 tree adapter type. This is exported as a type that can be used for type annotations when working with the adapter.
152
153
```typescript { .api }
154
export type Htmlparser2TreeAdapterMap = TreeAdapterTypeMap<
155
AnyNode,
156
ParentNode,
157
ChildNode,
158
Document,
159
Document,
160
Element,
161
Comment,
162
Text,
163
Element,
164
ProcessingInstruction
165
>;
166
```
167
168
### Domhandler Node Types
169
170
The adapter uses domhandler node types for all DOM operations:
171
172
```typescript { .api }
173
// From domhandler
174
type AnyNode = ParentNode | ChildNode;
175
interface ParentNode {
176
children: ChildNode[];
177
parent: ParentNode | null;
178
}
179
interface ChildNode {
180
parent: ParentNode | null;
181
prev: ChildNode | null;
182
next: ChildNode | null;
183
}
184
185
declare class Document implements ParentNode {
186
constructor(children: ChildNode[]);
187
children: ChildNode[];
188
parent: null;
189
['x-mode']?: html.DOCUMENT_MODE;
190
startIndex?: number;
191
endIndex?: number;
192
sourceCodeLocation?: Token.ElementLocation | null;
193
}
194
195
declare class Element implements ChildNode {
196
constructor(name: string, attribs: Record<string, string>, children: ChildNode[]);
197
name: string;
198
attribs: Record<string, string>;
199
children: ChildNode[];
200
parent: ParentNode | null;
201
prev: ChildNode | null;
202
next: ChildNode | null;
203
namespace?: string;
204
['x-attribsNamespace']?: Record<string, string>;
205
['x-attribsPrefix']?: Record<string, string>;
206
startIndex?: number;
207
endIndex?: number;
208
sourceCodeLocation?: Token.ElementLocation | null;
209
}
210
211
declare class Text implements ChildNode {
212
constructor(data: string);
213
data: string;
214
parent: ParentNode | null;
215
prev: ChildNode | null;
216
next: ChildNode | null;
217
startIndex?: number;
218
endIndex?: number;
219
sourceCodeLocation?: Token.ElementLocation | null;
220
}
221
222
declare class Comment implements ChildNode {
223
constructor(data: string);
224
data: string;
225
parent: ParentNode | null;
226
prev: ChildNode | null;
227
next: ChildNode | null;
228
startIndex?: number;
229
endIndex?: number;
230
sourceCodeLocation?: Token.ElementLocation | null;
231
}
232
233
declare class ProcessingInstruction implements ChildNode {
234
constructor(name: string, data: string);
235
name: string;
236
data: string;
237
parent: ParentNode | null;
238
prev: ChildNode | null;
239
next: ChildNode | null;
240
['x-name']?: string;
241
['x-publicId']?: string;
242
['x-systemId']?: string;
243
startIndex?: number;
244
endIndex?: number;
245
sourceCodeLocation?: Token.ElementLocation | null;
246
}
247
```
248
249
### Parse5 Types
250
251
Key types from parse5 used by the adapter:
252
253
```typescript { .api }
254
// From parse5
255
interface Token {
256
namespace?: Namespace;
257
prefix?: string;
258
name: string;
259
value: string;
260
}
261
262
namespace Token {
263
interface Attribute {
264
namespace?: string;
265
prefix?: string;
266
name: string;
267
value: string;
268
}
269
270
interface ElementLocation {
271
startOffset?: number;
272
endOffset?: number;
273
startLine?: number;
274
startCol?: number;
275
endLine?: number;
276
endCol?: number;
277
}
278
}
279
280
namespace html {
281
enum DOCUMENT_MODE {
282
NO_QUIRKS = 'no-quirks',
283
QUIRKS = 'quirks',
284
LIMITED_QUIRKS = 'limited-quirks'
285
}
286
287
enum NS {
288
HTML = 'http://www.w3.org/1999/xhtml',
289
MATHML = 'http://www.w3.org/1998/Math/MathML',
290
SVG = 'http://www.w3.org/2000/svg',
291
XLINK = 'http://www.w3.org/1999/xlink',
292
XML = 'http://www.w3.org/XML/1998/namespace',
293
XMLNS = 'http://www.w3.org/2000/xmlns/'
294
}
295
}
296
```
297
298
## Implementation Notes
299
300
The adapter extends domhandler nodes with additional properties for parse5 compatibility:
301
302
- **Source Location Tracking**: When enabled, nodes have `startIndex`, `endIndex`, and `sourceCodeLocation` properties
303
- **Namespace Information**: Element nodes store namespace and prefix data in `x-attribsNamespace` and `x-attribsPrefix` properties
304
- **Document Mode**: Document nodes store HTML document mode in the `x-mode` property
305
- **DOCTYPE Information**: ProcessingInstruction nodes used for DOCTYPE store parsed data in `x-name`, `x-publicId`, and `x-systemId` properties
306
307
## Advanced Usage
308
309
### Using with parse5 Parser Options
310
311
```typescript
312
import { parse, parseFragment, serialize } from "parse5";
313
import { adapter } from "parse5-htmlparser2-tree-adapter";
314
315
// Parse with source location tracking
316
const documentWithLocation = parse(html, {
317
treeAdapter: adapter,
318
sourceCodeLocationInfo: true
319
});
320
321
// Access source location information
322
const element = documentWithLocation.children[0]; // html element
323
console.log(element.sourceCodeLocation?.startOffset);
324
console.log(element.sourceCodeLocation?.endOffset);
325
326
// Parse fragment with context
327
const fragment = parseFragment("<td>Cell content</td>", {
328
treeAdapter: adapter,
329
contextElement: contextTableElement
330
});
331
332
// Serialize back to HTML using parse5's serializer
333
const html = serialize(documentWithLocation, { treeAdapter: adapter });
334
```
335
336
### Working with htmlparser2-style DOM
337
338
```typescript
339
import { parse } from "parse5";
340
import { adapter } from "parse5-htmlparser2-tree-adapter";
341
342
const document = parse("<div class='content'>Hello <strong>World</strong></div>", {
343
treeAdapter: adapter
344
});
345
346
// Navigate using htmlparser2/domhandler patterns
347
const html = document.children[0]; // html element
348
const body = html.children[1]; // body element
349
const div = body.children[0]; // div element
350
351
// Access attributes htmlparser2-style
352
console.log(div.attribs.class); // "content"
353
354
// Traverse siblings
355
let current = div.children[0]; // "Hello " text node
356
while (current) {
357
console.log(current.data || current.name); // "Hello ", "strong", " World"
358
current = current.next;
359
}
360
361
// Use domhandler utilities
362
import { isText, isTag } from "domhandler";
363
div.children.forEach(child => {
364
if (isText(child)) {
365
console.log("Text:", child.data);
366
} else if (isTag(child)) {
367
console.log("Element:", child.name);
368
}
369
});
370
```