or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

index.md

index.mddocs/

0

# parse5-htmlparser2-tree-adapter

1

2

Tree adapter that enables parse5 (a WHATWG HTML5-compliant HTML parser) to work with htmlparser2's DOM representation and API. This adapter translates parse5's internal tree representation to htmlparser2's format, allowing developers to leverage parse5's spec-compliant parsing while maintaining compatibility with existing htmlparser2-based codebases.

3

4

## Package Information

5

6

- **Package Name**: parse5-htmlparser2-tree-adapter

7

- **Package Type**: npm

8

- **Language**: TypeScript

9

- **Installation**: `npm install parse5-htmlparser2-tree-adapter`

10

11

## Core Imports

12

13

```typescript

14

import { adapter, serializeDoctypeContent, type Htmlparser2TreeAdapterMap } from "parse5-htmlparser2-tree-adapter";

15

```

16

17

For CommonJS:

18

19

```javascript

20

const { adapter, serializeDoctypeContent } = require("parse5-htmlparser2-tree-adapter");

21

```

22

23

## Basic Usage

24

25

```typescript

26

import { parse, parseFragment } from "parse5";

27

import { adapter } from "parse5-htmlparser2-tree-adapter";

28

29

// Parse HTML document with htmlparser2 DOM representation

30

const document = parse("<html><body><h1>Hello World</h1></body></html>", {

31

treeAdapter: adapter

32

});

33

34

// Parse HTML fragment

35

const fragment = parseFragment("<p>Hello <strong>world</strong></p>", {

36

treeAdapter: adapter

37

});

38

39

// The resulting nodes are htmlparser2-compatible (domhandler) nodes

40

console.log(document.children[0].name); // "html"

41

```

42

43

## Architecture

44

45

The adapter implements parse5's TreeAdapter interface using domhandler node types:

46

47

- **Node Type Mapping**: Maps parse5's generic tree types to specific domhandler classes (Document, Element, Text, Comment, ProcessingInstruction)

48

- **Attribute Handling**: Manages HTML attributes with namespace and prefix information in htmlparser2 format

49

- **Source Location**: Supports parse5's source code location tracking when enabled

50

- **DOM Tree Operations**: Implements all required tree mutation and traversal operations

51

52

## Capabilities

53

54

### Tree Adapter Interface

55

56

Main adapter object implementing parse5's TreeAdapter interface for htmlparser2 DOM representation.

57

58

```typescript { .api }

59

declare const adapter: TreeAdapter<Htmlparser2TreeAdapterMap>;

60

61

interface TreeAdapter<TMap extends TreeAdapterTypeMap> {

62

// Node type checking

63

isCommentNode(node: TMap['node']): node is TMap['commentNode'];

64

isElementNode(node: TMap['node']): node is TMap['element'];

65

isTextNode(node: TMap['node']): node is TMap['textNode'];

66

isDocumentTypeNode(node: TMap['node']): node is TMap['documentTypeNode'];

67

68

// Node construction

69

createDocument(): TMap['document'];

70

createDocumentFragment(): TMap['documentFragment'];

71

createElement(tagName: string, namespaceURI: html.NS, attrs: Token.Attribute[]): TMap['element'];

72

createCommentNode(data: string): TMap['commentNode'];

73

createTextNode(value: string): TMap['textNode'];

74

75

// Tree mutation

76

appendChild(parentNode: TMap['parentNode'], newNode: TMap['childNode']): void;

77

insertBefore(parentNode: TMap['parentNode'], newNode: TMap['childNode'], referenceNode: TMap['childNode']): void;

78

detachNode(node: TMap['childNode']): void;

79

insertText(parentNode: TMap['parentNode'], text: string): void;

80

insertTextBefore(parentNode: TMap['parentNode'], text: string, referenceNode: TMap['childNode']): void;

81

adoptAttributes(recipient: TMap['element'], attrs: Token.Attribute[]): void;

82

83

// Template handling

84

setTemplateContent(templateElement: TMap['template'], contentElement: TMap['documentFragment']): void;

85

getTemplateContent(templateElement: TMap['template']): TMap['documentFragment'];

86

87

// Document type and mode

88

setDocumentType(document: TMap['document'], name: string, publicId: string, systemId: string): void;

89

setDocumentMode(document: TMap['document'], mode: html.DOCUMENT_MODE): void;

90

getDocumentMode(document: TMap['document']): html.DOCUMENT_MODE;

91

92

// Tree traversal

93

getFirstChild(node: TMap['parentNode']): TMap['childNode'] | null;

94

getChildNodes(node: TMap['parentNode']): TMap['childNode'][];

95

getParentNode(node: TMap['node']): TMap['parentNode'] | null;

96

getAttrList(element: TMap['element']): Token.Attribute[];

97

98

// Node data access

99

getTagName(element: TMap['element']): string;

100

getNamespaceURI(element: TMap['element']): html.NS;

101

getTextNodeContent(textNode: TMap['textNode']): string;

102

getCommentNodeContent(commentNode: TMap['commentNode']): string;

103

getDocumentTypeNodeName(doctypeNode: TMap['documentTypeNode']): string;

104

getDocumentTypeNodePublicId(doctypeNode: TMap['documentTypeNode']): string;

105

getDocumentTypeNodeSystemId(doctypeNode: TMap['documentTypeNode']): string;

106

107

// Source code location

108

setNodeSourceCodeLocation(node: TMap['node'], location: Token.ElementLocation | null): void;

109

getNodeSourceCodeLocation(node: TMap['node']): Token.ElementLocation | null | undefined;

110

updateNodeSourceCodeLocation(node: TMap['node'], endLocation: Token.ElementLocation): void;

111

}

112

```

113

114

### DOCTYPE Serialization

115

116

Utility function for serializing DOCTYPE declaration content to string format.

117

118

```typescript { .api }

119

/**

120

* Serializes DOCTYPE declaration content to string format

121

* @param name - DOCTYPE name

122

* @param publicId - Public identifier

123

* @param systemId - System identifier

124

* @returns Serialized DOCTYPE string

125

*/

126

declare function serializeDoctypeContent(name: string, publicId: string, systemId: string): string;

127

```

128

129

**Usage Example:**

130

131

```typescript

132

import { serializeDoctypeContent } from "parse5-htmlparser2-tree-adapter";

133

134

// Basic DOCTYPE

135

const doctype = serializeDoctypeContent("html", "", "");

136

// Result: "!DOCTYPE html"

137

138

// DOCTYPE with public ID

139

const xhtmlDoctype = serializeDoctypeContent(

140

"html",

141

"-//W3C//DTD XHTML 1.0 Strict//EN",

142

"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"

143

);

144

// Result: "!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\""

145

```

146

147

## Types

148

149

### Htmlparser2TreeAdapterMap

150

151

Type map specialization that defines which domhandler node types correspond to each parse5 tree adapter type. This is exported as a type that can be used for type annotations when working with the adapter.

152

153

```typescript { .api }

154

export type Htmlparser2TreeAdapterMap = TreeAdapterTypeMap<

155

AnyNode,

156

ParentNode,

157

ChildNode,

158

Document,

159

Document,

160

Element,

161

Comment,

162

Text,

163

Element,

164

ProcessingInstruction

165

>;

166

```

167

168

### Domhandler Node Types

169

170

The adapter uses domhandler node types for all DOM operations:

171

172

```typescript { .api }

173

// From domhandler

174

type AnyNode = ParentNode | ChildNode;

175

interface ParentNode {

176

children: ChildNode[];

177

parent: ParentNode | null;

178

}

179

interface ChildNode {

180

parent: ParentNode | null;

181

prev: ChildNode | null;

182

next: ChildNode | null;

183

}

184

185

declare class Document implements ParentNode {

186

constructor(children: ChildNode[]);

187

children: ChildNode[];

188

parent: null;

189

['x-mode']?: html.DOCUMENT_MODE;

190

startIndex?: number;

191

endIndex?: number;

192

sourceCodeLocation?: Token.ElementLocation | null;

193

}

194

195

declare class Element implements ChildNode {

196

constructor(name: string, attribs: Record<string, string>, children: ChildNode[]);

197

name: string;

198

attribs: Record<string, string>;

199

children: ChildNode[];

200

parent: ParentNode | null;

201

prev: ChildNode | null;

202

next: ChildNode | null;

203

namespace?: string;

204

['x-attribsNamespace']?: Record<string, string>;

205

['x-attribsPrefix']?: Record<string, string>;

206

startIndex?: number;

207

endIndex?: number;

208

sourceCodeLocation?: Token.ElementLocation | null;

209

}

210

211

declare class Text implements ChildNode {

212

constructor(data: string);

213

data: string;

214

parent: ParentNode | null;

215

prev: ChildNode | null;

216

next: ChildNode | null;

217

startIndex?: number;

218

endIndex?: number;

219

sourceCodeLocation?: Token.ElementLocation | null;

220

}

221

222

declare class Comment implements ChildNode {

223

constructor(data: string);

224

data: string;

225

parent: ParentNode | null;

226

prev: ChildNode | null;

227

next: ChildNode | null;

228

startIndex?: number;

229

endIndex?: number;

230

sourceCodeLocation?: Token.ElementLocation | null;

231

}

232

233

declare class ProcessingInstruction implements ChildNode {

234

constructor(name: string, data: string);

235

name: string;

236

data: string;

237

parent: ParentNode | null;

238

prev: ChildNode | null;

239

next: ChildNode | null;

240

['x-name']?: string;

241

['x-publicId']?: string;

242

['x-systemId']?: string;

243

startIndex?: number;

244

endIndex?: number;

245

sourceCodeLocation?: Token.ElementLocation | null;

246

}

247

```

248

249

### Parse5 Types

250

251

Key types from parse5 used by the adapter:

252

253

```typescript { .api }

254

// From parse5

255

interface Token {

256

namespace?: Namespace;

257

prefix?: string;

258

name: string;

259

value: string;

260

}

261

262

namespace Token {

263

interface Attribute {

264

namespace?: string;

265

prefix?: string;

266

name: string;

267

value: string;

268

}

269

270

interface ElementLocation {

271

startOffset?: number;

272

endOffset?: number;

273

startLine?: number;

274

startCol?: number;

275

endLine?: number;

276

endCol?: number;

277

}

278

}

279

280

namespace html {

281

enum DOCUMENT_MODE {

282

NO_QUIRKS = 'no-quirks',

283

QUIRKS = 'quirks',

284

LIMITED_QUIRKS = 'limited-quirks'

285

}

286

287

enum NS {

288

HTML = 'http://www.w3.org/1999/xhtml',

289

MATHML = 'http://www.w3.org/1998/Math/MathML',

290

SVG = 'http://www.w3.org/2000/svg',

291

XLINK = 'http://www.w3.org/1999/xlink',

292

XML = 'http://www.w3.org/XML/1998/namespace',

293

XMLNS = 'http://www.w3.org/2000/xmlns/'

294

}

295

}

296

```

297

298

## Implementation Notes

299

300

The adapter extends domhandler nodes with additional properties for parse5 compatibility:

301

302

- **Source Location Tracking**: When enabled, nodes have `startIndex`, `endIndex`, and `sourceCodeLocation` properties

303

- **Namespace Information**: Element nodes store namespace and prefix data in `x-attribsNamespace` and `x-attribsPrefix` properties

304

- **Document Mode**: Document nodes store HTML document mode in the `x-mode` property

305

- **DOCTYPE Information**: ProcessingInstruction nodes used for DOCTYPE store parsed data in `x-name`, `x-publicId`, and `x-systemId` properties

306

307

## Advanced Usage

308

309

### Using with parse5 Parser Options

310

311

```typescript

312

import { parse, parseFragment, serialize } from "parse5";

313

import { adapter } from "parse5-htmlparser2-tree-adapter";

314

315

// Parse with source location tracking

316

const documentWithLocation = parse(html, {

317

treeAdapter: adapter,

318

sourceCodeLocationInfo: true

319

});

320

321

// Access source location information

322

const element = documentWithLocation.children[0]; // html element

323

console.log(element.sourceCodeLocation?.startOffset);

324

console.log(element.sourceCodeLocation?.endOffset);

325

326

// Parse fragment with context

327

const fragment = parseFragment("<td>Cell content</td>", {

328

treeAdapter: adapter,

329

contextElement: contextTableElement

330

});

331

332

// Serialize back to HTML using parse5's serializer

333

const html = serialize(documentWithLocation, { treeAdapter: adapter });

334

```

335

336

### Working with htmlparser2-style DOM

337

338

```typescript

339

import { parse } from "parse5";

340

import { adapter } from "parse5-htmlparser2-tree-adapter";

341

342

const document = parse("<div class='content'>Hello <strong>World</strong></div>", {

343

treeAdapter: adapter

344

});

345

346

// Navigate using htmlparser2/domhandler patterns

347

const html = document.children[0]; // html element

348

const body = html.children[1]; // body element

349

const div = body.children[0]; // div element

350

351

// Access attributes htmlparser2-style

352

console.log(div.attribs.class); // "content"

353

354

// Traverse siblings

355

let current = div.children[0]; // "Hello " text node

356

while (current) {

357

console.log(current.data || current.name); // "Hello ", "strong", " World"

358

current = current.next;

359

}

360

361

// Use domhandler utilities

362

import { isText, isTag } from "domhandler";

363

div.children.forEach(child => {

364

if (isText(child)) {

365

console.log("Text:", child.data);

366

} else if (isTag(child)) {

367

console.log("Element:", child.name);

368

}

369

});

370

```