htmlparser2 tree adapter for parse5 enabling WHATWG HTML5-compliant parsing with htmlparser2's DOM representation.
npx @tessl/cli install tessl/npm-parse5-htmlparser2-tree-adapter@8.0.0Tree adapter that enables parse5 (a WHATWG HTML5-compliant HTML parser) to work with htmlparser2's DOM representation and API. This adapter translates parse5's internal tree representation to htmlparser2's format, allowing developers to leverage parse5's spec-compliant parsing while maintaining compatibility with existing htmlparser2-based codebases.
npm install parse5-htmlparser2-tree-adapterimport { adapter, serializeDoctypeContent, type Htmlparser2TreeAdapterMap } from "parse5-htmlparser2-tree-adapter";For CommonJS:
const { adapter, serializeDoctypeContent } = require("parse5-htmlparser2-tree-adapter");import { parse, parseFragment } from "parse5";
import { adapter } from "parse5-htmlparser2-tree-adapter";
// Parse HTML document with htmlparser2 DOM representation
const document = parse("<html><body><h1>Hello World</h1></body></html>", {
treeAdapter: adapter
});
// Parse HTML fragment
const fragment = parseFragment("<p>Hello <strong>world</strong></p>", {
treeAdapter: adapter
});
// The resulting nodes are htmlparser2-compatible (domhandler) nodes
console.log(document.children[0].name); // "html"The adapter implements parse5's TreeAdapter interface using domhandler node types:
Main adapter object implementing parse5's TreeAdapter interface for htmlparser2 DOM representation.
declare const adapter: TreeAdapter<Htmlparser2TreeAdapterMap>;
interface TreeAdapter<TMap extends TreeAdapterTypeMap> {
// Node type checking
isCommentNode(node: TMap['node']): node is TMap['commentNode'];
isElementNode(node: TMap['node']): node is TMap['element'];
isTextNode(node: TMap['node']): node is TMap['textNode'];
isDocumentTypeNode(node: TMap['node']): node is TMap['documentTypeNode'];
// Node construction
createDocument(): TMap['document'];
createDocumentFragment(): TMap['documentFragment'];
createElement(tagName: string, namespaceURI: html.NS, attrs: Token.Attribute[]): TMap['element'];
createCommentNode(data: string): TMap['commentNode'];
createTextNode(value: string): TMap['textNode'];
// Tree mutation
appendChild(parentNode: TMap['parentNode'], newNode: TMap['childNode']): void;
insertBefore(parentNode: TMap['parentNode'], newNode: TMap['childNode'], referenceNode: TMap['childNode']): void;
detachNode(node: TMap['childNode']): void;
insertText(parentNode: TMap['parentNode'], text: string): void;
insertTextBefore(parentNode: TMap['parentNode'], text: string, referenceNode: TMap['childNode']): void;
adoptAttributes(recipient: TMap['element'], attrs: Token.Attribute[]): void;
// Template handling
setTemplateContent(templateElement: TMap['template'], contentElement: TMap['documentFragment']): void;
getTemplateContent(templateElement: TMap['template']): TMap['documentFragment'];
// Document type and mode
setDocumentType(document: TMap['document'], name: string, publicId: string, systemId: string): void;
setDocumentMode(document: TMap['document'], mode: html.DOCUMENT_MODE): void;
getDocumentMode(document: TMap['document']): html.DOCUMENT_MODE;
// Tree traversal
getFirstChild(node: TMap['parentNode']): TMap['childNode'] | null;
getChildNodes(node: TMap['parentNode']): TMap['childNode'][];
getParentNode(node: TMap['node']): TMap['parentNode'] | null;
getAttrList(element: TMap['element']): Token.Attribute[];
// Node data access
getTagName(element: TMap['element']): string;
getNamespaceURI(element: TMap['element']): html.NS;
getTextNodeContent(textNode: TMap['textNode']): string;
getCommentNodeContent(commentNode: TMap['commentNode']): string;
getDocumentTypeNodeName(doctypeNode: TMap['documentTypeNode']): string;
getDocumentTypeNodePublicId(doctypeNode: TMap['documentTypeNode']): string;
getDocumentTypeNodeSystemId(doctypeNode: TMap['documentTypeNode']): string;
// Source code location
setNodeSourceCodeLocation(node: TMap['node'], location: Token.ElementLocation | null): void;
getNodeSourceCodeLocation(node: TMap['node']): Token.ElementLocation | null | undefined;
updateNodeSourceCodeLocation(node: TMap['node'], endLocation: Token.ElementLocation): void;
}Utility function for serializing DOCTYPE declaration content to string format.
/**
* Serializes DOCTYPE declaration content to string format
* @param name - DOCTYPE name
* @param publicId - Public identifier
* @param systemId - System identifier
* @returns Serialized DOCTYPE string
*/
declare function serializeDoctypeContent(name: string, publicId: string, systemId: string): string;Usage Example:
import { serializeDoctypeContent } from "parse5-htmlparser2-tree-adapter";
// Basic DOCTYPE
const doctype = serializeDoctypeContent("html", "", "");
// Result: "!DOCTYPE html"
// DOCTYPE with public ID
const xhtmlDoctype = serializeDoctypeContent(
"html",
"-//W3C//DTD XHTML 1.0 Strict//EN",
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
);
// Result: "!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\""Type map specialization that defines which domhandler node types correspond to each parse5 tree adapter type. This is exported as a type that can be used for type annotations when working with the adapter.
export type Htmlparser2TreeAdapterMap = TreeAdapterTypeMap<
AnyNode,
ParentNode,
ChildNode,
Document,
Document,
Element,
Comment,
Text,
Element,
ProcessingInstruction
>;The adapter uses domhandler node types for all DOM operations:
// From domhandler
type AnyNode = ParentNode | ChildNode;
interface ParentNode {
children: ChildNode[];
parent: ParentNode | null;
}
interface ChildNode {
parent: ParentNode | null;
prev: ChildNode | null;
next: ChildNode | null;
}
declare class Document implements ParentNode {
constructor(children: ChildNode[]);
children: ChildNode[];
parent: null;
['x-mode']?: html.DOCUMENT_MODE;
startIndex?: number;
endIndex?: number;
sourceCodeLocation?: Token.ElementLocation | null;
}
declare class Element implements ChildNode {
constructor(name: string, attribs: Record<string, string>, children: ChildNode[]);
name: string;
attribs: Record<string, string>;
children: ChildNode[];
parent: ParentNode | null;
prev: ChildNode | null;
next: ChildNode | null;
namespace?: string;
['x-attribsNamespace']?: Record<string, string>;
['x-attribsPrefix']?: Record<string, string>;
startIndex?: number;
endIndex?: number;
sourceCodeLocation?: Token.ElementLocation | null;
}
declare class Text implements ChildNode {
constructor(data: string);
data: string;
parent: ParentNode | null;
prev: ChildNode | null;
next: ChildNode | null;
startIndex?: number;
endIndex?: number;
sourceCodeLocation?: Token.ElementLocation | null;
}
declare class Comment implements ChildNode {
constructor(data: string);
data: string;
parent: ParentNode | null;
prev: ChildNode | null;
next: ChildNode | null;
startIndex?: number;
endIndex?: number;
sourceCodeLocation?: Token.ElementLocation | null;
}
declare class ProcessingInstruction implements ChildNode {
constructor(name: string, data: string);
name: string;
data: string;
parent: ParentNode | null;
prev: ChildNode | null;
next: ChildNode | null;
['x-name']?: string;
['x-publicId']?: string;
['x-systemId']?: string;
startIndex?: number;
endIndex?: number;
sourceCodeLocation?: Token.ElementLocation | null;
}Key types from parse5 used by the adapter:
// From parse5
interface Token {
namespace?: Namespace;
prefix?: string;
name: string;
value: string;
}
namespace Token {
interface Attribute {
namespace?: string;
prefix?: string;
name: string;
value: string;
}
interface ElementLocation {
startOffset?: number;
endOffset?: number;
startLine?: number;
startCol?: number;
endLine?: number;
endCol?: number;
}
}
namespace html {
enum DOCUMENT_MODE {
NO_QUIRKS = 'no-quirks',
QUIRKS = 'quirks',
LIMITED_QUIRKS = 'limited-quirks'
}
enum NS {
HTML = 'http://www.w3.org/1999/xhtml',
MATHML = 'http://www.w3.org/1998/Math/MathML',
SVG = 'http://www.w3.org/2000/svg',
XLINK = 'http://www.w3.org/1999/xlink',
XML = 'http://www.w3.org/XML/1998/namespace',
XMLNS = 'http://www.w3.org/2000/xmlns/'
}
}The adapter extends domhandler nodes with additional properties for parse5 compatibility:
startIndex, endIndex, and sourceCodeLocation propertiesx-attribsNamespace and x-attribsPrefix propertiesx-mode propertyx-name, x-publicId, and x-systemId propertiesimport { parse, parseFragment, serialize } from "parse5";
import { adapter } from "parse5-htmlparser2-tree-adapter";
// Parse with source location tracking
const documentWithLocation = parse(html, {
treeAdapter: adapter,
sourceCodeLocationInfo: true
});
// Access source location information
const element = documentWithLocation.children[0]; // html element
console.log(element.sourceCodeLocation?.startOffset);
console.log(element.sourceCodeLocation?.endOffset);
// Parse fragment with context
const fragment = parseFragment("<td>Cell content</td>", {
treeAdapter: adapter,
contextElement: contextTableElement
});
// Serialize back to HTML using parse5's serializer
const html = serialize(documentWithLocation, { treeAdapter: adapter });import { parse } from "parse5";
import { adapter } from "parse5-htmlparser2-tree-adapter";
const document = parse("<div class='content'>Hello <strong>World</strong></div>", {
treeAdapter: adapter
});
// Navigate using htmlparser2/domhandler patterns
const html = document.children[0]; // html element
const body = html.children[1]; // body element
const div = body.children[0]; // div element
// Access attributes htmlparser2-style
console.log(div.attribs.class); // "content"
// Traverse siblings
let current = div.children[0]; // "Hello " text node
while (current) {
console.log(current.data || current.name); // "Hello ", "strong", " World"
current = current.next;
}
// Use domhandler utilities
import { isText, isTag } from "domhandler";
div.children.forEach(child => {
if (isText(child)) {
console.log("Text:", child.data);
} else if (isTag(child)) {
console.log("Element:", child.name);
}
});