HTML parser and serializer that is fully compliant with the WHATWG HTML Living Standard.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Tree adapters provide a pluggable interface for customizing how parsed HTML is represented in memory. They define the structure of AST nodes and provide methods for creating, manipulating, and inspecting nodes.
The built-in tree adapter that creates DOM-like node structures with standard properties.
/**
* Default tree adapter that creates DOM-like nodes
*/
const defaultTreeAdapter: TreeAdapter<DefaultTreeAdapterMap>;
/**
* Type mapping for the default tree adapter
*/
interface DefaultTreeAdapterMap extends TreeAdapterTypeMap<
Node,
ParentNode,
ChildNode,
Document,
DocumentFragment,
Element,
CommentNode,
TextNode,
Template,
DocumentType
> {}Usage Examples:
import { parse, defaultTreeAdapter } from "parse5";
// Default tree adapter is used automatically
const document = parse('<div>Content</div>');
// Can be specified explicitly
const documentExplicit = parse('<div>Content</div>', {
treeAdapter: defaultTreeAdapter
});
// Access node properties (default tree adapter structure)
const element = document.childNodes[1].childNodes[1].childNodes[0]; // html > body > div
console.log(element.tagName); // 'div'
console.log(element.nodeName); // 'div'
console.log(element.childNodes[0].value); // 'Content'Complete interface for implementing custom tree adapters.
/**
* Tree adapter interface defining all required methods for AST manipulation
*/
interface TreeAdapter<T extends TreeAdapterTypeMap = TreeAdapterTypeMap> {
// Node creation methods
createDocument(): T['document'];
createDocumentFragment(): T['documentFragment'];
createElement(tagName: string, namespaceURI: NS, attrs: Attribute[]): T['element'];
createCommentNode(data: string): T['commentNode'];
createTextNode(value: string): T['textNode'];
// Node manipulation methods
appendChild(parentNode: T['parentNode'], newNode: T['childNode']): void;
insertBefore(parentNode: T['parentNode'], newNode: T['childNode'], referenceNode: T['childNode']): void;
setTemplateContent(templateElement: T['template'], contentElement: T['documentFragment']): void;
getTemplateContent(templateElement: T['template']): T['documentFragment'];
detachNode(node: T['childNode']): void;
// Node inspection methods
getTagName(element: T['element']): string;
getNamespaceURI(element: T['element']): string;
getTextNodeContent(textNode: T['textNode']): string;
getCommentNodeContent(commentNode: T['commentNode']): string;
getDocumentTypeNodeName(doctypeNode: T['documentType']): string;
getDocumentTypeNodePublicId(doctypeNode: T['documentType']): string;
getDocumentTypeNodeSystemId(doctypeNode: T['documentType']): string;
// Attribute methods
getAttrList(element: T['element']): Attribute[];
getAttr(element: T['element'], attrName: string): string | null;
setAttr(element: T['element'], attrName: string, attrValue: string): void;
// Parent/child relationships
getChildNodes(node: T['parentNode']): T['childNode'][];
getParentNode(node: T['childNode']): T['parentNode'] | null;
getFirstChild(node: T['parentNode']): T['childNode'] | null;
// Type guard methods
isElementNode(node: T['node']): node is T['element'];
isTextNode(node: T['node']): node is T['textNode'];
isCommentNode(node: T['node']): node is T['commentNode'];
isDocumentTypeNode(node: T['node']): node is T['documentType'];
// Special methods
adoptAttributes(recipient: T['element'], attrs: Attribute[]): void;
getDocumentMode(document: T['document']): DOCUMENT_MODE;
setDocumentMode(document: T['document'], mode: DOCUMENT_MODE): void;
setDocumentType(document: T['document'], name: string, publicId: string, systemId: string): void;
insertText(parentNode: T['parentNode'], text: string): void;
insertTextBefore(parentNode: T['parentNode'], text: string, referenceNode: T['childNode']): void;
// Template methods
getTemplateContent(templateElement: T['template']): T['documentFragment'];
setTemplateContent(templateElement: T['template'], contentElement: T['documentFragment']): void;
// Location methods
getNodeSourceCodeLocation(node: T['node']): ElementLocation | undefined | null;
setNodeSourceCodeLocation(node: T['node'], location: ElementLocation | null): void;
updateNodeSourceCodeLocation(node: T['node'], location: Partial<ElementLocation>): void;
// Optional callback methods
onItemPush?(item: T['element']): void;
onItemPop?(item: T['element'], newTop: T['parentNode']): void;
}Node interfaces provided by the default tree adapter.
/**
* Document node representing the entire HTML document
*/
interface Document {
nodeName: '#document';
mode: 'no-quirks' | 'quirks' | 'limited-quirks';
childNodes: ChildNode[];
sourceCodeLocation?: Location | null;
}
/**
* Document fragment node for parsing HTML fragments
*/
interface DocumentFragment {
nodeName: '#document-fragment';
childNodes: ChildNode[];
sourceCodeLocation?: Location | null;
}
/**
* Element node representing HTML elements
*/
interface Element {
nodeName: string;
tagName: string;
attrs: Attribute[];
namespaceURI: string;
sourceCodeLocation?: ElementLocation | null;
parentNode: ParentNode | null;
childNodes: ChildNode[];
}
/**
* Text node containing text content
*/
interface TextNode {
nodeName: '#text';
parentNode: ParentNode | null;
value: string;
sourceCodeLocation?: Location | null;
}
/**
* Comment node containing comment text
*/
interface CommentNode {
nodeName: '#comment';
parentNode: ParentNode | null;
data: string;
sourceCodeLocation?: Location | null;
}
/**
* Document type node (DOCTYPE declaration)
*/
interface DocumentType {
nodeName: '#documentType';
parentNode: ParentNode | null;
name: string;
publicId: string;
systemId: string;
sourceCodeLocation?: Location | null;
}
/**
* Template element with content fragment
*/
interface Template extends Element {
nodeName: 'template';
tagName: 'template';
content: DocumentFragment;
}
/**
* Union types for node categorization
*/
type ParentNode = Document | DocumentFragment | Element | Template;
type ChildNode = Element | Template | CommentNode | TextNode | DocumentType;
type Node = ParentNode | ChildNode;
/**
* Document mode enumeration
*/
type DOCUMENT_MODE = 'no-quirks' | 'quirks' | 'limited-quirks';
/**
* Namespace enumeration
*/
enum NS {
HTML = 'http://www.w3.org/1999/xhtml',
MATHML = 'http://www.w3.org/1998/Math/MathML',
SVG = 'http://www.w3.org/2000/svg',
XLINK = 'http://www.w3.org/1999/xlink',
XML = 'http://www.w3.org/XML/1998/namespace',
XMLNS = 'http://www.w3.org/2000/xmlns/'
}
/**
* Attribute interface
*/
interface Attribute {
name: string;
value: string;
namespace?: string;
prefix?: string;
}
/**
* Basic location interface
*/
interface Location {
startLine: number;
startCol: number;
startOffset: number;
endLine: number;
endCol: number;
endOffset: number;
}
/**
* Element location interface
*/
interface ElementLocation extends Location {
startTag?: Location;
endTag?: Location;
attrs?: Record<string, Location>;
}Usage Examples:
import { parse, parseFragment } from "parse5";
import type { Element, TextNode, Document } from "parse5";
// Type-safe node access with default tree adapter
const document: Document = parse('<div>Hello <span>World</span></div>');
const htmlElement = document.childNodes[1] as Element;
const bodyElement = htmlElement.childNodes[1] as Element;
const divElement = bodyElement.childNodes[0] as Element;
console.log(divElement.tagName); // 'div'
console.log(divElement.attrs); // []
console.log(divElement.childNodes.length); // 2
const textNode = divElement.childNodes[0] as TextNode;
console.log(textNode.value); // 'Hello '
const spanElement = divElement.childNodes[1] as Element;
console.log(spanElement.tagName); // 'span'Example of implementing a custom tree adapter for specialized use cases.
/**
* Example custom tree adapter that adds custom properties to nodes
*/
interface CustomElement {
type: 'element';
tag: string;
attributes: Record<string, string>;
children: CustomNode[];
customProperty: string;
}
interface CustomText {
type: 'text';
content: string;
}
type CustomNode = CustomElement | CustomText;
const customTreeAdapter: TreeAdapter<CustomTreeAdapterMap> = {
// Implement all required TreeAdapter methods
createElement(tagName: string, namespaceURI: string, attrs: Attribute[]): CustomElement {
return {
type: 'element',
tag: tagName,
attributes: Object.fromEntries(attrs.map(attr => [attr.name, attr.value])),
children: [],
customProperty: `custom-${tagName}`
};
},
createTextNode(value: string): CustomText {
return {
type: 'text',
content: value
};
},
appendChild(parent: CustomElement, child: CustomNode): void {
parent.children.push(child);
},
isElementNode(node: CustomNode): node is CustomElement {
return node.type === 'element';
},
getTagName(element: CustomElement): string {
return element.tag;
},
// ... implement all other required methods
};Usage Examples:
import { parse } from "parse5";
// Use custom tree adapter
const document = parse('<div class="container">Content</div>', {
treeAdapter: customTreeAdapter
});
// Access custom properties
const element = document.children[0].children[0].children[0];
console.log(element.customProperty); // 'custom-div'
console.log(element.attributes.class); // 'container'Type mapping interface for defining node types in custom tree adapters.
/**
* Generic type mapping interface for tree adapters
*/
interface TreeAdapterTypeMap<
Node = unknown,
ParentNode = unknown,
ChildNode = unknown,
Document = unknown,
DocumentFragment = unknown,
Element = unknown,
CommentNode = unknown,
TextNode = unknown,
Template = unknown,
DocumentType = unknown
> {
node: Node;
parentNode: ParentNode;
childNode: ChildNode;
document: Document;
documentFragment: DocumentFragment;
element: Element;
commentNode: CommentNode;
textNode: TextNode;
template: Template;
documentType: DocumentType;
}import { parse, defaultTreeAdapter } from "parse5";
import type { Element, Node } from "parse5";
function traverseNodes(node: Node, callback: (node: Node) => void): void {
callback(node);
if (defaultTreeAdapter.isElementNode(node) ||
node.nodeName === '#document' ||
node.nodeName === '#document-fragment') {
const children = defaultTreeAdapter.getChildNodes(node);
children.forEach(child => traverseNodes(child, callback));
}
}
const document = parse('<div><p>Text</p><span>More text</span></div>');
traverseNodes(document, (node) => {
console.log(node.nodeName);
});import { parse, defaultTreeAdapter } from "parse5";
import type { Element } from "parse5";
function findElementsByTagName(root: Node, tagName: string): Element[] {
const results: Element[] = [];
function traverse(node: Node): void {
if (defaultTreeAdapter.isElementNode(node) &&
defaultTreeAdapter.getTagName(node) === tagName) {
results.push(node);
}
if (defaultTreeAdapter.isElementNode(node) ||
node.nodeName === '#document' ||
node.nodeName === '#document-fragment') {
const children = defaultTreeAdapter.getChildNodes(node);
children.forEach(traverse);
}
}
traverse(root);
return results;
}
const document = parse('<div><p>Para 1</p><div><p>Para 2</p></div></div>');
const paragraphs = findElementsByTagName(document, 'p');
console.log(paragraphs.length); // 2import { parseFragment, defaultTreeAdapter } from "parse5";
import type { Element } from "parse5";
const fragment = parseFragment('<div class="old">Content</div>');
const element = fragment.childNodes[0] as Element;
// Read attributes
const classList = defaultTreeAdapter.getAttr(element, 'class');
console.log(classList); // 'old'
// Modify attributes
defaultTreeAdapter.setAttr(element, 'class', 'new updated');
defaultTreeAdapter.setAttr(element, 'data-id', '123');
// Check all attributes
const allAttrs = defaultTreeAdapter.getAttrList(element);
console.log(allAttrs); // [{ name: 'class', value: 'new updated' }, { name: 'data-id', value: '123' }]Install with Tessl CLI
npx tessl i tessl/npm-parse5