CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-parse5

HTML parser and serializer that is fully compliant with the WHATWG HTML Living Standard.

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

tree-adapters.mddocs/

Tree Adapters

Tree adapters provide a pluggable interface for customizing how parsed HTML is represented in memory. They define the structure of AST nodes and provide methods for creating, manipulating, and inspecting nodes.

Capabilities

Default Tree Adapter

The built-in tree adapter that creates DOM-like node structures with standard properties.

/**
 * Default tree adapter that creates DOM-like nodes
 */
const defaultTreeAdapter: TreeAdapter<DefaultTreeAdapterMap>;

/**
 * Type mapping for the default tree adapter
 */
interface DefaultTreeAdapterMap extends TreeAdapterTypeMap<
  Node,
  ParentNode,
  ChildNode,
  Document,
  DocumentFragment,
  Element,
  CommentNode,
  TextNode,
  Template,
  DocumentType
> {}

Usage Examples:

import { parse, defaultTreeAdapter } from "parse5";

// Default tree adapter is used automatically
const document = parse('<div>Content</div>');

// Can be specified explicitly
const documentExplicit = parse('<div>Content</div>', {
  treeAdapter: defaultTreeAdapter
});

// Access node properties (default tree adapter structure)
const element = document.childNodes[1].childNodes[1].childNodes[0]; // html > body > div
console.log(element.tagName);        // 'div'
console.log(element.nodeName);       // 'div'
console.log(element.childNodes[0].value); // 'Content'

Tree Adapter Interface

Complete interface for implementing custom tree adapters.

/**
 * Tree adapter interface defining all required methods for AST manipulation
 */
interface TreeAdapter<T extends TreeAdapterTypeMap = TreeAdapterTypeMap> {
  // Node creation methods
  createDocument(): T['document'];
  createDocumentFragment(): T['documentFragment'];
  createElement(tagName: string, namespaceURI: NS, attrs: Attribute[]): T['element'];
  createCommentNode(data: string): T['commentNode'];
  createTextNode(value: string): T['textNode'];

  // Node manipulation methods
  appendChild(parentNode: T['parentNode'], newNode: T['childNode']): void;
  insertBefore(parentNode: T['parentNode'], newNode: T['childNode'], referenceNode: T['childNode']): void;
  setTemplateContent(templateElement: T['template'], contentElement: T['documentFragment']): void;
  getTemplateContent(templateElement: T['template']): T['documentFragment'];
  detachNode(node: T['childNode']): void;

  // Node inspection methods
  getTagName(element: T['element']): string;
  getNamespaceURI(element: T['element']): string;
  getTextNodeContent(textNode: T['textNode']): string;
  getCommentNodeContent(commentNode: T['commentNode']): string;
  getDocumentTypeNodeName(doctypeNode: T['documentType']): string;
  getDocumentTypeNodePublicId(doctypeNode: T['documentType']): string;
  getDocumentTypeNodeSystemId(doctypeNode: T['documentType']): string;

  // Attribute methods
  getAttrList(element: T['element']): Attribute[];
  getAttr(element: T['element'], attrName: string): string | null;
  setAttr(element: T['element'], attrName: string, attrValue: string): void;

  // Parent/child relationships
  getChildNodes(node: T['parentNode']): T['childNode'][];
  getParentNode(node: T['childNode']): T['parentNode'] | null;
  getFirstChild(node: T['parentNode']): T['childNode'] | null;

  // Type guard methods
  isElementNode(node: T['node']): node is T['element'];
  isTextNode(node: T['node']): node is T['textNode'];
  isCommentNode(node: T['node']): node is T['commentNode'];
  isDocumentTypeNode(node: T['node']): node is T['documentType'];

  // Special methods
  adoptAttributes(recipient: T['element'], attrs: Attribute[]): void;
  getDocumentMode(document: T['document']): DOCUMENT_MODE;
  setDocumentMode(document: T['document'], mode: DOCUMENT_MODE): void;
  setDocumentType(document: T['document'], name: string, publicId: string, systemId: string): void;
  insertText(parentNode: T['parentNode'], text: string): void;
  insertTextBefore(parentNode: T['parentNode'], text: string, referenceNode: T['childNode']): void;
  
  // Template methods
  getTemplateContent(templateElement: T['template']): T['documentFragment'];
  setTemplateContent(templateElement: T['template'], contentElement: T['documentFragment']): void;
  
  // Location methods
  getNodeSourceCodeLocation(node: T['node']): ElementLocation | undefined | null;
  setNodeSourceCodeLocation(node: T['node'], location: ElementLocation | null): void;
  updateNodeSourceCodeLocation(node: T['node'], location: Partial<ElementLocation>): void;
  
  // Optional callback methods
  onItemPush?(item: T['element']): void;
  onItemPop?(item: T['element'], newTop: T['parentNode']): void;
}

Default Tree Adapter Node Types

Node interfaces provided by the default tree adapter.

/**
 * Document node representing the entire HTML document
 */
interface Document {
  nodeName: '#document';
  mode: 'no-quirks' | 'quirks' | 'limited-quirks';
  childNodes: ChildNode[];
  sourceCodeLocation?: Location | null;
}

/**
 * Document fragment node for parsing HTML fragments
 */
interface DocumentFragment {
  nodeName: '#document-fragment';
  childNodes: ChildNode[];
  sourceCodeLocation?: Location | null;
}

/**
 * Element node representing HTML elements
 */
interface Element {
  nodeName: string;
  tagName: string;
  attrs: Attribute[];
  namespaceURI: string;
  sourceCodeLocation?: ElementLocation | null;
  parentNode: ParentNode | null;
  childNodes: ChildNode[];
}

/**
 * Text node containing text content
 */
interface TextNode {
  nodeName: '#text';
  parentNode: ParentNode | null;
  value: string;
  sourceCodeLocation?: Location | null;
}

/**
 * Comment node containing comment text
 */
interface CommentNode {
  nodeName: '#comment';
  parentNode: ParentNode | null;
  data: string;
  sourceCodeLocation?: Location | null;
}

/**
 * Document type node (DOCTYPE declaration)
 */
interface DocumentType {
  nodeName: '#documentType';
  parentNode: ParentNode | null;
  name: string;
  publicId: string;
  systemId: string;
  sourceCodeLocation?: Location | null;
}

/**
 * Template element with content fragment
 */
interface Template extends Element {
  nodeName: 'template';
  tagName: 'template';
  content: DocumentFragment;
}

/**
 * Union types for node categorization
 */
type ParentNode = Document | DocumentFragment | Element | Template;
type ChildNode = Element | Template | CommentNode | TextNode | DocumentType;
type Node = ParentNode | ChildNode;

/**
 * Document mode enumeration
 */
type DOCUMENT_MODE = 'no-quirks' | 'quirks' | 'limited-quirks';

/**
 * Namespace enumeration
 */
enum NS {
  HTML = 'http://www.w3.org/1999/xhtml',
  MATHML = 'http://www.w3.org/1998/Math/MathML',
  SVG = 'http://www.w3.org/2000/svg',
  XLINK = 'http://www.w3.org/1999/xlink',
  XML = 'http://www.w3.org/XML/1998/namespace',
  XMLNS = 'http://www.w3.org/2000/xmlns/'
}

/**
 * Attribute interface
 */
interface Attribute {
  name: string;
  value: string;
  namespace?: string;
  prefix?: string;
}

/**
 * Basic location interface
 */
interface Location {
  startLine: number;
  startCol: number;
  startOffset: number;
  endLine: number;
  endCol: number;
  endOffset: number;
}

/**
 * Element location interface
 */
interface ElementLocation extends Location {
  startTag?: Location;
  endTag?: Location;
  attrs?: Record<string, Location>;
}

Usage Examples:

import { parse, parseFragment } from "parse5";
import type { Element, TextNode, Document } from "parse5";

// Type-safe node access with default tree adapter
const document: Document = parse('<div>Hello <span>World</span></div>');
const htmlElement = document.childNodes[1] as Element;
const bodyElement = htmlElement.childNodes[1] as Element;
const divElement = bodyElement.childNodes[0] as Element;

console.log(divElement.tagName);     // 'div'
console.log(divElement.attrs);       // []
console.log(divElement.childNodes.length); // 2

const textNode = divElement.childNodes[0] as TextNode;
console.log(textNode.value);         // 'Hello '

const spanElement = divElement.childNodes[1] as Element;
console.log(spanElement.tagName);    // 'span'

Custom Tree Adapter Implementation

Example of implementing a custom tree adapter for specialized use cases.

/**
 * Example custom tree adapter that adds custom properties to nodes
 */
interface CustomElement {
  type: 'element';
  tag: string;
  attributes: Record<string, string>;
  children: CustomNode[];
  customProperty: string;
}

interface CustomText {
  type: 'text';
  content: string;
}

type CustomNode = CustomElement | CustomText;

const customTreeAdapter: TreeAdapter<CustomTreeAdapterMap> = {
  // Implement all required TreeAdapter methods
  createElement(tagName: string, namespaceURI: string, attrs: Attribute[]): CustomElement {
    return {
      type: 'element',
      tag: tagName,
      attributes: Object.fromEntries(attrs.map(attr => [attr.name, attr.value])),
      children: [],
      customProperty: `custom-${tagName}`
    };
  },

  createTextNode(value: string): CustomText {
    return {
      type: 'text',
      content: value
    };
  },

  appendChild(parent: CustomElement, child: CustomNode): void {
    parent.children.push(child);
  },

  isElementNode(node: CustomNode): node is CustomElement {
    return node.type === 'element';
  },

  getTagName(element: CustomElement): string {
    return element.tag;
  },

  // ... implement all other required methods
};

Usage Examples:

import { parse } from "parse5";

// Use custom tree adapter
const document = parse('<div class="container">Content</div>', {
  treeAdapter: customTreeAdapter
});

// Access custom properties
const element = document.children[0].children[0].children[0];
console.log(element.customProperty); // 'custom-div'
console.log(element.attributes.class); // 'container'

Tree Adapter Type Mapping

Type mapping interface for defining node types in custom tree adapters.

/**
 * Generic type mapping interface for tree adapters
 */
interface TreeAdapterTypeMap<
  Node = unknown,
  ParentNode = unknown,
  ChildNode = unknown,
  Document = unknown,
  DocumentFragment = unknown,
  Element = unknown,
  CommentNode = unknown,
  TextNode = unknown,
  Template = unknown,
  DocumentType = unknown
> {
  node: Node;
  parentNode: ParentNode;
  childNode: ChildNode;
  document: Document;
  documentFragment: DocumentFragment;
  element: Element;
  commentNode: CommentNode;
  textNode: TextNode;
  template: Template;
  documentType: DocumentType;
}

Common Tree Adapter Patterns

Node Traversal

import { parse, defaultTreeAdapter } from "parse5";
import type { Element, Node } from "parse5";

function traverseNodes(node: Node, callback: (node: Node) => void): void {
  callback(node);
  
  if (defaultTreeAdapter.isElementNode(node) || 
      node.nodeName === '#document' || 
      node.nodeName === '#document-fragment') {
    const children = defaultTreeAdapter.getChildNodes(node);
    children.forEach(child => traverseNodes(child, callback));
  }
}

const document = parse('<div><p>Text</p><span>More text</span></div>');
traverseNodes(document, (node) => {
  console.log(node.nodeName);
});

Element Filtering

import { parse, defaultTreeAdapter } from "parse5";
import type { Element } from "parse5";

function findElementsByTagName(root: Node, tagName: string): Element[] {
  const results: Element[] = [];
  
  function traverse(node: Node): void {
    if (defaultTreeAdapter.isElementNode(node) && 
        defaultTreeAdapter.getTagName(node) === tagName) {
      results.push(node);
    }
    
    if (defaultTreeAdapter.isElementNode(node) || 
        node.nodeName === '#document' || 
        node.nodeName === '#document-fragment') {
      const children = defaultTreeAdapter.getChildNodes(node);
      children.forEach(traverse);
    }
  }
  
  traverse(root);
  return results;
}

const document = parse('<div><p>Para 1</p><div><p>Para 2</p></div></div>');
const paragraphs = findElementsByTagName(document, 'p');
console.log(paragraphs.length); // 2

Attribute Manipulation

import { parseFragment, defaultTreeAdapter } from "parse5";
import type { Element } from "parse5";

const fragment = parseFragment('<div class="old">Content</div>');
const element = fragment.childNodes[0] as Element;

// Read attributes
const classList = defaultTreeAdapter.getAttr(element, 'class');
console.log(classList); // 'old'

// Modify attributes
defaultTreeAdapter.setAttr(element, 'class', 'new updated');
defaultTreeAdapter.setAttr(element, 'data-id', '123');

// Check all attributes
const allAttrs = defaultTreeAdapter.getAttrList(element);
console.log(allAttrs); // [{ name: 'class', value: 'new updated' }, { name: 'data-id', value: '123' }]

Install with Tessl CLI

npx tessl i tessl/npm-parse5

docs

error-handling.md

html-utilities.md

index.md

parsing.md

serialization.md

tokenization.md

tree-adapters.md

tile.json