or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

index.md
tile.json

index.mddocs/

domhandler

domhandler is a TypeScript library that provides a DOM handler for htmlparser2, creating tree structures containing all nodes of a parsed HTML page. It serves as a foundational component in the HTML parsing ecosystem, enabling the transformation of HTML markup into structured DOM trees that can be manipulated programmatically.

Package Information

  • Package Name: domhandler
  • Package Type: npm
  • Language: TypeScript
  • Installation: npm install domhandler

Core Imports

import { DomHandler, type DomHandlerOptions } from "domhandler";
import { Element, Text, Comment, Document, type ChildNode } from "domhandler";

// Default import
import DomHandler from "domhandler";

For CommonJS:

const { DomHandler } = require("domhandler");
const { Element, Text, Comment, Document } = require("domhandler");

// Default import
const DomHandler = require("domhandler");

Basic Usage

import { Parser } from "htmlparser2";
import { DomHandler } from "domhandler";

const rawHtml = 'Xyz <script language="javascript">var foo = "<<bar>>";</script><!--<!-- Waah! -- -->';

const handler = new DomHandler((error, dom) => {
    if (error) {
        // Handle error
    } else {
        // Parsing completed, use the DOM
        console.log(dom);
    }
});

const parser = new Parser(handler);
parser.write(rawHtml);
parser.end();

Architecture

domhandler is built around several key components:

  • DomHandler Class: The main parser event handler that creates and manages the DOM tree
  • Node Hierarchy: A comprehensive set of node types representing different HTML elements
  • Event-driven Processing: Responds to htmlparser2 events to build the DOM tree incrementally
  • DOM Compatibility: Provides DOM spec-compatible properties and methods for easy integration

Capabilities

DOM Handler

The main class that processes HTML parsing events and creates a DOM tree structure.

class DomHandler {
  /** The elements of the DOM */
  dom: ChildNode[];
  /** The root element for the DOM */
  root: Document;

  constructor(
    callback?: Callback | null,
    options?: DomHandlerOptions | null,
    elementCB?: ElementCallback
  );

  onparserinit(parser: ParserInterface): void;
  onreset(): void;
  onend(): void;
  onerror(error: Error): void;
  onclosetag(): void;
  onopentag(name: string, attribs: { [key: string]: string }): void;
  ontext(data: string): void;
  oncomment(data: string): void;
  oncommentend(): void;
  oncdatastart(): void;
  oncdataend(): void;
  onprocessinginstruction(name: string, data: string): void;
}

interface DomHandlerOptions {
  /** Add a startIndex property to nodes (default: false) */
  withStartIndices?: boolean;
  /** Add an endIndex property to nodes (default: false) */
  withEndIndices?: boolean;
  /** Treat the markup as XML (default: false) */
  xmlMode?: boolean;
}

interface ParserInterface {
  startIndex: number | null;
  endIndex: number | null;
}

type Callback = (error: Error | null, dom: ChildNode[]) => void;
type ElementCallback = (element: Element) => void;

Node Types

Core node classes representing different types of DOM elements.

abstract class Node {
  /** The type of the node */
  abstract readonly type: ElementType;
  /** Parent of the node */
  parent: ParentNode | null;
  /** Previous sibling */
  prev: ChildNode | null;
  /** Next sibling */
  next: ChildNode | null;
  /** The start index of the node */
  startIndex: number | null;
  /** The end index of the node */
  endIndex: number | null;
  /** parse5 source code location info */
  sourceCodeLocation?: SourceCodeLocation | null;
  /** DOM spec-compatible node type */
  abstract readonly nodeType: number;
  /** DOM spec-compatible alias for parent */
  get parentNode(): ParentNode | null;
  /** DOM spec-compatible alias for prev */
  get previousSibling(): ChildNode | null;
  /** DOM spec-compatible alias for next */
  get nextSibling(): ChildNode | null;

  cloneNode<T extends Node>(this: T, recursive?: boolean): T;
}

abstract class DataNode extends Node {
  constructor(public data: string);
  /** DOM spec-compatible alias for data */
  get nodeValue(): string;
  set nodeValue(data: string);
}

abstract class NodeWithChildren extends Node {
  constructor(public children: ChildNode[]);
  /** First child of the node */
  get firstChild(): ChildNode | null;
  /** Last child of the node */
  get lastChild(): ChildNode | null;
  /** DOM spec-compatible alias for children */
  get childNodes(): ChildNode[];
  set childNodes(children: ChildNode[]);
}

Specific Node Classes

Individual node types for different HTML elements.

class Text extends DataNode {
  readonly type: ElementType.Text;
  readonly nodeType: 3;
}

class Comment extends DataNode {
  readonly type: ElementType.Comment;
  readonly nodeType: 8;
}

class ProcessingInstruction extends DataNode {
  readonly type: ElementType.Directive;
  readonly nodeType: 1;
  constructor(public name: string, data: string);
  /** Document type name (parse5 only) */
  "x-name"?: string;
  /** Document type public identifier (parse5 only) */
  "x-publicId"?: string;
  /** Document type system identifier (parse5 only) */
  "x-systemId"?: string;
}

class CDATA extends NodeWithChildren {
  readonly type: ElementType.CDATA;
  readonly nodeType: 4;
}

class Document extends NodeWithChildren {
  readonly type: ElementType.Root;
  readonly nodeType: 9;
  /** Document mode (parse5 only) */
  "x-mode"?: "no-quirks" | "quirks" | "limited-quirks";
}

class Element extends NodeWithChildren {
  readonly nodeType: 1;
  constructor(
    public name: string,
    public attribs: { [name: string]: string },
    children?: ChildNode[],
    public type?: ElementType.Tag | ElementType.Script | ElementType.Style
  );
  /** DOM spec-compatible alias for name */
  get tagName(): string;
  set tagName(name: string);
  /** DOM spec-compatible attributes array */
  get attributes(): Attribute[];
  /** parse5 source code location info with start & end tags */
  sourceCodeLocation?: TagSourceCodeLocation | null;
  /** Element namespace (parse5 only) */
  namespace?: string;
  /** Element attribute namespaces (parse5 only) */
  "x-attribsNamespace"?: Record<string, string>;
  /** Element attribute namespace-related prefixes (parse5 only) */
  "x-attribsPrefix"?: Record<string, string>;
}

Type Guards and Utilities

Helper functions for working with DOM nodes.

/** Check if node is an Element */
function isTag(node: Node): node is Element;

/** Check if node is CDATA */
function isCDATA(node: Node): node is CDATA;

/** Check if node is Text */
function isText(node: Node): node is Text;

/** Check if node is Comment */
function isComment(node: Node): node is Comment;

/** Check if node is ProcessingInstruction */
function isDirective(node: Node): node is ProcessingInstruction;

/** Check if node is Document */
function isDocument(node: Node): node is Document;

/** Check if node has children */
function hasChildren(node: Node): node is ParentNode;

/** Clone a node with optional recursive cloning */
function cloneNode<T extends Node>(node: T, recursive?: boolean): T;

Types

// Element type enum (from domelementtype dependency)
enum ElementType {
  Text = "text",
  Directive = "directive", 
  Comment = "comment",
  Script = "script",
  Style = "style",
  Tag = "tag",
  CDATA = "cdata",
  Root = "root"
}

// Core type aliases
type ParentNode = Document | Element | CDATA;
type ChildNode = Text | Comment | ProcessingInstruction | Element | CDATA | Document;
type AnyNode = ParentNode | ChildNode;

// Attribute interface
interface Attribute {
  name: string;
  value: string;
  namespace?: string;
  prefix?: string;
}

// Location interfaces
interface SourceCodeLocation {
  startLine: number;
  startCol: number;
  startOffset: number;
  endLine: number;
  endCol: number;
  endOffset: number;
}

interface TagSourceCodeLocation extends SourceCodeLocation {
  startTag?: SourceCodeLocation;
  endTag?: SourceCodeLocation;
}