High-level functions for parsing HTML/XML into DOM trees using the domhandler integration. These functions provide the easiest way to parse documents into manipulatable DOM structures.
Parses HTML/XML data and returns a complete Document object with full DOM tree structure.
/**
* Parses the data, returns the resulting document.
* @param data - The HTML/XML string to parse
* @param options - Optional parser and DOM handler options
* @returns Complete Document object with DOM tree
*/
function parseDocument(data: string, options?: Options): Document;Usage Examples:
import { parseDocument } from "htmlparser2";
// Basic HTML parsing
const doc = parseDocument("<html><body><h1>Hello World</h1></body></html>");
console.log(doc.children[0].name); // "html"
// Parse with options
const xmlDoc = parseDocument(`<?xml version="1.0"?>
<root>
<item id="1">First</item>
<item id="2">Second</item>
</root>`, { xmlMode: true });
// Access elements
const items = xmlDoc.children[0].children.filter(node => node.name === "item");
console.log(items.length); // 2Parses data and returns an array of root nodes. This function is deprecated in favor of
parseDocument/**
* Parses data, returns an array of the root nodes.
* Note: root nodes still have a Document node as their parent.
* @param data - The HTML/XML string to parse
* @param options - Optional parser and DOM handler options
* @returns Array of root child nodes
* @deprecated Use parseDocument instead
*/
function parseDOM(data: string, options?: Options): ChildNode[];Creates a streaming parser that builds a DOM and returns it via callback when parsing is complete.
/**
* Creates a parser instance with attached DOM handler for streaming.
* @param callback - Called when parsing completes with error or document
* @param options - Optional parser and DOM handler options
* @param elementCallback - Optional callback fired for each completed element
* @returns Parser instance for writing data
*/
function createDocumentStream(
callback: (error: Error | null, document: Document) => void,
options?: Options,
elementCallback?: (element: Element) => void
): Parser;Usage Examples:
import { createDocumentStream } from "htmlparser2";
// Stream parsing with callback
const parser = createDocumentStream((error, document) => {
if (error) {
console.error("Parsing failed:", error);
return;
}
console.log("Parsed document:", document);
// Process the complete DOM tree
});
// Write data in chunks (useful for streaming)
parser.write("<html><head><title>");
parser.write("My Page");
parser.write("</title></head>");
parser.write("<body>Content here</body></html>");
parser.end();
// With element callback for processing elements as they're completed
const streamParser = createDocumentStream(
(error, doc) => console.log("Final document ready"),
{ xmlMode: false },
(element) => {
// Called for each completed element during parsing
console.log("Completed element:", element.name);
}
);Creates a streaming parser that returns an array of child nodes via callback. Deprecated in favor of
createDocumentStream/**
* Creates a parser instance with attached DOM handler for streaming.
* @param callback - Called when parsing completes with error or child nodes
* @param options - Optional parser and DOM handler options
* @param elementCallback - Optional callback fired for each completed element
* @returns Parser instance for writing data
* @deprecated Use createDocumentStream instead
*/
function createDomStream(
callback: (error: Error | null, dom: ChildNode[]) => void,
options?: Options,
elementCallback?: (element: Element) => void
): Parser;DOM parsing functions accept an
OptionsParserOptionsDomHandlerOptionsinterface Options extends ParserOptions, DomHandlerOptions {}
interface ParserOptions {
/** Enable XML parsing mode - important for XML documents and feeds */
xmlMode?: boolean;
/** Decode HTML entities in text content (default: true) */
decodeEntities?: boolean;
/** Convert tag names to lowercase (default: !xmlMode) */
lowerCaseTags?: boolean;
/** Convert attribute names to lowercase (default: !xmlMode) */
lowerCaseAttributeNames?: boolean;
/** Recognize CDATA sections even in HTML mode (default: xmlMode) */
recognizeCDATA?: boolean;
/** Recognize self-closing tags even in HTML mode (default: xmlMode) */
recognizeSelfClosing?: boolean;
}
interface DomHandlerOptions {
/** Include location information for nodes */
withStartIndices?: boolean;
/** Include end location information for nodes */
withEndIndices?: boolean;
/** Normalize whitespace in text content */
normalizeWhitespace?: boolean;
}The parsed DOM uses types from the domhandler package:
interface Document extends Node {
type: "root";
name: "root";
children: ChildNode[];
parent: null;
}
interface Element extends Node {
type: "tag";
name: string;
attribs: { [name: string]: string };
children: ChildNode[];
parent: ParentNode | null;
}
interface Text extends Node {
type: "text";
data: string;
parent: ParentNode | null;
}
interface Comment extends Node {
type: "comment";
data: string;
parent: ParentNode | null;
}
interface ProcessingInstruction extends Node {
type: "directive";
name: string;
data: string;
parent: ParentNode | null;
}
type ChildNode = Element | Text | Comment | ProcessingInstruction;
type ParentNode = Document | Element;