Low-level Parser class with callback interface for memory-efficient parsing. This approach is ideal for processing large documents, streaming data, or when you need maximum control over parsing behavior.
The core Parser class that tokenizes HTML/XML and fires callback events for each parsing event.
/**
* Main HTML/XML parser class with callback-based interface
*/
class Parser {
/** The start index of the last event */
public startIndex: number;
/** The end index of the last event */
public endIndex: number;
/**
* Create a new Parser instance
* @param cbs - Callback object implementing Handler interface
* @param options - Parser configuration options
*/
constructor(cbs?: Partial<Handler> | null, options?: ParserOptions);
/**
* Write data to the parser for processing
* @param chunk - HTML/XML string data to parse
*/
write(chunk: string): void;
/**
* Signal end of input and complete parsing
* @param chunk - Optional final chunk of data
*/
end(chunk?: string): void;
/** Pause parsing - can be resumed later */
pause(): void;
/** Resume parsing after pause */
resume(): void;
/** Reset parser to initial state */
reset(): void;
/**
* Reset parser and parse complete data in one call
* @param data - Complete HTML/XML string to parse
*/
parseComplete(data: string): void;
/**
* Parse a chunk of data (deprecated - use write instead)
* @param chunk - HTML/XML string data to parse
* @deprecated Use write() instead
*/
parseChunk(chunk: string): void;
/**
* Signal end of input (deprecated - use end instead)
* @param chunk - Optional final chunk of data
* @deprecated Use end() instead
*/
done(chunk?: string): void;
/**
* Checks if the current tag is a void element. Can be overridden to specify additional void elements
* @param name - Tag name to check
* @returns True if the tag is a void element
* @protected
*/
protected isVoidElement(name: string): boolean;
}Usage Examples:
import { Parser } from "htmlparser2";
// Basic callback parsing
const parser = new Parser({
onopentag(name, attributes) {
console.log("Opening tag:", name, attributes);
},
ontext(text) {
console.log("Text content:", text);
},
onclosetag(tagname) {
console.log("Closing tag:", tagname);
}
});
parser.write("<div class='content'>Hello <b>world</b>!</div>");
parser.end();
// Advanced parsing with all callbacks
const advancedParser = new Parser({
onparserinit(parser) {
console.log("Parser initialized");
},
onopentag(name, attribs, isImplied) {
if (name === "img") {
console.log("Image found:", attribs.src);
}
},
onattribute(name, value, quote) {
console.log(`Attribute: ${name}="${value}" (quote: ${quote})`);
},
ontext(data) {
if (data.trim()) {
console.log("Text:", data.trim());
}
},
oncomment(data) {
console.log("Comment:", data);
},
onprocessinginstruction(name, data) {
console.log("Processing instruction:", name, data);
},
onerror(error) {
console.error("Parse error:", error);
},
onend() {
console.log("Parsing complete");
}
}, { xmlMode: false });Complete callback interface for handling all parser events:
interface Handler {
/** Called when parser is initialized */
onparserinit(parser: Parser): void;
/** Called to reset handler state */
onreset(): void;
/** Called when parsing is complete */
onend(): void;
/** Called when a parsing error occurs */
onerror(error: Error): void;
/** Called when a closing tag is found */
onclosetag(name: string, isImplied: boolean): void;
/** Called when an opening tag name is found (before attributes) */
onopentagname(name: string): void;
/**
* Called for each attribute found in an opening tag
* @param name - Attribute name
* @param value - Attribute value
* @param quote - Quote character used ("\"", "'", null for unquoted, undefined for no value)
*/
onattribute(
name: string,
value: string,
quote?: string | undefined | null
): void;
/**
* Called when an opening tag is complete (after attributes)
* @param name - Tag name
* @param attribs - Object containing all attributes
* @param isImplied - Whether tag was implied by parser
*/
onopentag(
name: string,
attribs: { [s: string]: string },
isImplied: boolean
): void;
/** Called for text content */
ontext(data: string): void;
/** Called for HTML comments */
oncomment(data: string): void;
/** Called at start of CDATA section */
oncdatastart(): void;
/** Called at end of CDATA section */
oncdataend(): void;
/** Called at end of comment */
oncommentend(): void;
/** Called for processing instructions like <?xml ?> */
onprocessinginstruction(name: string, data: string): void;
}interface ParserOptions {
/**
* Enable XML parsing mode for feeds and XML documents
* Affects tag case sensitivity, self-closing tags, and CDATA handling
* @default false
*/
xmlMode?: boolean;
/**
* Decode HTML entities in text and attribute values
* @default true
*/
decodeEntities?: boolean;
/**
* Convert all tag names to lowercase
* @default !xmlMode
*/
lowerCaseTags?: boolean;
/**
* Convert all attribute names to lowercase
* Has performance impact but improves compatibility
* @default !xmlMode
*/
lowerCaseAttributeNames?: boolean;
/**
* Recognize CDATA sections even in HTML mode
* @default xmlMode
*/
recognizeCDATA?: boolean;
/**
* Recognize self-closing tags even in HTML mode
* @default xmlMode
*/
recognizeSelfClosing?: boolean;
/**
* Custom tokenizer class to use instead of default
* Advanced usage for custom parsing behavior
*/
Tokenizer?: typeof Tokenizer;
}import { Parser } from "htmlparser2";
let currentTag = '';
let depth = 0;
const streamingParser = new Parser({
onopentag(name, attribs) {
depth++;
if (name === 'article') {
currentTag = name;
console.log('Article started:', attribs);
}
},
ontext(text) {
if (currentTag === 'article' && text.trim()) {
console.log('Article text:', text.trim());
}
},
onclosetag(name) {
if (name === 'article') {
console.log('Article ended');
currentTag = '';
}
depth--;
}
});
// Process data in chunks
const chunks = ['<html><body>', '<article id="1">', 'Article content here', '</article>', '</body></html>'];
chunks.forEach(chunk => streamingParser.write(chunk));
streamingParser.end();import { Parser } from "htmlparser2";
const forms = [];
let currentForm = null;
const formParser = new Parser({
onopentag(name, attribs) {
if (name === 'form') {
currentForm = {
action: attribs.action,
method: attribs.method || 'GET',
fields: []
};
} else if (currentForm && name === 'input') {
currentForm.fields.push({
name: attribs.name,
type: attribs.type || 'text',
value: attribs.value
});
}
},
onclosetag(name) {
if (name === 'form' && currentForm) {
forms.push(currentForm);
currentForm = null;
}
}
});import { Parser } from "htmlparser2";
const parser = new Parser({
onopentag(name, attribs) {
// Process tags
},
onerror(error) {
console.error('Parser error:', error.message);
// Handle malformed HTML gracefully
parser.resume(); // Continue parsing after error
}
}, {
// Options for better error recovery
lowerCaseTags: true,
decodeEntities: true
});