Event-driven HTML parser for processing document structures, extracting elements, attributes, and content with precise position tracking and customizable event handling.
Core HTML parsing functionality with event-driven architecture for processing HTML content.
/**
* Event-driven HTML parser for processing HTML structures
*/
class HTMLParser {
/** Last parsing event processed, or null if none */
lastEvent: Partial<Block> | null;
/**
* Parse HTML content and fire events for each structural element
* @param html - HTML content to parse
*/
parse(html: string): void;
/**
* Register event listener for specific parsing events
* @param types - Comma-separated list of event types to listen for
* @param listener - Callback function to handle events
*/
addListener(types: string, listener: Listener): void;
/**
* Fire a parsing event to all registered listeners
* @param type - Event type name
* @param data - Optional event data
*/
fire(type: string, data?: Partial<Block>): void;
/**
* Remove event listener for specific event types
* @param type - Event type to remove listener from
* @param listener - Specific listener function to remove
*/
removeListener(type: string, listener: Listener): void;
/**
* Calculate line and column position for a given character index
* @param event - Block event with position information
* @param index - Character index to calculate position for
* @returns Object with line and column numbers
*/
fixPos(event: Block, index: number): { line: number; col: number };
/**
* Convert attributes array to key-value map
* @param arrAttrs - Array of attribute objects
* @returns Map of attribute names to values
*/
getMapAttrs(arrAttrs: Attr[]): { [name: string]: string };
/**
* Create boolean map from comma-separated string
* @param str - Comma-separated string of keys
* @returns Object map with keys set to true
*/
makeMap(str: string): { [key: string]: boolean };
}HTMLParser fires events during HTML processing that listeners can respond to.
/**
* Event listener function type
*/
type Listener = (event: Block) => void;Available Event Types:
startendtagstarttagendtextcommentcdataallAll parsing events receive Block objects containing detailed information about the parsed element.
/**
* Represents a parsed HTML element or content block
*/
interface Block {
/** Tag name (e.g., 'div', 'p', 'img') */
tagName: string;
/** Array of parsed attributes */
attrs: Attr[];
/** Block type ('tagstart', 'tagend', 'text', 'comment', 'cdata') */
type: string;
/** Raw HTML content for this block */
raw: string;
/** Character position in the source HTML */
pos: number;
/** Line number in the source HTML (1-based) */
line: number;
/** Column number in the source HTML (1-based) */
col: number;
/** Text content of the block (for text blocks) */
content: string;
/** Whether the block spans multiple lines */
long: boolean;
/** Closing tag information */
close: string;
/** Reference to the previous parsing event */
lastEvent?: Partial<Block>;
}Detailed information about parsed HTML attributes.
/**
* Represents a parsed HTML attribute
*/
interface Attr {
/** Attribute name */
name: string;
/** Attribute value */
value: string;
/** Quote character used (' or ") */
quote: string;
/** Starting index of the attribute in the raw HTML */
index: number;
/** Raw attribute text including quotes */
raw: string;
}import { HTMLParser } from "htmlhint";
const parser = new HTMLParser();
const html = '<div class="test">Hello <strong>world</strong></div>';
// Listen for tag start events
parser.addListener('tagstart', (event) => {
console.log(`Tag opened: ${event.tagName} at line ${event.line}, col ${event.col}`);
// Process attributes
event.attrs.forEach(attr => {
console.log(` Attribute: ${attr.name}="${attr.value}"`);
});
});
// Listen for text content
parser.addListener('text', (event) => {
console.log(`Text content: "${event.content}"`);
});
// Listen for tag end events
parser.addListener('tagend', (event) => {
console.log(`Tag closed: ${event.tagName}`);
});
// Parse the HTML
parser.parse(html);import { HTMLParser } from "htmlhint";
const parser = new HTMLParser();
// Listen for multiple event types
parser.addListener('tagstart,tagend', (event) => {
if (event.type === 'tagstart') {
console.log(`Opening tag: <${event.tagName}>`);
} else {
console.log(`Closing tag: </${event.tagName}>`);
}
});
// Listen for content events
parser.addListener('text,comment', (event) => {
if (event.type === 'text') {
console.log(`Text: ${event.content}`);
} else {
console.log(`Comment: ${event.content}`);
}
});
parser.parse('<p>Text content</p><!-- Comment -->');import { HTMLParser } from "htmlhint";
const parser = new HTMLParser();
parser.addListener('tagstart', (event) => {
// Get attributes as a map
const attrMap = parser.getMapAttrs(event.attrs);
console.log(`Tag: ${event.tagName}`);
console.log('Attributes:', attrMap);
// Check for specific attributes
if (attrMap.id) {
console.log(`Element has ID: ${attrMap.id}`);
}
if (attrMap.class) {
console.log(`Element has classes: ${attrMap.class}`);
}
});
parser.parse('<div id="main" class="container active" data-role="navigation">Content</div>');import { HTMLParser } from "htmlhint";
const parser = new HTMLParser();
parser.addListener('tagstart', (event) => {
console.log(`Tag ${event.tagName} at line ${event.line}, column ${event.col}`);
// Calculate position for specific attribute
event.attrs.forEach(attr => {
const attrPos = parser.fixPos(event, attr.index);
console.log(` Attribute ${attr.name} at line ${attrPos.line}, column ${attrPos.col}`);
});
});
const multilineHTML = `
<div
class="container"
id="main">
Content
</div>`;
parser.parse(multilineHTML);import { HTMLParser } from "htmlhint";
const parser = new HTMLParser();
const tagStack: string[] = [];
// Track tag nesting
parser.addListener('tagstart', (event) => {
tagStack.push(event.tagName);
console.log(`Nesting level: ${tagStack.length}, Current: ${event.tagName}`);
});
parser.addListener('tagend', (event) => {
const expected = tagStack.pop();
if (expected !== event.tagName) {
console.log(`Mismatched tags: expected ${expected}, got ${event.tagName}`);
}
});
parser.parse('<div><p><span>Text</span></p></div>');import { HTMLParser } from "htmlhint";
const parser = new HTMLParser();
parser.addListener('comment', (event) => {
console.log(`HTML Comment: ${event.content}`);
});
parser.addListener('cdata', (event) => {
console.log(`CDATA Section: ${event.content}`);
});
const htmlWithSpecialContent = `
<div>
<!-- This is a comment -->
<script><![CDATA[
var x = 1 < 2;
console.log(x);
]]></script>
</div>`;
parser.parse(htmlWithSpecialContent);import { HTMLParser } from "htmlhint";
const parser = new HTMLParser();
// Define named listener functions
const tagListener = (event: Block) => {
console.log(`Tag: ${event.tagName}`);
};
const textListener = (event: Block) => {
console.log(`Text: ${event.content}`);
};
// Add listeners
parser.addListener('tagstart', tagListener);
parser.addListener('text', textListener);
// Parse some content
parser.parse('<p>Hello world</p>');
// Remove specific listener
parser.removeListener('tagstart', tagListener);
// Parse more content (only text listener active)
parser.parse('<div>More content</div>');The
fixPosconst parser = new HTMLParser();
parser.addListener('tagstart', (event) => {
// Calculate position for the end of the opening tag
const endPos = parser.fixPos(event, event.pos + event.raw.length);
console.log(`Tag ends at line ${endPos.line}, column ${endPos.col}`);
});The
makeMapconst parser = new HTMLParser();
const voidElements = parser.makeMap('img,br,hr,input,meta,link');
parser.addListener('tagstart', (event) => {
if (voidElements[event.tagName]) {
console.log(`${event.tagName} is a void element`);
}
});HTMLParser is designed to be tolerant of malformed HTML and continues parsing even when encountering errors. However, listeners should handle edge cases:
const parser = new HTMLParser();
parser.addListener('tagstart', (event) => {
try {
// Process the event
if (!event.tagName) {
console.warn('Tag event without tag name');
return;
}
// Safe attribute processing
const attrMap = parser.getMapAttrs(event.attrs || []);
} catch (error) {
console.error('Error processing tag event:', error);
}
});