A very fast HTML parser, generating a simplified DOM, with basic element query support.
—
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Pending
The risk profile of this skill
Base Node classes and node type system providing the foundation for the DOM tree structure, including specialized TextNode and CommentNode classes for complete HTML representation.
Abstract base class providing common functionality for all DOM nodes.
abstract class Node {
/** Array of child nodes */
childNodes: Node[];
/** Parent element reference */
parentNode: HTMLElement | null;
/** Position range in original HTML source */
range: readonly [number, number];
// Abstract properties (implemented by subclasses)
abstract rawTagName: string;
abstract nodeType: NodeType;
abstract text: string;
abstract rawText: string;
// Abstract methods
abstract toString(): string;
abstract clone(): Node;
}Methods available on all node types for DOM manipulation and content access.
/** Remove node from its parent */
remove(): Node;
/** Get raw text content (may include HTML entities) */
get innerText(): string;
/** Get/set decoded text content */
get textContent(): string;
set textContent(val: string): void;Usage Examples:
import { parse, NodeType } from "node-html-parser";
const root = parse('<div>Text <em>emphasis</em></div>');
const textNode = root.childNodes[0];
// Check node type
if (textNode.nodeType === NodeType.TEXT_NODE) {
console.log("This is a text node");
}
// Remove node
textNode.remove();
console.log(root.innerHTML); // "<em>emphasis</em>"Represents text content within HTML elements with whitespace handling and text manipulation.
class TextNode extends Node {
/** Node type constant */
nodeType: NodeType.TEXT_NODE; // Value: 3
/** Empty string for text nodes */
rawTagName: '';
/** Get/set raw text content including HTML entities */
get rawText(): string;
set rawText(text: string): void;
/** Get decoded text content */
get text(): string;
/** Get raw text with normalized whitespace */
get trimmedRawText(): string;
/** Get decoded text with normalized whitespace */
get trimmedText(): string;
/** Check if node contains only whitespace */
get isWhitespace(): boolean;
/** Return raw text as string representation */
toString(): string;
/** Create copy of text node */
clone(): TextNode;
}Usage Examples:
import { parse, TextNode } from "node-html-parser";
const root = parse('<p> Hello & world </p>');
const textNode = root.firstChild as TextNode;
console.log(textNode.rawText); // " Hello & world "
console.log(textNode.text); // " Hello & world "
console.log(textNode.trimmedText); // "Hello & world"
console.log(textNode.isWhitespace); // false
// Whitespace node
const whitespace = parse('<div> </div>').firstChild as TextNode;
console.log(whitespace.isWhitespace); // true
// Modify text content
textNode.rawText = "New content";
console.log(root.toString()); // "<p>New content</p>"Represents HTML comments in the DOM tree.
class CommentNode extends Node {
/** Node type constant */
nodeType: NodeType.COMMENT_NODE; // Value: 8
/** Comment tag identifier */
rawTagName: '!--';
/** Comment content */
rawText: string;
/** Get comment content (same as rawText) */
get text(): string;
/** Return formatted comment string */
toString(): string; // Returns <!--content-->
/** Create copy of comment node */
clone(): CommentNode;
}Usage Examples:
import { parse, CommentNode, NodeType } from "node-html-parser";
// Parse HTML with comments (requires comment: true option)
const root = parse('<!-- This is a comment --><div>Content</div>', {
comment: true
});
const commentNode = root.childNodes[0] as CommentNode;
console.log(commentNode.nodeType === NodeType.COMMENT_NODE); // true
console.log(commentNode.rawText); // " This is a comment "
console.log(commentNode.text); // " This is a comment "
console.log(commentNode.toString()); // "<!-- This is a comment -->"
// Create comment programmatically
const newComment = new CommentNode(' New comment ');
root.appendChild(newComment);Enumeration defining constants for different node types following DOM standards.
enum NodeType {
/** HTMLElement nodes */
ELEMENT_NODE = 1,
/** TextNode nodes */
TEXT_NODE = 3,
/** CommentNode nodes */
COMMENT_NODE = 8
}Usage Examples:
import { parse, NodeType } from "node-html-parser";
const root = parse('<!--comment--><div>text</div>', { comment: true });
root.childNodes.forEach(node => {
switch (node.nodeType) {
case NodeType.ELEMENT_NODE:
console.log('Element:', node.tagName);
break;
case NodeType.TEXT_NODE:
console.log('Text:', node.text);
break;
case NodeType.COMMENT_NODE:
console.log('Comment:', node.rawText);
break;
}
});When working with elements containing mixed content (text, elements, and comments), you can iterate through all child nodes:
import { parse, NodeType, HTMLElement, TextNode, CommentNode } from "node-html-parser";
const mixed = parse(`
<div>
Start text
<!-- A comment -->
<p>Paragraph</p>
End text
</div>
`, { comment: true });
mixed.childNodes.forEach(node => {
if (node.nodeType === NodeType.ELEMENT_NODE) {
const element = node as HTMLElement;
console.log(`Element: ${element.tagName}`);
} else if (node.nodeType === NodeType.TEXT_NODE) {
const textNode = node as TextNode;
if (!textNode.isWhitespace) {
console.log(`Text: "${textNode.text.trim()}"`);
}
} else if (node.nodeType === NodeType.COMMENT_NODE) {
const comment = node as CommentNode;
console.log(`Comment: ${comment.rawText}`);
}
});All node types can be created programmatically:
/** Create new HTMLElement */
new HTMLElement(
tagName: string,
keyAttrs: { id?: string; class?: string },
rawAttrs?: string,
parentNode?: HTMLElement | null,
range?: [number, number]
);
/** Create new TextNode */
new TextNode(
rawText: string,
parentNode?: HTMLElement | null,
range?: [number, number]
);
/** Create new CommentNode */
new CommentNode(
rawText: string,
parentNode?: HTMLElement | null,
range?: [number, number],
rawTagName?: string
);Usage Examples:
import { HTMLElement, TextNode, CommentNode } from "node-html-parser";
// Create elements programmatically
const div = new HTMLElement('div', { id: 'container', class: 'main' });
const text = new TextNode('Hello world');
const comment = new CommentNode(' TODO: Add more content ');
div.appendChild(text);
div.appendChild(comment);
console.log(div.toString());
// <div id="container" class="main">Hello world<!-- TODO: Add more content --></div>