Fast & forgiving HTML/XML parser with callback-based interface and DOM generation capabilities
—
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Pending
The risk profile of this skill
WritableStream integration for Node.js streams, enabling pipeline processing and integration with other stream-based tools. The WritableStream class wraps the Parser with a standard Node.js Writable interface.
A Node.js Writable stream that processes HTML/XML data using the htmlparser2 Parser internally.
/**
* WritableStream makes the Parser interface available as a NodeJS stream
*/
class WritableStream extends Writable {
/**
* Create a new WritableStream instance
* @param cbs - Callback object implementing Handler interface (partial)
* @param options - Parser configuration options
*/
constructor(cbs: Partial<Handler>, options?: ParserOptions);
}Usage Examples:
import { WritableStream } from "htmlparser2/WritableStream";
import fs from "fs";
// Basic stream processing
const parserStream = new WritableStream({
onopentag(name, attribs) {
if (name === "link" && attribs.rel === "stylesheet") {
console.log("Found CSS:", attribs.href);
}
},
ontext(text) {
if (text.trim()) {
console.log("Text:", text.trim());
}
}
});
// Pipe from file stream
const htmlFile = fs.createReadStream("./document.html");
htmlFile.pipe(parserStream).on("finish", () => {
console.log("Parsing complete");
});
// Pipe from HTTP response
import https from "https";
https.get("https://example.com", (response) => {
response.pipe(parserStream);
});import { WritableStream } from "htmlparser2/WritableStream";
import { Transform } from "stream";
import fs from "fs";
// Custom transform stream to filter HTML content
class HtmlFilter extends Transform {
constructor() {
super({ objectMode: true });
}
_transform(chunk, encoding, callback) {
// Filter out script tags and pass through
const filtered = chunk.toString().replace(/<script[^>]*>.*?<\/script>/gi, '');
callback(null, filtered);
}
}
// Chain streams together
const extractLinks = new WritableStream({
onopentag(name, attribs) {
if (name === "a" && attribs.href) {
console.log("Link found:", attribs.href);
}
}
});
fs.createReadStream("./page.html")
.pipe(new HtmlFilter())
.pipe(extractLinks)
.on("finish", () => console.log("Link extraction complete"));import { WritableStream } from "htmlparser2/WritableStream";
import { DomHandler } from "domhandler";
// Stream that builds DOM incrementally
const domHandler = new DomHandler((error, dom) => {
if (error) {
console.error("DOM construction failed:", error);
return;
}
console.log("DOM constructed:", dom);
});
const domStream = new WritableStream(domHandler, {
xmlMode: false,
decodeEntities: true
});
// Process large HTML files without loading entirely into memory
const largeHtmlFile = fs.createReadStream("./large-document.html");
largeHtmlFile.pipe(domStream);import { WritableStream } from "htmlparser2/WritableStream";
import { createServer } from "http";
// HTTP server that processes incoming HTML
const server = createServer((req, res) => {
if (req.method === "POST" && req.url === "/parse") {
const results = [];
const htmlProcessor = new WritableStream({
onopentag(name, attribs) {
results.push({ type: "tag", name, attribs });
},
ontext(text) {
if (text.trim()) {
results.push({ type: "text", content: text.trim() });
}
},
onend() {
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify(results));
}
});
req.pipe(htmlProcessor);
}
});The WritableStream accepts the same options as the Parser:
interface ParserOptions {
/** Enable XML parsing mode */
xmlMode?: boolean;
/** Decode HTML entities */
decodeEntities?: boolean;
/** Convert tag names to lowercase */
lowerCaseTags?: boolean;
/** Convert attribute names to lowercase */
lowerCaseAttributeNames?: boolean;
/** Recognize CDATA sections in HTML mode */
recognizeCDATA?: boolean;
/** Recognize self-closing tags in HTML mode */
recognizeSelfClosing?: boolean;
}The WritableStream automatically handles Buffer inputs and string encoding:
import { WritableStream } from "htmlparser2/WritableStream";
const parser = new WritableStream({
ontext(text) {
console.log("Text:", text);
}
});
// Both Buffer and string inputs work
parser.write(Buffer.from("<div>Hello</div>"));
parser.write(" from buffer!");
parser.end();import { WritableStream } from "htmlparser2/WritableStream";
import fs from "fs";
const parserStream = new WritableStream({
onopentag(name, attribs) {
console.log("Tag:", name);
},
onerror(error) {
console.error("Parse error:", error);
}
});
// Handle stream errors
parserStream.on("error", (error) => {
console.error("Stream error:", error);
});
const inputStream = fs.createReadStream("./document.html");
inputStream.on("error", (error) => {
console.error("File read error:", error);
});
inputStream.pipe(parserStream);import { WritableStream } from "htmlparser2/WritableStream";
import highland from "highland";
import fs from "fs";
const tags = [];
const htmlStream = new WritableStream({
onopentag(name, attribs) {
tags.push({ name, attribs });
}
});
highland(fs.createReadStream("./document.html"))
.pipe(htmlStream)
.on("finish", () => {
console.log("Found tags:", tags.length);
});import { WritableStream } from "htmlparser2/WritableStream";
import { fromEvent } from "rxjs";
import fs from "fs";
const parserStream = new WritableStream({
onopentag(name, attribs) {
if (name === "img") {
console.log("Image:", attribs.src);
}
}
});
const finish$ = fromEvent(parserStream, "finish");
finish$.subscribe(() => {
console.log("Stream processing complete");
});
fs.createReadStream("./gallery.html").pipe(parserStream);