Fast HTML parsing and transformation API powered by lol-html for streaming HTML processing, DOM manipulation, and content rewriting with CSS selector-based targeting.
// HTMLRewriter is available globally
const rewriter = new HTMLRewriter();Create and configure HTML transformation pipelines with CSS selector-based element targeting and handler registration.
/**
* Fast HTML transformation API with streaming support
* Powered by lol-html for high-performance HTML processing
*/
class HTMLRewriter {
/**
* Create a new HTMLRewriter instance
*/
constructor();
/**
* Add handlers for elements matching a CSS selector
* @param selector - CSS selector (e.g. "div", "a[href]", ".class", "#id")
* @param handlers - Object containing handler functions for elements, comments, and text
* @returns HTMLRewriter instance for chaining
*/
on(selector: string, handlers: HTMLRewriterElementContentHandlers): HTMLRewriter;
/**
* Add handlers for document-level events
* @param handlers - Object containing handler functions for doctype, comments, text, and document end
* @returns HTMLRewriter instance for chaining
*/
onDocument(handlers: HTMLRewriterDocumentContentHandlers): HTMLRewriter;
/**
* Transform HTML content from various input types
* @param input - The HTML to transform (Response, Blob, ArrayBuffer, or string)
* @returns Transformed content in the same type as input
*/
transform(input: Response | Blob | ArrayBuffer | string): Response | ArrayBuffer | string;
}Usage Examples:
// Basic URL rewriting
const rewriter = new HTMLRewriter()
.on('a[href]', {
element(element) {
const href = element.getAttribute('href');
if (href?.startsWith('/')) {
element.setAttribute('href', `https://example.com${href}`);
}
}
});
// Transform HTML string
const html = '<a href="/about">About</a>';
const transformed = rewriter.transform(html);
// Result: '<a href="https://example.com/about">About</a>'
// Transform fetch response
const response = await fetch('https://example.com');
const transformedResponse = rewriter.transform(response);Handlers for processing HTML elements, comments, and text nodes with comprehensive DOM manipulation capabilities.
interface HTMLRewriterElementContentHandlers {
/** Handler for HTML elements matching the selector */
element?(element: Element): void | Promise<void>;
/** Handler for HTML comments within matching elements */
comments?(comment: Comment): void | Promise<void>;
/** Handler for text nodes within matching elements */
text?(text: Text): void | Promise<void>;
}
interface HTMLRewriterDocumentContentHandlers {
/** Handler for DOCTYPE declarations */
doctype?(doctype: Doctype): void | Promise<void>;
/** Handler for document-level comments */
comments?(comment: Comment): void | Promise<void>;
/** Handler for document-level text nodes */
text?(text: Text): void | Promise<void>;
/** Handler for document end */
end?(end: DocumentEnd): void | Promise<void>;
}Usage Examples:
const rewriter = new HTMLRewriter()
// Add meta tags to head
.on('head', {
element(element) {
element.append('<meta name="viewport" content="width=device-width, initial-scale=1">', {
html: true
});
}
})
// Remove script tags
.on('script', {
element(element) {
element.remove();
}
})
// Process text content
.on('p', {
text(text) {
if (!text.lastInTextNode) return;
// Capitalize first letter of paragraphs
const content = text.text;
if (content.trim()) {
text.replace(content.charAt(0).toUpperCase() + content.slice(1));
}
}
})
// Handle comments
.on('*', {
comments(comment) {
// Remove all HTML comments
comment.remove();
}
});Comprehensive DOM element manipulation including attributes, content insertion, and structural modifications.
interface Element {
/** The tag name in lowercase (e.g. "div", "span") */
tagName: string;
/** Iterator for the element's attributes */
readonly attributes: IterableIterator<[string, string]>;
/** Whether this element was removed */
readonly removed: boolean;
/** Whether the element is explicitly self-closing */
readonly selfClosing: boolean;
/** Whether the element can have inner content */
readonly canHaveContent: boolean;
/** The element's namespace URI */
readonly namespaceURI: string;
/**
* Get an attribute value by name
* @param name - Attribute name
* @returns Attribute value or null if not found
*/
getAttribute(name: string): string | null;
/**
* Check if an attribute exists
* @param name - Attribute name
* @returns True if attribute exists
*/
hasAttribute(name: string): boolean;
/**
* Set an attribute value
* @param name - Attribute name
* @param value - Attribute value
* @returns Element instance for chaining
*/
setAttribute(name: string, value: string): Element;
/**
* Remove an attribute
* @param name - Attribute name
* @returns Element instance for chaining
*/
removeAttribute(name: string): Element;
/**
* Insert content before this element
* @param content - Content to insert
* @param options - Content insertion options
* @returns Element instance for chaining
*/
before(content: Content, options?: ContentOptions): Element;
/**
* Insert content after this element
* @param content - Content to insert
* @param options - Content insertion options
* @returns Element instance for chaining
*/
after(content: Content, options?: ContentOptions): Element;
/**
* Insert content at the start of this element
* @param content - Content to insert
* @param options - Content insertion options
* @returns Element instance for chaining
*/
prepend(content: Content, options?: ContentOptions): Element;
/**
* Insert content at the end of this element
* @param content - Content to insert
* @param options - Content insertion options
* @returns Element instance for chaining
*/
append(content: Content, options?: ContentOptions): Element;
/**
* Replace this element with new content
* @param content - Replacement content
* @param options - Content insertion options
* @returns Element instance for chaining
*/
replace(content: Content, options?: ContentOptions): Element;
/**
* Remove this element and its contents
* @returns Element instance for chaining
*/
remove(): Element;
/**
* Remove this element but keep its contents
* @returns Element instance for chaining
*/
removeAndKeepContent(): Element;
/**
* Set the inner content of this element
* @param content - New inner content
* @param options - Content insertion options
* @returns Element instance for chaining
*/
setInnerContent(content: Content, options?: ContentOptions): Element;
/**
* Add a handler for the end tag of this element
* @param handler - End tag handler function
*/
onEndTag(handler: (tag: EndTag) => void | Promise<void>): void;
}
type Content = string;
interface ContentOptions {
/** Whether to parse the content as HTML @default false */
html?: boolean;
}Usage Examples:
const rewriter = new HTMLRewriter()
.on('img', {
element(element) {
// Add lazy loading and responsive attributes
element.setAttribute('loading', 'lazy');
element.setAttribute('decoding', 'async');
// Add responsive sizes if not present
if (!element.hasAttribute('sizes')) {
element.setAttribute('sizes', '(max-width: 768px) 100vw, 50vw');
}
// Wrap images in figure tags
if (element.getAttribute('alt')) {
const alt = element.getAttribute('alt');
element.before('<figure>');
element.after(`<figcaption>${alt}</figcaption></figure>`);
}
}
})
.on('div.container', {
element(element) {
// Add Bootstrap classes
element.setAttribute('class', 'container mx-auto px-4');
// Insert navigation at the start
element.prepend(`
<nav class="navbar">
<a href="/">Home</a>
<a href="/about">About</a>
</nav>
`, { html: true });
}
});Process and modify text content and HTML comments with precise control over text nodes and document structure.
interface Text {
/** The text content */
readonly text: string;
/** Whether this chunk is the last piece of text in a text node */
readonly lastInTextNode: boolean;
/** Whether this chunk was removed */
readonly removed: boolean;
/**
* Insert content before this text chunk
* @param content - Content to insert
* @param options - Content insertion options
* @returns Text instance for chaining
*/
before(content: Content, options?: ContentOptions): Text;
/**
* Insert content after this text chunk
* @param content - Content to insert
* @param options - Content insertion options
* @returns Text instance for chaining
*/
after(content: Content, options?: ContentOptions): Text;
/**
* Replace this text chunk with new content
* @param content - Replacement content
* @param options - Content insertion options
* @returns Text instance for chaining
*/
replace(content: Content, options?: ContentOptions): Text;
/**
* Remove this text chunk
* @returns Text instance for chaining
*/
remove(): Text;
}
interface Comment {
/** The comment text */
text: string;
/** Whether this comment was removed */
readonly removed: boolean;
/**
* Insert content before this comment
* @param content - Content to insert
* @param options - Content insertion options
* @returns Comment instance for chaining
*/
before(content: Content, options?: ContentOptions): Comment;
/**
* Insert content after this comment
* @param content - Content to insert
* @param options - Content insertion options
* @returns Comment instance for chaining
*/
after(content: Content, options?: ContentOptions): Comment;
/**
* Replace this comment with new content
* @param content - Replacement content
* @param options - Content insertion options
* @returns Comment instance for chaining
*/
replace(content: Content, options?: ContentOptions): Comment;
/**
* Remove this comment
* @returns Comment instance for chaining
*/
remove(): Comment;
}
interface Doctype {
/** The doctype name (e.g. "html" for <!DOCTYPE html>) */
readonly name: string | null;
/** The doctype public identifier */
readonly publicId: string | null;
/** The doctype system identifier */
readonly systemId: string | null;
/** Whether this doctype was removed */
readonly removed: boolean;
/**
* Remove this doctype
* @returns Doctype instance for chaining
*/
remove(): Doctype;
}
interface DocumentEnd {
/**
* Append content at the end of the document
* @param content - Content to append
* @param options - Content insertion options
* @returns DocumentEnd instance for chaining
*/
append(content: Content, options?: ContentOptions): DocumentEnd;
}Usage Examples:
const rewriter = new HTMLRewriter()
// Text processing and replacement
.on('p', {
text(text) {
if (!text.lastInTextNode) return;
// Replace placeholder text
const content = text.text
.replace(/\$\{company\}/g, 'Acme Corp')
.replace(/\$\{year\}/g, new Date().getFullYear().toString());
if (content !== text.text) {
text.replace(content);
}
}
})
// Remove development comments
.on('*', {
comments(comment) {
if (comment.text.includes('TODO') || comment.text.includes('DEBUG')) {
comment.remove();
}
}
})
// Document-level handlers
.onDocument({
doctype(doctype) {
console.log(`Document type: ${doctype.name}`);
},
end(end) {
// Add analytics script at document end
end.append(`
<script>
// Analytics tracking code
console.log('Page loaded');
</script>
`, { html: true });
}
});
// Process streaming HTML
const response = await fetch('https://example.com');
const transformedResponse = rewriter.transform(response);