Static methods for rendering, parsing, and working with DOM nodes without requiring a Cheerio instance. These utility functions provide essential functionality for document processing and manipulation.
Methods for converting DOM structures to string representations.
/**
* Render document or elements as HTML
* @param dom - Elements to render (optional, defaults to document root)
* @param options - Rendering options
* @returns HTML string representation
*/
function html(dom?: BasicAcceptedElems<AnyNode>, options?: CheerioOptions): string;
/**
* Render document or elements as XML
* @param dom - Elements to render (optional, defaults to document root)
* @returns XML string representation
*/
function xml(dom?: BasicAcceptedElems<AnyNode>): string;
/**
* Get text content from elements
* @param elements - Elements to extract text from (optional)
* @returns Combined text content
*/
function text(elements?: ArrayLike<AnyNode>): string;Usage Examples:
import { html, xml, text } from "cheerio";
// Render as HTML
const $ = cheerio.load('<div><p>Hello <strong>world</strong></p></div>');
// Render entire document
console.log(html());
// Output: "<div><p>Hello <strong>world</strong></p></div>"
// Render specific elements
console.log(html($('p')));
// Output: "<p>Hello <strong>world</strong></p>"
// Render with options
console.log(html($('div'), { xmlMode: false }));
// Render as XML
console.log(xml($('div')));
// Output: "<div><p>Hello <strong>world</strong></p></div>"
// Extract text content
console.log(text($('p')));
// Output: "Hello world"
// Text from all elements
console.log(text());
// Output: "Hello world" (all text content)Methods for parsing HTML strings into DOM node arrays.
/**
* Parse HTML string into array of DOM nodes
* @param data - HTML markup to parse
* @param context - Context parameter (ignored, for jQuery compatibility)
* @param keepScripts - Whether to preserve script elements (default: false)
* @returns Array of parsed DOM nodes, or null for invalid input
*/
function parseHTML(data?: string | null): AnyNode[] | null;
function parseHTML(data: string, context?: unknown, keepScripts?: boolean): AnyNode[];
function parseHTML(data?: '' | null): null;Usage Examples:
import { parseHTML } from "cheerio";
// Parse HTML string
const nodes = parseHTML('<div>Hello</div><p>World</p>');
// Returns: [DivElement, PElement]
// Parse with script handling
const withScripts = parseHTML('<div>Content</div><script>alert("hi")</script>', null, true);
// Keeps script element
const withoutScripts = parseHTML('<div>Content</div><script>alert("hi")</script>', null, false);
// Removes script element (default behavior)
// Handle edge cases
parseHTML('') // Returns null
parseHTML(null) // Returns null
parseHTML('<invalid') // Attempts to parse invalid HTML
// Use parsed nodes
const parsed = parseHTML('<li>Item 1</li><li>Item 2</li>');
if (parsed) {
parsed.forEach(node => {
console.log(text([node]));
});
}Utility functions for working with DOM nodes and structure.
/**
* Get the document root element
* @returns Cheerio instance wrapping the root document
*/
function root(): Cheerio<Document>;
/**
* Check if one element contains another
* @param container - Potential parent element
* @param contained - Potential child element
* @returns True if container contains contained element
*/
function contains(container: AnyNode, contained: AnyNode): boolean;
/**
* Merge two array-like objects
* @param arr1 - First array to merge into
* @param arr2 - Second array to merge from
* @returns Modified first array with elements from second array
*/
function merge<T>(arr1: Writable<ArrayLike<T>>, arr2: ArrayLike<T>): ArrayLike<T> | undefined;Usage Examples:
import { root, contains, merge } from "cheerio";
// Access document root
const $ = cheerio.load('<html><body><div>Content</div></body></html>');
const documentRoot = root();
console.log(html(documentRoot)); // Full document HTML
// Check element containment
const container = $('body')[0];
const child = $('div')[0];
console.log(contains(container, child)); // true
const unrelated = parseHTML('<span>Other</span>')[0];
console.log(contains(container, unrelated)); // false
// Note: element does not contain itself
console.log(contains(container, container)); // false
// Merge arrays
const arr1 = ['a', 'b'];
const arr2 = ['c', 'd'];
const merged = merge(arr1, arr2);
console.log(arr1); // ['a', 'b', 'c', 'd'] - arr1 is modified
console.log(merged === arr1); // true - returns arr1Static version of the extract method for data extraction from documents.
/**
* Extract multiple values from document and store in object
* @param map - Extraction mapping object defining what to extract
* @returns Object with extracted values
*/
function extract<M extends ExtractMap>(map: M): ExtractedMap<M>;Usage Examples:
import { extract } from "cheerio";
// Load document for extraction
const $ = cheerio.load(`
<article>
<h1>Article Title</h1>
<p class="author">By John Doe</p>
<div class="content">
<p>First paragraph</p>
<p>Second paragraph</p>
</div>
<span class="date">2023-12-01</span>
</article>
`);
// Extract multiple values
const articleData = extract({
title: 'h1',
author: '.author',
content: '.content p',
date: '.date',
wordCount: {
selector: '.content',
value: (el) => $(el).text().split(' ').length
}
});
console.log(articleData);
// Output: {
// title: "Article Title",
// author: "By John Doe",
// content: ["First paragraph", "Second paragraph"],
// date: "2023-12-01",
// wordCount: 4
// }Detailed rendering control with options:
// HTML rendering with specific options
const renderOptions: CheerioOptions = {
xmlMode: false, // HTML mode
decodeEntities: true, // Decode HTML entities
lowerCaseAttributeNames: false, // Preserve attribute case
// ... other htmlparser2 options
};
const customHtml = html($('div'), renderOptions);
// XML rendering (always uses XML mode)
const xmlOutput = xml($('data')); // Renders with XML syntax rulesUsing static methods to analyze document structure:
/**
* Analyze document structure using static methods
* @param htmlString - HTML to analyze
* @returns Analysis object with document information
*/
function analyzeDocument(htmlString: string) {
const $ = cheerio.load(htmlString);
return {
// Basic counts
elementCount: $('*').length,
textLength: text().length,
// Structure analysis
maxDepth: getMaxDepth($('html')[0] || $('body')[0]),
hasScripts: $('script').length > 0,
hasStyles: $('style, link[rel="stylesheet"]').length > 0,
// Content extraction
title: $('title').text() || $('h1').first().text(),
headings: $('h1, h2, h3, h4, h5, h6').map((i, el) => ({
level: parseInt(el.tagName.charAt(1)),
text: text([el])
})).get(),
// Form analysis
forms: $('form').length,
inputs: $('input, select, textarea').length,
// Link analysis
internalLinks: $('a[href^="/"], a[href^="#"]').length,
externalLinks: $('a[href^="http"]').length,
// Media content
images: $('img').length,
videos: $('video').length,
// Rendered output sizes
htmlSize: html().length,
xmlSize: xml().length,
textSize: text().length
};
}
function getMaxDepth(element: AnyNode, currentDepth = 0): number {
if (!element || !('children' in element)) return currentDepth;
let maxChildDepth = currentDepth;
if (element.children) {
for (const child of element.children) {
const childDepth = getMaxDepth(child, currentDepth + 1);
maxChildDepth = Math.max(maxChildDepth, childDepth);
}
}
return maxChildDepth;
}
// Usage
const analysis = analyzeDocument('<html><body><h1>Title</h1><p>Content</p></body></html>');
console.log(analysis);Utilities for processing multiple documents:
/**
* Process multiple HTML strings with same operations
* @param htmlStrings - Array of HTML strings to process
* @param processor - Function to process each document
* @returns Array of processing results
*/
function batchProcess<T>(
htmlStrings: string[],
processor: (doc: CheerioAPI) => T
): T[] {
return htmlStrings.map(htmlString => {
const $ = cheerio.load(htmlString);
return processor($);
});
}
/**
* Extract same data from multiple documents
* @param htmlStrings - Array of HTML strings
* @param extractMap - Extraction configuration
* @returns Array of extracted data objects
*/
function batchExtract<M extends ExtractMap>(
htmlStrings: string[],
extractMap: M
): ExtractedMap<M>[] {
return batchProcess(htmlStrings, $ => extract(extractMap));
}
// Usage
const documents = [
'<article><h1>Title 1</h1><p>Content 1</p></article>',
'<article><h1>Title 2</h1><p>Content 2</p></article>'
];
const extractedData = batchExtract(documents, {
title: 'h1',
content: 'p'
});
// Results in array of {title, content} objectsMethods for optimizing static operations:
/**
* Efficient text extraction for large documents
* @param elements - Elements to extract text from
* @param maxLength - Maximum text length to extract
* @returns Truncated text content
*/
function efficientTextExtract(elements: ArrayLike<AnyNode>, maxLength = 1000): string {
let result = '';
let currentLength = 0;
for (let i = 0; i < elements.length && currentLength < maxLength; i++) {
const elementText = text([elements[i]]);
const remainingLength = maxLength - currentLength;
if (elementText.length <= remainingLength) {
result += elementText;
currentLength += elementText.length;
} else {
result += elementText.substring(0, remainingLength);
break;
}
}
return result;
}
/**
* Minimal HTML rendering (removes unnecessary whitespace)
* @param elements - Elements to render
* @returns Minimized HTML string
*/
function minimalHtml(elements: BasicAcceptedElems<AnyNode>): string {
return html(elements)
.replace(/>\s+</g, '><') // Remove whitespace between tags
.replace(/\s+/g, ' ') // Normalize whitespace
.trim(); // Remove leading/trailing space
}// Array-like object type for merge function
type Writable<T> = { -readonly [P in keyof T]: T[P] };
// Extract map for data extraction
interface ExtractMap {
[key: string]: string | ExtractConfig;
}
interface ExtractConfig {
selector: string;
value?: (element: AnyNode) => any;
attribute?: string;
}
type ExtractedMap<M extends ExtractMap> = {
[K in keyof M]: M[K] extends string
? string | string[]
: M[K] extends ExtractConfig
? any
: never;
};
// Basic element types
type BasicAcceptedElems<T extends AnyNode> = ArrayLike<T> | T | string;
// Rendering options
interface RenderOptions extends CheerioOptions {
decodeEntities?: boolean;
encodeEntities?: boolean;
selfClosingTags?: boolean;
}Static methods handle errors gracefully:
// Safe parsing with error handling
function safeParseHTML(htmlString: string): AnyNode[] {
try {
const nodes = parseHTML(htmlString);
return nodes || [];
} catch (error) {
console.warn('HTML parsing failed:', error.message);
return [];
}
}
// Safe rendering with fallbacks
function safeRender(elements: any): string {
try {
if (!elements) return '';
return html(elements);
} catch (error) {
console.warn('Rendering failed:', error.message);
return '[Render Error]';
}
}
// Safe text extraction
function safeText(elements: any): string {
try {
if (!elements) return '';
return text(elements);
} catch (error) {
console.warn('Text extraction failed:', error.message);
return '';
}
}