Core functionality for loading HTML/XML documents from various sources including strings, buffers, URLs, and streams. Cheerio provides multiple entry points optimized for different use cases.
The primary function for creating a Cheerio instance from HTML/XML content.
/**
* Create a querying function, bound to a document created from the provided markup
* @param content - Markup to be loaded (string, DOM nodes, or Buffer)
* @param options - Options for the created instance
* @param isDocument - Whether to parse as full document (default: true)
* @returns The loaded document API
*/
function load(
content: string | AnyNode | AnyNode[] | Buffer,
options?: CheerioOptions | null,
isDocument?: boolean
): CheerioAPI;Usage Examples:
import { load } from "cheerio";
// Load HTML string
const $ = load('<h1>Hello World</h1>');
// Load with options
const $xml = load('<root><item>data</item></root>', { xmlMode: true });
// Load as fragment (no html/body wrapper)
const $fragment = load('<li>Item</li>', null, false);Sniffs the encoding of a buffer and creates a querying function from the decoded content.
/**
* Sniffs the encoding of a buffer, then creates a querying function
* @param buffer - The buffer to sniff the encoding of
* @param options - Options including encoding detection settings
* @returns The loaded document
*/
function loadBuffer(
buffer: Buffer,
options?: DecodeStreamOptions
): CheerioAPI;
interface DecodeStreamOptions extends CheerioOptions {
encoding?: SnifferOptions;
}Usage Examples:
import { loadBuffer } from "cheerio";
import fs from "fs";
// Load from file buffer
const buffer = fs.readFileSync('index.html');
const $ = loadBuffer(buffer);
// Load with encoding options
const $withEncoding = loadBuffer(buffer, {
encoding: { defaultEncoding: 'utf8' }
});Loads a document directly from a URL with automatic redirect handling and encoding detection.
/**
* Load a document from a URL
* @param url - The URL to load the document from
* @param options - Options including request settings and Cheerio options
* @returns Promise resolving to the loaded document
*/
function fromURL(
url: string | URL,
options?: CheerioRequestOptions
): Promise<CheerioAPI>;
interface CheerioRequestOptions extends DecodeStreamOptions {
requestOptions?: UndiciStreamOptions;
}Usage Examples:
import { fromURL } from "cheerio";
// Load from URL
const $ = await fromURL('https://example.com');
// Load with custom headers
const $withHeaders = await fromURL('https://api.example.com/data', {
requestOptions: {
headers: { 'User-Agent': 'MyApp/1.0' }
}
});
// Load XML from URL
const $xml = await fromURL('https://example.com/feed.xml', {
xmlMode: true
});Creates a writable stream that parses strings into a document, useful for processing large documents.
/**
* Creates a stream that parses a sequence of strings into a document
* @param options - The options to pass to Cheerio
* @param cb - The callback to call when the stream is finished
* @returns The writable stream
*/
function stringStream(
options: CheerioOptions,
cb: (err: Error | null | undefined, $: CheerioAPI) => void
): Writable;Usage Examples:
import { stringStream } from "cheerio";
import fs from "fs";
const writeStream = stringStream({}, (err, $) => {
if (err) {
console.error('Parse error:', err);
return;
}
console.log($('h1').text());
});
// Pipe HTML content to the stream
fs.createReadStream('large-document.html', { encoding: 'utf8' })
.pipe(writeStream);Parses a stream of buffers into a document with automatic encoding detection.
/**
* Parses a stream of buffers into a document
* @param options - Options including encoding detection settings
* @param cb - Callback called when the stream is finished
* @returns The writable stream
*/
function decodeStream(
options: DecodeStreamOptions,
cb: (err: Error | null | undefined, $: CheerioAPI) => void
): Writable;Usage Examples:
import { decodeStream } from "cheerio";
import fs from "fs";
const stream = decodeStream({
encoding: { defaultEncoding: 'utf8' }
}, (err, $) => {
if (err) {
console.error('Decode error:', err);
return;
}
console.log($('title').text());
});
// Pipe binary HTML content
fs.createReadStream('document.html').pipe(stream);Alternative entry point that uses only htmlparser2, reducing memory usage by not loading parse5.
/**
* Slim version that always uses htmlparser2 parser
* @param content - Markup to be loaded
* @param options - Options for the created instance
* @param isDocument - Always false here, as we use htmlparser2
* @returns The loaded document
*/
const load: (
content: string | AnyNode | AnyNode[] | Buffer,
options?: CheerioOptions | null,
isDocument?: boolean
) => CheerioAPI;Usage Examples:
import { load } from "cheerio/slim";
// Lighter weight loading (htmlparser2 only)
const $ = load('<div>Content</div>');
// Same API as regular load
console.log($('div').text()); // "Content"interface CheerioOptions {
/** Enable XML mode parsing with htmlparser2 */
xml?: HTMLParser2Options | boolean;
/** Enable XML mode (deprecated, use xml option) */
xmlMode?: boolean;
/** Base URI for resolving href and src attributes */
baseURI?: string | URL;
/** Enable quirks mode for case-insensitive selectors */
quirksMode?: boolean;
/** Custom pseudo-class definitions */
pseudos?: Record<string, string | Function>;
}interface HTMLParser2Options {
/** Treat input as XML document */
xmlMode?: boolean;
/** Decode HTML entities */
decodeEntities?: boolean;
/** Preserve case of tag and attribute names */
lowerCaseAttributeNames?: boolean;
/** Parser-specific options from htmlparser2 */
[key: string]: any;
}All loading functions may throw or pass errors to callbacks:
Error Handling Examples:
// Synchronous loading
try {
const $ = load(htmlContent);
} catch (error) {
console.error('Parse error:', error.message);
}
// Asynchronous loading
try {
const $ = await fromURL('https://example.com');
} catch (error) {
console.error('Network or parse error:', error.message);
}
// Stream loading
stringStream({}, (err, $) => {
if (err) {
console.error('Stream error:', err.message);
return;
}
// Process successfully loaded document
});