or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

attributes.mdforms.mdindex.mdloading.mdmanipulation.mdstatic-methods.mdtraversal.mdutils.md
tile.json

loading.mddocs/

Document Loading

Core functionality for loading HTML/XML documents from various sources including strings, buffers, URLs, and streams. Cheerio provides multiple entry points optimized for different use cases.

Capabilities

Load Function

The primary function for creating a Cheerio instance from HTML/XML content.

/**
 * Create a querying function, bound to a document created from the provided markup
 * @param content - Markup to be loaded (string, DOM nodes, or Buffer)
 * @param options - Options for the created instance
 * @param isDocument - Whether to parse as full document (default: true)
 * @returns The loaded document API
 */
function load(
  content: string | AnyNode | AnyNode[] | Buffer,
  options?: CheerioOptions | null,
  isDocument?: boolean
): CheerioAPI;

Usage Examples:

import { load } from "cheerio";

// Load HTML string
const $ = load('<h1>Hello World</h1>');

// Load with options
const $xml = load('<root><item>data</item></root>', { xmlMode: true });

// Load as fragment (no html/body wrapper)
const $fragment = load('<li>Item</li>', null, false);

Load Buffer

Sniffs the encoding of a buffer and creates a querying function from the decoded content.

/**
 * Sniffs the encoding of a buffer, then creates a querying function
 * @param buffer - The buffer to sniff the encoding of
 * @param options - Options including encoding detection settings
 * @returns The loaded document
 */
function loadBuffer(
  buffer: Buffer,
  options?: DecodeStreamOptions
): CheerioAPI;

interface DecodeStreamOptions extends CheerioOptions {
  encoding?: SnifferOptions;
}

Usage Examples:

import { loadBuffer } from "cheerio";
import fs from "fs";

// Load from file buffer
const buffer = fs.readFileSync('index.html');
const $ = loadBuffer(buffer);

// Load with encoding options
const $withEncoding = loadBuffer(buffer, {
  encoding: { defaultEncoding: 'utf8' }
});

From URL

Loads a document directly from a URL with automatic redirect handling and encoding detection.

/**
 * Load a document from a URL
 * @param url - The URL to load the document from
 * @param options - Options including request settings and Cheerio options
 * @returns Promise resolving to the loaded document
 */
function fromURL(
  url: string | URL,
  options?: CheerioRequestOptions
): Promise<CheerioAPI>;

interface CheerioRequestOptions extends DecodeStreamOptions {
  requestOptions?: UndiciStreamOptions;
}

Usage Examples:

import { fromURL } from "cheerio";

// Load from URL
const $ = await fromURL('https://example.com');

// Load with custom headers
const $withHeaders = await fromURL('https://api.example.com/data', {
  requestOptions: {
    headers: { 'User-Agent': 'MyApp/1.0' }
  }
});

// Load XML from URL
const $xml = await fromURL('https://example.com/feed.xml', {
  xmlMode: true
});

String Stream

Creates a writable stream that parses strings into a document, useful for processing large documents.

/**
 * Creates a stream that parses a sequence of strings into a document
 * @param options - The options to pass to Cheerio
 * @param cb - The callback to call when the stream is finished
 * @returns The writable stream
 */
function stringStream(
  options: CheerioOptions,
  cb: (err: Error | null | undefined, $: CheerioAPI) => void
): Writable;

Usage Examples:

import { stringStream } from "cheerio";
import fs from "fs";

const writeStream = stringStream({}, (err, $) => {
  if (err) {
    console.error('Parse error:', err);
    return;
  }
  
  console.log($('h1').text());
});

// Pipe HTML content to the stream
fs.createReadStream('large-document.html', { encoding: 'utf8' })
  .pipe(writeStream);

Decode Stream

Parses a stream of buffers into a document with automatic encoding detection.

/**
 * Parses a stream of buffers into a document
 * @param options - Options including encoding detection settings
 * @param cb - Callback called when the stream is finished
 * @returns The writable stream
 */
function decodeStream(
  options: DecodeStreamOptions,
  cb: (err: Error | null | undefined, $: CheerioAPI) => void
): Writable;

Usage Examples:

import { decodeStream } from "cheerio";
import fs from "fs";

const stream = decodeStream({
  encoding: { defaultEncoding: 'utf8' }
}, (err, $) => {
  if (err) {
    console.error('Decode error:', err);
    return;
  }
  
  console.log($('title').text());
});

// Pipe binary HTML content
fs.createReadStream('document.html').pipe(stream);

Slim Import

Alternative entry point that uses only htmlparser2, reducing memory usage by not loading parse5.

/**
 * Slim version that always uses htmlparser2 parser
 * @param content - Markup to be loaded
 * @param options - Options for the created instance
 * @param isDocument - Always false here, as we use htmlparser2
 * @returns The loaded document
 */
const load: (
  content: string | AnyNode | AnyNode[] | Buffer,
  options?: CheerioOptions | null,
  isDocument?: boolean
) => CheerioAPI;

Usage Examples:

import { load } from "cheerio/slim";

// Lighter weight loading (htmlparser2 only)
const $ = load('<div>Content</div>');

// Same API as regular load
console.log($('div').text()); // "Content"

Configuration Options

CheerioOptions

interface CheerioOptions {
  /** Enable XML mode parsing with htmlparser2 */
  xml?: HTMLParser2Options | boolean;
  
  /** Enable XML mode (deprecated, use xml option) */
  xmlMode?: boolean;
  
  /** Base URI for resolving href and src attributes */
  baseURI?: string | URL;
  
  /** Enable quirks mode for case-insensitive selectors */
  quirksMode?: boolean;
  
  /** Custom pseudo-class definitions */
  pseudos?: Record<string, string | Function>;
}

HTMLParser2Options

interface HTMLParser2Options {
  /** Treat input as XML document */
  xmlMode?: boolean;
  
  /** Decode HTML entities */
  decodeEntities?: boolean;
  
  /** Preserve case of tag and attribute names */
  lowerCaseAttributeNames?: boolean;
  
  /** Parser-specific options from htmlparser2 */
  [key: string]: any;
}

Error Handling

All loading functions may throw or pass errors to callbacks:

  • Parse errors: Malformed HTML/XML syntax
  • Network errors: Failed URL requests (fromURL only)
  • Encoding errors: Invalid character encoding in buffers
  • Configuration errors: Invalid options or parameters

Error Handling Examples:

// Synchronous loading
try {
  const $ = load(htmlContent);
} catch (error) {
  console.error('Parse error:', error.message);
}

// Asynchronous loading
try {
  const $ = await fromURL('https://example.com');
} catch (error) {
  console.error('Network or parse error:', error.message);
}

// Stream loading
stringStream({}, (err, $) => {
  if (err) {
    console.error('Stream error:', err.message);
    return;
  }
  // Process successfully loaded document
});