or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

attributes.mdforms.mdindex.mdloading.mdmanipulation.mdstatic-methods.mdtraversal.mdutils.md
tile.json

static-methods.mddocs/

Static Utilities

Static methods for rendering, parsing, and working with DOM nodes without requiring a Cheerio instance. These utility functions provide essential functionality for document processing and manipulation.

Capabilities

Document Rendering

Methods for converting DOM structures to string representations.

/**
 * Render document or elements as HTML
 * @param dom - Elements to render (optional, defaults to document root)
 * @param options - Rendering options
 * @returns HTML string representation
 */
function html(dom?: BasicAcceptedElems<AnyNode>, options?: CheerioOptions): string;

/**
 * Render document or elements as XML
 * @param dom - Elements to render (optional, defaults to document root)  
 * @returns XML string representation
 */
function xml(dom?: BasicAcceptedElems<AnyNode>): string;

/**
 * Get text content from elements
 * @param elements - Elements to extract text from (optional)
 * @returns Combined text content
 */
function text(elements?: ArrayLike<AnyNode>): string;

Usage Examples:

import { html, xml, text } from "cheerio";

// Render as HTML
const $ = cheerio.load('<div><p>Hello <strong>world</strong></p></div>');

// Render entire document
console.log(html());
// Output: "<div><p>Hello <strong>world</strong></p></div>"

// Render specific elements
console.log(html($('p')));
// Output: "<p>Hello <strong>world</strong></p>"

// Render with options
console.log(html($('div'), { xmlMode: false }));

// Render as XML
console.log(xml($('div')));
// Output: "<div><p>Hello <strong>world</strong></p></div>"

// Extract text content
console.log(text($('p')));
// Output: "Hello world"

// Text from all elements
console.log(text());
// Output: "Hello world" (all text content)

HTML Parsing

Methods for parsing HTML strings into DOM node arrays.

/**
 * Parse HTML string into array of DOM nodes
 * @param data - HTML markup to parse
 * @param context - Context parameter (ignored, for jQuery compatibility)
 * @param keepScripts - Whether to preserve script elements (default: false)
 * @returns Array of parsed DOM nodes, or null for invalid input
 */
function parseHTML(data?: string | null): AnyNode[] | null;
function parseHTML(data: string, context?: unknown, keepScripts?: boolean): AnyNode[];
function parseHTML(data?: '' | null): null;

Usage Examples:

import { parseHTML } from "cheerio";

// Parse HTML string
const nodes = parseHTML('<div>Hello</div><p>World</p>');
// Returns: [DivElement, PElement]

// Parse with script handling
const withScripts = parseHTML('<div>Content</div><script>alert("hi")</script>', null, true);
// Keeps script element

const withoutScripts = parseHTML('<div>Content</div><script>alert("hi")</script>', null, false);
// Removes script element (default behavior)

// Handle edge cases
parseHTML('')                        // Returns null
parseHTML(null)                      // Returns null
parseHTML('<invalid')                // Attempts to parse invalid HTML

// Use parsed nodes
const parsed = parseHTML('<li>Item 1</li><li>Item 2</li>');
if (parsed) {
  parsed.forEach(node => {
    console.log(text([node]));
  });
}

DOM Utilities

Utility functions for working with DOM nodes and structure.

/**
 * Get the document root element
 * @returns Cheerio instance wrapping the root document
 */
function root(): Cheerio<Document>;

/**
 * Check if one element contains another
 * @param container - Potential parent element
 * @param contained - Potential child element
 * @returns True if container contains contained element
 */
function contains(container: AnyNode, contained: AnyNode): boolean;

/**
 * Merge two array-like objects
 * @param arr1 - First array to merge into
 * @param arr2 - Second array to merge from
 * @returns Modified first array with elements from second array
 */
function merge<T>(arr1: Writable<ArrayLike<T>>, arr2: ArrayLike<T>): ArrayLike<T> | undefined;

Usage Examples:

import { root, contains, merge } from "cheerio";

// Access document root
const $ = cheerio.load('<html><body><div>Content</div></body></html>');
const documentRoot = root();
console.log(html(documentRoot));    // Full document HTML

// Check element containment
const container = $('body')[0];
const child = $('div')[0];
console.log(contains(container, child)); // true

const unrelated = parseHTML('<span>Other</span>')[0];
console.log(contains(container, unrelated)); // false

// Note: element does not contain itself
console.log(contains(container, container)); // false

// Merge arrays
const arr1 = ['a', 'b'];
const arr2 = ['c', 'd'];
const merged = merge(arr1, arr2);
console.log(arr1); // ['a', 'b', 'c', 'd'] - arr1 is modified
console.log(merged === arr1); // true - returns arr1

Data Extraction

Static version of the extract method for data extraction from documents.

/**
 * Extract multiple values from document and store in object
 * @param map - Extraction mapping object defining what to extract
 * @returns Object with extracted values
 */
function extract<M extends ExtractMap>(map: M): ExtractedMap<M>;

Usage Examples:

import { extract } from "cheerio";

// Load document for extraction
const $ = cheerio.load(`
  <article>
    <h1>Article Title</h1>
    <p class="author">By John Doe</p>
    <div class="content">
      <p>First paragraph</p>
      <p>Second paragraph</p>
    </div>
    <span class="date">2023-12-01</span>
  </article>
`);

// Extract multiple values
const articleData = extract({
  title: 'h1',
  author: '.author',
  content: '.content p',
  date: '.date',
  wordCount: {
    selector: '.content',
    value: (el) => $(el).text().split(' ').length
  }
});

console.log(articleData);
// Output: {
//   title: "Article Title",
//   author: "By John Doe", 
//   content: ["First paragraph", "Second paragraph"],
//   date: "2023-12-01",
//   wordCount: 4
// }

Advanced Static Utilities

Custom Rendering Options

Detailed rendering control with options:

// HTML rendering with specific options
const renderOptions: CheerioOptions = {
  xmlMode: false,              // HTML mode
  decodeEntities: true,        // Decode HTML entities
  lowerCaseAttributeNames: false, // Preserve attribute case
  // ... other htmlparser2 options
};

const customHtml = html($('div'), renderOptions);

// XML rendering (always uses XML mode)
const xmlOutput = xml($('data'));   // Renders with XML syntax rules

Document Structure Analysis

Using static methods to analyze document structure:

/**
 * Analyze document structure using static methods
 * @param htmlString - HTML to analyze
 * @returns Analysis object with document information
 */
function analyzeDocument(htmlString: string) {
  const $ = cheerio.load(htmlString);
  
  return {
    // Basic counts
    elementCount: $('*').length,
    textLength: text().length,
    
    // Structure analysis
    maxDepth: getMaxDepth($('html')[0] || $('body')[0]),
    hasScripts: $('script').length > 0,
    hasStyles: $('style, link[rel="stylesheet"]').length > 0,
    
    // Content extraction
    title: $('title').text() || $('h1').first().text(),
    headings: $('h1, h2, h3, h4, h5, h6').map((i, el) => ({
      level: parseInt(el.tagName.charAt(1)),
      text: text([el])
    })).get(),
    
    // Form analysis
    forms: $('form').length,
    inputs: $('input, select, textarea').length,
    
    // Link analysis
    internalLinks: $('a[href^="/"], a[href^="#"]').length,
    externalLinks: $('a[href^="http"]').length,
    
    // Media content
    images: $('img').length,
    videos: $('video').length,
    
    // Rendered output sizes
    htmlSize: html().length,
    xmlSize: xml().length,
    textSize: text().length
  };
}

function getMaxDepth(element: AnyNode, currentDepth = 0): number {
  if (!element || !('children' in element)) return currentDepth;
  
  let maxChildDepth = currentDepth;
  if (element.children) {
    for (const child of element.children) {
      const childDepth = getMaxDepth(child, currentDepth + 1);
      maxChildDepth = Math.max(maxChildDepth, childDepth);
    }
  }
  return maxChildDepth;
}

// Usage
const analysis = analyzeDocument('<html><body><h1>Title</h1><p>Content</p></body></html>');
console.log(analysis);

Batch Processing Utilities

Utilities for processing multiple documents:

/**
 * Process multiple HTML strings with same operations
 * @param htmlStrings - Array of HTML strings to process
 * @param processor - Function to process each document
 * @returns Array of processing results
 */
function batchProcess<T>(
  htmlStrings: string[], 
  processor: (doc: CheerioAPI) => T
): T[] {
  return htmlStrings.map(htmlString => {
    const $ = cheerio.load(htmlString);
    return processor($);
  });
}

/**
 * Extract same data from multiple documents
 * @param htmlStrings - Array of HTML strings
 * @param extractMap - Extraction configuration
 * @returns Array of extracted data objects
 */
function batchExtract<M extends ExtractMap>(
  htmlStrings: string[],
  extractMap: M
): ExtractedMap<M>[] {
  return batchProcess(htmlStrings, $ => extract(extractMap));
}

// Usage
const documents = [
  '<article><h1>Title 1</h1><p>Content 1</p></article>',
  '<article><h1>Title 2</h1><p>Content 2</p></article>'
];

const extractedData = batchExtract(documents, {
  title: 'h1',
  content: 'p'
});
// Results in array of {title, content} objects

Performance Utilities

Methods for optimizing static operations:

/**
 * Efficient text extraction for large documents
 * @param elements - Elements to extract text from
 * @param maxLength - Maximum text length to extract
 * @returns Truncated text content
 */
function efficientTextExtract(elements: ArrayLike<AnyNode>, maxLength = 1000): string {
  let result = '';
  let currentLength = 0;
  
  for (let i = 0; i < elements.length && currentLength < maxLength; i++) {
    const elementText = text([elements[i]]);
    const remainingLength = maxLength - currentLength;
    
    if (elementText.length <= remainingLength) {
      result += elementText;
      currentLength += elementText.length;
    } else {
      result += elementText.substring(0, remainingLength);
      break;
    }
  }
  
  return result;
}

/**
 * Minimal HTML rendering (removes unnecessary whitespace)
 * @param elements - Elements to render
 * @returns Minimized HTML string
 */
function minimalHtml(elements: BasicAcceptedElems<AnyNode>): string {
  return html(elements)
    .replace(/>\s+</g, '><')      // Remove whitespace between tags
    .replace(/\s+/g, ' ')         // Normalize whitespace
    .trim();                      // Remove leading/trailing space
}

Utility Types

// Array-like object type for merge function
type Writable<T> = { -readonly [P in keyof T]: T[P] };

// Extract map for data extraction
interface ExtractMap {
  [key: string]: string | ExtractConfig;
}

interface ExtractConfig {
  selector: string;
  value?: (element: AnyNode) => any;
  attribute?: string;
}

type ExtractedMap<M extends ExtractMap> = {
  [K in keyof M]: M[K] extends string 
    ? string | string[]
    : M[K] extends ExtractConfig
    ? any
    : never;
};

// Basic element types
type BasicAcceptedElems<T extends AnyNode> = ArrayLike<T> | T | string;

// Rendering options
interface RenderOptions extends CheerioOptions {
  decodeEntities?: boolean;
  encodeEntities?: boolean;
  selfClosingTags?: boolean;
}

Error Handling

Static methods handle errors gracefully:

// Safe parsing with error handling
function safeParseHTML(htmlString: string): AnyNode[] {
  try {
    const nodes = parseHTML(htmlString);
    return nodes || [];
  } catch (error) {
    console.warn('HTML parsing failed:', error.message);
    return [];
  }
}

// Safe rendering with fallbacks
function safeRender(elements: any): string {
  try {
    if (!elements) return '';
    return html(elements);
  } catch (error) {
    console.warn('Rendering failed:', error.message);
    return '[Render Error]';
  }
}

// Safe text extraction
function safeText(elements: any): string {
  try {
    if (!elements) return '';
    return text(elements);
  } catch (error) {
    console.warn('Text extraction failed:', error.message);
    return '';
  }
}