CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-tesseract-js

Pure JavaScript multilingual OCR library that brings the powerful Tesseract OCR engine to both browser and Node.js environments through WebAssembly

Pending
Overview
Eval results
Files

worker-api.mddocs/

Worker API

The Worker API is the primary interface for OCR operations in Tesseract.js. Workers provide full lifecycle management for OCR tasks including text recognition, parameter configuration, filesystem operations, and resource cleanup.

Capabilities

Worker Creation

Creates a new OCR worker with language support and configuration options.

/**
 * Creates a new OCR worker with specified languages and options
 * @param langs - Language code(s) for OCR recognition (default: 'eng')
 * @param oem - OCR Engine Mode (default: OEM.LSTM_ONLY)
 * @param options - Worker configuration options
 * @param config - Tesseract initialization configuration
 * @returns Promise resolving to Worker instance
 */
function createWorker(
  langs?: string | string[] | Lang[], 
  oem?: OEM, 
  options?: Partial<WorkerOptions>, 
  config?: string | Partial<InitOptions>
): Promise<Worker>;

interface Lang {
  code: string;
  data: unknown;
}

Usage Examples:

import { createWorker, OEM } from 'tesseract.js';

// Basic worker with English
const worker = await createWorker('eng');

// Multi-language worker
const multiWorker = await createWorker(['eng', 'fra', 'deu']);

// Worker with custom options
const customWorker = await createWorker('eng', OEM.LSTM_ONLY, {
  logger: m => console.log(m),
  errorHandler: err => console.error(err)
});

Text Recognition

Recognizes text from images with comprehensive output format options.

/**
 * Recognizes text from an image
 * @param image - Image input in various supported formats
 * @param options - Recognition options including region of interest
 * @param output - Output format configuration
 * @param jobId - Optional job identifier for tracking
 * @returns Promise resolving to recognition results
 */
recognize(
  image: ImageLike, 
  options?: Partial<RecognizeOptions>, 
  output?: Partial<OutputFormats>, 
  jobId?: string
): Promise<RecognizeResult>;

interface RecognizeOptions {
  rectangle: Rectangle;
  pdfTitle: string;
  pdfTextOnly: boolean;
  rotateAuto: boolean;
  rotateRadians: number;
}

interface Rectangle {
  left: number;
  top: number;
  width: number;
  height: number;
}

interface OutputFormats {
  text: boolean;
  blocks: boolean;
  layoutBlocks: boolean;
  hocr: boolean;
  tsv: boolean;
  box: boolean;
  unlv: boolean;
  osd: boolean;
  pdf: boolean;
  imageColor: boolean;
  imageGrey: boolean;
  imageBinary: boolean;
  debug: boolean;
}

Usage Examples:

// Basic text recognition
const { data: { text } } = await worker.recognize('image.png');

// Recognition with region of interest
const result = await worker.recognize('image.png', {
  rectangle: { left: 100, top: 50, width: 300, height: 200 }
});

// Multiple output formats
const fullResult = await worker.recognize('image.png', {}, {
  text: true,
  hocr: true,
  pdf: true
});

Orientation and Script Detection

Detects text orientation and script from images.

/**
 * Detects orientation and script information from image
 * @param image - Image input in various supported formats
 * @param jobId - Optional job identifier for tracking
 * @returns Promise resolving to detection results
 */
detect(image: ImageLike, jobId?: string): Promise<DetectResult>;

interface DetectResult {
  jobId: string;
  data: DetectData;
}

interface DetectData {
  tesseract_script_id: number | null;
  script: string | null;
  script_confidence: number | null;
  orientation_degrees: number | null;
  orientation_confidence: number | null;
}

Usage Examples:

const detection = await worker.detect('rotated-image.png');
console.log(detection.data.orientation_degrees); // e.g., 90
console.log(detection.data.script); // e.g., 'Latin'

Parameter Configuration

Sets Tesseract-specific parameters for fine-tuning OCR behavior.

/**
 * Sets OCR parameters for the worker
 * @param params - Parameter key-value pairs
 * @param jobId - Optional job identifier
 * @returns Promise resolving to configuration result
 */
setParameters(params: Partial<WorkerParams>, jobId?: string): Promise<ConfigResult>;

interface WorkerParams {
  tessedit_pageseg_mode: PSM;
  tessedit_char_whitelist: string;
  tessedit_char_blacklist: string;
  preserve_interword_spaces: string;
  user_defined_dpi: string;
  [propName: string]: any;
}

Usage Examples:

// Restrict to numbers only
await worker.setParameters({
  tessedit_char_whitelist: '0123456789'
});

// Set page segmentation mode
await worker.setParameters({
  tessedit_pageseg_mode: PSM.SINGLE_LINE
});

// Preserve spaces between words
await worker.setParameters({
  preserve_interword_spaces: '1'
});

Worker Reinitialization

Reinitializes worker with new languages or engine settings.

/**
 * Reinitializes worker with new languages or configuration
 * @param langs - New language code(s)
 * @param oem - New OCR Engine Mode
 * @param config - New initialization configuration
 * @param jobId - Optional job identifier
 * @returns Promise resolving to configuration result
 */
reinitialize(
  langs?: string | Lang[], 
  oem?: OEM, 
  config?: string | Partial<InitOptions>, 
  jobId?: string
): Promise<ConfigResult>;

interface InitOptions {
  load_system_dawg: string;
  load_freq_dawg: string;
  load_unambig_dawg: string;
  load_punc_dawg: string;
  load_number_dawg: string;
  load_bigram_dawg: string;
}

Usage Examples:

// Switch to different language
await worker.reinitialize('fra');

// Switch to multiple languages
await worker.reinitialize(['eng', 'chi_sim']);

// Change engine mode
await worker.reinitialize('eng', OEM.TESSERACT_ONLY);

Filesystem Operations

Direct access to worker's internal filesystem for advanced use cases.

/**
 * Writes text to worker filesystem
 * @param path - File path
 * @param text - Text content to write
 * @param jobId - Optional job identifier
 * @returns Promise resolving to operation result
 */
writeText(path: string, text: string, jobId?: string): Promise<ConfigResult>;

/**
 * Reads text from worker filesystem
 * @param path - File path to read
 * @param jobId - Optional job identifier
 * @returns Promise resolving to file content
 */
readText(path: string, jobId?: string): Promise<ConfigResult>;

/**
 * Removes file from worker filesystem
 * @param path - File path to remove
 * @param jobId - Optional job identifier
 * @returns Promise resolving to operation result
 */
removeFile(path: string, jobId?: string): Promise<ConfigResult>;

/**
 * Direct filesystem method access
 * @param method - Filesystem method name
 * @param args - Method arguments
 * @param jobId - Optional job identifier
 * @returns Promise resolving to method result
 */
FS(method: string, args: any[], jobId?: string): Promise<ConfigResult>;

Worker Loading (Deprecated)

The load method is deprecated and should be removed from code as workers now come pre-loaded.

/**
 * @deprecated Workers now come pre-loaded, this method is deprecated and should be removed
 * @param jobId - Optional job identifier
 * @returns Promise resolving to configuration result
 */
load(jobId?: string): Promise<ConfigResult>;

Worker Termination

Terminates worker and releases resources.

/**
 * Terminates worker and releases resources
 * @param jobId - Optional job identifier
 * @returns Promise resolving when termination is complete
 */
terminate(jobId?: string): Promise<ConfigResult>;

Usage Examples:

// Always terminate workers when done
await worker.terminate();

// In finally block to ensure cleanup
try {
  const result = await worker.recognize('image.png');
  // Process result
} finally {
  await worker.terminate();
}

// Using filesystem operations
await worker.writeText('/tmp/config.txt', 'custom config');
const config = await worker.readText('/tmp/config.txt');
await worker.removeFile('/tmp/config.txt');

Result Types

interface RecognizeResult {
  jobId: string;
  data: Page;
}

interface ConfigResult {
  jobId: string;
  data: any;
}

interface Page {
  blocks: Block[] | null;
  confidence: number;
  oem: string;
  osd: string;
  psm: string;
  text: string;
  version: string;
  hocr: string | null;
  tsv: string | null;
  box: string | null;
  unlv: string | null;
  sd: string | null;
  imageColor: string | null;
  imageGrey: string | null;
  imageBinary: string | null;
  rotateRadians: number | null;
  pdf: number[] | null;
  debug: string | null;
}

interface Block {
  paragraphs: Paragraph[];
  text: string;
  confidence: number;
  bbox: Bbox;
  blocktype: string;
  page: Page;
}

interface Paragraph {
  lines: Line[];
  text: string;
  confidence: number;
  bbox: Bbox;
  is_ltr: boolean;
}

interface Line {
  words: Word[];
  text: string;
  confidence: number;
  baseline: Baseline;
  rowAttributes: RowAttributes;
  bbox: Bbox;
}

interface Word {
  symbols: Symbol[];
  choices: Choice[];
  text: string;
  confidence: number;
  bbox: Bbox;
  font_name: string;
}

interface Symbol {
  text: string;
  confidence: number;
  bbox: Bbox;
  is_superscript: boolean;
  is_subscript: boolean;
  is_dropcap: boolean;
}

interface Choice {
  text: string;
  confidence: number;
}

interface Bbox {
  x0: number;
  y0: number;
  x1: number;
  y1: number;
}

interface Baseline {
  x0: number;
  y0: number;
  x1: number;
  y1: number;
  has_baseline: boolean;
}

interface RowAttributes {
  ascenders: number;
  descenders: number;
  row_height: number;
}

Configuration Types

interface WorkerOptions {
  corePath: string;
  langPath: string;
  cachePath: string;
  dataPath: string;
  workerPath: string;
  cacheMethod: string;
  workerBlobURL: boolean;
  gzip: boolean;
  legacyLang: boolean;
  legacyCore: boolean;
  logger: (arg: LoggerMessage) => void;
  errorHandler: (arg: any) => void;
}

interface LoggerMessage {
  jobId: string;
  progress: number;
  status: string;
  userJobId: string;
  workerId: string;
}

Install with Tessl CLI

npx tessl i tessl/npm-tesseract-js

docs

configuration-constants.md

high-level-functions.md

index.md

scheduler-api.md

worker-api.md

tile.json