or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

configuration-constants.mdhigh-level-functions.mdindex.mdscheduler-api.mdworker-api.md
tile.json

tessl/npm-tesseract-js

Pure JavaScript multilingual OCR library that brings the powerful Tesseract OCR engine to both browser and Node.js environments through WebAssembly

Workspace
tessl
Visibility
Public
Created
Last updated
Describes
npmpkg:npm/tesseract.js@6.0.x

To install, run

npx @tessl/cli install tessl/npm-tesseract-js@6.0.0

index.mddocs/

Tesseract.js

Tesseract.js is a pure JavaScript OCR (Optical Character Recognition) library that brings the powerful Tesseract OCR engine to both browser and Node.js environments through WebAssembly. It enables developers to extract text from images in almost any language with high accuracy, supporting various image formats and providing both real-time and batch processing capabilities.

Package Information

  • Package Name: tesseract.js
  • Package Type: npm
  • Language: JavaScript/TypeScript
  • Installation: npm install tesseract.js

Core Imports

import { createWorker, createScheduler, recognize, detect } from "tesseract.js";

For CommonJS:

const { createWorker, createScheduler, recognize, detect } = require("tesseract.js");

Constants import:

import { languages, OEM, PSM, setLogging } from "tesseract.js";

Basic Usage

import { createWorker } from 'tesseract.js';

(async () => {
  const worker = await createWorker('eng');
  const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png');
  console.log(text);
  await worker.terminate();
})();

Architecture

Tesseract.js is built around several key components:

  • Worker API: Primary interface for OCR operations with full lifecycle management
  • Scheduler API: Queue-based system for managing multiple workers and parallel processing
  • High-level Functions: Convenience functions (recognize, detect) for one-shot operations
  • Constants: Language codes (languages), engine modes (OEM), and page segmentation modes (PSM)
  • WebAssembly Core: Tesseract OCR engine compiled to WebAssembly for browser/Node.js compatibility

Capabilities

Worker API

Primary interface for creating and managing OCR workers. Workers handle text recognition, parameter configuration, and resource management.

function createWorker(
  langs?: string | string[] | Lang[], 
  oem?: OEM, 
  options?: Partial<WorkerOptions>, 
  config?: string | Partial<InitOptions>
): Promise<Worker>;

interface Worker {
  load(jobId?: string): Promise<ConfigResult>; // @deprecated
  writeText(path: string, text: string, jobId?: string): Promise<ConfigResult>;
  readText(path: string, jobId?: string): Promise<ConfigResult>;
  removeFile(path: string, jobId?: string): Promise<ConfigResult>;
  FS(method: string, args: any[], jobId?: string): Promise<ConfigResult>;
  recognize(image: ImageLike, options?: Partial<RecognizeOptions>, output?: Partial<OutputFormats>, jobId?: string): Promise<RecognizeResult>;
  detect(image: ImageLike, jobId?: string): Promise<DetectResult>;
  setParameters(params: Partial<WorkerParams>, jobId?: string): Promise<ConfigResult>;
  reinitialize(langs?: string | Lang[], oem?: OEM, config?: string | Partial<InitOptions>, jobId?: string): Promise<ConfigResult>;
  terminate(jobId?: string): Promise<ConfigResult>;
}

Worker API

Scheduler API

Queue-based system for managing multiple workers and distributing OCR jobs across them for parallel processing.

function createScheduler(): Scheduler;

interface Scheduler {
  addWorker(worker: Worker): string;
  addJob(action: 'recognize', ...args: Parameters<Worker['recognize']>): Promise<RecognizeResult>;
  addJob(action: 'detect', ...args: Parameters<Worker['detect']>): Promise<DetectResult>;
  terminate(): Promise<any>;
  getQueueLen(): number;
  getNumWorkers(): number;
}

Scheduler API

High-Level Functions

Convenience functions for one-shot OCR operations without manual worker management.

function recognize(image: ImageLike, langs?: string, options?: Partial<WorkerOptions>): Promise<RecognizeResult>;
function detect(image: ImageLike, options?: Partial<WorkerOptions>): Promise<DetectResult>;

High-Level Functions

Configuration and Constants

Language codes, engine modes, page segmentation modes, and logging configuration.

const languages: {
  ENG: 'eng';
  FRA: 'fra';
  DEU: 'deu';
  // ... 100+ more language codes
};

enum OEM {
  TESSERACT_ONLY = 0,
  LSTM_ONLY = 1,
  TESSERACT_LSTM_COMBINED = 2,
  DEFAULT = 3
}

enum PSM {
  OSD_ONLY = '0',
  AUTO_OSD = '1',
  AUTO_ONLY = '2',
  AUTO = '3',
  // ... more segmentation modes
}

function setLogging(logging: boolean): void;

Configuration and Constants

Core Types

type ImageLike = string | HTMLImageElement | HTMLCanvasElement | HTMLVideoElement
  | CanvasRenderingContext2D | File | Blob | Buffer | OffscreenCanvas;

interface Lang {
  code: string;
  data: unknown;
}

interface RecognizeResult {
  jobId: string;
  data: Page;
}

interface DetectResult {
  jobId: string;
  data: DetectData;
}

interface DetectData {
  tesseract_script_id: number | null;
  script: string | null;
  script_confidence: number | null;
  orientation_degrees: number | null;
  orientation_confidence: number | null;
}

interface ConfigResult {
  jobId: string;
  data: any;
}

interface Page {
  blocks: Block[] | null;
  confidence: number;
  oem: string;
  osd: string;
  psm: string;
  text: string;
  version: string;
  hocr: string | null;
  tsv: string | null;
  box: string | null;
  unlv: string | null;
  sd: string | null;
  imageColor: string | null;
  imageGrey: string | null;
  imageBinary: string | null;
  rotateRadians: number | null;
  pdf: number[] | null;
  debug: string | null;
}

interface Block {
  paragraphs: Paragraph[];
  text: string;
  confidence: number;
  bbox: Bbox;
  blocktype: string;
  page: Page;
}

interface Paragraph {
  lines: Line[];
  text: string;
  confidence: number;
  bbox: Bbox;
  is_ltr: boolean;
}

interface Line {
  words: Word[];
  text: string;
  confidence: number;
  baseline: Baseline;
  rowAttributes: RowAttributes;
  bbox: Bbox;
}

interface Word {
  symbols: Symbol[];
  choices: Choice[];
  text: string;
  confidence: number;
  bbox: Bbox;
  font_name: string;
}

interface Symbol {
  text: string;
  confidence: number;
  bbox: Bbox;
  is_superscript: boolean;
  is_subscript: boolean;
  is_dropcap: boolean;
}

interface Choice {
  text: string;
  confidence: number;
}

interface Bbox {
  x0: number;
  y0: number;
  x1: number;
  y1: number;
}

interface Baseline {
  x0: number;
  y0: number;
  x1: number;
  y1: number;
  has_baseline: boolean;
}

interface RowAttributes {
  ascenders: number;
  descenders: number;
  row_height: number;
}

interface WorkerOptions {
  corePath: string;
  langPath: string;
  cachePath: string;
  dataPath: string;
  workerPath: string;
  cacheMethod: string;
  workerBlobURL: boolean;
  gzip: boolean;
  legacyLang: boolean;
  legacyCore: boolean;
  logger: (arg: LoggerMessage) => void;
  errorHandler: (arg: any) => void;
}

interface WorkerParams {
  tessedit_pageseg_mode: PSM;
  tessedit_char_whitelist: string;
  tessedit_char_blacklist: string;
  preserve_interword_spaces: string;
  user_defined_dpi: string;
  [propName: string]: any;
}

interface LoggerMessage {
  jobId: string;
  progress: number;
  status: string;
  userJobId: string;
  workerId: string;
}

interface RecognizeOptions {
  rectangle: Rectangle;
  pdfTitle: string;
  pdfTextOnly: boolean;
  rotateAuto: boolean;
  rotateRadians: number;
}

interface Rectangle {
  left: number;
  top: number;
  width: number;
  height: number;
}

interface OutputFormats {
  text: boolean;
  blocks: boolean;
  layoutBlocks: boolean;
  hocr: boolean;
  tsv: boolean;
  box: boolean;
  unlv: boolean;
  osd: boolean;
  pdf: boolean;
  imageColor: boolean;
  imageGrey: boolean;
  imageBinary: boolean;
  debug: boolean;
}

interface InitOptions {
  load_system_dawg: string;
  load_freq_dawg: string;
  load_unambig_dawg: string;
  load_punc_dawg: string;
  load_number_dawg: string;
  load_bigram_dawg: string;
}