CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-tesseract-js

Pure JavaScript multilingual OCR library that brings the powerful Tesseract OCR engine to both browser and Node.js environments through WebAssembly

Pending
Overview
Eval results
Files

scheduler-api.mddocs/

Scheduler API

The Scheduler API provides a queue-based system for managing multiple workers and distributing OCR jobs across them for parallel processing. It's ideal for handling large batches of images or maintaining a pool of workers for high-throughput applications.

Capabilities

Scheduler Creation

Creates a new scheduler instance for managing worker pools and job queues.

/**
 * Creates a new scheduler for managing multiple workers
 * @returns Scheduler instance
 */
function createScheduler(): Scheduler;

Usage Examples:

import { createScheduler, createWorker } from 'tesseract.js';

const scheduler = createScheduler();

Worker Management

Add workers to the scheduler pool for job distribution.

/**
 * Adds a worker to the scheduler pool
 * @param worker - Worker instance to add
 * @returns Worker ID string for tracking
 */
addWorker(worker: Worker): string;

/**
 * Gets the current number of workers in the pool
 * @returns Number of workers
 */
getNumWorkers(): number;

Usage Examples:

// Create and add workers to scheduler
const worker1 = await createWorker('eng');
const worker2 = await createWorker('eng');

const workerId1 = scheduler.addWorker(worker1);
const workerId2 = scheduler.addWorker(worker2);

console.log(scheduler.getNumWorkers()); // 2

Job Queue Management

Queue jobs for execution across available workers.

/**
 * Adds a recognition job to the queue
 * @param action - Job action type ('recognize' or 'detect')
 * @param args - Arguments for the specified action
 * @returns Promise resolving to job result
 */
addJob(action: 'recognize', ...args: Parameters<Worker['recognize']>): Promise<RecognizeResult>;
addJob(action: 'detect', ...args: Parameters<Worker['detect']>): Promise<DetectResult>;

/**
 * Gets the current job queue length
 * @returns Number of queued jobs
 */
getQueueLen(): number;

Usage Examples:

// Queue recognition jobs
const results = await Promise.all([
  scheduler.addJob('recognize', 'image1.png'),
  scheduler.addJob('recognize', 'image2.png'),
  scheduler.addJob('recognize', 'image3.png')
]);

// Queue detection jobs
const detectionResult = await scheduler.addJob('detect', 'rotated-image.png');

// Check queue status
console.log(scheduler.getQueueLen()); // Number of pending jobs

Advanced Job Queuing

Queue jobs with full parameter support for complex scenarios.

// Recognition with options
const result = await scheduler.addJob('recognize', 'image.png', {
  rectangle: { left: 0, top: 0, width: 100, height: 100 }
}, {
  text: true,
  hocr: true
});

// Recognition with custom job ID
const resultWithId = await scheduler.addJob('recognize', 'image.png', {}, {}, 'custom-job-123');

Scheduler Termination

Terminates all workers and clears the job queue.

/**
 * Terminates all workers and clears the job queue
 * @returns Promise resolving when all workers are terminated
 */
terminate(): Promise<any>;

Usage Examples:

// Clean shutdown of scheduler and all workers
await scheduler.terminate();

// Use in finally block for guaranteed cleanup
try {
  // Process images
  const results = await Promise.all([
    scheduler.addJob('recognize', 'image1.png'),
    scheduler.addJob('recognize', 'image2.png')
  ]);
} finally {
  await scheduler.terminate();
}

Complete Usage Patterns

Basic Parallel Processing

import { createScheduler, createWorker } from 'tesseract.js';

(async () => {
  const scheduler = createScheduler();
  
  // Add multiple workers
  const worker1 = await createWorker('eng');
  const worker2 = await createWorker('eng');
  scheduler.addWorker(worker1);
  scheduler.addWorker(worker2);
  
  // Process multiple images in parallel
  const images = ['img1.png', 'img2.png', 'img3.png', 'img4.png'];
  const results = await Promise.all(
    images.map(img => scheduler.addJob('recognize', img))
  );
  
  // Extract text from all results
  const texts = results.map(result => result.data.text);
  console.log(texts);
  
  await scheduler.terminate();
})();

Multi-Language Processing

(async () => {
  const scheduler = createScheduler();
  
  // Create workers for different languages
  const englishWorker = await createWorker('eng');
  const frenchWorker = await createWorker('fra');
  const germanWorker = await createWorker('deu');
  
  scheduler.addWorker(englishWorker);
  scheduler.addWorker(frenchWorker);
  scheduler.addWorker(germanWorker);
  
  // Process images with different languages
  const results = await Promise.all([
    scheduler.addJob('recognize', 'english-doc.png'),
    scheduler.addJob('recognize', 'french-doc.png'),
    scheduler.addJob('recognize', 'german-doc.png')
  ]);
  
  await scheduler.terminate();
})();

Batch Processing with Progress Tracking

(async () => {
  const scheduler = createScheduler();
  
  // Add workers with progress logging
  const worker1 = await createWorker('eng', 1, {
    logger: m => console.log(`Worker 1: ${m.status} - ${m.progress}%`)
  });
  const worker2 = await createWorker('eng', 1, {
    logger: m => console.log(`Worker 2: ${m.status} - ${m.progress}%`)
  });
  
  scheduler.addWorker(worker1);
  scheduler.addWorker(worker2);
  
  // Process large batch
  const imageFiles = Array.from({length: 20}, (_, i) => `batch-${i}.png`);
  
  console.log(`Processing ${imageFiles.length} images with ${scheduler.getNumWorkers()} workers`);
  
  const results = [];
  for (const imageFile of imageFiles) {
    console.log(`Queue length: ${scheduler.getQueueLen()}`);
    results.push(scheduler.addJob('recognize', imageFile));
  }
  
  const allResults = await Promise.all(results);
  console.log(`Completed processing ${allResults.length} images`);
  
  await scheduler.terminate();
})();

Error Handling with Scheduler

(async () => {
  const scheduler = createScheduler();
  
  // Add worker with error handler
  const worker = await createWorker('eng', 1, {
    errorHandler: (error) => console.error('Worker error:', error)
  });
  scheduler.addWorker(worker);
  
  try {
    const results = await Promise.allSettled([
      scheduler.addJob('recognize', 'valid-image.png'),
      scheduler.addJob('recognize', 'invalid-image.png'),
      scheduler.addJob('recognize', 'another-image.png')
    ]);
    
    results.forEach((result, index) => {
      if (result.status === 'fulfilled') {
        console.log(`Image ${index}: ${result.value.data.text}`);
      } else {
        console.error(`Image ${index} failed:`, result.reason);
      }
    });
  } finally {
    await scheduler.terminate();
  }
})();

Interface Definition

interface Scheduler {
  addWorker(worker: Worker): string;
  addJob(action: 'recognize', ...args: Parameters<Worker['recognize']>): Promise<RecognizeResult>;
  addJob(action: 'detect', ...args: Parameters<Worker['detect']>): Promise<DetectResult>;
  terminate(): Promise<any>;
  getQueueLen(): number;
  getNumWorkers(): number;
}

Install with Tessl CLI

npx tessl i tessl/npm-tesseract-js

docs

configuration-constants.md

high-level-functions.md

index.md

scheduler-api.md

worker-api.md

tile.json