0
# Tesseract.js
1
2
Tesseract.js is a pure JavaScript OCR (Optical Character Recognition) library that brings the powerful Tesseract OCR engine to both browser and Node.js environments through WebAssembly. It enables developers to extract text from images in almost any language with high accuracy, supporting various image formats and providing both real-time and batch processing capabilities.
3
4
## Package Information
5
6
- **Package Name**: tesseract.js
7
- **Package Type**: npm
8
- **Language**: JavaScript/TypeScript
9
- **Installation**: `npm install tesseract.js`
10
11
## Core Imports
12
13
```javascript
14
import { createWorker, createScheduler, recognize, detect } from "tesseract.js";
15
```
16
17
For CommonJS:
18
19
```javascript
20
const { createWorker, createScheduler, recognize, detect } = require("tesseract.js");
21
```
22
23
Constants import:
24
25
```javascript
26
import { languages, OEM, PSM, setLogging } from "tesseract.js";
27
```
28
29
## Basic Usage
30
31
```javascript
32
import { createWorker } from 'tesseract.js';
33
34
(async () => {
35
const worker = await createWorker('eng');
36
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png');
37
console.log(text);
38
await worker.terminate();
39
})();
40
```
41
42
## Architecture
43
44
Tesseract.js is built around several key components:
45
46
- **Worker API**: Primary interface for OCR operations with full lifecycle management
47
- **Scheduler API**: Queue-based system for managing multiple workers and parallel processing
48
- **High-level Functions**: Convenience functions (`recognize`, `detect`) for one-shot operations
49
- **Constants**: Language codes (`languages`), engine modes (`OEM`), and page segmentation modes (`PSM`)
50
- **WebAssembly Core**: Tesseract OCR engine compiled to WebAssembly for browser/Node.js compatibility
51
52
## Capabilities
53
54
### Worker API
55
56
Primary interface for creating and managing OCR workers. Workers handle text recognition, parameter configuration, and resource management.
57
58
```javascript { .api }
59
function createWorker(
60
langs?: string | string[] | Lang[],
61
oem?: OEM,
62
options?: Partial<WorkerOptions>,
63
config?: string | Partial<InitOptions>
64
): Promise<Worker>;
65
66
interface Worker {
67
load(jobId?: string): Promise<ConfigResult>; // @deprecated
68
writeText(path: string, text: string, jobId?: string): Promise<ConfigResult>;
69
readText(path: string, jobId?: string): Promise<ConfigResult>;
70
removeFile(path: string, jobId?: string): Promise<ConfigResult>;
71
FS(method: string, args: any[], jobId?: string): Promise<ConfigResult>;
72
recognize(image: ImageLike, options?: Partial<RecognizeOptions>, output?: Partial<OutputFormats>, jobId?: string): Promise<RecognizeResult>;
73
detect(image: ImageLike, jobId?: string): Promise<DetectResult>;
74
setParameters(params: Partial<WorkerParams>, jobId?: string): Promise<ConfigResult>;
75
reinitialize(langs?: string | Lang[], oem?: OEM, config?: string | Partial<InitOptions>, jobId?: string): Promise<ConfigResult>;
76
terminate(jobId?: string): Promise<ConfigResult>;
77
}
78
```
79
80
[Worker API](./worker-api.md)
81
82
### Scheduler API
83
84
Queue-based system for managing multiple workers and distributing OCR jobs across them for parallel processing.
85
86
```javascript { .api }
87
function createScheduler(): Scheduler;
88
89
interface Scheduler {
90
addWorker(worker: Worker): string;
91
addJob(action: 'recognize', ...args: Parameters<Worker['recognize']>): Promise<RecognizeResult>;
92
addJob(action: 'detect', ...args: Parameters<Worker['detect']>): Promise<DetectResult>;
93
terminate(): Promise<any>;
94
getQueueLen(): number;
95
getNumWorkers(): number;
96
}
97
```
98
99
[Scheduler API](./scheduler-api.md)
100
101
### High-Level Functions
102
103
Convenience functions for one-shot OCR operations without manual worker management.
104
105
```javascript { .api }
106
function recognize(image: ImageLike, langs?: string, options?: Partial<WorkerOptions>): Promise<RecognizeResult>;
107
function detect(image: ImageLike, options?: Partial<WorkerOptions>): Promise<DetectResult>;
108
```
109
110
[High-Level Functions](./high-level-functions.md)
111
112
### Configuration and Constants
113
114
Language codes, engine modes, page segmentation modes, and logging configuration.
115
116
```javascript { .api }
117
const languages: {
118
ENG: 'eng';
119
FRA: 'fra';
120
DEU: 'deu';
121
// ... 100+ more language codes
122
};
123
124
enum OEM {
125
TESSERACT_ONLY = 0,
126
LSTM_ONLY = 1,
127
TESSERACT_LSTM_COMBINED = 2,
128
DEFAULT = 3
129
}
130
131
enum PSM {
132
OSD_ONLY = '0',
133
AUTO_OSD = '1',
134
AUTO_ONLY = '2',
135
AUTO = '3',
136
// ... more segmentation modes
137
}
138
139
function setLogging(logging: boolean): void;
140
```
141
142
[Configuration and Constants](./configuration-constants.md)
143
144
## Core Types
145
146
```javascript { .api }
147
type ImageLike = string | HTMLImageElement | HTMLCanvasElement | HTMLVideoElement
148
| CanvasRenderingContext2D | File | Blob | Buffer | OffscreenCanvas;
149
150
interface Lang {
151
code: string;
152
data: unknown;
153
}
154
155
interface RecognizeResult {
156
jobId: string;
157
data: Page;
158
}
159
160
interface DetectResult {
161
jobId: string;
162
data: DetectData;
163
}
164
165
interface DetectData {
166
tesseract_script_id: number | null;
167
script: string | null;
168
script_confidence: number | null;
169
orientation_degrees: number | null;
170
orientation_confidence: number | null;
171
}
172
173
interface ConfigResult {
174
jobId: string;
175
data: any;
176
}
177
178
interface Page {
179
blocks: Block[] | null;
180
confidence: number;
181
oem: string;
182
osd: string;
183
psm: string;
184
text: string;
185
version: string;
186
hocr: string | null;
187
tsv: string | null;
188
box: string | null;
189
unlv: string | null;
190
sd: string | null;
191
imageColor: string | null;
192
imageGrey: string | null;
193
imageBinary: string | null;
194
rotateRadians: number | null;
195
pdf: number[] | null;
196
debug: string | null;
197
}
198
199
interface Block {
200
paragraphs: Paragraph[];
201
text: string;
202
confidence: number;
203
bbox: Bbox;
204
blocktype: string;
205
page: Page;
206
}
207
208
interface Paragraph {
209
lines: Line[];
210
text: string;
211
confidence: number;
212
bbox: Bbox;
213
is_ltr: boolean;
214
}
215
216
interface Line {
217
words: Word[];
218
text: string;
219
confidence: number;
220
baseline: Baseline;
221
rowAttributes: RowAttributes;
222
bbox: Bbox;
223
}
224
225
interface Word {
226
symbols: Symbol[];
227
choices: Choice[];
228
text: string;
229
confidence: number;
230
bbox: Bbox;
231
font_name: string;
232
}
233
234
interface Symbol {
235
text: string;
236
confidence: number;
237
bbox: Bbox;
238
is_superscript: boolean;
239
is_subscript: boolean;
240
is_dropcap: boolean;
241
}
242
243
interface Choice {
244
text: string;
245
confidence: number;
246
}
247
248
interface Bbox {
249
x0: number;
250
y0: number;
251
x1: number;
252
y1: number;
253
}
254
255
interface Baseline {
256
x0: number;
257
y0: number;
258
x1: number;
259
y1: number;
260
has_baseline: boolean;
261
}
262
263
interface RowAttributes {
264
ascenders: number;
265
descenders: number;
266
row_height: number;
267
}
268
269
interface WorkerOptions {
270
corePath: string;
271
langPath: string;
272
cachePath: string;
273
dataPath: string;
274
workerPath: string;
275
cacheMethod: string;
276
workerBlobURL: boolean;
277
gzip: boolean;
278
legacyLang: boolean;
279
legacyCore: boolean;
280
logger: (arg: LoggerMessage) => void;
281
errorHandler: (arg: any) => void;
282
}
283
284
interface WorkerParams {
285
tessedit_pageseg_mode: PSM;
286
tessedit_char_whitelist: string;
287
tessedit_char_blacklist: string;
288
preserve_interword_spaces: string;
289
user_defined_dpi: string;
290
[propName: string]: any;
291
}
292
293
interface LoggerMessage {
294
jobId: string;
295
progress: number;
296
status: string;
297
userJobId: string;
298
workerId: string;
299
}
300
301
interface RecognizeOptions {
302
rectangle: Rectangle;
303
pdfTitle: string;
304
pdfTextOnly: boolean;
305
rotateAuto: boolean;
306
rotateRadians: number;
307
}
308
309
interface Rectangle {
310
left: number;
311
top: number;
312
width: number;
313
height: number;
314
}
315
316
interface OutputFormats {
317
text: boolean;
318
blocks: boolean;
319
layoutBlocks: boolean;
320
hocr: boolean;
321
tsv: boolean;
322
box: boolean;
323
unlv: boolean;
324
osd: boolean;
325
pdf: boolean;
326
imageColor: boolean;
327
imageGrey: boolean;
328
imageBinary: boolean;
329
debug: boolean;
330
}
331
332
interface InitOptions {
333
load_system_dawg: string;
334
load_freq_dawg: string;
335
load_unambig_dawg: string;
336
load_punc_dawg: string;
337
load_number_dawg: string;
338
load_bigram_dawg: string;
339
}
340
```