0
# Text Recognition
1
2
OCR capabilities, document analysis, and text extraction with specialized structures for character recognition and document processing.
3
4
## Capabilities
5
6
### Recognition Engine
7
8
Core text recognition functionality with training, classification, and confidence scoring.
9
10
```java { .api }
11
/**
12
* Character recognition engine
13
*/
14
class L_RECOG extends Pointer {
15
PIX pixdb_ave(); // average templates
16
PIX pixdb_range(); // template ranges
17
PIXA pixa_tr(); // training examples
18
PIXAA pixaa_tr(); // organized training data
19
PTA pta_tr(); // training centroids
20
NUMA nasum_tr(); // training sums
21
int threshold(); // classification threshold
22
int maxyshift(); // maximum y shift
23
}
24
25
/**
26
* Create recognition engine
27
* @param scalew - Template width scale
28
* @param scaleh - Template height scale
29
* @param linew - Line width for template rendering
30
* @param threshold - Classification threshold
31
* @param maxyshift - Maximum vertical shift allowed
32
* @return L_RECOG engine or null on failure
33
*/
34
L_RECOG recogCreate(int scalew, int scaleh, int linew, int threshold, int maxyshift);
35
36
/**
37
* Create from existing recognizer
38
* @param recs - Source recognizer
39
* @param scalew - New width scale
40
* @param scaleh - New height scale
41
* @param linew - Line width
42
* @param threshold - Classification threshold
43
* @param maxyshift - Maximum y shift
44
* @return New L_RECOG or null on failure
45
*/
46
L_RECOG recogCreateFromRecog(L_RECOG recs, int scalew, int scaleh, int linew, int threshold, int maxyshift);
47
48
/**
49
* Train recognizer with labeled example
50
* @param recog - Recognition engine
51
* @param pixs - Training image
52
* @param box - Character bounding box (can be null for full image)
53
* @param text - Character label
54
* @param debug - Debug level (0 = none)
55
* @return 0 on success, 1 on failure
56
*/
57
int recogTrainLabeled(L_RECOG recog, PIX pixs, BOX box, String text, int debug);
58
59
/**
60
* Finalize training (build templates)
61
* @param recog - Recognition engine
62
* @param debug - Debug level
63
* @return 0 on success, 1 on failure
64
*/
65
int recogFinishTraining(L_RECOG recog, int debug);
66
67
/**
68
* Classify character
69
* @param recog - Recognition engine
70
* @param pixs - Character image
71
* @param box - Character bounding box (can be null)
72
* @param pcharstr - Returns recognized character
73
* @param pscore - Returns confidence score
74
* @param debug - Debug level
75
* @return 0 on success, 1 on failure
76
*/
77
int recogClassifyPixel(L_RECOG recog, PIX pixs, BOX box, BytePointer pcharstr, FloatPointer pscore, int debug);
78
```
79
80
**Usage Examples:**
81
82
```java
83
import org.bytedeco.leptonica.*;
84
import static org.bytedeco.leptonica.global.leptonica.*;
85
86
// Create OCR engine for digits
87
L_RECOG digitRecog = recogCreate(32, 32, 4, 128, 2);
88
89
// Train with labeled examples
90
PIX digit0 = pixRead("digit_0_sample.png");
91
recogTrainLabeled(digitRecog, digit0, null, "0", 0);
92
93
PIX digit1 = pixRead("digit_1_sample.png");
94
recogTrainLabeled(digitRecog, digit1, null, "1", 0);
95
96
// ... train with more examples ...
97
98
// Finalize training
99
recogFinishTraining(digitRecog, 0);
100
101
// Classify unknown character
102
PIX unknown = pixRead("unknown_digit.png");
103
BytePointer result = new BytePointer(10);
104
FloatPointer confidence = new FloatPointer(1);
105
106
int status = recogClassifyPixel(digitRecog, unknown, null, result, confidence, 0);
107
if (status == 0) {
108
System.out.println("Recognized: " + result.getString() +
109
" (confidence: " + confidence.get() + ")");
110
}
111
```
112
113
### Document Dewarping
114
115
Correct document distortion and perspective issues for improved OCR accuracy.
116
117
```java { .api }
118
/**
119
* Single page dewarp correction
120
*/
121
class L_DEWARP extends Pointer {
122
PIX pixs(); // source image
123
PIXA sampv(); // vertical samples
124
PIXA samph(); // horizontal samples
125
PTA ptav(); // vertical control points
126
PTA ptah(); // horizontal control points
127
int w(); // image width
128
int h(); // image height
129
int nx(); // horizontal sampling points
130
int ny(); // vertical sampling points
131
}
132
133
/**
134
* Multi-page dewarp processing
135
*/
136
class L_DEWARPA extends Pointer {
137
int nalloc(); // allocated array size
138
int maxpage(); // maximum page number
139
int sampling(); // sampling factor
140
int redfactor(); // reduction factor
141
int minlines(); // minimum lines for modeling
142
int maxdist(); // maximum distance for interpolation
143
}
144
145
/**
146
* Create dewarp structure for single page
147
* @param pixs - Source document image
148
* @param pageno - Page number identifier
149
* @return L_DEWARP structure or null on failure
150
*/
151
L_DEWARP dewarpCreate(PIX pixs, int pageno);
152
153
/**
154
* Create multi-page dewarp structure
155
* @param nmax - Maximum number of pages
156
* @param sampling - Sampling density
157
* @param redfactor - Size reduction factor
158
* @param minlines - Minimum text lines required
159
* @param maxdist - Maximum interpolation distance
160
* @return L_DEWARPA structure or null on failure
161
*/
162
L_DEWARPA dewarpaCreate(int nmax, int sampling, int redfactor, int minlines, int maxdist);
163
164
/**
165
* Build dewarp model for page
166
* @param dew - Dewarp structure
167
* @param debugfile - Debug output file (can be null)
168
* @return 0 on success, 1 on failure
169
*/
170
int dewarpBuildModel(L_DEWARP dew, String debugfile);
171
172
/**
173
* Apply dewarp correction
174
* @param dew - Dewarp structure with built model
175
* @param pixs - Source image to correct
176
* @param debugfile - Debug output file (can be null)
177
* @return Corrected PIX or null on failure
178
*/
179
PIX dewarpApply(L_DEWARP dew, PIX pixs, String debugfile);
180
181
/**
182
* Add page to multi-page dewarper
183
* @param dewa - Multi-page dewarp structure
184
* @param pixs - Page image
185
* @param pageno - Page number
186
* @param debugfile - Debug output file (can be null)
187
* @return 0 on success, 1 on failure
188
*/
189
int dewarpaInsertDewarp(L_DEWARPA dewa, L_DEWARP dew);
190
```
191
192
**Usage Examples:**
193
194
```java
195
// Single page dewarping
196
PIX document = pixRead("scanned_page.jpg");
197
L_DEWARP dewarp = dewarpCreate(document, 1);
198
199
// Build correction model
200
int result = dewarpBuildModel(dewarp, null);
201
if (result == 0) {
202
// Apply correction
203
PIX corrected = dewarpApply(dewarp, document, null);
204
pixWrite("corrected_page.jpg", corrected, IFF_JPEG);
205
}
206
207
// Multi-page document processing
208
L_DEWARPA multiPage = dewarpaCreate(100, 7, 1, 6, 30);
209
210
// Process each page
211
for (int i = 1; i <= pageCount; i++) {
212
PIX page = pixRead("page_" + i + ".jpg");
213
L_DEWARP pageDewarp = dewarpCreate(page, i);
214
215
if (dewarpBuildModel(pageDewarp, null) == 0) {
216
dewarpaInsertDewarp(multiPage, pageDewarp);
217
218
PIX corrected = dewarpApply(pageDewarp, page, null);
219
pixWrite("corrected_page_" + i + ".jpg", corrected, IFF_JPEG);
220
}
221
}
222
```
223
224
### JBig2 Classification
225
226
Specialized encoding and classification for document compression and analysis.
227
228
```java { .api }
229
/**
230
* JBig2 symbol classifier
231
*/
232
class JBCLASSER extends Pointer {
233
SARRAY safiles(); // input file names
234
int method(); // classification method
235
int components(); // number of components
236
int maxwidth(); // maximum symbol width
237
int maxheight(); // maximum symbol height
238
int npages(); // number of pages processed
239
int baseindex(); // base index for symbols
240
}
241
242
/**
243
* JBig2 encoding data
244
*/
245
class JBDATA extends Pointer {
246
PIX pix(); // reconstructed image
247
int w(); // image width
248
int h(); // image height
249
int nclass(); // number of symbol classes
250
PIXA pixat(); // template symbols
251
PTAA ptaul(); // upper-left coordinates
252
}
253
254
/**
255
* Create JBig2 classifier
256
* @param method - Classification method
257
* @param components - Number of components to use
258
* @return JBCLASSER or null on failure
259
*/
260
JBCLASSER jbClasserCreate(int method, int components);
261
262
/**
263
* Add page to classifier
264
* @param classer - JBig2 classifier
265
* @param pixs - Page image
266
* @param filename - Source filename
267
* @return 0 on success, 1 on failure
268
*/
269
int jbClasserAddPage(JBCLASSER classer, PIX pixs, String filename);
270
271
/**
272
* Generate JBig2 encoding data
273
* @param classer - Trained classifier
274
* @param pageno - Page number to encode
275
* @return JBDATA encoding or null on failure
276
*/
277
JBDATA jbClasserGetJbData(JBCLASSER classer, int pageno);
278
```
279
280
**Usage Examples:**
281
282
```java
283
// Create JBig2 classifier for document compression
284
JBCLASSER classifier = jbClasserCreate(JB_CLASSIFICATION, 8);
285
286
// Add document pages
287
for (int i = 0; i < pageCount; i++) {
288
PIX page = pixRead("page_" + i + ".tiff");
289
jbClasserAddPage(classifier, page, "page_" + i + ".tiff");
290
}
291
292
// Generate compressed representation
293
JBDATA compressed = jbClasserGetJbData(classifier, 0);
294
PIX reconstructed = compressed.pix();
295
```
296
297
### Bitmap Fonts
298
299
Bitmap font rendering for text overlay and document generation.
300
301
```java { .api }
302
/**
303
* Bitmap font structure
304
*/
305
class L_BMF extends Pointer {
306
PIX pixa(); // character bitmaps
307
int size(); // font size
308
BytePointer directory(); // font directory
309
}
310
311
/**
312
* Create bitmap font
313
* @param dir - Font directory path
314
* @param fontsize - Font size
315
* @return L_BMF font or null on failure
316
*/
317
L_BMF bmfCreate(String dir, int fontsize);
318
319
/**
320
* Render text using bitmap font
321
* @param bmf - Bitmap font
322
* @param textstr - Text to render
323
* @return PIX with rendered text or null on failure
324
*/
325
PIX bmfGetPix(L_BMF bmf, String textstr);
326
327
/**
328
* Get text width in pixels
329
* @param bmf - Bitmap font
330
* @param textstr - Text string
331
* @param pw - Returns width in pixels
332
* @return 0 on success, 1 on failure
333
*/
334
int bmfGetWidth(L_BMF bmf, String textstr, IntPointer pw);
335
```
336
337
**Usage Examples:**
338
339
```java
340
// Create bitmap font
341
L_BMF font = bmfCreate("/usr/share/fonts/leptonica", 12);
342
343
// Render text
344
PIX textImage = bmfGetPix(font, "Hello, World!");
345
346
// Get text dimensions
347
IntPointer width = new IntPointer(1);
348
bmfGetWidth(font, "Sample Text", width);
349
System.out.println("Text width: " + width.get() + " pixels");
350
351
// Overlay text on image
352
PIX overlayed = pixPaintBoxa(baseImage, textImage, 100, 50, 0x000000);
353
```
354
355
## Text Processing Pipeline
356
357
### Complete OCR Workflow
358
359
```java
360
// 1. Document preprocessing
361
PIX document = pixRead("document.jpg");
362
PIX gray = pixConvertRGBToGray(document, 0.299f, 0.587f, 0.114f);
363
PIX binary = pixOtsuAdaptiveThreshold(gray, 32, 32, 0, 0, 0.1f, null);
364
365
// 2. Dewarp correction
366
L_DEWARP dewarp = dewarpCreate(binary, 1);
367
if (dewarpBuildModel(dewarp, null) == 0) {
368
binary = dewarpApply(dewarp, binary, null);
369
}
370
371
// 3. Character segmentation (hypothetical)
372
BOXA characters = segmentCharacters(binary);
373
374
// 4. Character recognition
375
L_RECOG ocr = loadTrainedOCR(); // hypothetical
376
StringBuilder result = new StringBuilder();
377
378
int charCount = boxaGetCount(characters);
379
for (int i = 0; i < charCount; i++) {
380
BOX charBox = boxaGetBox(characters, i, L_CLONE);
381
PIX charImage = pixClipRectangle(binary, charBox, null);
382
383
BytePointer character = new BytePointer(10);
384
FloatPointer confidence = new FloatPointer(1);
385
386
if (recogClassifyPixel(ocr, charImage, null, character, confidence, 0) == 0) {
387
if (confidence.get() > 0.7f) { // confidence threshold
388
result.append(character.getString());
389
}
390
}
391
}
392
393
System.out.println("Recognized text: " + result.toString());
394
```
395
396
## Constants
397
398
```java { .api }
399
// JBig2 classification methods
400
static final int JB_CLASSIFICATION = 0;
401
static final int JB_CORRELATION = 1;
402
403
// Font sizes
404
static final int L_BM_FONT_4 = 4;
405
static final int L_BM_FONT_6 = 6;
406
static final int L_BM_FONT_8 = 8;
407
static final int L_BM_FONT_10 = 10;
408
static final int L_BM_FONT_12 = 12;
409
static final int L_BM_FONT_14 = 14;
410
static final int L_BM_FONT_16 = 16;
411
static final int L_BM_FONT_20 = 20;
412
```