Tessl Tile for maven/org.bytedeco/leptonica-platform@1.85.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

collections.md connected-components.md core-images.md geometry.md image-io.md image-processing.md index.md morphology.md text-recognition.md utilities.md

text-recognition.mddocs/

0
# Text Recognition
1

2
OCR capabilities, document analysis, and text extraction with specialized structures for character recognition and document processing.
3

4
## Capabilities
5

6
### Recognition Engine
7

8
Core text recognition functionality with training, classification, and confidence scoring.
9

10
```java { .api }
11
/**
12
 * Character recognition engine
13
 */
14
class L_RECOG extends Pointer {
15
    PIX pixdb_ave(); // average templates
16
    PIX pixdb_range(); // template ranges
17
    PIXA pixa_tr(); // training examples
18
    PIXAA pixaa_tr(); // organized training data
19
    PTA pta_tr(); // training centroids
20
    NUMA nasum_tr(); // training sums
21
    int threshold(); // classification threshold
22
    int maxyshift(); // maximum y shift
23
}
24

25
/**
26
 * Create recognition engine
27
 * @param scalew - Template width scale
28
 * @param scaleh - Template height scale
29
 * @param linew - Line width for template rendering
30
 * @param threshold - Classification threshold
31
 * @param maxyshift - Maximum vertical shift allowed
32
 * @return L_RECOG engine or null on failure
33
 */
34
L_RECOG recogCreate(int scalew, int scaleh, int linew, int threshold, int maxyshift);
35

36
/**
37
 * Create from existing recognizer
38
 * @param recs - Source recognizer
39
 * @param scalew - New width scale
40
 * @param scaleh - New height scale  
41
 * @param linew - Line width
42
 * @param threshold - Classification threshold
43
 * @param maxyshift - Maximum y shift
44
 * @return New L_RECOG or null on failure
45
 */
46
L_RECOG recogCreateFromRecog(L_RECOG recs, int scalew, int scaleh, int linew, int threshold, int maxyshift);
47

48
/**
49
 * Train recognizer with labeled example
50
 * @param recog - Recognition engine
51
 * @param pixs - Training image
52
 * @param box - Character bounding box (can be null for full image)
53
 * @param text - Character label
54
 * @param debug - Debug level (0 = none)
55
 * @return 0 on success, 1 on failure
56
 */
57
int recogTrainLabeled(L_RECOG recog, PIX pixs, BOX box, String text, int debug);
58

59
/**
60
 * Finalize training (build templates)
61
 * @param recog - Recognition engine
62
 * @param debug - Debug level
63
 * @return 0 on success, 1 on failure
64
 */
65
int recogFinishTraining(L_RECOG recog, int debug);
66

67
/**
68
 * Classify character
69
 * @param recog - Recognition engine
70
 * @param pixs - Character image
71
 * @param box - Character bounding box (can be null)
72
 * @param pcharstr - Returns recognized character
73
 * @param pscore - Returns confidence score
74
 * @param debug - Debug level
75
 * @return 0 on success, 1 on failure
76
 */
77
int recogClassifyPixel(L_RECOG recog, PIX pixs, BOX box, BytePointer pcharstr, FloatPointer pscore, int debug);
78
```
79

80
**Usage Examples:**
81

82
```java
83
import org.bytedeco.leptonica.*;
84
import static org.bytedeco.leptonica.global.leptonica.*;
85

86
// Create OCR engine for digits
87
L_RECOG digitRecog = recogCreate(32, 32, 4, 128, 2);
88

89
// Train with labeled examples
90
PIX digit0 = pixRead("digit_0_sample.png");
91
recogTrainLabeled(digitRecog, digit0, null, "0", 0);
92

93
PIX digit1 = pixRead("digit_1_sample.png");
94
recogTrainLabeled(digitRecog, digit1, null, "1", 0);
95

96
// ... train with more examples ...
97

98
// Finalize training
99
recogFinishTraining(digitRecog, 0);
100

101
// Classify unknown character
102
PIX unknown = pixRead("unknown_digit.png");
103
BytePointer result = new BytePointer(10);
104
FloatPointer confidence = new FloatPointer(1);
105

106
int status = recogClassifyPixel(digitRecog, unknown, null, result, confidence, 0);
107
if (status == 0) {
108
    System.out.println("Recognized: " + result.getString() + 
109
                      " (confidence: " + confidence.get() + ")");
110
}
111
```
112

113
### Document Dewarping
114

115
Correct document distortion and perspective issues for improved OCR accuracy.
116

117
```java { .api }
118
/**
119
 * Single page dewarp correction
120
 */
121
class L_DEWARP extends Pointer {
122
    PIX pixs(); // source image
123
    PIXA sampv(); // vertical samples
124
    PIXA samph(); // horizontal samples
125
    PTA ptav(); // vertical control points
126
    PTA ptah(); // horizontal control points
127
    int w(); // image width
128
    int h(); // image height
129
    int nx(); // horizontal sampling points
130
    int ny(); // vertical sampling points
131
}
132

133
/**
134
 * Multi-page dewarp processing
135
 */
136
class L_DEWARPA extends Pointer {
137
    int nalloc(); // allocated array size
138
    int maxpage(); // maximum page number
139
    int sampling(); // sampling factor
140
    int redfactor(); // reduction factor
141
    int minlines(); // minimum lines for modeling
142
    int maxdist(); // maximum distance for interpolation
143
}
144

145
/**
146
 * Create dewarp structure for single page
147
 * @param pixs - Source document image
148
 * @param pageno - Page number identifier
149
 * @return L_DEWARP structure or null on failure
150
 */
151
L_DEWARP dewarpCreate(PIX pixs, int pageno);
152

153
/**
154
 * Create multi-page dewarp structure
155
 * @param nmax - Maximum number of pages
156
 * @param sampling - Sampling density
157
 * @param redfactor - Size reduction factor
158
 * @param minlines - Minimum text lines required
159
 * @param maxdist - Maximum interpolation distance
160
 * @return L_DEWARPA structure or null on failure
161
 */
162
L_DEWARPA dewarpaCreate(int nmax, int sampling, int redfactor, int minlines, int maxdist);
163

164
/**
165
 * Build dewarp model for page
166
 * @param dew - Dewarp structure
167
 * @param debugfile - Debug output file (can be null)
168
 * @return 0 on success, 1 on failure
169
 */
170
int dewarpBuildModel(L_DEWARP dew, String debugfile);
171

172
/**
173
 * Apply dewarp correction
174
 * @param dew - Dewarp structure with built model
175
 * @param pixs - Source image to correct
176
 * @param debugfile - Debug output file (can be null)
177
 * @return Corrected PIX or null on failure
178
 */
179
PIX dewarpApply(L_DEWARP dew, PIX pixs, String debugfile);
180

181
/**
182
 * Add page to multi-page dewarper
183
 * @param dewa - Multi-page dewarp structure
184
 * @param pixs - Page image
185
 * @param pageno - Page number
186
 * @param debugfile - Debug output file (can be null)
187
 * @return 0 on success, 1 on failure
188
 */
189
int dewarpaInsertDewarp(L_DEWARPA dewa, L_DEWARP dew);
190
```
191

192
**Usage Examples:**
193

194
```java
195
// Single page dewarping
196
PIX document = pixRead("scanned_page.jpg");
197
L_DEWARP dewarp = dewarpCreate(document, 1);
198

199
// Build correction model
200
int result = dewarpBuildModel(dewarp, null);
201
if (result == 0) {
202
    // Apply correction
203
    PIX corrected = dewarpApply(dewarp, document, null);
204
    pixWrite("corrected_page.jpg", corrected, IFF_JPEG);
205
}
206

207
// Multi-page document processing
208
L_DEWARPA multiPage = dewarpaCreate(100, 7, 1, 6, 30);
209

210
// Process each page
211
for (int i = 1; i <= pageCount; i++) {
212
    PIX page = pixRead("page_" + i + ".jpg");
213
    L_DEWARP pageDewarp = dewarpCreate(page, i);
214
    
215
    if (dewarpBuildModel(pageDewarp, null) == 0) {
216
        dewarpaInsertDewarp(multiPage, pageDewarp);
217
        
218
        PIX corrected = dewarpApply(pageDewarp, page, null);
219
        pixWrite("corrected_page_" + i + ".jpg", corrected, IFF_JPEG);
220
    }
221
}
222
```
223

224
### JBig2 Classification
225

226
Specialized encoding and classification for document compression and analysis.
227

228
```java { .api }
229
/**
230
 * JBig2 symbol classifier
231
 */
232
class JBCLASSER extends Pointer {
233
    SARRAY safiles(); // input file names
234
    int method(); // classification method
235
    int components(); // number of components
236
    int maxwidth(); // maximum symbol width
237
    int maxheight(); // maximum symbol height
238
    int npages(); // number of pages processed
239
    int baseindex(); // base index for symbols
240
}
241

242
/**
243
 * JBig2 encoding data
244
 */
245
class JBDATA extends Pointer {
246
    PIX pix(); // reconstructed image
247
    int w(); // image width
248
    int h(); // image height
249
    int nclass(); // number of symbol classes
250
    PIXA pixat(); // template symbols
251
    PTAA ptaul(); // upper-left coordinates
252
}
253

254
/**
255
 * Create JBig2 classifier
256
 * @param method - Classification method
257
 * @param components - Number of components to use
258
 * @return JBCLASSER or null on failure
259
 */
260
JBCLASSER jbClasserCreate(int method, int components);
261

262
/**
263
 * Add page to classifier
264
 * @param classer - JBig2 classifier
265
 * @param pixs - Page image
266
 * @param filename - Source filename
267
 * @return 0 on success, 1 on failure
268
 */
269
int jbClasserAddPage(JBCLASSER classer, PIX pixs, String filename);
270

271
/**
272
 * Generate JBig2 encoding data
273
 * @param classer - Trained classifier
274
 * @param pageno - Page number to encode
275
 * @return JBDATA encoding or null on failure
276
 */
277
JBDATA jbClasserGetJbData(JBCLASSER classer, int pageno);
278
```
279

280
**Usage Examples:**
281

282
```java
283
// Create JBig2 classifier for document compression
284
JBCLASSER classifier = jbClasserCreate(JB_CLASSIFICATION, 8);
285

286
// Add document pages
287
for (int i = 0; i < pageCount; i++) {
288
    PIX page = pixRead("page_" + i + ".tiff");
289
    jbClasserAddPage(classifier, page, "page_" + i + ".tiff");
290
}
291

292
// Generate compressed representation
293
JBDATA compressed = jbClasserGetJbData(classifier, 0);
294
PIX reconstructed = compressed.pix();
295
```
296

297
### Bitmap Fonts
298

299
Bitmap font rendering for text overlay and document generation.
300

301
```java { .api }
302
/**
303
 * Bitmap font structure
304
 */
305
class L_BMF extends Pointer {
306
    PIX pixa(); // character bitmaps
307
    int size(); // font size
308
    BytePointer directory(); // font directory
309
}
310

311
/**
312
 * Create bitmap font
313
 * @param dir - Font directory path
314
 * @param fontsize - Font size
315
 * @return L_BMF font or null on failure
316
 */
317
L_BMF bmfCreate(String dir, int fontsize);
318

319
/**
320
 * Render text using bitmap font
321
 * @param bmf - Bitmap font
322
 * @param textstr - Text to render
323
 * @return PIX with rendered text or null on failure
324
 */
325
PIX bmfGetPix(L_BMF bmf, String textstr);
326

327
/**
328
 * Get text width in pixels
329
 * @param bmf - Bitmap font
330
 * @param textstr - Text string
331
 * @param pw - Returns width in pixels
332
 * @return 0 on success, 1 on failure
333
 */
334
int bmfGetWidth(L_BMF bmf, String textstr, IntPointer pw);
335
```
336

337
**Usage Examples:**
338

339
```java
340
// Create bitmap font
341
L_BMF font = bmfCreate("/usr/share/fonts/leptonica", 12);
342

343
// Render text
344
PIX textImage = bmfGetPix(font, "Hello, World!");
345

346
// Get text dimensions
347
IntPointer width = new IntPointer(1);
348
bmfGetWidth(font, "Sample Text", width);
349
System.out.println("Text width: " + width.get() + " pixels");
350

351
// Overlay text on image
352
PIX overlayed = pixPaintBoxa(baseImage, textImage, 100, 50, 0x000000);
353
```
354

355
## Text Processing Pipeline
356

357
### Complete OCR Workflow
358

359
```java
360
// 1. Document preprocessing
361
PIX document = pixRead("document.jpg");
362
PIX gray = pixConvertRGBToGray(document, 0.299f, 0.587f, 0.114f);
363
PIX binary = pixOtsuAdaptiveThreshold(gray, 32, 32, 0, 0, 0.1f, null);
364

365
// 2. Dewarp correction
366
L_DEWARP dewarp = dewarpCreate(binary, 1);
367
if (dewarpBuildModel(dewarp, null) == 0) {
368
    binary = dewarpApply(dewarp, binary, null);
369
}
370

371
// 3. Character segmentation (hypothetical)
372
BOXA characters = segmentCharacters(binary);
373

374
// 4. Character recognition
375
L_RECOG ocr = loadTrainedOCR(); // hypothetical
376
StringBuilder result = new StringBuilder();
377

378
int charCount = boxaGetCount(characters);
379
for (int i = 0; i < charCount; i++) {
380
    BOX charBox = boxaGetBox(characters, i, L_CLONE);
381
    PIX charImage = pixClipRectangle(binary, charBox, null);
382
    
383
    BytePointer character = new BytePointer(10);
384
    FloatPointer confidence = new FloatPointer(1);
385
    
386
    if (recogClassifyPixel(ocr, charImage, null, character, confidence, 0) == 0) {
387
        if (confidence.get() > 0.7f) { // confidence threshold
388
            result.append(character.getString());
389
        }
390
    }
391
}
392

393
System.out.println("Recognized text: " + result.toString());
394
```
395

396
## Constants
397

398
```java { .api }
399
// JBig2 classification methods
400
static final int JB_CLASSIFICATION = 0;
401
static final int JB_CORRELATION = 1;
402

403
// Font sizes
404
static final int L_BM_FONT_4 = 4;
405
static final int L_BM_FONT_6 = 6;
406
static final int L_BM_FONT_8 = 8;
407
static final int L_BM_FONT_10 = 10;
408
static final int L_BM_FONT_12 = 12;
409
static final int L_BM_FONT_14 = 14;
410
static final int L_BM_FONT_16 = 16;
411
static final int L_BM_FONT_20 = 20;
412
```

Version

Tile

Files

text-recognition.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

text-recognition.mddocs/