Tessl Tile for maven/org.bytedeco/tesseract-platform@5.5.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

configuration.md core-ocr-engine.md index.md language-support.md layout-analysis.md output-renderers.md

layout-analysis.mddocs/

0
# Layout Analysis
1

2
Advanced page structure analysis including text block detection, reading order determination, and geometric layout information. Supports complex document layouts with tables, columns, and mixed content types for comprehensive document understanding.
3

4
## Capabilities
5

6
### Page Segmentation
7

8
Automatic analysis of page structure to identify and classify different regions and content types.
9

10
```java { .api }
11
public class TessBaseAPI {
12
    // Layout analysis entry point
13
    public PageIterator AnalyseLayout();
14
    
15
    // Component extraction methods
16
    public BOXA GetRegions(PIXA[] pixa);
17
    public BOXA GetTextlines(PIXA[] pixa, int[][] blockids);
18
    public BOXA GetWords(PIXA[] pixa);
19
    public BOXA GetConnectedComponents(PIXA[] cc);
20
    public BOXA GetComponentImages(int level, boolean text_only, PIXA[] pixa, int[][] blockids);
21
    
22
    // Page segmentation mode control
23
    public void SetPageSegMode(int mode);
24
    public int GetPageSegMode();
25
}
26
```
27

28
**Component Extraction Levels:**
29
- **Regions**: Major page areas (text blocks, images, tables)
30
- **Text Lines**: Individual lines of text within regions
31
- **Words**: Word-level segmentation with spacing
32
- **Connected Components**: Individual character shapes
33

34
#### Usage Example
35

36
```java
37
TessBaseAPI api = new TessBaseAPI();
38
api.Init(null, "eng");
39
api.SetImage(image);
40

41
// Perform layout analysis without OCR
42
PageIterator pageIt = api.AnalyseLayout();
43

44
if (pageIt != null) {
45
    pageIt.Begin();
46
    int blockNum = 1;
47
    
48
    // Analyze each text block
49
    do {
50
        int blockType = pageIt.BlockType();
51
        System.out.println("Block " + blockNum + " type: " + 
52
                         getBlockTypeName(blockType));
53
        
54
        // Get block dimensions
55
        int[] left = new int[1], top = new int[1], 
56
              right = new int[1], bottom = new int[1];
57
        if (pageIt.BoundingBox(RIL_BLOCK, left, top, right, bottom)) {
58
            int width = right[0] - left[0];
59
            int height = bottom[0] - top[0];
60
            System.out.printf("  Size: %dx%d at (%d,%d)\n", 
61
                             width, height, left[0], top[0]);
62
        }
63
        
64
        blockNum++;
65
    } while (pageIt.Next(RIL_BLOCK));
66
}
67
```
68

69
### Page Segmentation Modes
70

71
Configure how Tesseract analyzes page layout and text structure.
72

73
```java { .api }
74
// Page segmentation mode constants
75
public static final int PSM_OSD_ONLY = 0;              // Orientation and script detection only
76
public static final int PSM_AUTO_OSD = 1;              // Auto page seg with OSD
77
public static final int PSM_AUTO_ONLY = 2;             // Auto page seg without OSD
78
public static final int PSM_AUTO = 3;                  // Fully automatic page segmentation
79
public static final int PSM_SINGLE_COLUMN = 4;         // Single column of text
80
public static final int PSM_SINGLE_BLOCK_VERT_TEXT = 5; // Single vertical text block
81
public static final int PSM_SINGLE_BLOCK = 6;          // Single uniform block (default)
82
public static final int PSM_SINGLE_LINE = 7;           // Single text line
83
public static final int PSM_SINGLE_WORD = 8;           // Single word
84
public static final int PSM_CIRCLE_WORD = 9;           // Single word in circle
85
public static final int PSM_SINGLE_CHAR = 10;          // Single character
86
public static final int PSM_SPARSE_TEXT = 11;          // Sparse text (find text anywhere)
87
public static final int PSM_SPARSE_TEXT_OSD = 12;      // Sparse text with OSD
88
public static final int PSM_RAW_LINE = 13;             // Raw line (bypass word detection)
89

90
// Helper functions
91
public static boolean PSM_OSD_ENABLED(int mode);
92
public static boolean PSM_ORIENTATION_ENABLED(int mode);
93
```
94

95
#### Usage Example
96

97
```java
98
TessBaseAPI api = new TessBaseAPI();
99
api.Init(null, "eng");
100

101
// Configure for different document types
102
if (isNewspaper) {
103
    api.SetPageSegMode(PSM_AUTO);  // Multi-column layout
104
} else if (isSingleColumn) {
105
    api.SetPageSegMode(PSM_SINGLE_COLUMN);
106
} else if (isTableCell) {
107
    api.SetPageSegMode(PSM_SINGLE_BLOCK);
108
} else if (isLicensePlate) {
109
    api.SetPageSegMode(PSM_SINGLE_LINE);
110
}
111

112
api.SetImage(image);
113
String text = api.GetUTF8Text();
114
```
115

116
### Block Type Classification
117

118
Automatic identification and classification of different content types within the page.
119

120
```java { .api }
121
// Block type constants
122
public static final int PT_UNKNOWN = 0;          // Unknown block type
123
public static final int PT_FLOWING_TEXT = 1;     // Regular paragraph text
124
public static final int PT_HEADING_TEXT = 2;     // Heading or title text
125
public static final int PT_PULLOUT_TEXT = 3;     // Pull-quote or sidebar text
126
public static final int PT_EQUATION = 4;         // Mathematical equation
127
public static final int PT_INLINE_EQUATION = 5;  // Inline mathematical expression
128
public static final int PT_TABLE = 6;            // Table structure
129
public static final int PT_VERTICAL_TEXT = 7;    // Vertical text orientation
130
public static final int PT_CAPTION_TEXT = 8;     // Image or table caption
131
public static final int PT_FLOWING_IMAGE = 9;    // Flowing image
132
public static final int PT_HEADING_IMAGE = 10;   // Heading image
133
public static final int PT_PULLOUT_IMAGE = 11;   // Pull-out image
134
public static final int PT_HORZ_LINE = 12;       // Horizontal line
135
public static final int PT_VERT_LINE = 13;       // Vertical line  
136
public static final int PT_NOISE = 14;           // Noise or artifacts
137

138
// Block type utility functions
139
public static boolean PTIsTextType(int type);
140
public static boolean PTIsImageType(int type);
141
public static boolean PTIsLineType(int type);
142
```
143

144
#### Usage Example
145

146
```java
147
PageIterator pageIt = api.AnalyseLayout();
148
pageIt.Begin();
149

150
do {
151
    int blockType = pageIt.BlockType();
152
    
153
    if (PTIsTextType(blockType)) {
154
        System.out.println("Text block found");
155
        
156
        switch (blockType) {
157
            case PT_HEADING_TEXT:
158
                System.out.println("  -> Heading text");
159
                break;
160
            case PT_FLOWING_TEXT:
161
                System.out.println("  -> Body text");
162
                break;
163
            case PT_CAPTION_TEXT:
164
                System.out.println("  -> Caption text");
165
                break;
166
        }
167
    } else if (PTIsImageType(blockType)) {
168
        System.out.println("Image block found");
169
    } else if (blockType == PT_TABLE) {
170
        System.out.println("Table structure detected");
171
    }
172
    
173
} while (pageIt.Next(RIL_BLOCK));
174
```
175

176
### Orientation and Script Detection
177

178
Determine page orientation, text direction, and script types for proper text processing.
179

180
```java { .api }
181
public class PageIterator {
182
    // Orientation information
183
    public void Orientation(int[] orientation, int[] writing_direction,
184
                           int[] textline_order, float[] deskew_angle);
185
}
186

187
// Orientation constants
188
public static final int ORIENTATION_PAGE_UP = 0;     // Normal orientation
189
public static final int ORIENTATION_PAGE_RIGHT = 1;  // 90° clockwise
190
public static final int ORIENTATION_PAGE_DOWN = 2;   // 180° rotation
191
public static final int ORIENTATION_PAGE_LEFT = 3;   // 90° counter-clockwise
192

193
// Writing direction constants  
194
public static final int WRITING_DIRECTION_LEFT_TO_RIGHT = 0;
195
public static final int WRITING_DIRECTION_RIGHT_TO_LEFT = 1;
196
public static final int WRITING_DIRECTION_TOP_TO_BOTTOM = 2;
197

198
// Text line order constants
199
public static final int TEXTLINE_ORDER_LEFT_TO_RIGHT = 0;
200
public static final int TEXTLINE_ORDER_RIGHT_TO_LEFT = 1;
201
public static final int TEXTLINE_ORDER_TOP_TO_BOTTOM = 2;
202
```
203

204
#### Usage Example
205

206
```java
207
PageIterator pageIt = api.AnalyseLayout();
208
pageIt.Begin();
209

210
// Get page-level orientation information
211
int[] orientation = new int[1];
212
int[] writing_dir = new int[1]; 
213
int[] textline_order = new int[1];
214
float[] deskew_angle = new float[1];
215

216
pageIt.Orientation(orientation, writing_dir, textline_order, deskew_angle);
217

218
System.out.println("Page orientation: " + orientation[0]);
219
System.out.println("Writing direction: " + writing_dir[0]);
220
System.out.println("Text line order: " + textline_order[0]);
221
System.out.printf("Deskew angle: %.2f degrees\n", deskew_angle[0]);
222

223
// Rotate image if needed
224
if (orientation[0] == ORIENTATION_PAGE_RIGHT) {
225
    System.out.println("Page needs 90° counter-clockwise rotation");
226
} else if (orientation[0] == ORIENTATION_PAGE_DOWN) {
227
    System.out.println("Page needs 180° rotation");
228
}
229
```
230

231
### Geometric Layout Information
232

233
Extract detailed geometric information including baselines, polygons, and precise positioning.
234

235
```java { .api }
236
public class PageIterator {
237
    // Baseline information
238
    public boolean Baseline(int level, int[] x1, int[] y1, int[] x2, int[] y2);
239
    
240
    // Block outline polygon
241
    public PTA BlockPolygon();
242
    
243
    // Image extraction with padding
244
    public PIX GetImage(int level, int padding, PIX original_img, 
245
                       int[] left, int[] top);
246
    
247
    // Binary image extraction
248
    public PIX GetBinaryImage(int level);
249
}
250
```
251

252
#### Usage Example
253

254
```java
255
PageIterator pageIt = api.AnalyseLayout();
256
pageIt.Begin();
257

258
// Extract geometric information for text lines
259
do {
260
    if (pageIt.IsAtBeginningOf(RIL_TEXTLINE)) {
261
        // Get text line baseline
262
        int[] x1 = new int[1], y1 = new int[1], x2 = new int[1], y2 = new int[1];
263
        if (pageIt.Baseline(RIL_TEXTLINE, x1, y1, x2, y2)) {
264
            System.out.printf("Baseline: (%d,%d) to (%d,%d)\n", 
265
                             x1[0], y1[0], x2[0], y2[0]);
266
            
267
            // Calculate text angle
268
            double angle = Math.atan2(y2[0] - y1[0], x2[0] - x1[0]) * 180 / Math.PI;
269
            System.out.printf("Text angle: %.1f degrees\n", angle);
270
        }
271
        
272
        // Extract text line image
273
        PIX lineImage = pageIt.GetBinaryImage(RIL_TEXTLINE);
274
        if (lineImage != null) {
275
            pixWrite("/tmp/line_" + pageIt.imagenum() + ".png", lineImage, IFF_PNG);
276
            pixDestroy(lineImage);
277
        }
278
    }
279
    
280
} while (pageIt.Next(RIL_TEXTLINE));
281
```
282

283
### Paragraph Analysis
284

285
Detailed paragraph-level analysis including justification, list detection, and formatting.
286

287
```java { .api }
288
public class PageIterator {
289
    // Paragraph information
290
    public void ParagraphInfo(int[] justification, boolean[] is_list_item,
291
                             boolean[] is_crown, int[] first_line_indent);
292
}
293

294
// Paragraph justification constants
295
public static final int JUSTIFICATION_UNKNOWN = 0;
296
public static final int JUSTIFICATION_LEFT = 1;
297
public static final int JUSTIFICATION_CENTER = 2;  
298
public static final int JUSTIFICATION_RIGHT = 3;
299
```
300

301
#### Usage Example
302

303
```java
304
PageIterator pageIt = api.AnalyseLayout();
305
pageIt.Begin();
306

307
// Analyze paragraph formatting
308
do {
309
    if (pageIt.IsAtBeginningOf(RIL_PARA)) {
310
        int[] justification = new int[1];
311
        boolean[] is_list = new boolean[1];
312
        boolean[] is_crown = new boolean[1];
313
        int[] indent = new int[1];
314
        
315
        pageIt.ParagraphInfo(justification, is_list, is_crown, indent);
316
        
317
        System.out.println("Paragraph properties:");
318
        switch (justification[0]) {
319
            case JUSTIFICATION_LEFT:
320
                System.out.println("  Justification: Left");
321
                break;
322
            case JUSTIFICATION_CENTER:
323
                System.out.println("  Justification: Center");
324
                break;
325
            case JUSTIFICATION_RIGHT:
326
                System.out.println("  Justification: Right");
327
                break;
328
            default:
329
                System.out.println("  Justification: Unknown");
330
        }
331
        
332
        if (is_list[0]) {
333
            System.out.println("  -> List item detected");
334
        }
335
        
336
        if (is_crown[0]) {
337
            System.out.println("  -> Crown paragraph (hanging indent)");
338
        }
339
        
340
        System.out.println("  First line indent: " + indent[0] + "px");
341
    }
342
    
343
} while (pageIt.Next(RIL_PARA));
344
```
345

346
### Component Image Extraction
347

348
Extract individual components as separate images for detailed analysis or processing.
349

350
```java { .api }
351
public class TessBaseAPI {
352
    // Extract component images at different levels
353
    public BOXA GetComponentImages(int level, boolean text_only, 
354
                                  PIXA[] pixa, int[][] blockids);
355
}
356
```
357

358
#### Usage Example
359

360
```java
361
// Extract all word images from the page
362
PIXA[] wordImages = new PIXA[1];
363
int[][] blockIds = new int[1][];
364

365
BOXA wordBoxes = api.GetComponentImages(RIL_WORD, true, wordImages, blockIds);
366

367
if (wordBoxes != null && wordImages[0] != null) {
368
    int numWords = boxaGetCount(wordBoxes);
369
    int numImages = pixaGetCount(wordImages[0]);
370
    
371
    System.out.println("Extracted " + numWords + " word regions");
372
    System.out.println("Generated " + numImages + " word images");
373
    
374
    // Save individual word images
375
    for (int i = 0; i < numImages; i++) {
376
        PIX wordPix = pixaGetPix(wordImages[0], i, L_CLONE);
377
        String filename = String.format("/tmp/word_%03d.png", i);
378
        pixWrite(filename, wordPix, IFF_PNG);
379
        pixDestroy(wordPix);
380
    }
381
    
382
    // Cleanup
383
    boxaDestroy(wordBoxes);
384
    pixaDestroy(wordImages[0]);
385
}
386
```
387

388
### Reading Order Analysis
389

390
Determine the logical reading order for complex layouts with multiple columns or regions.
391

392
```java { .api }
393
public class ResultIterator {
394
    // Calculate reading order for text lines
395
    public static void CalculateTextlineOrder(boolean paragraph_is_ltr,
396
                                            int[] word_dirs,
397
                                            int[] reading_order);
398
}
399
```
400

401
#### Usage Example
402

403
```java
404
ResultIterator resultIt = api.GetIterator();
405
resultIt.Begin();
406

407
// Collect word directions for reading order calculation
408
List<Integer> wordDirections = new ArrayList<>();
409
do {
410
    int direction = resultIt.WordDirection();
411
    wordDirections.add(direction);
412
} while (resultIt.Next(RIL_WORD));
413

414
// Calculate reading order
415
boolean isLtr = resultIt.ParagraphIsLtr();
416
int[] wordDirs = wordDirections.stream().mapToInt(i -> i).toArray();
417
int[] readingOrder = new int[wordDirs.length];
418

419
ResultIterator.CalculateTextlineOrder(isLtr, wordDirs, readingOrder);
420

421
// Process words in reading order
422
for (int i = 0; i < readingOrder.length; i++) {
423
    int wordIndex = readingOrder[i];
424
    System.out.println("Reading order " + i + ": word " + wordIndex);
425
}
426
```
427

428
## Advanced Layout Features
429

430
### Table Detection and Analysis
431

432
While Tesseract can detect table blocks (PT_TABLE), detailed table structure analysis requires additional processing:
433

434
```java
435
PageIterator pageIt = api.AnalyseLayout();
436
pageIt.Begin();
437

438
do {
439
    if (pageIt.BlockType() == PT_TABLE) {
440
        System.out.println("Table detected");
441
        
442
        // Get table bounding box
443
        int[] left = new int[1], top = new int[1], 
444
              right = new int[1], bottom = new int[1];
445
        pageIt.BoundingBox(RIL_BLOCK, left, top, right, bottom);
446
        
447
        // Extract table region for specialized processing
448
        api.SetRectangle(left[0], top[0], 
449
                        right[0] - left[0], 
450
                        bottom[0] - top[0]);
451
        
452
        // Process table with different PSM mode
453
        api.SetPageSegMode(PSM_SPARSE_TEXT);
454
        String tableText = api.GetUTF8Text();
455
        
456
        System.out.println("Table content:\n" + tableText);
457
    }
458
} while (pageIt.Next(RIL_BLOCK));
459
```
460

461
## Types
462

463
### Layout Constants
464

465
```java { .api }
466
// Iterator level constants
467
public static final int RIL_BLOCK = 0;
468
public static final int RIL_PARA = 1;
469
public static final int RIL_TEXTLINE = 2;
470
public static final int RIL_WORD = 3;
471
public static final int RIL_SYMBOL = 4;
472

473
// Page segmentation modes
474
public static final int PSM_AUTO = 3;                  // Default auto segmentation
475
public static final int PSM_SINGLE_COLUMN = 4;         // Single column layout
476
public static final int PSM_SINGLE_BLOCK = 6;          // Single text block
477
public static final int PSM_SINGLE_LINE = 7;           // Single line
478
public static final int PSM_SPARSE_TEXT = 11;          // Find text anywhere
479

480
// Block type constants  
481
public static final int PT_FLOWING_TEXT = 1;
482
public static final int PT_HEADING_TEXT = 2;
483
public static final int PT_TABLE = 6;
484
public static final int PT_VERTICAL_TEXT = 7;
485
public static final int PT_CAPTION_TEXT = 8;
486
```
487

488
### Leptonica Integration Types
489

490
```java { .api }
491
// Leptonica data structures (from org.bytedeco.leptonica)
492
public class PIX;     // Image structure
493
public class PIXA;    // Array of PIX images  
494
public class BOXA;    // Array of bounding boxes
495
public class BOX;     // Single bounding box
496
public class PTA;     // Array of points (polygon)
497
```

Version

Tile

Files

layout-analysis.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

layout-analysis.mddocs/