or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration.mdcore-ocr-engine.mdindex.mdlanguage-support.mdlayout-analysis.mdoutput-renderers.mdresult-navigation.md

layout-analysis.mddocs/

0

# Layout Analysis

1

2

Advanced page structure analysis including text block detection, reading order determination, and geometric layout information. Supports complex document layouts with tables, columns, and mixed content types for comprehensive document understanding.

3

4

## Capabilities

5

6

### Page Segmentation

7

8

Automatic analysis of page structure to identify and classify different regions and content types.

9

10

```java { .api }

11

public class TessBaseAPI {

12

// Layout analysis entry point

13

public PageIterator AnalyseLayout();

14

15

// Component extraction methods

16

public BOXA GetRegions(PIXA[] pixa);

17

public BOXA GetTextlines(PIXA[] pixa, int[][] blockids);

18

public BOXA GetWords(PIXA[] pixa);

19

public BOXA GetConnectedComponents(PIXA[] cc);

20

public BOXA GetComponentImages(int level, boolean text_only, PIXA[] pixa, int[][] blockids);

21

22

// Page segmentation mode control

23

public void SetPageSegMode(int mode);

24

public int GetPageSegMode();

25

}

26

```

27

28

**Component Extraction Levels:**

29

- **Regions**: Major page areas (text blocks, images, tables)

30

- **Text Lines**: Individual lines of text within regions

31

- **Words**: Word-level segmentation with spacing

32

- **Connected Components**: Individual character shapes

33

34

#### Usage Example

35

36

```java

37

TessBaseAPI api = new TessBaseAPI();

38

api.Init(null, "eng");

39

api.SetImage(image);

40

41

// Perform layout analysis without OCR

42

PageIterator pageIt = api.AnalyseLayout();

43

44

if (pageIt != null) {

45

pageIt.Begin();

46

int blockNum = 1;

47

48

// Analyze each text block

49

do {

50

int blockType = pageIt.BlockType();

51

System.out.println("Block " + blockNum + " type: " +

52

getBlockTypeName(blockType));

53

54

// Get block dimensions

55

int[] left = new int[1], top = new int[1],

56

right = new int[1], bottom = new int[1];

57

if (pageIt.BoundingBox(RIL_BLOCK, left, top, right, bottom)) {

58

int width = right[0] - left[0];

59

int height = bottom[0] - top[0];

60

System.out.printf(" Size: %dx%d at (%d,%d)\n",

61

width, height, left[0], top[0]);

62

}

63

64

blockNum++;

65

} while (pageIt.Next(RIL_BLOCK));

66

}

67

```

68

69

### Page Segmentation Modes

70

71

Configure how Tesseract analyzes page layout and text structure.

72

73

```java { .api }

74

// Page segmentation mode constants

75

public static final int PSM_OSD_ONLY = 0; // Orientation and script detection only

76

public static final int PSM_AUTO_OSD = 1; // Auto page seg with OSD

77

public static final int PSM_AUTO_ONLY = 2; // Auto page seg without OSD

78

public static final int PSM_AUTO = 3; // Fully automatic page segmentation

79

public static final int PSM_SINGLE_COLUMN = 4; // Single column of text

80

public static final int PSM_SINGLE_BLOCK_VERT_TEXT = 5; // Single vertical text block

81

public static final int PSM_SINGLE_BLOCK = 6; // Single uniform block (default)

82

public static final int PSM_SINGLE_LINE = 7; // Single text line

83

public static final int PSM_SINGLE_WORD = 8; // Single word

84

public static final int PSM_CIRCLE_WORD = 9; // Single word in circle

85

public static final int PSM_SINGLE_CHAR = 10; // Single character

86

public static final int PSM_SPARSE_TEXT = 11; // Sparse text (find text anywhere)

87

public static final int PSM_SPARSE_TEXT_OSD = 12; // Sparse text with OSD

88

public static final int PSM_RAW_LINE = 13; // Raw line (bypass word detection)

89

90

// Helper functions

91

public static boolean PSM_OSD_ENABLED(int mode);

92

public static boolean PSM_ORIENTATION_ENABLED(int mode);

93

```

94

95

#### Usage Example

96

97

```java

98

TessBaseAPI api = new TessBaseAPI();

99

api.Init(null, "eng");

100

101

// Configure for different document types

102

if (isNewspaper) {

103

api.SetPageSegMode(PSM_AUTO); // Multi-column layout

104

} else if (isSingleColumn) {

105

api.SetPageSegMode(PSM_SINGLE_COLUMN);

106

} else if (isTableCell) {

107

api.SetPageSegMode(PSM_SINGLE_BLOCK);

108

} else if (isLicensePlate) {

109

api.SetPageSegMode(PSM_SINGLE_LINE);

110

}

111

112

api.SetImage(image);

113

String text = api.GetUTF8Text();

114

```

115

116

### Block Type Classification

117

118

Automatic identification and classification of different content types within the page.

119

120

```java { .api }

121

// Block type constants

122

public static final int PT_UNKNOWN = 0; // Unknown block type

123

public static final int PT_FLOWING_TEXT = 1; // Regular paragraph text

124

public static final int PT_HEADING_TEXT = 2; // Heading or title text

125

public static final int PT_PULLOUT_TEXT = 3; // Pull-quote or sidebar text

126

public static final int PT_EQUATION = 4; // Mathematical equation

127

public static final int PT_INLINE_EQUATION = 5; // Inline mathematical expression

128

public static final int PT_TABLE = 6; // Table structure

129

public static final int PT_VERTICAL_TEXT = 7; // Vertical text orientation

130

public static final int PT_CAPTION_TEXT = 8; // Image or table caption

131

public static final int PT_FLOWING_IMAGE = 9; // Flowing image

132

public static final int PT_HEADING_IMAGE = 10; // Heading image

133

public static final int PT_PULLOUT_IMAGE = 11; // Pull-out image

134

public static final int PT_HORZ_LINE = 12; // Horizontal line

135

public static final int PT_VERT_LINE = 13; // Vertical line

136

public static final int PT_NOISE = 14; // Noise or artifacts

137

138

// Block type utility functions

139

public static boolean PTIsTextType(int type);

140

public static boolean PTIsImageType(int type);

141

public static boolean PTIsLineType(int type);

142

```

143

144

#### Usage Example

145

146

```java

147

PageIterator pageIt = api.AnalyseLayout();

148

pageIt.Begin();

149

150

do {

151

int blockType = pageIt.BlockType();

152

153

if (PTIsTextType(blockType)) {

154

System.out.println("Text block found");

155

156

switch (blockType) {

157

case PT_HEADING_TEXT:

158

System.out.println(" -> Heading text");

159

break;

160

case PT_FLOWING_TEXT:

161

System.out.println(" -> Body text");

162

break;

163

case PT_CAPTION_TEXT:

164

System.out.println(" -> Caption text");

165

break;

166

}

167

} else if (PTIsImageType(blockType)) {

168

System.out.println("Image block found");

169

} else if (blockType == PT_TABLE) {

170

System.out.println("Table structure detected");

171

}

172

173

} while (pageIt.Next(RIL_BLOCK));

174

```

175

176

### Orientation and Script Detection

177

178

Determine page orientation, text direction, and script types for proper text processing.

179

180

```java { .api }

181

public class PageIterator {

182

// Orientation information

183

public void Orientation(int[] orientation, int[] writing_direction,

184

int[] textline_order, float[] deskew_angle);

185

}

186

187

// Orientation constants

188

public static final int ORIENTATION_PAGE_UP = 0; // Normal orientation

189

public static final int ORIENTATION_PAGE_RIGHT = 1; // 90° clockwise

190

public static final int ORIENTATION_PAGE_DOWN = 2; // 180° rotation

191

public static final int ORIENTATION_PAGE_LEFT = 3; // 90° counter-clockwise

192

193

// Writing direction constants

194

public static final int WRITING_DIRECTION_LEFT_TO_RIGHT = 0;

195

public static final int WRITING_DIRECTION_RIGHT_TO_LEFT = 1;

196

public static final int WRITING_DIRECTION_TOP_TO_BOTTOM = 2;

197

198

// Text line order constants

199

public static final int TEXTLINE_ORDER_LEFT_TO_RIGHT = 0;

200

public static final int TEXTLINE_ORDER_RIGHT_TO_LEFT = 1;

201

public static final int TEXTLINE_ORDER_TOP_TO_BOTTOM = 2;

202

```

203

204

#### Usage Example

205

206

```java

207

PageIterator pageIt = api.AnalyseLayout();

208

pageIt.Begin();

209

210

// Get page-level orientation information

211

int[] orientation = new int[1];

212

int[] writing_dir = new int[1];

213

int[] textline_order = new int[1];

214

float[] deskew_angle = new float[1];

215

216

pageIt.Orientation(orientation, writing_dir, textline_order, deskew_angle);

217

218

System.out.println("Page orientation: " + orientation[0]);

219

System.out.println("Writing direction: " + writing_dir[0]);

220

System.out.println("Text line order: " + textline_order[0]);

221

System.out.printf("Deskew angle: %.2f degrees\n", deskew_angle[0]);

222

223

// Rotate image if needed

224

if (orientation[0] == ORIENTATION_PAGE_RIGHT) {

225

System.out.println("Page needs 90° counter-clockwise rotation");

226

} else if (orientation[0] == ORIENTATION_PAGE_DOWN) {

227

System.out.println("Page needs 180° rotation");

228

}

229

```

230

231

### Geometric Layout Information

232

233

Extract detailed geometric information including baselines, polygons, and precise positioning.

234

235

```java { .api }

236

public class PageIterator {

237

// Baseline information

238

public boolean Baseline(int level, int[] x1, int[] y1, int[] x2, int[] y2);

239

240

// Block outline polygon

241

public PTA BlockPolygon();

242

243

// Image extraction with padding

244

public PIX GetImage(int level, int padding, PIX original_img,

245

int[] left, int[] top);

246

247

// Binary image extraction

248

public PIX GetBinaryImage(int level);

249

}

250

```

251

252

#### Usage Example

253

254

```java

255

PageIterator pageIt = api.AnalyseLayout();

256

pageIt.Begin();

257

258

// Extract geometric information for text lines

259

do {

260

if (pageIt.IsAtBeginningOf(RIL_TEXTLINE)) {

261

// Get text line baseline

262

int[] x1 = new int[1], y1 = new int[1], x2 = new int[1], y2 = new int[1];

263

if (pageIt.Baseline(RIL_TEXTLINE, x1, y1, x2, y2)) {

264

System.out.printf("Baseline: (%d,%d) to (%d,%d)\n",

265

x1[0], y1[0], x2[0], y2[0]);

266

267

// Calculate text angle

268

double angle = Math.atan2(y2[0] - y1[0], x2[0] - x1[0]) * 180 / Math.PI;

269

System.out.printf("Text angle: %.1f degrees\n", angle);

270

}

271

272

// Extract text line image

273

PIX lineImage = pageIt.GetBinaryImage(RIL_TEXTLINE);

274

if (lineImage != null) {

275

pixWrite("/tmp/line_" + pageIt.imagenum() + ".png", lineImage, IFF_PNG);

276

pixDestroy(lineImage);

277

}

278

}

279

280

} while (pageIt.Next(RIL_TEXTLINE));

281

```

282

283

### Paragraph Analysis

284

285

Detailed paragraph-level analysis including justification, list detection, and formatting.

286

287

```java { .api }

288

public class PageIterator {

289

// Paragraph information

290

public void ParagraphInfo(int[] justification, boolean[] is_list_item,

291

boolean[] is_crown, int[] first_line_indent);

292

}

293

294

// Paragraph justification constants

295

public static final int JUSTIFICATION_UNKNOWN = 0;

296

public static final int JUSTIFICATION_LEFT = 1;

297

public static final int JUSTIFICATION_CENTER = 2;

298

public static final int JUSTIFICATION_RIGHT = 3;

299

```

300

301

#### Usage Example

302

303

```java

304

PageIterator pageIt = api.AnalyseLayout();

305

pageIt.Begin();

306

307

// Analyze paragraph formatting

308

do {

309

if (pageIt.IsAtBeginningOf(RIL_PARA)) {

310

int[] justification = new int[1];

311

boolean[] is_list = new boolean[1];

312

boolean[] is_crown = new boolean[1];

313

int[] indent = new int[1];

314

315

pageIt.ParagraphInfo(justification, is_list, is_crown, indent);

316

317

System.out.println("Paragraph properties:");

318

switch (justification[0]) {

319

case JUSTIFICATION_LEFT:

320

System.out.println(" Justification: Left");

321

break;

322

case JUSTIFICATION_CENTER:

323

System.out.println(" Justification: Center");

324

break;

325

case JUSTIFICATION_RIGHT:

326

System.out.println(" Justification: Right");

327

break;

328

default:

329

System.out.println(" Justification: Unknown");

330

}

331

332

if (is_list[0]) {

333

System.out.println(" -> List item detected");

334

}

335

336

if (is_crown[0]) {

337

System.out.println(" -> Crown paragraph (hanging indent)");

338

}

339

340

System.out.println(" First line indent: " + indent[0] + "px");

341

}

342

343

} while (pageIt.Next(RIL_PARA));

344

```

345

346

### Component Image Extraction

347

348

Extract individual components as separate images for detailed analysis or processing.

349

350

```java { .api }

351

public class TessBaseAPI {

352

// Extract component images at different levels

353

public BOXA GetComponentImages(int level, boolean text_only,

354

PIXA[] pixa, int[][] blockids);

355

}

356

```

357

358

#### Usage Example

359

360

```java

361

// Extract all word images from the page

362

PIXA[] wordImages = new PIXA[1];

363

int[][] blockIds = new int[1][];

364

365

BOXA wordBoxes = api.GetComponentImages(RIL_WORD, true, wordImages, blockIds);

366

367

if (wordBoxes != null && wordImages[0] != null) {

368

int numWords = boxaGetCount(wordBoxes);

369

int numImages = pixaGetCount(wordImages[0]);

370

371

System.out.println("Extracted " + numWords + " word regions");

372

System.out.println("Generated " + numImages + " word images");

373

374

// Save individual word images

375

for (int i = 0; i < numImages; i++) {

376

PIX wordPix = pixaGetPix(wordImages[0], i, L_CLONE);

377

String filename = String.format("/tmp/word_%03d.png", i);

378

pixWrite(filename, wordPix, IFF_PNG);

379

pixDestroy(wordPix);

380

}

381

382

// Cleanup

383

boxaDestroy(wordBoxes);

384

pixaDestroy(wordImages[0]);

385

}

386

```

387

388

### Reading Order Analysis

389

390

Determine the logical reading order for complex layouts with multiple columns or regions.

391

392

```java { .api }

393

public class ResultIterator {

394

// Calculate reading order for text lines

395

public static void CalculateTextlineOrder(boolean paragraph_is_ltr,

396

int[] word_dirs,

397

int[] reading_order);

398

}

399

```

400

401

#### Usage Example

402

403

```java

404

ResultIterator resultIt = api.GetIterator();

405

resultIt.Begin();

406

407

// Collect word directions for reading order calculation

408

List<Integer> wordDirections = new ArrayList<>();

409

do {

410

int direction = resultIt.WordDirection();

411

wordDirections.add(direction);

412

} while (resultIt.Next(RIL_WORD));

413

414

// Calculate reading order

415

boolean isLtr = resultIt.ParagraphIsLtr();

416

int[] wordDirs = wordDirections.stream().mapToInt(i -> i).toArray();

417

int[] readingOrder = new int[wordDirs.length];

418

419

ResultIterator.CalculateTextlineOrder(isLtr, wordDirs, readingOrder);

420

421

// Process words in reading order

422

for (int i = 0; i < readingOrder.length; i++) {

423

int wordIndex = readingOrder[i];

424

System.out.println("Reading order " + i + ": word " + wordIndex);

425

}

426

```

427

428

## Advanced Layout Features

429

430

### Table Detection and Analysis

431

432

While Tesseract can detect table blocks (PT_TABLE), detailed table structure analysis requires additional processing:

433

434

```java

435

PageIterator pageIt = api.AnalyseLayout();

436

pageIt.Begin();

437

438

do {

439

if (pageIt.BlockType() == PT_TABLE) {

440

System.out.println("Table detected");

441

442

// Get table bounding box

443

int[] left = new int[1], top = new int[1],

444

right = new int[1], bottom = new int[1];

445

pageIt.BoundingBox(RIL_BLOCK, left, top, right, bottom);

446

447

// Extract table region for specialized processing

448

api.SetRectangle(left[0], top[0],

449

right[0] - left[0],

450

bottom[0] - top[0]);

451

452

// Process table with different PSM mode

453

api.SetPageSegMode(PSM_SPARSE_TEXT);

454

String tableText = api.GetUTF8Text();

455

456

System.out.println("Table content:\n" + tableText);

457

}

458

} while (pageIt.Next(RIL_BLOCK));

459

```

460

461

## Types

462

463

### Layout Constants

464

465

```java { .api }

466

// Iterator level constants

467

public static final int RIL_BLOCK = 0;

468

public static final int RIL_PARA = 1;

469

public static final int RIL_TEXTLINE = 2;

470

public static final int RIL_WORD = 3;

471

public static final int RIL_SYMBOL = 4;

472

473

// Page segmentation modes

474

public static final int PSM_AUTO = 3; // Default auto segmentation

475

public static final int PSM_SINGLE_COLUMN = 4; // Single column layout

476

public static final int PSM_SINGLE_BLOCK = 6; // Single text block

477

public static final int PSM_SINGLE_LINE = 7; // Single line

478

public static final int PSM_SPARSE_TEXT = 11; // Find text anywhere

479

480

// Block type constants

481

public static final int PT_FLOWING_TEXT = 1;

482

public static final int PT_HEADING_TEXT = 2;

483

public static final int PT_TABLE = 6;

484

public static final int PT_VERTICAL_TEXT = 7;

485

public static final int PT_CAPTION_TEXT = 8;

486

```

487

488

### Leptonica Integration Types

489

490

```java { .api }

491

// Leptonica data structures (from org.bytedeco.leptonica)

492

public class PIX; // Image structure

493

public class PIXA; // Array of PIX images

494

public class BOXA; // Array of bounding boxes

495

public class BOX; // Single bounding box

496

public class PTA; // Array of points (polygon)

497

```