JavaCPP platform aggregator for Tesseract OCR native libraries providing cross-platform OCR capabilities in Java applications
—
Advanced page structure analysis including text block detection, reading order determination, and geometric layout information. Supports complex document layouts with tables, columns, and mixed content types for comprehensive document understanding.
Automatic analysis of page structure to identify and classify different regions and content types.
public class TessBaseAPI {
// Layout analysis entry point
public PageIterator AnalyseLayout();
// Component extraction methods
public BOXA GetRegions(PIXA[] pixa);
public BOXA GetTextlines(PIXA[] pixa, int[][] blockids);
public BOXA GetWords(PIXA[] pixa);
public BOXA GetConnectedComponents(PIXA[] cc);
public BOXA GetComponentImages(int level, boolean text_only, PIXA[] pixa, int[][] blockids);
// Page segmentation mode control
public void SetPageSegMode(int mode);
public int GetPageSegMode();
}Component Extraction Levels:
TessBaseAPI api = new TessBaseAPI();
api.Init(null, "eng");
api.SetImage(image);
// Perform layout analysis without OCR
PageIterator pageIt = api.AnalyseLayout();
if (pageIt != null) {
pageIt.Begin();
int blockNum = 1;
// Analyze each text block
do {
int blockType = pageIt.BlockType();
System.out.println("Block " + blockNum + " type: " +
getBlockTypeName(blockType));
// Get block dimensions
int[] left = new int[1], top = new int[1],
right = new int[1], bottom = new int[1];
if (pageIt.BoundingBox(RIL_BLOCK, left, top, right, bottom)) {
int width = right[0] - left[0];
int height = bottom[0] - top[0];
System.out.printf(" Size: %dx%d at (%d,%d)\n",
width, height, left[0], top[0]);
}
blockNum++;
} while (pageIt.Next(RIL_BLOCK));
}Configure how Tesseract analyzes page layout and text structure.
// Page segmentation mode constants
public static final int PSM_OSD_ONLY = 0; // Orientation and script detection only
public static final int PSM_AUTO_OSD = 1; // Auto page seg with OSD
public static final int PSM_AUTO_ONLY = 2; // Auto page seg without OSD
public static final int PSM_AUTO = 3; // Fully automatic page segmentation
public static final int PSM_SINGLE_COLUMN = 4; // Single column of text
public static final int PSM_SINGLE_BLOCK_VERT_TEXT = 5; // Single vertical text block
public static final int PSM_SINGLE_BLOCK = 6; // Single uniform block (default)
public static final int PSM_SINGLE_LINE = 7; // Single text line
public static final int PSM_SINGLE_WORD = 8; // Single word
public static final int PSM_CIRCLE_WORD = 9; // Single word in circle
public static final int PSM_SINGLE_CHAR = 10; // Single character
public static final int PSM_SPARSE_TEXT = 11; // Sparse text (find text anywhere)
public static final int PSM_SPARSE_TEXT_OSD = 12; // Sparse text with OSD
public static final int PSM_RAW_LINE = 13; // Raw line (bypass word detection)
// Helper functions
public static boolean PSM_OSD_ENABLED(int mode);
public static boolean PSM_ORIENTATION_ENABLED(int mode);TessBaseAPI api = new TessBaseAPI();
api.Init(null, "eng");
// Configure for different document types
if (isNewspaper) {
api.SetPageSegMode(PSM_AUTO); // Multi-column layout
} else if (isSingleColumn) {
api.SetPageSegMode(PSM_SINGLE_COLUMN);
} else if (isTableCell) {
api.SetPageSegMode(PSM_SINGLE_BLOCK);
} else if (isLicensePlate) {
api.SetPageSegMode(PSM_SINGLE_LINE);
}
api.SetImage(image);
String text = api.GetUTF8Text();Automatic identification and classification of different content types within the page.
// Block type constants
public static final int PT_UNKNOWN = 0; // Unknown block type
public static final int PT_FLOWING_TEXT = 1; // Regular paragraph text
public static final int PT_HEADING_TEXT = 2; // Heading or title text
public static final int PT_PULLOUT_TEXT = 3; // Pull-quote or sidebar text
public static final int PT_EQUATION = 4; // Mathematical equation
public static final int PT_INLINE_EQUATION = 5; // Inline mathematical expression
public static final int PT_TABLE = 6; // Table structure
public static final int PT_VERTICAL_TEXT = 7; // Vertical text orientation
public static final int PT_CAPTION_TEXT = 8; // Image or table caption
public static final int PT_FLOWING_IMAGE = 9; // Flowing image
public static final int PT_HEADING_IMAGE = 10; // Heading image
public static final int PT_PULLOUT_IMAGE = 11; // Pull-out image
public static final int PT_HORZ_LINE = 12; // Horizontal line
public static final int PT_VERT_LINE = 13; // Vertical line
public static final int PT_NOISE = 14; // Noise or artifacts
// Block type utility functions
public static boolean PTIsTextType(int type);
public static boolean PTIsImageType(int type);
public static boolean PTIsLineType(int type);PageIterator pageIt = api.AnalyseLayout();
pageIt.Begin();
do {
int blockType = pageIt.BlockType();
if (PTIsTextType(blockType)) {
System.out.println("Text block found");
switch (blockType) {
case PT_HEADING_TEXT:
System.out.println(" -> Heading text");
break;
case PT_FLOWING_TEXT:
System.out.println(" -> Body text");
break;
case PT_CAPTION_TEXT:
System.out.println(" -> Caption text");
break;
}
} else if (PTIsImageType(blockType)) {
System.out.println("Image block found");
} else if (blockType == PT_TABLE) {
System.out.println("Table structure detected");
}
} while (pageIt.Next(RIL_BLOCK));Determine page orientation, text direction, and script types for proper text processing.
public class PageIterator {
// Orientation information
public void Orientation(int[] orientation, int[] writing_direction,
int[] textline_order, float[] deskew_angle);
}
// Orientation constants
public static final int ORIENTATION_PAGE_UP = 0; // Normal orientation
public static final int ORIENTATION_PAGE_RIGHT = 1; // 90° clockwise
public static final int ORIENTATION_PAGE_DOWN = 2; // 180° rotation
public static final int ORIENTATION_PAGE_LEFT = 3; // 90° counter-clockwise
// Writing direction constants
public static final int WRITING_DIRECTION_LEFT_TO_RIGHT = 0;
public static final int WRITING_DIRECTION_RIGHT_TO_LEFT = 1;
public static final int WRITING_DIRECTION_TOP_TO_BOTTOM = 2;
// Text line order constants
public static final int TEXTLINE_ORDER_LEFT_TO_RIGHT = 0;
public static final int TEXTLINE_ORDER_RIGHT_TO_LEFT = 1;
public static final int TEXTLINE_ORDER_TOP_TO_BOTTOM = 2;PageIterator pageIt = api.AnalyseLayout();
pageIt.Begin();
// Get page-level orientation information
int[] orientation = new int[1];
int[] writing_dir = new int[1];
int[] textline_order = new int[1];
float[] deskew_angle = new float[1];
pageIt.Orientation(orientation, writing_dir, textline_order, deskew_angle);
System.out.println("Page orientation: " + orientation[0]);
System.out.println("Writing direction: " + writing_dir[0]);
System.out.println("Text line order: " + textline_order[0]);
System.out.printf("Deskew angle: %.2f degrees\n", deskew_angle[0]);
// Rotate image if needed
if (orientation[0] == ORIENTATION_PAGE_RIGHT) {
System.out.println("Page needs 90° counter-clockwise rotation");
} else if (orientation[0] == ORIENTATION_PAGE_DOWN) {
System.out.println("Page needs 180° rotation");
}Extract detailed geometric information including baselines, polygons, and precise positioning.
public class PageIterator {
// Baseline information
public boolean Baseline(int level, int[] x1, int[] y1, int[] x2, int[] y2);
// Block outline polygon
public PTA BlockPolygon();
// Image extraction with padding
public PIX GetImage(int level, int padding, PIX original_img,
int[] left, int[] top);
// Binary image extraction
public PIX GetBinaryImage(int level);
}PageIterator pageIt = api.AnalyseLayout();
pageIt.Begin();
// Extract geometric information for text lines
do {
if (pageIt.IsAtBeginningOf(RIL_TEXTLINE)) {
// Get text line baseline
int[] x1 = new int[1], y1 = new int[1], x2 = new int[1], y2 = new int[1];
if (pageIt.Baseline(RIL_TEXTLINE, x1, y1, x2, y2)) {
System.out.printf("Baseline: (%d,%d) to (%d,%d)\n",
x1[0], y1[0], x2[0], y2[0]);
// Calculate text angle
double angle = Math.atan2(y2[0] - y1[0], x2[0] - x1[0]) * 180 / Math.PI;
System.out.printf("Text angle: %.1f degrees\n", angle);
}
// Extract text line image
PIX lineImage = pageIt.GetBinaryImage(RIL_TEXTLINE);
if (lineImage != null) {
pixWrite("/tmp/line_" + pageIt.imagenum() + ".png", lineImage, IFF_PNG);
pixDestroy(lineImage);
}
}
} while (pageIt.Next(RIL_TEXTLINE));Detailed paragraph-level analysis including justification, list detection, and formatting.
public class PageIterator {
// Paragraph information
public void ParagraphInfo(int[] justification, boolean[] is_list_item,
boolean[] is_crown, int[] first_line_indent);
}
// Paragraph justification constants
public static final int JUSTIFICATION_UNKNOWN = 0;
public static final int JUSTIFICATION_LEFT = 1;
public static final int JUSTIFICATION_CENTER = 2;
public static final int JUSTIFICATION_RIGHT = 3;PageIterator pageIt = api.AnalyseLayout();
pageIt.Begin();
// Analyze paragraph formatting
do {
if (pageIt.IsAtBeginningOf(RIL_PARA)) {
int[] justification = new int[1];
boolean[] is_list = new boolean[1];
boolean[] is_crown = new boolean[1];
int[] indent = new int[1];
pageIt.ParagraphInfo(justification, is_list, is_crown, indent);
System.out.println("Paragraph properties:");
switch (justification[0]) {
case JUSTIFICATION_LEFT:
System.out.println(" Justification: Left");
break;
case JUSTIFICATION_CENTER:
System.out.println(" Justification: Center");
break;
case JUSTIFICATION_RIGHT:
System.out.println(" Justification: Right");
break;
default:
System.out.println(" Justification: Unknown");
}
if (is_list[0]) {
System.out.println(" -> List item detected");
}
if (is_crown[0]) {
System.out.println(" -> Crown paragraph (hanging indent)");
}
System.out.println(" First line indent: " + indent[0] + "px");
}
} while (pageIt.Next(RIL_PARA));Extract individual components as separate images for detailed analysis or processing.
public class TessBaseAPI {
// Extract component images at different levels
public BOXA GetComponentImages(int level, boolean text_only,
PIXA[] pixa, int[][] blockids);
}// Extract all word images from the page
PIXA[] wordImages = new PIXA[1];
int[][] blockIds = new int[1][];
BOXA wordBoxes = api.GetComponentImages(RIL_WORD, true, wordImages, blockIds);
if (wordBoxes != null && wordImages[0] != null) {
int numWords = boxaGetCount(wordBoxes);
int numImages = pixaGetCount(wordImages[0]);
System.out.println("Extracted " + numWords + " word regions");
System.out.println("Generated " + numImages + " word images");
// Save individual word images
for (int i = 0; i < numImages; i++) {
PIX wordPix = pixaGetPix(wordImages[0], i, L_CLONE);
String filename = String.format("/tmp/word_%03d.png", i);
pixWrite(filename, wordPix, IFF_PNG);
pixDestroy(wordPix);
}
// Cleanup
boxaDestroy(wordBoxes);
pixaDestroy(wordImages[0]);
}Determine the logical reading order for complex layouts with multiple columns or regions.
public class ResultIterator {
// Calculate reading order for text lines
public static void CalculateTextlineOrder(boolean paragraph_is_ltr,
int[] word_dirs,
int[] reading_order);
}ResultIterator resultIt = api.GetIterator();
resultIt.Begin();
// Collect word directions for reading order calculation
List<Integer> wordDirections = new ArrayList<>();
do {
int direction = resultIt.WordDirection();
wordDirections.add(direction);
} while (resultIt.Next(RIL_WORD));
// Calculate reading order
boolean isLtr = resultIt.ParagraphIsLtr();
int[] wordDirs = wordDirections.stream().mapToInt(i -> i).toArray();
int[] readingOrder = new int[wordDirs.length];
ResultIterator.CalculateTextlineOrder(isLtr, wordDirs, readingOrder);
// Process words in reading order
for (int i = 0; i < readingOrder.length; i++) {
int wordIndex = readingOrder[i];
System.out.println("Reading order " + i + ": word " + wordIndex);
}While Tesseract can detect table blocks (PT_TABLE), detailed table structure analysis requires additional processing:
PageIterator pageIt = api.AnalyseLayout();
pageIt.Begin();
do {
if (pageIt.BlockType() == PT_TABLE) {
System.out.println("Table detected");
// Get table bounding box
int[] left = new int[1], top = new int[1],
right = new int[1], bottom = new int[1];
pageIt.BoundingBox(RIL_BLOCK, left, top, right, bottom);
// Extract table region for specialized processing
api.SetRectangle(left[0], top[0],
right[0] - left[0],
bottom[0] - top[0]);
// Process table with different PSM mode
api.SetPageSegMode(PSM_SPARSE_TEXT);
String tableText = api.GetUTF8Text();
System.out.println("Table content:\n" + tableText);
}
} while (pageIt.Next(RIL_BLOCK));// Iterator level constants
public static final int RIL_BLOCK = 0;
public static final int RIL_PARA = 1;
public static final int RIL_TEXTLINE = 2;
public static final int RIL_WORD = 3;
public static final int RIL_SYMBOL = 4;
// Page segmentation modes
public static final int PSM_AUTO = 3; // Default auto segmentation
public static final int PSM_SINGLE_COLUMN = 4; // Single column layout
public static final int PSM_SINGLE_BLOCK = 6; // Single text block
public static final int PSM_SINGLE_LINE = 7; // Single line
public static final int PSM_SPARSE_TEXT = 11; // Find text anywhere
// Block type constants
public static final int PT_FLOWING_TEXT = 1;
public static final int PT_HEADING_TEXT = 2;
public static final int PT_TABLE = 6;
public static final int PT_VERTICAL_TEXT = 7;
public static final int PT_CAPTION_TEXT = 8;// Leptonica data structures (from org.bytedeco.leptonica)
public class PIX; // Image structure
public class PIXA; // Array of PIX images
public class BOXA; // Array of bounding boxes
public class BOX; // Single bounding box
public class PTA; // Array of points (polygon)Install with Tessl CLI
npx tessl i tessl/maven-org-bytedeco--tesseract-platform