JavaCPP platform aggregator for Tesseract OCR native libraries providing cross-platform OCR capabilities in Java applications
—
Hierarchical iterators for navigating OCR results from page structure down to individual characters. The iterator hierarchy provides access to bounding boxes, confidence scores, text formatting, and detailed layout information at multiple levels of granularity.
The iterator classes form a hierarchy that provides increasingly detailed access to recognition results:
// Base iterator for page structure (no OCR text access)
public class PageIterator {
// Navigation
public void Begin();
public boolean Next(int level);
public void RestartParagraph();
public void RestartRow();
// Position queries
public boolean IsAtBeginningOf(int level);
public boolean IsAtFinalElement(int level, int element);
public boolean Empty(int level);
}
// Adds OCR text access (extends PageIterator)
public class LTRResultIterator extends PageIterator {
public native @Cast("char*") BytePointer GetUTF8Text(int level);
public float Confidence(int level);
public void SetLineSeparator(String new_line);
public void SetParagraphSeparator(String new_para);
}
// Adds bidirectional text support (extends LTRResultIterator)
public class ResultIterator extends LTRResultIterator {
public boolean ParagraphIsLtr();
public StringFloatPairVectorVector GetRawLSTMTimesteps();
public StringFloatPairVectorVector GetBestLSTMSymbolChoices();
}Iterator Levels:
RIL_BLOCK (0): Text blocks, images, tablesRIL_PARA (1): Paragraphs within blocksRIL_TEXTLINE (2): Lines within paragraphsRIL_WORD (3): Words within linesRIL_SYMBOL (4): Characters within wordsNavigate through the page hierarchy and access structural information.
public class PageIterator {
// Movement
public void Begin(); // Move to start of page
public boolean Next(int level); // Move to next element at level
public void RestartParagraph(); // Move to paragraph start
public void RestartRow(); // Move to row start
// Position testing
public boolean IsAtBeginningOf(int level); // Check if at beginning
public boolean IsAtFinalElement(int level, int element);
public boolean Empty(int level); // Check if level is empty
}TessBaseAPI api = new TessBaseAPI();
api.Init(null, "eng");
api.SetImage(image);
PageIterator pageIt = api.AnalyseLayout();
if (pageIt != null) {
pageIt.Begin();
// Iterate through all text blocks
do {
int blockType = pageIt.BlockType();
System.out.println("Block type: " + blockType);
// Get block bounding box
int[] left = new int[1], top = new int[1],
right = new int[1], bottom = new int[1];
if (pageIt.BoundingBox(RIL_BLOCK, left, top, right, bottom)) {
System.out.println("Block bounds: " + left[0] + "," + top[0] +
" to " + right[0] + "," + bottom[0]);
}
} while (pageIt.Next(RIL_BLOCK));
}Access precise coordinate information for layout elements.
public class PageIterator {
// Bounding rectangles
public boolean BoundingBox(int level, int[] left, int[] top,
int[] right, int[] bottom);
// Text baselines
public boolean Baseline(int level, int[] x1, int[] y1, int[] x2, int[] y2);
// Orientation information
public void Orientation(int[] orientation, int[] writing_direction,
int[] textline_order, float[] deskew_angle);
// Block outline polygon
public PTA BlockPolygon();
}Coordinate System:
ResultIterator resultIt = api.GetIterator();
resultIt.Begin();
// Get coordinates for all words
do {
int[] left = new int[1], top = new int[1],
right = new int[1], bottom = new int[1];
if (resultIt.BoundingBox(RIL_WORD, left, top, right, bottom)) {
String word = resultIt.GetUTF8Text(RIL_WORD);
float confidence = resultIt.Confidence(RIL_WORD);
System.out.printf("Word: '%s' at (%d,%d)-(%d,%d) conf=%.1f%%\n",
word, left[0], top[0], right[0], bottom[0], confidence);
}
} while (resultIt.Next(RIL_WORD));Extract text content with detailed formatting and style information.
public class LTRResultIterator {
// Text extraction
public native @Cast("char*") BytePointer GetUTF8Text(int level);
// Text separators
public void SetLineSeparator(String new_line);
public void SetParagraphSeparator(String new_para);
// Font and style information
public String WordFontAttributes(boolean[] is_bold, boolean[] is_italic,
boolean[] is_underlined, boolean[] is_monospace,
boolean[] is_serif, boolean[] is_smallcaps,
int[] pointsize, int[] font_id);
// Language and dictionary information
public String WordRecognitionLanguage();
public boolean WordIsFromDictionary();
public boolean WordIsNumeric();
public int WordDirection();
public int BlanksBeforeWord();
}ResultIterator it = api.GetIterator();
it.Begin();
// Extract formatted text information
do {
String word = it.GetUTF8Text(RIL_WORD);
float conf = it.Confidence(RIL_WORD);
// Get font styling
boolean[] bold = new boolean[1], italic = new boolean[1],
underlined = new boolean[1], monospace = new boolean[1],
serif = new boolean[1], smallcaps = new boolean[1];
int[] pointsize = new int[1], font_id = new int[1];
String fontName = it.WordFontAttributes(bold, italic, underlined,
monospace, serif, smallcaps,
pointsize, font_id);
System.out.printf("Word: '%s' Font: %s Size: %dpt Bold: %s Italic: %s\n",
word, fontName, pointsize[0], bold[0], italic[0]);
// Check word properties
if (it.WordIsNumeric()) {
System.out.println(" -> Numeric word");
}
if (it.WordIsFromDictionary()) {
System.out.println(" -> Dictionary word");
}
} while (it.Next(RIL_WORD));Access individual character information including superscripts, subscripts, and detailed symbol properties.
public class LTRResultIterator {
// Symbol properties
public boolean SymbolIsSuperscript();
public boolean SymbolIsSubscript();
public boolean SymbolIsDropcap();
}ResultIterator it = api.GetIterator();
it.Begin();
// Analyze character-level details
do {
String symbol = it.GetUTF8Text(RIL_SYMBOL);
float conf = it.Confidence(RIL_SYMBOL);
System.out.printf("Symbol: '%s' conf=%.1f%%", symbol, conf);
if (it.SymbolIsSuperscript()) {
System.out.print(" [SUPERSCRIPT]");
}
if (it.SymbolIsSubscript()) {
System.out.print(" [SUBSCRIPT]");
}
if (it.SymbolIsDropcap()) {
System.out.print(" [DROPCAP]");
}
System.out.println();
} while (it.Next(RIL_SYMBOL));Handle complex scripts with mixed text directions and reading orders.
public class ResultIterator {
// Text direction
public boolean ParagraphIsLtr();
// Reading order calculation
public static void CalculateTextlineOrder(boolean paragraph_is_ltr,
int[] word_dirs,
int[] reading_order);
// Advanced LSTM outputs
public StringFloatPairVectorVector GetRawLSTMTimesteps();
public StringFloatPairVectorVector GetBestLSTMSymbolChoices();
}Text Direction Constants:
DIR_NEUTRAL (0): Neutral charactersDIR_LEFT_TO_RIGHT (1): LTR text (Latin, Cyrillic, etc.)DIR_RIGHT_TO_LEFT (2): RTL text (Arabic, Hebrew, etc.)DIR_MIX (3): Mixed direction textResultIterator it = api.GetIterator();
it.Begin();
// Handle bidirectional text
do {
if (it.IsAtBeginningOf(RIL_PARA)) {
boolean isLtr = it.ParagraphIsLtr();
System.out.println("Paragraph direction: " +
(isLtr ? "Left-to-Right" : "Right-to-Left"));
}
String word = it.GetUTF8Text(RIL_WORD);
int direction = it.WordDirection();
System.out.printf("Word: '%s' Direction: %d\n", word, direction);
} while (it.Next(RIL_WORD));Access multiple recognition candidates for improved accuracy.
public class ChoiceIterator {
// Constructor from result iterator
public ChoiceIterator(LTRResultIterator result_it);
// Navigation
public boolean Next();
// Choice information
public String GetUTF8Text();
public float Confidence();
public StringFloatPairVectorVector Timesteps();
}ResultIterator resultIt = api.GetIterator();
resultIt.Begin();
// Get alternative recognition choices for each symbol
do {
String mainChoice = resultIt.GetUTF8Text(RIL_SYMBOL);
float mainConf = resultIt.Confidence(RIL_SYMBOL);
System.out.printf("Main choice: '%s' (%.1f%%)\n", mainChoice, mainConf);
// Get alternatives
ChoiceIterator choiceIt = new ChoiceIterator(resultIt);
int choiceNum = 1;
while (choiceIt.Next()) {
String altChoice = choiceIt.GetUTF8Text();
float altConf = choiceIt.Confidence();
System.out.printf(" Alt %d: '%s' (%.1f%%)\n",
choiceNum++, altChoice, altConf);
}
} while (resultIt.Next(RIL_SYMBOL));Access detailed page layout and block type information.
public class PageIterator {
// Block classification
public int BlockType();
// Paragraph information
public void ParagraphInfo(int[] justification, boolean[] is_list_item,
boolean[] is_crown, int[] first_line_indent);
// Image extraction
public PIX GetBinaryImage(int level);
public PIX GetImage(int level, int padding, PIX original_img,
int[] left, int[] top);
}Block Types:
PT_FLOWING_TEXT (1): Regular paragraph textPT_HEADING_TEXT (2): Heading or title textPT_PULLOUT_TEXT (3): Pull-quote or sidebar textPT_EQUATION (4): Mathematical equationPT_TABLE (6): Table structurePT_VERTICAL_TEXT (7): Vertical text orientationPT_CAPTION_TEXT (8): Image or table captionPageIterator pageIt = api.AnalyseLayout();
pageIt.Begin();
do {
int blockType = pageIt.BlockType();
String typeName = getBlockTypeName(blockType);
System.out.println("Block type: " + typeName);
if (blockType == PT_FLOWING_TEXT) {
// Get paragraph details
int[] justification = new int[1];
boolean[] is_list = new boolean[1], is_crown = new boolean[1];
int[] indent = new int[1];
pageIt.ParagraphInfo(justification, is_list, is_crown, indent);
System.out.println(" Justification: " + justification[0]);
System.out.println(" List item: " + is_list[0]);
System.out.println(" First line indent: " + indent[0]);
}
} while (pageIt.Next(RIL_BLOCK));public static final int RIL_BLOCK = 0; // Block level
public static final int RIL_PARA = 1; // Paragraph level
public static final int RIL_TEXTLINE = 2; // Text line level
public static final int RIL_WORD = 3; // Word level
public static final int RIL_SYMBOL = 4; // Character/symbol levelpublic static final int DIR_NEUTRAL = 0; // Neutral text
public static final int DIR_LEFT_TO_RIGHT = 1; // LTR text
public static final int DIR_RIGHT_TO_LEFT = 2; // RTL text
public static final int DIR_MIX = 3; // Mixed direction// Vector of string-float pairs for LSTM outputs
public class StringFloatPairVectorVector {
public long size();
// Contains timestep information and confidence scores
// for neural network recognition alternatives
}
// Mutable iterator for result modification (opaque)
public class MutableIterator {
// Allows modification of recognition results
// Internal implementation details not exposed
}Install with Tessl CLI
npx tessl i tessl/maven-org-bytedeco--tesseract-platform