CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-bytedeco--tesseract

JavaCPP Presets for Tesseract - Java wrapper library providing JNI bindings to the native Tesseract OCR library version 5.5.1, enabling optical character recognition capabilities in Java applications

Pending
Overview
Eval results
Files

iterators.mddocs/

Result Analysis with Iterators

Detailed analysis of OCR results using iterator classes that provide hierarchical navigation through page structure, word-level confidence scores, bounding boxes, font information, and alternative recognition choices.

Capabilities

ResultIterator Class

Primary iterator for OCR results that handles proper reading order and bidirectional text correctly.

/**
 * Iterator for OCR results in proper reading order
 * Handles bidirectional text correctly and provides access to recognition results
 */
public class ResultIterator extends LTRResultIterator {
    // Navigation methods
    public void Begin();
    public boolean Next(int level);
    public boolean IsAtBeginningOf(int level);
    public boolean IsAtFinalElement(int level, int element);
    
    // Text access
    public BytePointer GetUTF8Text(int level);
    public int BlanksBeforeWord();
    public boolean ParagraphIsLtr();
    
    // LSTM support  
    public StringFloatPairVectorVector GetRawLSTMTimesteps();
    public StringFloatPairVectorVector GetBestLSTMSymbolChoices();
}

Detailed Analysis Example:

import org.bytedeco.tesseract.*;
import static org.bytedeco.tesseract.global.tesseract.*;

// Get results iterator
ResultIterator ri = api.GetIterator();
int level = RIL_WORD;

if (ri != null) {
    do {
        // Get word text and confidence
        BytePointer word = ri.GetUTF8Text(level);
        float conf = ri.Confidence(level);
        
        // Get bounding box
        int[] x1 = new int[1], y1 = new int[1], x2 = new int[1], y2 = new int[1];
        ri.BoundingBox(level, x1, y1, x2, y2);
        
        // Get font information
        boolean[] isBold = new boolean[1], isItalic = new boolean[1];
        boolean[] isUnderlined = new boolean[1], isMonospace = new boolean[1];
        boolean[] isSerif = new boolean[1], isSmallcaps = new boolean[1];
        int[] pointsize = new int[1], fontId = new int[1];
        
        String fontName = ri.WordFontAttributes(isBold, isItalic, isUnderlined, 
                                               isMonospace, isSerif, isSmallcaps, 
                                               pointsize, fontId);
        
        System.out.printf("Word: '%s' | Confidence: %.2f%% | Box: (%d,%d,%d,%d) | Font: %s %dpt%n",
                         word.getString(), conf, x1[0], y1[0], x2[0], y2[0], 
                         fontName, pointsize[0]);
        
        word.deallocate();
    } while (ri.Next(level));
}

PageIterator Class

Base iterator for page structure hierarchy providing layout analysis without OCR results.

/**
 * Iterator for page structure hierarchy without OCR results
 * Provides access to layout analysis and page structure
 */
public class PageIterator extends Pointer {
    // Navigation
    public void Begin();
    public boolean Next(int level);
    public void RestartParagraph();
    public void RestartRow();
    public boolean IsAtBeginningOf(int level);
    public boolean IsAtFinalElement(int level, int element);
    
    // Bounding boxes
    public boolean BoundingBox(int level, IntPointer left, IntPointer top, 
                              IntPointer right, IntPointer bottom);
    public void SetBoundingBoxComponents(boolean include_upper_dots, boolean include_lower_dots);
    
    // Content access
    public int BlockType();
    public PTA BlockPolygon();
    public PIX GetBinaryImage(int level);
    public PIX GetImage(int level, int padding, PIX original_img, 
                       IntPointer left, IntPointer top);
    public boolean Baseline(int level, IntPointer x1, IntPointer y1, 
                           IntPointer x2, IntPointer y2);
    
    // Text properties
    public void Orientation(IntPointer orientation, IntPointer writing_direction,
                           IntPointer textline_order, FloatPointer deskew_angle);
    public void ParagraphInfo(IntPointer justification, BoolPointer is_list_item,
                             BoolPointer is_crown, IntPointer first_line_indent); 
}

LTRResultIterator Class

Left-to-right result iterator providing text-specific methods and font information.

/**
 * Left-to-right result iterator with text-specific methods
 * Base class for ResultIterator with detailed text analysis capabilities
 */
public class LTRResultIterator extends PageIterator {
    // Text output
    public BytePointer GetUTF8Text(int level);
    public void SetLineSeparator(String new_line);
    public void SetParagraphSeparator(String new_para);
    public float Confidence(int level);
    
    // Font information
    public String WordFontAttributes(boolean[] is_bold, boolean[] is_italic,
                                   boolean[] is_underlined, boolean[] is_monospace,
                                   boolean[] is_serif, boolean[] is_smallcaps,
                                   IntBuffer pointsize, IntBuffer font_id);
    public String WordRecognitionLanguage();
    public int WordDirection();
    
    // Word properties
    public boolean WordIsFromDictionary();
    public boolean WordIsNumeric();
    public int BlanksBeforeWord();
    
    // Symbol properties
    public boolean SymbolIsSuperscript();
    public boolean SymbolIsSubscript();
    public boolean SymbolIsDropcap();
}

ChoiceIterator Class

Iterator for examining alternative recognition choices for a single symbol.

/**
 * Iterator over classifier choices for a single symbol
 * Provides access to alternative recognition results with confidence scores
 */
public class ChoiceIterator extends Pointer {
    /**
     * Create choice iterator from result iterator position
     * @param result_it Result iterator positioned at symbol
     */
    public ChoiceIterator(LTRResultIterator result_it);
    
    /**
     * Move to next choice for current symbol
     * @return true if more choices available
     */
    public boolean Next();
    
    /**
     * Get text of current choice (do not deallocate)
     * @return Choice text as BytePointer
     */
    public BytePointer GetUTF8Text();
    
    /**
     * Get confidence score for current choice
     * @return Confidence percentage (0-100)
     */
    public float Confidence();
    
    /**
     * Get LSTM timesteps for current choice
     * @return Complex timestep data structure
     */
    public StringFloatPairVectorVector Timesteps();
}

Alternative Choices Example:

// Position iterator at a specific symbol
ResultIterator ri = api.GetIterator();
ri.Begin();

// Get choices for current symbol
ChoiceIterator ci = new ChoiceIterator(ri);
if (ci != null) {
    do {
        BytePointer choice = ci.GetUTF8Text();
        float conf = ci.Confidence();
        System.out.printf("Choice: '%s' (%.2f%%)%n", choice.getString(), conf);
        // Note: choice text should not be deallocated
    } while (ci.Next());
}

Iterator Level Constants

Navigation and analysis can be performed at different hierarchical levels:

// Page Iterator Level Constants
public static final int RIL_BLOCK = 0;      // Block level (paragraphs, images, etc.)
public static final int RIL_PARA = 1;       // Paragraph level
public static final int RIL_TEXTLINE = 2;   // Text line level  
public static final int RIL_WORD = 3;       // Word level
public static final int RIL_SYMBOL = 4;     // Symbol/character level

Hierarchical Navigation Example:

ResultIterator ri = api.GetIterator();

// Iterate through paragraphs
ri.Begin();
do {
    System.out.println("=== PARAGRAPH ===");
    BytePointer para = ri.GetUTF8Text(RIL_PARA);
    System.out.println("Paragraph: " + para.getString());
    para.deallocate();
    
    // Iterate through words in this paragraph
    do {
        BytePointer word = ri.GetUTF8Text(RIL_WORD);
        float conf = ri.Confidence(RIL_WORD);
        System.out.printf("  Word: '%s' (%.1f%%)%n", word.getString(), conf);
        word.deallocate();
    } while (ri.Next(RIL_WORD) && !ri.IsAtBeginningOf(RIL_PARA));
    
} while (ri.Next(RIL_PARA));

Font and Style Information

Detailed font attributes available through iterator methods:

/**
 * Get comprehensive font attributes for current word
 * @param is_bold Output: true if font is bold
 * @param is_italic Output: true if font is italic  
 * @param is_underlined Output: true if text is underlined
 * @param is_monospace Output: true if font is monospace
 * @param is_serif Output: true if font has serifs
 * @param is_smallcaps Output: true if text is small caps
 * @param pointsize Output: font size in points
 * @param font_id Output: internal font identifier
 * @return Font family name
 */
public String WordFontAttributes(boolean[] is_bold, boolean[] is_italic,
                               boolean[] is_underlined, boolean[] is_monospace,
                               boolean[] is_serif, boolean[] is_smallcaps,
                               IntBuffer pointsize, IntBuffer font_id);

Font Analysis Example:

ResultIterator ri = api.GetIterator();
ri.Begin();

do {
    BytePointer word = ri.GetUTF8Text(RIL_WORD);
    
    // Get font attributes
    boolean[] bold = new boolean[1], italic = new boolean[1];
    boolean[] underlined = new boolean[1], monospace = new boolean[1];
    boolean[] serif = new boolean[1], smallcaps = new boolean[1];
    int[] size = new int[1], fontId = new int[1];
    
    String fontFamily = ri.WordFontAttributes(bold, italic, underlined, 
                                             monospace, serif, smallcaps,
                                             size, fontId);
    
    // Build style description
    StringBuilder style = new StringBuilder();
    if (bold[0]) style.append("Bold ");
    if (italic[0]) style.append("Italic ");
    if (underlined[0]) style.append("Underlined ");
    if (smallcaps[0]) style.append("SmallCaps ");
    
    System.out.printf("'%s' - %s %dpt %s%n", 
                     word.getString(), fontFamily, size[0], style.toString());
    
    word.deallocate();
} while (ri.Next(RIL_WORD));

Text Properties and Metadata

Additional text analysis capabilities:

// Word classification
public boolean WordIsFromDictionary();       // Word found in dictionary
public boolean WordIsNumeric();              // Word contains only numbers  
public int WordDirection();                  // Text direction (LTR/RTL)
public String WordRecognitionLanguage();     // Detected language

// Symbol properties
public boolean SymbolIsSuperscript();        // Symbol is superscript
public boolean SymbolIsSubscript();          // Symbol is subscript  
public boolean SymbolIsDropcap();            // Symbol is drop capital

// Layout properties
public int BlanksBeforeWord();              // Number of spaces before word
public boolean ParagraphIsLtr();            // Paragraph is left-to-right

Bounding Box Information

Get precise coordinate information for layout analysis:

/**
 * Get bounding rectangle for element at specified level
 * @param level Iterator level (RIL_BLOCK, RIL_PARA, RIL_TEXTLINE, RIL_WORD, RIL_SYMBOL)
 * @param left Output: left coordinate
 * @param top Output: top coordinate  
 * @param right Output: right coordinate
 * @param bottom Output: bottom coordinate
 * @return true if bounding box is available
 */
public boolean BoundingBox(int level, IntPointer left, IntPointer top,
                          IntPointer right, IntPointer bottom);

/**
 * Get baseline coordinates for text line
 * @param level Iterator level
 * @param x1 Output: baseline start X
 * @param y1 Output: baseline start Y
 * @param x2 Output: baseline end X  
 * @param y2 Output: baseline end Y
 * @return true if baseline is available
 */
public boolean Baseline(int level, IntPointer x1, IntPointer y1,
                       IntPointer x2, IntPointer y2);

Layout Analysis Example:

PageIterator pi = api.AnalyseLayout();  // Layout analysis without OCR
pi.Begin();

do {
    int blockType = pi.BlockType();
    System.out.println("Block Type: " + getBlockTypeName(blockType));
    
    // Get block bounding box
    int[] left = new int[1], top = new int[1];
    int[] right = new int[1], bottom = new int[1];
    if (pi.BoundingBox(RIL_BLOCK, left, top, right, bottom)) {
        System.out.printf("Block bounds: (%d,%d) to (%d,%d)%n", 
                         left[0], top[0], right[0], bottom[0]);
    }
    
    // Get orientation information
    int[] orientation = new int[1], writingDir = new int[1];
    int[] textlineOrder = new int[1];
    float[] deskewAngle = new float[1];
    pi.Orientation(orientation, writingDir, textlineOrder, deskewAngle);
    
    System.out.printf("Orientation: %d°, Writing Direction: %s%n",
                     orientation[0] * 90, getWritingDirection(writingDir[0]));
    
} while (pi.Next(RIL_BLOCK));

Memory Management

Important Iterator Guidelines:

  • Iterators are automatically managed by JavaCPP
  • Always call deallocate() on BytePointer results from GetUTF8Text()
  • Check for null before using iterators
  • ChoiceIterator text results should NOT be deallocated
  • Iterators become invalid after calling api.End()

Install with Tessl CLI

npx tessl i tessl/maven-org-bytedeco--tesseract

docs

basic-ocr.md

configuration.md

data-structures.md

index.md

iterators.md

renderers.md

tile.json