tessl/maven-org-bytedeco--tesseract

JavaCPP Presets for Tesseract - Java wrapper library providing JNI bindings to the native Tesseract OCR library version 5.5.1, enabling optical character recognition capabilities in Java applications

—

Pending

Overview

Eval results

Files

Result Analysis with Iterators

Name: tessl/maven-org-bytedeco--tesseract
Author: tessl

Detailed analysis of OCR results using iterator classes that provide hierarchical navigation through page structure, word-level confidence scores, bounding boxes, font information, and alternative recognition choices.

Capabilities

ResultIterator Class

Primary iterator for OCR results that handles proper reading order and bidirectional text correctly.

/**
 * Iterator for OCR results in proper reading order
 * Handles bidirectional text correctly and provides access to recognition results
 */
public class ResultIterator extends LTRResultIterator {
    // Navigation methods
    public void Begin();
    public boolean Next(int level);
    public boolean IsAtBeginningOf(int level);
    public boolean IsAtFinalElement(int level, int element);
    
    // Text access
    public BytePointer GetUTF8Text(int level);
    public int BlanksBeforeWord();
    public boolean ParagraphIsLtr();
    
    // LSTM support  
    public StringFloatPairVectorVector GetRawLSTMTimesteps();
    public StringFloatPairVectorVector GetBestLSTMSymbolChoices();
}

Detailed Analysis Example:

import org.bytedeco.tesseract.*;
import static org.bytedeco.tesseract.global.tesseract.*;

// Get results iterator
ResultIterator ri = api.GetIterator();
int level = RIL_WORD;

if (ri != null) {
    do {
        // Get word text and confidence
        BytePointer word = ri.GetUTF8Text(level);
        float conf = ri.Confidence(level);
        
        // Get bounding box
        int[] x1 = new int[1], y1 = new int[1], x2 = new int[1], y2 = new int[1];
        ri.BoundingBox(level, x1, y1, x2, y2);
        
        // Get font information
        boolean[] isBold = new boolean[1], isItalic = new boolean[1];
        boolean[] isUnderlined = new boolean[1], isMonospace = new boolean[1];
        boolean[] isSerif = new boolean[1], isSmallcaps = new boolean[1];
        int[] pointsize = new int[1], fontId = new int[1];
        
        String fontName = ri.WordFontAttributes(isBold, isItalic, isUnderlined, 
                                               isMonospace, isSerif, isSmallcaps, 
                                               pointsize, fontId);
        
        System.out.printf("Word: '%s' | Confidence: %.2f%% | Box: (%d,%d,%d,%d) | Font: %s %dpt%n",
                         word.getString(), conf, x1[0], y1[0], x2[0], y2[0], 
                         fontName, pointsize[0]);
        
        word.deallocate();
    } while (ri.Next(level));
}

PageIterator Class

Base iterator for page structure hierarchy providing layout analysis without OCR results.

/**
 * Iterator for page structure hierarchy without OCR results
 * Provides access to layout analysis and page structure
 */
public class PageIterator extends Pointer {
    // Navigation
    public void Begin();
    public boolean Next(int level);
    public void RestartParagraph();
    public void RestartRow();
    public boolean IsAtBeginningOf(int level);
    public boolean IsAtFinalElement(int level, int element);
    
    // Bounding boxes
    public boolean BoundingBox(int level, IntPointer left, IntPointer top, 
                              IntPointer right, IntPointer bottom);
    public void SetBoundingBoxComponents(boolean include_upper_dots, boolean include_lower_dots);
    
    // Content access
    public int BlockType();
    public PTA BlockPolygon();
    public PIX GetBinaryImage(int level);
    public PIX GetImage(int level, int padding, PIX original_img, 
                       IntPointer left, IntPointer top);
    public boolean Baseline(int level, IntPointer x1, IntPointer y1, 
                           IntPointer x2, IntPointer y2);
    
    // Text properties
    public void Orientation(IntPointer orientation, IntPointer writing_direction,
                           IntPointer textline_order, FloatPointer deskew_angle);
    public void ParagraphInfo(IntPointer justification, BoolPointer is_list_item,
                             BoolPointer is_crown, IntPointer first_line_indent); 
}

LTRResultIterator Class

Left-to-right result iterator providing text-specific methods and font information.

/**
 * Left-to-right result iterator with text-specific methods
 * Base class for ResultIterator with detailed text analysis capabilities
 */
public class LTRResultIterator extends PageIterator {
    // Text output
    public BytePointer GetUTF8Text(int level);
    public void SetLineSeparator(String new_line);
    public void SetParagraphSeparator(String new_para);
    public float Confidence(int level);
    
    // Font information
    public String WordFontAttributes(boolean[] is_bold, boolean[] is_italic,
                                   boolean[] is_underlined, boolean[] is_monospace,
                                   boolean[] is_serif, boolean[] is_smallcaps,
                                   IntBuffer pointsize, IntBuffer font_id);
    public String WordRecognitionLanguage();
    public int WordDirection();
    
    // Word properties
    public boolean WordIsFromDictionary();
    public boolean WordIsNumeric();
    public int BlanksBeforeWord();
    
    // Symbol properties
    public boolean SymbolIsSuperscript();
    public boolean SymbolIsSubscript();
    public boolean SymbolIsDropcap();
}

ChoiceIterator Class

Iterator for examining alternative recognition choices for a single symbol.

/**
 * Iterator over classifier choices for a single symbol
 * Provides access to alternative recognition results with confidence scores
 */
public class ChoiceIterator extends Pointer {
    /**
     * Create choice iterator from result iterator position
     * @param result_it Result iterator positioned at symbol
     */
    public ChoiceIterator(LTRResultIterator result_it);
    
    /**
     * Move to next choice for current symbol
     * @return true if more choices available
     */
    public boolean Next();
    
    /**
     * Get text of current choice (do not deallocate)
     * @return Choice text as BytePointer
     */
    public BytePointer GetUTF8Text();
    
    /**
     * Get confidence score for current choice
     * @return Confidence percentage (0-100)
     */
    public float Confidence();
    
    /**
     * Get LSTM timesteps for current choice
     * @return Complex timestep data structure
     */
    public StringFloatPairVectorVector Timesteps();
}

Alternative Choices Example:

// Position iterator at a specific symbol
ResultIterator ri = api.GetIterator();
ri.Begin();

// Get choices for current symbol
ChoiceIterator ci = new ChoiceIterator(ri);
if (ci != null) {
    do {
        BytePointer choice = ci.GetUTF8Text();
        float conf = ci.Confidence();
        System.out.printf("Choice: '%s' (%.2f%%)%n", choice.getString(), conf);
        // Note: choice text should not be deallocated
    } while (ci.Next());
}

Iterator Level Constants

Navigation and analysis can be performed at different hierarchical levels:

// Page Iterator Level Constants
public static final int RIL_BLOCK = 0;      // Block level (paragraphs, images, etc.)
public static final int RIL_PARA = 1;       // Paragraph level
public static final int RIL_TEXTLINE = 2;   // Text line level  
public static final int RIL_WORD = 3;       // Word level
public static final int RIL_SYMBOL = 4;     // Symbol/character level

Hierarchical Navigation Example:

ResultIterator ri = api.GetIterator();

// Iterate through paragraphs
ri.Begin();
do {
    System.out.println("=== PARAGRAPH ===");
    BytePointer para = ri.GetUTF8Text(RIL_PARA);
    System.out.println("Paragraph: " + para.getString());
    para.deallocate();
    
    // Iterate through words in this paragraph
    do {
        BytePointer word = ri.GetUTF8Text(RIL_WORD);
        float conf = ri.Confidence(RIL_WORD);
        System.out.printf("  Word: '%s' (%.1f%%)%n", word.getString(), conf);
        word.deallocate();
    } while (ri.Next(RIL_WORD) && !ri.IsAtBeginningOf(RIL_PARA));
    
} while (ri.Next(RIL_PARA));

Font and Style Information

Detailed font attributes available through iterator methods:

/**
 * Get comprehensive font attributes for current word
 * @param is_bold Output: true if font is bold
 * @param is_italic Output: true if font is italic  
 * @param is_underlined Output: true if text is underlined
 * @param is_monospace Output: true if font is monospace
 * @param is_serif Output: true if font has serifs
 * @param is_smallcaps Output: true if text is small caps
 * @param pointsize Output: font size in points
 * @param font_id Output: internal font identifier
 * @return Font family name
 */
public String WordFontAttributes(boolean[] is_bold, boolean[] is_italic,
                               boolean[] is_underlined, boolean[] is_monospace,
                               boolean[] is_serif, boolean[] is_smallcaps,
                               IntBuffer pointsize, IntBuffer font_id);

Font Analysis Example:

ResultIterator ri = api.GetIterator();
ri.Begin();

do {
    BytePointer word = ri.GetUTF8Text(RIL_WORD);
    
    // Get font attributes
    boolean[] bold = new boolean[1], italic = new boolean[1];
    boolean[] underlined = new boolean[1], monospace = new boolean[1];
    boolean[] serif = new boolean[1], smallcaps = new boolean[1];
    int[] size = new int[1], fontId = new int[1];
    
    String fontFamily = ri.WordFontAttributes(bold, italic, underlined, 
                                             monospace, serif, smallcaps,
                                             size, fontId);
    
    // Build style description
    StringBuilder style = new StringBuilder();
    if (bold[0]) style.append("Bold ");
    if (italic[0]) style.append("Italic ");
    if (underlined[0]) style.append("Underlined ");
    if (smallcaps[0]) style.append("SmallCaps ");
    
    System.out.printf("'%s' - %s %dpt %s%n", 
                     word.getString(), fontFamily, size[0], style.toString());
    
    word.deallocate();
} while (ri.Next(RIL_WORD));

Text Properties and Metadata

Additional text analysis capabilities:

// Word classification
public boolean WordIsFromDictionary();       // Word found in dictionary
public boolean WordIsNumeric();              // Word contains only numbers  
public int WordDirection();                  // Text direction (LTR/RTL)
public String WordRecognitionLanguage();     // Detected language

// Symbol properties
public boolean SymbolIsSuperscript();        // Symbol is superscript
public boolean SymbolIsSubscript();          // Symbol is subscript  
public boolean SymbolIsDropcap();            // Symbol is drop capital

// Layout properties
public int BlanksBeforeWord();              // Number of spaces before word
public boolean ParagraphIsLtr();            // Paragraph is left-to-right

Bounding Box Information

Get precise coordinate information for layout analysis:

/**
 * Get bounding rectangle for element at specified level
 * @param level Iterator level (RIL_BLOCK, RIL_PARA, RIL_TEXTLINE, RIL_WORD, RIL_SYMBOL)
 * @param left Output: left coordinate
 * @param top Output: top coordinate  
 * @param right Output: right coordinate
 * @param bottom Output: bottom coordinate
 * @return true if bounding box is available
 */
public boolean BoundingBox(int level, IntPointer left, IntPointer top,
                          IntPointer right, IntPointer bottom);

/**
 * Get baseline coordinates for text line
 * @param level Iterator level
 * @param x1 Output: baseline start X
 * @param y1 Output: baseline start Y
 * @param x2 Output: baseline end X  
 * @param y2 Output: baseline end Y
 * @return true if baseline is available
 */
public boolean Baseline(int level, IntPointer x1, IntPointer y1,
                       IntPointer x2, IntPointer y2);

Layout Analysis Example:

PageIterator pi = api.AnalyseLayout();  // Layout analysis without OCR
pi.Begin();

do {
    int blockType = pi.BlockType();
    System.out.println("Block Type: " + getBlockTypeName(blockType));
    
    // Get block bounding box
    int[] left = new int[1], top = new int[1];
    int[] right = new int[1], bottom = new int[1];
    if (pi.BoundingBox(RIL_BLOCK, left, top, right, bottom)) {
        System.out.printf("Block bounds: (%d,%d) to (%d,%d)%n", 
                         left[0], top[0], right[0], bottom[0]);
    }
    
    // Get orientation information
    int[] orientation = new int[1], writingDir = new int[1];
    int[] textlineOrder = new int[1];
    float[] deskewAngle = new float[1];
    pi.Orientation(orientation, writingDir, textlineOrder, deskewAngle);
    
    System.out.printf("Orientation: %d°, Writing Direction: %s%n",
                     orientation[0] * 90, getWritingDirection(writingDir[0]));
    
} while (pi.Next(RIL_BLOCK));

Memory Management

Important Iterator Guidelines:

Iterators are automatically managed by JavaCPP
Always call deallocate() on BytePointer results from GetUTF8Text()
Check for null before using iterators
ChoiceIterator text results should NOT be deallocated
Iterators become invalid after calling api.End()

Install with Tessl CLI

npx tessl i tessl/maven-org-bytedeco--tesseract

docs