CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-bytedeco--leptonica-platform

JavaCPP bindings for Leptonica image processing library with cross-platform support

Pending
Overview
Eval results
Files

text-recognition.mddocs/

Text Recognition

OCR capabilities, document analysis, and text extraction with specialized structures for character recognition and document processing.

Capabilities

Recognition Engine

Core text recognition functionality with training, classification, and confidence scoring.

/**
 * Character recognition engine
 */
class L_RECOG extends Pointer {
    PIX pixdb_ave(); // average templates
    PIX pixdb_range(); // template ranges
    PIXA pixa_tr(); // training examples
    PIXAA pixaa_tr(); // organized training data
    PTA pta_tr(); // training centroids
    NUMA nasum_tr(); // training sums
    int threshold(); // classification threshold
    int maxyshift(); // maximum y shift
}

/**
 * Create recognition engine
 * @param scalew - Template width scale
 * @param scaleh - Template height scale
 * @param linew - Line width for template rendering
 * @param threshold - Classification threshold
 * @param maxyshift - Maximum vertical shift allowed
 * @return L_RECOG engine or null on failure
 */
L_RECOG recogCreate(int scalew, int scaleh, int linew, int threshold, int maxyshift);

/**
 * Create from existing recognizer
 * @param recs - Source recognizer
 * @param scalew - New width scale
 * @param scaleh - New height scale  
 * @param linew - Line width
 * @param threshold - Classification threshold
 * @param maxyshift - Maximum y shift
 * @return New L_RECOG or null on failure
 */
L_RECOG recogCreateFromRecog(L_RECOG recs, int scalew, int scaleh, int linew, int threshold, int maxyshift);

/**
 * Train recognizer with labeled example
 * @param recog - Recognition engine
 * @param pixs - Training image
 * @param box - Character bounding box (can be null for full image)
 * @param text - Character label
 * @param debug - Debug level (0 = none)
 * @return 0 on success, 1 on failure
 */
int recogTrainLabeled(L_RECOG recog, PIX pixs, BOX box, String text, int debug);

/**
 * Finalize training (build templates)
 * @param recog - Recognition engine
 * @param debug - Debug level
 * @return 0 on success, 1 on failure
 */
int recogFinishTraining(L_RECOG recog, int debug);

/**
 * Classify character
 * @param recog - Recognition engine
 * @param pixs - Character image
 * @param box - Character bounding box (can be null)
 * @param pcharstr - Returns recognized character
 * @param pscore - Returns confidence score
 * @param debug - Debug level
 * @return 0 on success, 1 on failure
 */
int recogClassifyPixel(L_RECOG recog, PIX pixs, BOX box, BytePointer pcharstr, FloatPointer pscore, int debug);

Usage Examples:

import org.bytedeco.leptonica.*;
import static org.bytedeco.leptonica.global.leptonica.*;

// Create OCR engine for digits
L_RECOG digitRecog = recogCreate(32, 32, 4, 128, 2);

// Train with labeled examples
PIX digit0 = pixRead("digit_0_sample.png");
recogTrainLabeled(digitRecog, digit0, null, "0", 0);

PIX digit1 = pixRead("digit_1_sample.png");
recogTrainLabeled(digitRecog, digit1, null, "1", 0);

// ... train with more examples ...

// Finalize training
recogFinishTraining(digitRecog, 0);

// Classify unknown character
PIX unknown = pixRead("unknown_digit.png");
BytePointer result = new BytePointer(10);
FloatPointer confidence = new FloatPointer(1);

int status = recogClassifyPixel(digitRecog, unknown, null, result, confidence, 0);
if (status == 0) {
    System.out.println("Recognized: " + result.getString() + 
                      " (confidence: " + confidence.get() + ")");
}

Document Dewarping

Correct document distortion and perspective issues for improved OCR accuracy.

/**
 * Single page dewarp correction
 */
class L_DEWARP extends Pointer {
    PIX pixs(); // source image
    PIXA sampv(); // vertical samples
    PIXA samph(); // horizontal samples
    PTA ptav(); // vertical control points
    PTA ptah(); // horizontal control points
    int w(); // image width
    int h(); // image height
    int nx(); // horizontal sampling points
    int ny(); // vertical sampling points
}

/**
 * Multi-page dewarp processing
 */
class L_DEWARPA extends Pointer {
    int nalloc(); // allocated array size
    int maxpage(); // maximum page number
    int sampling(); // sampling factor
    int redfactor(); // reduction factor
    int minlines(); // minimum lines for modeling
    int maxdist(); // maximum distance for interpolation
}

/**
 * Create dewarp structure for single page
 * @param pixs - Source document image
 * @param pageno - Page number identifier
 * @return L_DEWARP structure or null on failure
 */
L_DEWARP dewarpCreate(PIX pixs, int pageno);

/**
 * Create multi-page dewarp structure
 * @param nmax - Maximum number of pages
 * @param sampling - Sampling density
 * @param redfactor - Size reduction factor
 * @param minlines - Minimum text lines required
 * @param maxdist - Maximum interpolation distance
 * @return L_DEWARPA structure or null on failure
 */
L_DEWARPA dewarpaCreate(int nmax, int sampling, int redfactor, int minlines, int maxdist);

/**
 * Build dewarp model for page
 * @param dew - Dewarp structure
 * @param debugfile - Debug output file (can be null)
 * @return 0 on success, 1 on failure
 */
int dewarpBuildModel(L_DEWARP dew, String debugfile);

/**
 * Apply dewarp correction
 * @param dew - Dewarp structure with built model
 * @param pixs - Source image to correct
 * @param debugfile - Debug output file (can be null)
 * @return Corrected PIX or null on failure
 */
PIX dewarpApply(L_DEWARP dew, PIX pixs, String debugfile);

/**
 * Add page to multi-page dewarper
 * @param dewa - Multi-page dewarp structure
 * @param pixs - Page image
 * @param pageno - Page number
 * @param debugfile - Debug output file (can be null)
 * @return 0 on success, 1 on failure
 */
int dewarpaInsertDewarp(L_DEWARPA dewa, L_DEWARP dew);

Usage Examples:

// Single page dewarping
PIX document = pixRead("scanned_page.jpg");
L_DEWARP dewarp = dewarpCreate(document, 1);

// Build correction model
int result = dewarpBuildModel(dewarp, null);
if (result == 0) {
    // Apply correction
    PIX corrected = dewarpApply(dewarp, document, null);
    pixWrite("corrected_page.jpg", corrected, IFF_JPEG);
}

// Multi-page document processing
L_DEWARPA multiPage = dewarpaCreate(100, 7, 1, 6, 30);

// Process each page
for (int i = 1; i <= pageCount; i++) {
    PIX page = pixRead("page_" + i + ".jpg");
    L_DEWARP pageDewarp = dewarpCreate(page, i);
    
    if (dewarpBuildModel(pageDewarp, null) == 0) {
        dewarpaInsertDewarp(multiPage, pageDewarp);
        
        PIX corrected = dewarpApply(pageDewarp, page, null);
        pixWrite("corrected_page_" + i + ".jpg", corrected, IFF_JPEG);
    }
}

JBig2 Classification

Specialized encoding and classification for document compression and analysis.

/**
 * JBig2 symbol classifier
 */
class JBCLASSER extends Pointer {
    SARRAY safiles(); // input file names
    int method(); // classification method
    int components(); // number of components
    int maxwidth(); // maximum symbol width
    int maxheight(); // maximum symbol height
    int npages(); // number of pages processed
    int baseindex(); // base index for symbols
}

/**
 * JBig2 encoding data
 */
class JBDATA extends Pointer {
    PIX pix(); // reconstructed image
    int w(); // image width
    int h(); // image height
    int nclass(); // number of symbol classes
    PIXA pixat(); // template symbols
    PTAA ptaul(); // upper-left coordinates
}

/**
 * Create JBig2 classifier
 * @param method - Classification method
 * @param components - Number of components to use
 * @return JBCLASSER or null on failure
 */
JBCLASSER jbClasserCreate(int method, int components);

/**
 * Add page to classifier
 * @param classer - JBig2 classifier
 * @param pixs - Page image
 * @param filename - Source filename
 * @return 0 on success, 1 on failure
 */
int jbClasserAddPage(JBCLASSER classer, PIX pixs, String filename);

/**
 * Generate JBig2 encoding data
 * @param classer - Trained classifier
 * @param pageno - Page number to encode
 * @return JBDATA encoding or null on failure
 */
JBDATA jbClasserGetJbData(JBCLASSER classer, int pageno);

Usage Examples:

// Create JBig2 classifier for document compression
JBCLASSER classifier = jbClasserCreate(JB_CLASSIFICATION, 8);

// Add document pages
for (int i = 0; i < pageCount; i++) {
    PIX page = pixRead("page_" + i + ".tiff");
    jbClasserAddPage(classifier, page, "page_" + i + ".tiff");
}

// Generate compressed representation
JBDATA compressed = jbClasserGetJbData(classifier, 0);
PIX reconstructed = compressed.pix();

Bitmap Fonts

Bitmap font rendering for text overlay and document generation.

/**
 * Bitmap font structure
 */
class L_BMF extends Pointer {
    PIX pixa(); // character bitmaps
    int size(); // font size
    BytePointer directory(); // font directory
}

/**
 * Create bitmap font
 * @param dir - Font directory path
 * @param fontsize - Font size
 * @return L_BMF font or null on failure
 */
L_BMF bmfCreate(String dir, int fontsize);

/**
 * Render text using bitmap font
 * @param bmf - Bitmap font
 * @param textstr - Text to render
 * @return PIX with rendered text or null on failure
 */
PIX bmfGetPix(L_BMF bmf, String textstr);

/**
 * Get text width in pixels
 * @param bmf - Bitmap font
 * @param textstr - Text string
 * @param pw - Returns width in pixels
 * @return 0 on success, 1 on failure
 */
int bmfGetWidth(L_BMF bmf, String textstr, IntPointer pw);

Usage Examples:

// Create bitmap font
L_BMF font = bmfCreate("/usr/share/fonts/leptonica", 12);

// Render text
PIX textImage = bmfGetPix(font, "Hello, World!");

// Get text dimensions
IntPointer width = new IntPointer(1);
bmfGetWidth(font, "Sample Text", width);
System.out.println("Text width: " + width.get() + " pixels");

// Overlay text on image
PIX overlayed = pixPaintBoxa(baseImage, textImage, 100, 50, 0x000000);

Text Processing Pipeline

Complete OCR Workflow

// 1. Document preprocessing
PIX document = pixRead("document.jpg");
PIX gray = pixConvertRGBToGray(document, 0.299f, 0.587f, 0.114f);
PIX binary = pixOtsuAdaptiveThreshold(gray, 32, 32, 0, 0, 0.1f, null);

// 2. Dewarp correction
L_DEWARP dewarp = dewarpCreate(binary, 1);
if (dewarpBuildModel(dewarp, null) == 0) {
    binary = dewarpApply(dewarp, binary, null);
}

// 3. Character segmentation (hypothetical)
BOXA characters = segmentCharacters(binary);

// 4. Character recognition
L_RECOG ocr = loadTrainedOCR(); // hypothetical
StringBuilder result = new StringBuilder();

int charCount = boxaGetCount(characters);
for (int i = 0; i < charCount; i++) {
    BOX charBox = boxaGetBox(characters, i, L_CLONE);
    PIX charImage = pixClipRectangle(binary, charBox, null);
    
    BytePointer character = new BytePointer(10);
    FloatPointer confidence = new FloatPointer(1);
    
    if (recogClassifyPixel(ocr, charImage, null, character, confidence, 0) == 0) {
        if (confidence.get() > 0.7f) { // confidence threshold
            result.append(character.getString());
        }
    }
}

System.out.println("Recognized text: " + result.toString());

Constants

// JBig2 classification methods
static final int JB_CLASSIFICATION = 0;
static final int JB_CORRELATION = 1;

// Font sizes
static final int L_BM_FONT_4 = 4;
static final int L_BM_FONT_6 = 6;
static final int L_BM_FONT_8 = 8;
static final int L_BM_FONT_10 = 10;
static final int L_BM_FONT_12 = 12;
static final int L_BM_FONT_14 = 14;
static final int L_BM_FONT_16 = 16;
static final int L_BM_FONT_20 = 20;

Install with Tessl CLI

npx tessl i tessl/maven-org-bytedeco--leptonica-platform

docs

collections.md

connected-components.md

core-images.md

geometry.md

image-io.md

image-processing.md

index.md

morphology.md

text-recognition.md

utilities.md

tile.json