tessl/maven-org-bytedeco--leptonica-platform

JavaCPP bindings for Leptonica image processing library with cross-platform support

—

Pending

Overview

Eval results

Files

Text Recognition

Name: tessl/maven-org-bytedeco--leptonica-platform
Author: tessl

OCR capabilities, document analysis, and text extraction with specialized structures for character recognition and document processing.

Capabilities

Recognition Engine

Core text recognition functionality with training, classification, and confidence scoring.

/**
 * Character recognition engine
 */
class L_RECOG extends Pointer {
    PIX pixdb_ave(); // average templates
    PIX pixdb_range(); // template ranges
    PIXA pixa_tr(); // training examples
    PIXAA pixaa_tr(); // organized training data
    PTA pta_tr(); // training centroids
    NUMA nasum_tr(); // training sums
    int threshold(); // classification threshold
    int maxyshift(); // maximum y shift
}

/**
 * Create recognition engine
 * @param scalew - Template width scale
 * @param scaleh - Template height scale
 * @param linew - Line width for template rendering
 * @param threshold - Classification threshold
 * @param maxyshift - Maximum vertical shift allowed
 * @return L_RECOG engine or null on failure
 */
L_RECOG recogCreate(int scalew, int scaleh, int linew, int threshold, int maxyshift);

/**
 * Create from existing recognizer
 * @param recs - Source recognizer
 * @param scalew - New width scale
 * @param scaleh - New height scale  
 * @param linew - Line width
 * @param threshold - Classification threshold
 * @param maxyshift - Maximum y shift
 * @return New L_RECOG or null on failure
 */
L_RECOG recogCreateFromRecog(L_RECOG recs, int scalew, int scaleh, int linew, int threshold, int maxyshift);

/**
 * Train recognizer with labeled example
 * @param recog - Recognition engine
 * @param pixs - Training image
 * @param box - Character bounding box (can be null for full image)
 * @param text - Character label
 * @param debug - Debug level (0 = none)
 * @return 0 on success, 1 on failure
 */
int recogTrainLabeled(L_RECOG recog, PIX pixs, BOX box, String text, int debug);

/**
 * Finalize training (build templates)
 * @param recog - Recognition engine
 * @param debug - Debug level
 * @return 0 on success, 1 on failure
 */
int recogFinishTraining(L_RECOG recog, int debug);

/**
 * Classify character
 * @param recog - Recognition engine
 * @param pixs - Character image
 * @param box - Character bounding box (can be null)
 * @param pcharstr - Returns recognized character
 * @param pscore - Returns confidence score
 * @param debug - Debug level
 * @return 0 on success, 1 on failure
 */
int recogClassifyPixel(L_RECOG recog, PIX pixs, BOX box, BytePointer pcharstr, FloatPointer pscore, int debug);

Usage Examples:

import org.bytedeco.leptonica.*;
import static org.bytedeco.leptonica.global.leptonica.*;

// Create OCR engine for digits
L_RECOG digitRecog = recogCreate(32, 32, 4, 128, 2);

// Train with labeled examples
PIX digit0 = pixRead("digit_0_sample.png");
recogTrainLabeled(digitRecog, digit0, null, "0", 0);

PIX digit1 = pixRead("digit_1_sample.png");
recogTrainLabeled(digitRecog, digit1, null, "1", 0);

// ... train with more examples ...

// Finalize training
recogFinishTraining(digitRecog, 0);

// Classify unknown character
PIX unknown = pixRead("unknown_digit.png");
BytePointer result = new BytePointer(10);
FloatPointer confidence = new FloatPointer(1);

int status = recogClassifyPixel(digitRecog, unknown, null, result, confidence, 0);
if (status == 0) {
    System.out.println("Recognized: " + result.getString() + 
                      " (confidence: " + confidence.get() + ")");
}

Document Dewarping

Correct document distortion and perspective issues for improved OCR accuracy.

/**
 * Single page dewarp correction
 */
class L_DEWARP extends Pointer {
    PIX pixs(); // source image
    PIXA sampv(); // vertical samples
    PIXA samph(); // horizontal samples
    PTA ptav(); // vertical control points
    PTA ptah(); // horizontal control points
    int w(); // image width
    int h(); // image height
    int nx(); // horizontal sampling points
    int ny(); // vertical sampling points
}

/**
 * Multi-page dewarp processing
 */
class L_DEWARPA extends Pointer {
    int nalloc(); // allocated array size
    int maxpage(); // maximum page number
    int sampling(); // sampling factor
    int redfactor(); // reduction factor
    int minlines(); // minimum lines for modeling
    int maxdist(); // maximum distance for interpolation
}

/**
 * Create dewarp structure for single page
 * @param pixs - Source document image
 * @param pageno - Page number identifier
 * @return L_DEWARP structure or null on failure
 */
L_DEWARP dewarpCreate(PIX pixs, int pageno);

/**
 * Create multi-page dewarp structure
 * @param nmax - Maximum number of pages
 * @param sampling - Sampling density
 * @param redfactor - Size reduction factor
 * @param minlines - Minimum text lines required
 * @param maxdist - Maximum interpolation distance
 * @return L_DEWARPA structure or null on failure
 */
L_DEWARPA dewarpaCreate(int nmax, int sampling, int redfactor, int minlines, int maxdist);

/**
 * Build dewarp model for page
 * @param dew - Dewarp structure
 * @param debugfile - Debug output file (can be null)
 * @return 0 on success, 1 on failure
 */
int dewarpBuildModel(L_DEWARP dew, String debugfile);

/**
 * Apply dewarp correction
 * @param dew - Dewarp structure with built model
 * @param pixs - Source image to correct
 * @param debugfile - Debug output file (can be null)
 * @return Corrected PIX or null on failure
 */
PIX dewarpApply(L_DEWARP dew, PIX pixs, String debugfile);

/**
 * Add page to multi-page dewarper
 * @param dewa - Multi-page dewarp structure
 * @param pixs - Page image
 * @param pageno - Page number
 * @param debugfile - Debug output file (can be null)
 * @return 0 on success, 1 on failure
 */
int dewarpaInsertDewarp(L_DEWARPA dewa, L_DEWARP dew);

Usage Examples:

// Single page dewarping
PIX document = pixRead("scanned_page.jpg");
L_DEWARP dewarp = dewarpCreate(document, 1);

// Build correction model
int result = dewarpBuildModel(dewarp, null);
if (result == 0) {
    // Apply correction
    PIX corrected = dewarpApply(dewarp, document, null);
    pixWrite("corrected_page.jpg", corrected, IFF_JPEG);
}

// Multi-page document processing
L_DEWARPA multiPage = dewarpaCreate(100, 7, 1, 6, 30);

// Process each page
for (int i = 1; i <= pageCount; i++) {
    PIX page = pixRead("page_" + i + ".jpg");
    L_DEWARP pageDewarp = dewarpCreate(page, i);
    
    if (dewarpBuildModel(pageDewarp, null) == 0) {
        dewarpaInsertDewarp(multiPage, pageDewarp);
        
        PIX corrected = dewarpApply(pageDewarp, page, null);
        pixWrite("corrected_page_" + i + ".jpg", corrected, IFF_JPEG);
    }
}

JBig2 Classification

Specialized encoding and classification for document compression and analysis.

/**
 * JBig2 symbol classifier
 */
class JBCLASSER extends Pointer {
    SARRAY safiles(); // input file names
    int method(); // classification method
    int components(); // number of components
    int maxwidth(); // maximum symbol width
    int maxheight(); // maximum symbol height
    int npages(); // number of pages processed
    int baseindex(); // base index for symbols
}

/**
 * JBig2 encoding data
 */
class JBDATA extends Pointer {
    PIX pix(); // reconstructed image
    int w(); // image width
    int h(); // image height
    int nclass(); // number of symbol classes
    PIXA pixat(); // template symbols
    PTAA ptaul(); // upper-left coordinates
}

/**
 * Create JBig2 classifier
 * @param method - Classification method
 * @param components - Number of components to use
 * @return JBCLASSER or null on failure
 */
JBCLASSER jbClasserCreate(int method, int components);

/**
 * Add page to classifier
 * @param classer - JBig2 classifier
 * @param pixs - Page image
 * @param filename - Source filename
 * @return 0 on success, 1 on failure
 */
int jbClasserAddPage(JBCLASSER classer, PIX pixs, String filename);

/**
 * Generate JBig2 encoding data
 * @param classer - Trained classifier
 * @param pageno - Page number to encode
 * @return JBDATA encoding or null on failure
 */
JBDATA jbClasserGetJbData(JBCLASSER classer, int pageno);

Usage Examples:

// Create JBig2 classifier for document compression
JBCLASSER classifier = jbClasserCreate(JB_CLASSIFICATION, 8);

// Add document pages
for (int i = 0; i < pageCount; i++) {
    PIX page = pixRead("page_" + i + ".tiff");
    jbClasserAddPage(classifier, page, "page_" + i + ".tiff");
}

// Generate compressed representation
JBDATA compressed = jbClasserGetJbData(classifier, 0);
PIX reconstructed = compressed.pix();

Bitmap Fonts

Bitmap font rendering for text overlay and document generation.

/**
 * Bitmap font structure
 */
class L_BMF extends Pointer {
    PIX pixa(); // character bitmaps
    int size(); // font size
    BytePointer directory(); // font directory
}

/**
 * Create bitmap font
 * @param dir - Font directory path
 * @param fontsize - Font size
 * @return L_BMF font or null on failure
 */
L_BMF bmfCreate(String dir, int fontsize);

/**
 * Render text using bitmap font
 * @param bmf - Bitmap font
 * @param textstr - Text to render
 * @return PIX with rendered text or null on failure
 */
PIX bmfGetPix(L_BMF bmf, String textstr);

/**
 * Get text width in pixels
 * @param bmf - Bitmap font
 * @param textstr - Text string
 * @param pw - Returns width in pixels
 * @return 0 on success, 1 on failure
 */
int bmfGetWidth(L_BMF bmf, String textstr, IntPointer pw);

Usage Examples:

// Create bitmap font
L_BMF font = bmfCreate("/usr/share/fonts/leptonica", 12);

// Render text
PIX textImage = bmfGetPix(font, "Hello, World!");

// Get text dimensions
IntPointer width = new IntPointer(1);
bmfGetWidth(font, "Sample Text", width);
System.out.println("Text width: " + width.get() + " pixels");

// Overlay text on image
PIX overlayed = pixPaintBoxa(baseImage, textImage, 100, 50, 0x000000);

Text Processing Pipeline

Complete OCR Workflow

// 1. Document preprocessing
PIX document = pixRead("document.jpg");
PIX gray = pixConvertRGBToGray(document, 0.299f, 0.587f, 0.114f);
PIX binary = pixOtsuAdaptiveThreshold(gray, 32, 32, 0, 0, 0.1f, null);

// 2. Dewarp correction
L_DEWARP dewarp = dewarpCreate(binary, 1);
if (dewarpBuildModel(dewarp, null) == 0) {
    binary = dewarpApply(dewarp, binary, null);
}

// 3. Character segmentation (hypothetical)
BOXA characters = segmentCharacters(binary);

// 4. Character recognition
L_RECOG ocr = loadTrainedOCR(); // hypothetical
StringBuilder result = new StringBuilder();

int charCount = boxaGetCount(characters);
for (int i = 0; i < charCount; i++) {
    BOX charBox = boxaGetBox(characters, i, L_CLONE);
    PIX charImage = pixClipRectangle(binary, charBox, null);
    
    BytePointer character = new BytePointer(10);
    FloatPointer confidence = new FloatPointer(1);
    
    if (recogClassifyPixel(ocr, charImage, null, character, confidence, 0) == 0) {
        if (confidence.get() > 0.7f) { // confidence threshold
            result.append(character.getString());
        }
    }
}

System.out.println("Recognized text: " + result.toString());

Constants

// JBig2 classification methods
static final int JB_CLASSIFICATION = 0;
static final int JB_CORRELATION = 1;

// Font sizes
static final int L_BM_FONT_4 = 4;
static final int L_BM_FONT_6 = 6;
static final int L_BM_FONT_8 = 8;
static final int L_BM_FONT_10 = 10;
static final int L_BM_FONT_12 = 12;
static final int L_BM_FONT_14 = 14;
static final int L_BM_FONT_16 = 16;
static final int L_BM_FONT_20 = 20;

Install with Tessl CLI

npx tessl i tessl/maven-org-bytedeco--leptonica-platform

docs

collections.md

connected-components.md

tessl/maven-org-bytedeco--leptonica-platform