CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-bytedeco--javacpp-presets-platform

Cross-platform Java bindings for 60+ native C/C++ libraries including OpenCV, FFmpeg, PyTorch, TensorFlow, and scientific computing libraries

Pending
Overview
Eval results
Files

text-processing.mddocs/

Text Processing

Text recognition, natural language processing, and document analysis capabilities through Tesseract OCR, Leptonica image processing, and SentencePiece tokenization.

Capabilities

Optical Character Recognition (OCR)

Tesseract OCR engine for extracting text from images and documents.

/**
 * Tesseract OCR API base class
 */
public class TessBaseAPI extends Pointer {
    /**
     * Create Tesseract API instance
     */
    public TessBaseAPI();
    
    /**
     * Initialize Tesseract with language and data path
     * @param datapath Path to tessdata directory
     * @param language Language code (e.g., "eng", "spa", "fra")
     * @return true if initialization successful
     */
    public native boolean Init(String datapath, String language);
    
    /**
     * Initialize with language, OCR engine mode, and config variables
     * @param datapath Path to tessdata directory
     * @param language Language code
     * @param mode OCR Engine Mode (OEM_TESSERACT_ONLY, OEM_LSTM_ONLY, etc.)
     * @param configs Config files to load
     * @param configs_size Number of config files
     * @param vars_vec Variable names to set
     * @param vars_values Variable values to set
     * @param vars_vec_size Number of variables
     * @param set_only_non_debug_params Only set non-debug parameters
     * @return true if initialization successful
     */
    public native boolean Init(String datapath, String language, int mode,
        PointerPointer configs, int configs_size, StringVector vars_vec,
        StringVector vars_values, long vars_vec_size, boolean set_only_non_debug_params);
    
    /**
     * Set image from memory buffer
     * @param imagedata Image data buffer
     * @param width Image width in pixels
     * @param height Image height in pixels
     * @param bytes_per_pixel Bytes per pixel (1, 3, or 4)
     * @param bytes_per_line Bytes per line (width * bytes_per_pixel if no padding)
     */
    public native void SetImage(BytePointer imagedata, int width, int height,
        int bytes_per_pixel, int bytes_per_line);
    
    /**
     * Set image from PIX (Leptonica image format)
     * @param pix Leptonica PIX image
     */
    public native void SetImage(PIX pix);
    
    /**
     * Get recognized text as UTF-8 string
     * @return Recognized text (caller must free with delete[])
     */
    public native String GetUTF8Text();
    
    /**
     * Get recognition confidence (0-100)
     * @return Mean confidence value
     */
    public native int MeanTextConf();
    
    /**
     * Get word-level recognition results
     * @return Array of word confidence values
     */
    public native IntPointer AllWordConfidences();
    
    /**
     * Set variable value
     * @param name Variable name
     * @param value Variable value
     * @return true if variable was set
     */
    public native boolean SetVariable(String name, String value);
    
    /**
     * Get variable value
     * @param name Variable name
     * @return Variable value or null if not found
     */
    public native String GetStringVariable(String name);
    
    /**
     * Set page segmentation mode
     * @param mode Page segmentation mode
     */
    public native void SetPageSegMode(int mode);
    
    /**
     * Get current page segmentation mode
     * @return Current PSM
     */
    public native int GetPageSegMode();
    
    /**
     * Set rectangle to restrict recognition area
     * @param left Left boundary
     * @param top Top boundary  
     * @param width Rectangle width
     * @param height Rectangle height
     */
    public native void SetRectangle(int left, int top, int width, int height);
    
    /**
     * Clear recognition results and free memory
     */
    public native void Clear();
    
    /**
     * End recognition and free resources
     */
    public native void End();
}

/**
 * Result iterator for detailed OCR results
 */
public class ResultIterator extends Pointer {
    /**
     * Get text at current position
     * @param level Text level (word, line, paragraph, block)
     * @return Text string
     */
    public native String GetUTF8Text(int level);
    
    /**
     * Get confidence at current position
     * @param level Text level
     * @return Confidence value (0-100)
     */
    public native float Confidence(int level);
    
    /**
     * Get bounding box at current position
     * @param level Text level
     * @param left Output left coordinate
     * @param top Output top coordinate
     * @param right Output right coordinate
     * @param bottom Output bottom coordinate
     * @return true if bounding box available
     */
    public native boolean BoundingBox(int level, IntPointer left, IntPointer top,
        IntPointer right, IntPointer bottom);
    
    /**
     * Move to next element at specified level
     * @param level Text level
     * @return true if moved successfully
     */
    public native boolean Next(int level);
    
    /**
     * Check if iterator is at beginning of element
     * @param level Text level
     * @return true if at beginning
     */
    public native boolean IsAtBeginningOf(int level);
    
    /**
     * Check if iterator is at final element
     * @param level Text level
     * @param element Element type
     * @return true if at final element
     */
    public native boolean IsAtFinalElement(int level, int element);
}

/**
 * Page segmentation modes
 */
public static final int PSM_OSD_ONLY = 0;           // Orientation and script detection only
public static final int PSM_AUTO_OSD = 1;          // Automatic page segmentation with OSD
public static final int PSM_AUTO_ONLY = 2;         // Automatic page segmentation without OSD
public static final int PSM_AUTO = 3;              // Fully automatic page segmentation (default)
public static final int PSM_SINGLE_COLUMN = 4;     // Single uniform column of text
public static final int PSM_SINGLE_BLOCK_VERT_TEXT = 5; // Single uniform block of vertically aligned text
public static final int PSM_SINGLE_BLOCK = 6;      // Single uniform block of text
public static final int PSM_SINGLE_LINE = 7;       // Single text line
public static final int PSM_SINGLE_WORD = 8;       // Single word
public static final int PSM_CIRCLE_WORD = 9;       // Single word in a circle
public static final int PSM_SINGLE_CHAR = 10;      // Single character
public static final int PSM_SPARSE_TEXT = 11;      // Sparse text (find as much text as possible)
public static final int PSM_SPARSE_TEXT_OSD = 12;  // Sparse text with orientation and script detection
public static final int PSM_RAW_LINE = 13;         // Raw line (no assumptions about text layout)

/**
 * OCR Engine modes
 */
public static final int OEM_TESSERACT_ONLY = 0;    // Legacy Tesseract engine only
public static final int OEM_LSTM_ONLY = 1;         // Neural nets LSTM engine only
public static final int OEM_TESSERACT_LSTM_COMBINED = 2; // Both engines combined
public static final int OEM_DEFAULT = 3;           // Default (whatever is available)

Image Processing for OCR

Leptonica library providing image processing operations optimized for document analysis and OCR preprocessing.

/**
 * PIX - Leptonica image structure
 */
public class PIX extends Pointer {
    /**
     * Get image width
     * @return Image width in pixels
     */
    public native int getWidth();
    
    /**
     * Get image height
     * @return Image height in pixels
     */
    public native int getHeight();
    
    /**
     * Get image depth (bits per pixel)
     * @return Image depth
     */
    public native int getDepth();
    
    /**
     * Get image data pointer
     * @return Pointer to image data
     */
    public native IntPointer getData();
    
    /**
     * Get words per line
     * @return Words per line
     */
    public native int getWpl();
    
    /**
     * Get input format
     * @return Input file format
     */
    public native int getInputFormat();
    
    /**
     * Get X resolution (DPI)
     * @return X resolution
     */
    public native int getXRes();
    
    /**
     * Get Y resolution (DPI)
     * @return Y resolution
     */
    public native int getYRes();
    
    /**
     * Clone PIX image
     * @return Cloned image
     */
    public native PIX pixClone();
    
    /**
     * Copy PIX image
     * @return Copied image
     */
    public native PIX pixCopy();
}

/**
 * Image I/O operations
 */
public static class LeptonicaIO {
    /**
     * Read image from file
     * @param filename Image file path
     * @return PIX image or null on error
     */
    public static native PIX pixRead(String filename);
    
    /**
     * Write image to file
     * @param filename Output file path
     * @param pix Image to write
     * @param format Output format (IFF_PNG, IFF_JPEG, etc.)
     * @return 0 on success, 1 on error
     */
    public static native int pixWrite(String filename, PIX pix, int format);
    
    /**
     * Read image from memory
     * @param data Image data buffer
     * @param size Buffer size
     * @return PIX image or null on error
     */
    public static native PIX pixReadMem(BytePointer data, long size);
    
    /**
     * Write image to memory
     * @param pdata Output data buffer pointer
     * @param psize Output buffer size
     * @param pix Image to write
     * @param format Output format
     * @return 0 on success, 1 on error
     */
    public static native int pixWriteMem(PointerPointer pdata, SizeTPointer psize,
        PIX pix, int format);
    
    /**
     * Display image (X11 or other display)
     * @param pix Image to display
     * @param x X position
     * @param y Y position
     * @return 0 on success, 1 on error
     */
    public static native int pixDisplay(PIX pix, int x, int y);
}

/**
 * Image enhancement and preprocessing
 */
public static class LeptonicaEnhancement {
    /**
     * Convert to grayscale
     * @param pixs Source image
     * @return Grayscale image
     */
    public static native PIX pixConvertTo8(PIX pixs);
    
    /**
     * Scale image
     * @param pixs Source image
     * @param scalex X scale factor
     * @param scaley Y scale factor
     * @return Scaled image
     */
    public static native PIX pixScale(PIX pixs, float scalex, float scaley);
    
    /**
     * Rotate image
     * @param pixs Source image
     * @param angle Rotation angle in radians
     * @param type Rotation type (L_ROTATE_AREA_MAP, etc.)
     * @param incolor Fill color for background
     * @param width Output width (0 for auto)
     * @param height Output height (0 for auto)
     * @return Rotated image
     */
    public static native PIX pixRotate(PIX pixs, float angle, int type, int incolor,
        int width, int height);
    
    /**
     * Deskew image (correct skew angle)
     * @param pixs Source image
     * @param redsearch Reduction factor for search
     * @return Deskewed image
     */
    public static native PIX pixDeskew(PIX pixs, int redsearch);
    
    /**
     * Unsharp mask filter for sharpening
     * @param pixs Source image
     * @param halfwidth Half-width of convolution kernel
     * @param fract Fraction for mixing
     * @return Sharpened image
     */
    public static native PIX pixUnsharpMasking(PIX pixs, int halfwidth, float fract);
    
    /**
     * Otsu thresholding for binarization
     * @param pixs Source grayscale image
     * @param sx Tile width for adaptive threshold
     * @param sy Tile height for adaptive threshold
     * @param smoothx Smoothing width
     * @param smoothy Smoothing height
     * @param scorefract Fraction of max score
     * @param pthresh Output threshold value
     * @return Binary image
     */
    public static native PIX pixOtsuAdaptiveThreshold(PIX pixs, int sx, int sy,
        int smoothx, int smoothy, float scorefract, IntPointer pthresh);
    
    /**
     * Remove noise using morphological operations
     * @param pixs Source binary image
     * @param removal Type of removal (L_REMOVE_SMALL_CC, etc.)
     * @param minsize Minimum component size to keep
     * @param connectivity Connectivity (4 or 8)
     * @return Denoised image
     */
    public static native PIX pixRemoveNoise(PIX pixs, int removal, int minsize, int connectivity);
}

/**
 * Morphological operations
 */
public static class LeptonicaMorphology {
    /**
     * Morphological erosion
     * @param pixs Source image
     * @param sel Structuring element
     * @return Eroded image
     */
    public static native PIX pixErode(PIX pixs, SEL sel);
    
    /**
     * Morphological dilation
     * @param pixs Source image
     * @param sel Structuring element
     * @return Dilated image
     */
    public static native PIX pixDilate(PIX pixs, SEL sel);
    
    /**
     * Morphological opening (erosion followed by dilation)
     * @param pixs Source image
     * @param sel Structuring element
     * @return Opened image
     */
    public static native PIX pixOpen(PIX pixs, SEL sel);
    
    /**
     * Morphological closing (dilation followed by erosion)
     * @param pixs Source image
     * @param sel Structuring element
     * @return Closed image
     */
    public static native PIX pixClose(PIX pixs, SEL sel);
}

Text Tokenization

SentencePiece library for neural text processing and tokenization.

/**
 * SentencePiece processor for text tokenization
 */
public class SentencePieceProcessor extends Pointer {
    /**
     * Create SentencePiece processor
     */
    public SentencePieceProcessor();
    
    /**
     * Load model from file
     * @param filename Path to SentencePiece model file
     * @return Status object indicating success/failure
     */
    public native Status Load(String filename);
    
    /**
     * Load model from serialized data
     * @param serialized_model_proto Serialized model data
     * @return Status object
     */
    public native Status LoadFromSerializedProto(String serialized_model_proto);
    
    /**
     * Encode text to pieces
     * @param input Input text
     * @param pieces Output token pieces
     * @return Status object
     */
    public native Status Encode(String input, StringVector pieces);
    
    /**
     * Encode text to IDs
     * @param input Input text
     * @param ids Output token IDs
     * @return Status object
     */
    public native Status Encode(String input, IntVector ids);
    
    /**
     * Decode pieces to text
     * @param pieces Input token pieces
     * @param output Output text
     * @return Status object
     */
    public native Status Decode(StringVector pieces, StringPointer output);
    
    /**
     * Decode IDs to text
     * @param ids Input token IDs
     * @param output Output text
     * @return Status object
     */
    public native Status Decode(IntVector ids, StringPointer output);
    
    /**
     * Sample encode with multiple possible segmentations
     * @param input Input text
     * @param nbest_size Number of best segmentations
     * @param alpha Smoothing parameter
     * @param pieces Output token pieces
     * @return Status object
     */
    public native Status SampleEncode(String input, int nbest_size, float alpha,
        StringVector pieces);
    
    /**
     * Get vocabulary size
     * @return Vocabulary size
     */
    public native int GetPieceSize();
    
    /**
     * Get piece from ID
     * @param id Token ID
     * @return Token piece string
     */
    public native String IdToPiece(int id);
    
    /**
     * Get ID from piece
     * @param piece Token piece string
     * @return Token ID
     */
    public native int PieceToId(String piece);
    
    /**
     * Check if token is unknown
     * @param id Token ID
     * @return true if unknown token
     */
    public native boolean IsUnknown(int id);
    
    /**
     * Check if token is control symbol
     * @param id Token ID
     * @return true if control symbol
     */
    public native boolean IsControl(int id);
    
    /**
     * Set encoding extra options
     * @param extra_option Extra options string
     * @return Status object
     */
    public native Status SetEncodeExtraOptions(String extra_option);
    
    /**
     * Set decoding extra options
     * @param extra_option Extra options string
     * @return Status object
     */
    public native Status SetDecodeExtraOptions(String extra_option);
}

/**
 * Status object for operation results
 */
public class Status extends Pointer {
    /**
     * Check if operation was successful
     * @return true if successful
     */
    public native boolean ok();
    
    /**
     * Get error code
     * @return Error code
     */
    public native int code();
    
    /**
     * Get error message
     * @return Error message string
     */
    public native String error_message();
    
    /**
     * Convert to string representation
     * @return Status string
     */
    public native String ToString();
}

/**
 * SentencePiece trainer for creating custom models
 */
public static class SentencePieceTrainer {
    /**
     * Train SentencePiece model
     * @param args Training arguments
     * @return Status object
     */
    public static native Status Train(String args);
    
    /**
     * Train from arguments map
     * @param kwargs Training arguments as key-value pairs
     * @return Status object
     */
    public static native Status Train(StringStringMap kwargs);
}

Usage Examples

Basic OCR with Tesseract

import org.bytedeco.tesseract.*;
import org.bytedeco.leptonica.*;
import static org.bytedeco.tesseract.global.tesseract.*;
import static org.bytedeco.leptonica.global.leptonica.*;

public class TesseractOCR {
    static {
        Loader.load(tesseract.class);
        Loader.load(leptonica.class);
    }
    
    public static void basicOCR(String imagePath) {
        try (PointerScope scope = new PointerScope()) {
            // Initialize Tesseract API
            TessBaseAPI api = new TessBaseAPI();
            
            // Initialize with English language
            // Note: tessdata directory must be available
            if (!api.Init(null, "eng")) {
                System.err.println("Could not initialize Tesseract API");
                return;
            }
            
            // Load image using Leptonica
            PIX image = pixRead(imagePath);
            if (image == null) {
                System.err.println("Could not load image: " + imagePath);
                api.End();
                return;
            }
            
            // Set image for OCR
            api.SetImage(image);
            
            // Get OCR result
            String ocrResult = api.GetUTF8Text();
            int confidence = api.MeanTextConf();
            
            System.out.println("OCR Result:");
            System.out.println(ocrResult);
            System.out.println("Mean confidence: " + confidence + "%");
            
            // Cleanup
            pixDestroy(image);
            api.End();
        }
    }
    
    public static void configuredOCR(String imagePath) {
        try (PointerScope scope = new PointerScope()) {
            TessBaseAPI api = new TessBaseAPI();
            
            // Initialize with specific configurations
            if (!api.Init(null, "eng")) {
                System.err.println("Could not initialize Tesseract");
                return;
            }
            
            // Configure OCR parameters
            api.SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ");
            api.SetPageSegMode(PSM_SINGLE_BLOCK);
            
            PIX image = pixRead(imagePath);
            api.SetImage(image);
            
            // Set recognition area (optional)
            api.SetRectangle(50, 50, 400, 200);
            
            String text = api.GetUTF8Text();
            System.out.println("Configured OCR Result: " + text);
            
            pixDestroy(image);
            api.End();
        }
    }
    
    public static void detailedOCR(String imagePath) {
        try (PointerScope scope = new PointerScope()) {
            TessBaseAPI api = new TessBaseAPI();
            api.Init(null, "eng");
            
            PIX image = pixRead(imagePath);
            api.SetImage(image);
            
            // Get detailed results with iterator
            ResultIterator ri = api.GetIterator();
            if (ri != null) {
                int level = RIL_WORD;  // Word level
                
                do {
                    String word = ri.GetUTF8Text(level);
                    float conf = ri.Confidence(level);
                    
                    // Get bounding box
                    IntPointer left = new IntPointer(1);
                    IntPointer top = new IntPointer(1);
                    IntPointer right = new IntPointer(1);
                    IntPointer bottom = new IntPointer(1);
                    
                    if (ri.BoundingBox(level, left, top, right, bottom)) {
                        System.out.printf("Word: '%s' (conf: %.2f) at (%d,%d)-(%d,%d)\n",
                            word, conf, left.get(), top.get(), right.get(), bottom.get());
                    }
                    
                } while (ri.Next(level));
            }
            
            pixDestroy(image);
            api.End();
        }
    }
}

Image Preprocessing with Leptonica

import org.bytedeco.leptonica.*;
import static org.bytedeco.leptonica.global.leptonica.*;

public class ImagePreprocessing {
    static {
        Loader.load(leptonica.class);
    }
    
    public static void preprocessForOCR(String inputPath, String outputPath) {
        try (PointerScope scope = new PointerScope()) {
            // Load image
            PIX original = pixRead(inputPath);
            if (original == null) {
                System.err.println("Could not load image");
                return;
            }
            
            System.out.printf("Original image: %dx%d, depth: %d\n",
                original.getWidth(), original.getHeight(), original.getDepth());
            
            // Convert to 8-bit grayscale
            PIX gray = pixConvertTo8(original);
            
            // Scale up if image is small (improves OCR accuracy)
            PIX scaled = gray;
            if (gray.getWidth() < 300 || gray.getHeight() < 300) {
                float scale = Math.max(300.0f / gray.getWidth(), 300.0f / gray.getHeight());
                scaled = pixScale(gray, scale, scale);
                pixDestroy(gray);
            }
            
            // Deskew the image
            PIX deskewed = pixDeskew(scaled, 2);
            if (deskewed != null) {
                pixDestroy(scaled);
                scaled = deskewed;
            }
            
            // Unsharp masking for better text definition
            PIX sharpened = pixUnsharpMasking(scaled, 5, 0.3f);
            
            // Adaptive binarization using Otsu
            IntPointer threshold = new IntPointer(1);
            PIX binary = pixOtsuAdaptiveThreshold(sharpened, 32, 32, 0, 0, 0.1f, threshold);
            
            System.out.println("Adaptive threshold: " + threshold.get());
            
            // Remove small noise components
            PIX denoised = pixRemoveNoise(binary, L_REMOVE_SMALL_CC, 3, 8);
            
            // Save preprocessed image
            pixWrite(outputPath, denoised, IFF_PNG);
            
            System.out.printf("Preprocessed image saved: %dx%d\n",
                denoised.getWidth(), denoised.getHeight());
            
            // Cleanup
            pixDestroy(original);
            pixDestroy(sharpened);
            pixDestroy(binary);
            pixDestroy(denoised);
        }
    }
    
    public static void morphologicalOperations(String imagePath) {
        try (PointerScope scope = new PointerScope()) {
            PIX original = pixRead(imagePath);
            PIX binary = pixConvertTo1(original, 128);  // Convert to binary
            
            // Create structuring elements
            SEL sel3x3 = selCreateBrick(3, 3, 1, 1, SEL_HIT);
            SEL sel5x1 = selCreateBrick(5, 1, 2, 0, SEL_HIT);
            
            // Morphological operations
            PIX eroded = pixErode(binary, sel3x3);
            PIX dilated = pixDilate(binary, sel3x3);
            PIX opened = pixOpen(binary, sel3x3);
            PIX closed = pixClose(binary, sel3x3);
            
            // Horizontal line detection
            PIX horizontal = pixOpen(binary, sel5x1);
            
            // Save results
            pixWrite("eroded.png", eroded, IFF_PNG);
            pixWrite("dilated.png", dilated, IFF_PNG);
            pixWrite("opened.png", opened, IFF_PNG);
            pixWrite("closed.png", closed, IFF_PNG);
            pixWrite("horizontal.png", horizontal, IFF_PNG);
            
            // Cleanup
            pixDestroy(original);
            pixDestroy(binary);
            pixDestroy(eroded);
            pixDestroy(dilated);
            pixDestroy(opened);
            pixDestroy(closed);
            pixDestroy(horizontal);
            selDestroy(sel3x3);
            selDestroy(sel5x1);
        }
    }
}

Text Tokenization with SentencePiece

import org.bytedeco.sentencepiece.*;
import static org.bytedeco.sentencepiece.global.sentencepiece.*;

public class TextTokenization {
    static {
        Loader.load(sentencepiece.class);
    }
    
    public static void basicTokenization(String modelPath) {
        try (PointerScope scope = new PointerScope()) {
            // Create processor
            SentencePieceProcessor processor = new SentencePieceProcessor();
            
            // Load pre-trained model
            Status status = processor.Load(modelPath);
            if (!status.ok()) {
                System.err.println("Failed to load model: " + status.error_message());
                return;
            }
            
            String text = "This is a sample text for tokenization.";
            
            // Encode to pieces
            StringVector pieces = new StringVector();
            status = processor.Encode(text, pieces);
            
            if (status.ok()) {
                System.out.println("Input text: " + text);
                System.out.print("Pieces: ");
                for (int i = 0; i < pieces.size(); i++) {
                    System.out.print("'" + pieces.get(i).getString() + "' ");
                }
                System.out.println();
            }
            
            // Encode to IDs
            IntVector ids = new IntVector();
            status = processor.Encode(text, ids);
            
            if (status.ok()) {
                System.out.print("IDs: ");
                for (int i = 0; i < ids.size(); i++) {
                    System.out.print(ids.get(i) + " ");
                }
                System.out.println();
            }
            
            // Decode back to text
            StringPointer decoded = new StringPointer();
            status = processor.Decode(pieces, decoded);
            
            if (status.ok()) {
                System.out.println("Decoded: " + decoded.getString());
            }
            
            // Vocabulary info
            System.out.println("Vocabulary size: " + processor.GetPieceSize());
            System.out.println("First 10 pieces:");
            for (int i = 0; i < Math.min(10, processor.GetPieceSize()); i++) {
                System.out.println("  " + i + ": '" + processor.IdToPiece(i) + "'");
            }
        }
    }
    
    public static void samplingTokenization(String modelPath) {
        try (PointerScope scope = new PointerScope()) {
            SentencePieceProcessor processor = new SentencePieceProcessor();
            processor.Load(modelPath);
            
            String text = "Neural machine translation with attention mechanism.";
            
            // Sample multiple segmentations
            System.out.println("Input: " + text);
            System.out.println("Sample segmentations:");
            
            for (int i = 0; i < 5; i++) {
                StringVector pieces = new StringVector();
                Status status = processor.SampleEncode(text, -1, 0.1f, pieces);
                
                if (status.ok()) {
                    System.out.print("Sample " + (i+1) + ": ");
                    for (int j = 0; j < pieces.size(); j++) {
                        System.out.print("'" + pieces.get(j).getString() + "' ");
                    }
                    System.out.println();
                }
            }
        }
    }
    
    public static void trainCustomModel() {
        try (PointerScope scope = new PointerScope()) {
            // Training arguments
            String args = "--input=training_data.txt " +
                         "--model_prefix=custom_model " +
                         "--vocab_size=8000 " +
                         "--character_coverage=0.9995 " +
                         "--model_type=bpe";
            
            Status status = SentencePieceTrainer.Train(args);
            
            if (status.ok()) {
                System.out.println("Model training completed successfully!");
                System.out.println("Model files: custom_model.model, custom_model.vocab");
            } else {
                System.err.println("Training failed: " + status.error_message());
            }
        }
    }
}

Complete OCR Pipeline

import org.bytedeco.tesseract.*;
import org.bytedeco.leptonica.*;
import org.bytedeco.sentencepiece.*;

public class OCRPipeline {
    public static void processDocument(String imagePath, String modelPath) {
        try (PointerScope scope = new PointerScope()) {
            // Step 1: Preprocess image
            PIX original = pixRead(imagePath);
            PIX gray = pixConvertTo8(original);
            PIX deskewed = pixDeskew(gray, 2);
            PIX sharpened = pixUnsharpMasking(deskewed != null ? deskewed : gray, 5, 0.3f);
            
            IntPointer threshold = new IntPointer(1);
            PIX binary = pixOtsuAdaptiveThreshold(sharpened, 32, 32, 0, 0, 0.1f, threshold);
            PIX denoised = pixRemoveNoise(binary, L_REMOVE_SMALL_CC, 3, 8);
            
            // Step 2: OCR with Tesseract
            TessBaseAPI api = new TessBaseAPI();
            api.Init(null, "eng");
            api.SetImage(denoised);
            
            String rawText = api.GetUTF8Text();
            int confidence = api.MeanTextConf();
            
            System.out.println("OCR Confidence: " + confidence + "%");
            System.out.println("Raw OCR Text:\n" + rawText);
            
            // Step 3: Post-process with SentencePiece (if model available)
            if (modelPath != null) {
                SentencePieceProcessor processor = new SentencePieceProcessor();
                Status status = processor.Load(modelPath);
                
                if (status.ok()) {
                    StringVector pieces = new StringVector();
                    processor.Encode(rawText, pieces);
                    
                    System.out.println("\nTokenized into " + pieces.size() + " pieces:");
                    for (int i = 0; i < Math.min(pieces.size(), 20); i++) {
                        System.out.print("'" + pieces.get(i).getString() + "' ");
                    }
                    System.out.println();
                }
            }
            
            // Cleanup
            pixDestroy(original);
            pixDestroy(gray);
            if (deskewed != null) pixDestroy(deskewed);
            pixDestroy(sharpened);
            pixDestroy(binary);
            pixDestroy(denoised);
            api.End();
        }
    }
}

Install with Tessl CLI

npx tessl i tessl/maven-org-bytedeco--javacpp-presets-platform

docs

computer-vision.md

gpu-computing.md

index.md

machine-learning.md

multimedia.md

scientific-computing.md

text-processing.md

tile.json