JavaCPP platform aggregator for Tesseract OCR native libraries providing cross-platform OCR capabilities in Java applications
npx @tessl/cli install tessl/maven-org-bytedeco--tesseract-platform@5.5.0JavaCPP platform aggregator for Tesseract OCR native libraries, providing comprehensive optical character recognition capabilities in Java applications. This package bundles cross-platform native libraries for Tesseract 5.5.1, enabling text extraction from images across Linux, macOS, Windows, and Android platforms.
org.bytedeco:tesseract-platform:5.5.1-1.5.12import org.bytedeco.javacpp.*;
import org.bytedeco.tesseract.*;
import org.bytedeco.leptonica.*;
import static org.bytedeco.tesseract.global.tesseract.*;
import static org.bytedeco.leptonica.global.leptonica.*;import org.bytedeco.javacpp.*;
import org.bytedeco.leptonica.*;
import org.bytedeco.tesseract.*;
import static org.bytedeco.leptonica.global.leptonica.*;
import static org.bytedeco.tesseract.global.tesseract.*;
public class BasicOCR {
public static void main(String[] args) {
TessBaseAPI api = new TessBaseAPI();
// Initialize tesseract with English language
if (api.Init(null, "eng") != 0) {
System.err.println("Could not initialize tesseract.");
return;
}
// Load image using Leptonica
PIX image = pixRead("image.png");
api.SetImage(image);
// Extract text
BytePointer outText = api.GetUTF8Text();
System.out.println("OCR Result: " + outText.getString());
// Cleanup
api.End();
outText.deallocate();
image.close();
}
}The Tesseract platform provides a comprehensive OCR solution built on the JavaCPP framework:
Primary OCR functionality including initialization, image processing, text recognition, and result extraction. The TessBaseAPI class serves as the main entry point for all OCR operations.
public class TessBaseAPI {
// Initialization
public TessBaseAPI();
public static native @Cast("const char*") BytePointer Version();
public int Init(String datapath, String language, int oem);
public int Init(String datapath, String language);
public void End();
// Image Processing
public void SetImage(PIX pix);
public void SetImage(byte[] imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line);
public void SetRectangle(int left, int top, int width, int height);
public PIX GetThresholdedImage();
// Recognition
public int Recognize(ETEXT_DESC monitor);
public native @Cast("char*") BytePointer TesseractRect(@Cast("const unsigned char*") byte[] imagedata, int bytes_per_pixel, int bytes_per_line,
int left, int top, int width, int height);
// Text Output
public native @Cast("char*") BytePointer GetUTF8Text();
public native @Cast("char*") BytePointer GetHOCRText(int page_number);
public native @Cast("char*") BytePointer GetTSVText(int page_number);
public int MeanTextConf();
public int[] AllWordConfidences();
}Hierarchical iterators for navigating recognition results from page level down to individual characters. Provides access to bounding boxes, confidence scores, text formatting, and layout information.
public class PageIterator {
public void Begin();
public boolean Next(int level);
public boolean BoundingBox(int level, int[] left, int[] top, int[] right, int[] bottom);
public boolean Baseline(int level, int[] x1, int[] y1, int[] x2, int[] y2);
public PIX GetBinaryImage(int level);
public int BlockType();
public void Orientation(int[] orientation, int[] writing_direction,
int[] textline_order, float[] deskew_angle);
}
public class ResultIterator extends LTRResultIterator {
public String GetUTF8Text(int level);
public float Confidence(int level);
public boolean ParagraphIsLtr();
public String WordFontAttributes(boolean[] is_bold, boolean[] is_italic,
boolean[] is_underlined, boolean[] is_monospace,
boolean[] is_serif, boolean[] is_smallcaps,
int[] pointsize, int[] font_id);
}Configurable pipeline for generating output in multiple formats including plain text, structured markup (hOCR, ALTO, PAGE), searchable PDF, and training data formats.
public abstract class TessResultRenderer {
public void insert(TessResultRenderer next);
public boolean BeginDocument(String title);
public boolean AddImage(TessBaseAPI api);
public boolean EndDocument();
public String file_extension();
}
// Concrete renderer classes
public class TessTextRenderer extends TessResultRenderer;
public class TessHOcrRenderer extends TessResultRenderer;
public class TessPDFRenderer extends TessResultRenderer;
public class TessAltoRenderer extends TessResultRenderer;
public class TessTsvRenderer extends TessResultRenderer;Advanced page structure analysis including text block detection, reading order determination, and geometric layout information. Supports complex document layouts with tables, columns, and mixed content.
public class TessBaseAPI {
public PageIterator AnalyseLayout();
public BOXA GetRegions(PIXA[] pixa);
public BOXA GetTextlines(PIXA[] pixa, int[][] blockids);
public BOXA GetWords(PIXA[] pixa);
public BOXA GetComponentImages(int level, boolean text_only, PIXA[] pixa, int[][] blockids);
}
// Layout analysis constants
public static final int PSM_AUTO = 3; // Fully automatic page segmentation
public static final int PSM_SINGLE_COLUMN = 4; // Single column of text
public static final int PSM_SINGLE_BLOCK = 6; // Single uniform block of text
public static final int PSM_SINGLE_LINE = 7; // Single text lineComprehensive configuration system with hundreds of parameters controlling OCR behavior, page segmentation, character recognition, and output formatting.
public class TessBaseAPI {
// Parameter Management
public boolean SetVariable(String name, String value);
public boolean GetIntVariable(String name, int[] value);
public boolean GetBoolVariable(String name, boolean[] value);
public boolean GetDoubleVariable(String name, double[] value);
public String GetStringVariable(String name);
// Page Segmentation
public void SetPageSegMode(int mode);
public int GetPageSegMode();
// OCR Engine Mode
public static final int OEM_TESSERACT_ONLY = 0;
public static final int OEM_LSTM_ONLY = 1;
public static final int OEM_DEFAULT = 3;
}Multi-language OCR with support for 100+ languages, custom language models, and language detection capabilities.
public class TessBaseAPI {
public String GetInitLanguagesAsString();
public void GetLoadedLanguagesAsVector(StringVector langs);
public void GetAvailableLanguagesAsVector(StringVector langs);
}
// Language initialization examples:
// "eng" - English
// "fra" - French
// "deu" - German
// "chi_sim" - Simplified Chinese
// "ara" - Arabic
// "eng+fra+deu" - Multiple languages// Progress monitoring and cancellation
public class ETEXT_DESC {
public short progress(); // Progress percentage (0-100)
public boolean more_to_come(); // More processing pending
public boolean ocr_alive(); // OCR engine active
public byte err_code(); // Error code
public void set_deadline_msecs(int deadline_msecs);
public boolean deadline_exceeded();
}
// Unicode character handling
public class UNICHAR {
public UNICHAR(String utf8_str, int len);
public UNICHAR(int unicode);
public int first_uni(); // Get first character as UCS-4
public int utf8_len(); // Get UTF-8 byte length
public String utf8_str(); // Get UTF-8 string
public static int[] UTF8ToUTF32(String utf8_str);
public static String UTF32ToUTF8(int[] str32);
}
// Collection types
public class StringVector {
public StringVector();
public long size();
public String get(long i);
public StringVector put(long i, String value);
public StringVector push_back(String value);
public void clear();
}// Page hierarchy levels for iteration
public static final int RIL_BLOCK = 0; // Block level
public static final int RIL_PARA = 1; // Paragraph level
public static final int RIL_TEXTLINE = 2; // Text line level
public static final int RIL_WORD = 3; // Word level
public static final int RIL_SYMBOL = 4; // Character/symbol level// Layout block types
public static final int PT_UNKNOWN = 0; // Unknown block type
public static final int PT_FLOWING_TEXT = 1; // Flowing text
public static final int PT_HEADING_TEXT = 2; // Heading text
public static final int PT_PULLOUT_TEXT = 3; // Pull-out text
public static final int PT_EQUATION = 4; // Mathematical equation
public static final int PT_TABLE = 6; // Table
public static final int PT_VERTICAL_TEXT = 7; // Vertical text
public static final int PT_CAPTION_TEXT = 8; // Caption text
public static final int PT_FLOWING_IMAGE = 9; // Flowing image
public static final int PT_NOISE = 14; // Noise/artifacts