JavaCPP platform aggregator for Tesseract OCR native libraries providing cross-platform OCR capabilities in Java applications
—
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Pending
The risk profile of this skill
JavaCPP platform aggregator for Tesseract OCR native libraries, providing comprehensive optical character recognition capabilities in Java applications. This package bundles cross-platform native libraries for Tesseract 5.5.1, enabling text extraction from images across Linux, macOS, Windows, and Android platforms.
org.bytedeco:tesseract-platform:5.5.1-1.5.12import org.bytedeco.javacpp.*;
import org.bytedeco.tesseract.*;
import org.bytedeco.leptonica.*;
import static org.bytedeco.tesseract.global.tesseract.*;
import static org.bytedeco.leptonica.global.leptonica.*;import org.bytedeco.javacpp.*;
import org.bytedeco.leptonica.*;
import org.bytedeco.tesseract.*;
import static org.bytedeco.leptonica.global.leptonica.*;
import static org.bytedeco.tesseract.global.tesseract.*;
public class BasicOCR {
public static void main(String[] args) {
TessBaseAPI api = new TessBaseAPI();
// Initialize tesseract with English language
if (api.Init(null, "eng") != 0) {
System.err.println("Could not initialize tesseract.");
return;
}
// Load image using Leptonica
PIX image = pixRead("image.png");
api.SetImage(image);
// Extract text
BytePointer outText = api.GetUTF8Text();
System.out.println("OCR Result: " + outText.getString());
// Cleanup
api.End();
outText.deallocate();
image.close();
}
}The Tesseract platform provides a comprehensive OCR solution built on the JavaCPP framework:
Primary OCR functionality including initialization, image processing, text recognition, and result extraction. The TessBaseAPI class serves as the main entry point for all OCR operations.
public class TessBaseAPI {
// Initialization
public TessBaseAPI();
public static native @Cast("const char*") BytePointer Version();
public int Init(String datapath, String language, int oem);
public int Init(String datapath, String language);
public void End();
// Image Processing
public void SetImage(PIX pix);
public void SetImage(byte[] imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line);
public void SetRectangle(int left, int top, int width, int height);
public PIX GetThresholdedImage();
// Recognition
public int Recognize(ETEXT_DESC monitor);
public native @Cast("char*") BytePointer TesseractRect(@Cast("const unsigned char*") byte[] imagedata, int bytes_per_pixel, int bytes_per_line,
int left, int top, int width, int height);
// Text Output
public native @Cast("char*") BytePointer GetUTF8Text();
public native @Cast("char*") BytePointer GetHOCRText(int page_number);
public native @Cast("char*") BytePointer GetTSVText(int page_number);
public int MeanTextConf();
public int[] AllWordConfidences();
}Hierarchical iterators for navigating recognition results from page level down to individual characters. Provides access to bounding boxes, confidence scores, text formatting, and layout information.
public class PageIterator {
public void Begin();
public boolean Next(int level);
public boolean BoundingBox(int level, int[] left, int[] top, int[] right, int[] bottom);
public boolean Baseline(int level, int[] x1, int[] y1, int[] x2, int[] y2);
public PIX GetBinaryImage(int level);
public int BlockType();
public void Orientation(int[] orientation, int[] writing_direction,
int[] textline_order, float[] deskew_angle);
}
public class ResultIterator extends LTRResultIterator {
public String GetUTF8Text(int level);
public float Confidence(int level);
public boolean ParagraphIsLtr();
public String WordFontAttributes(boolean[] is_bold, boolean[] is_italic,
boolean[] is_underlined, boolean[] is_monospace,
boolean[] is_serif, boolean[] is_smallcaps,
int[] pointsize, int[] font_id);
}Configurable pipeline for generating output in multiple formats including plain text, structured markup (hOCR, ALTO, PAGE), searchable PDF, and training data formats.
public abstract class TessResultRenderer {
public void insert(TessResultRenderer next);
public boolean BeginDocument(String title);
public boolean AddImage(TessBaseAPI api);
public boolean EndDocument();
public String file_extension();
}
// Concrete renderer classes
public class TessTextRenderer extends TessResultRenderer;
public class TessHOcrRenderer extends TessResultRenderer;
public class TessPDFRenderer extends TessResultRenderer;
public class TessAltoRenderer extends TessResultRenderer;
public class TessTsvRenderer extends TessResultRenderer;Advanced page structure analysis including text block detection, reading order determination, and geometric layout information. Supports complex document layouts with tables, columns, and mixed content.
public class TessBaseAPI {
public PageIterator AnalyseLayout();
public BOXA GetRegions(PIXA[] pixa);
public BOXA GetTextlines(PIXA[] pixa, int[][] blockids);
public BOXA GetWords(PIXA[] pixa);
public BOXA GetComponentImages(int level, boolean text_only, PIXA[] pixa, int[][] blockids);
}
// Layout analysis constants
public static final int PSM_AUTO = 3; // Fully automatic page segmentation
public static final int PSM_SINGLE_COLUMN = 4; // Single column of text
public static final int PSM_SINGLE_BLOCK = 6; // Single uniform block of text
public static final int PSM_SINGLE_LINE = 7; // Single text lineComprehensive configuration system with hundreds of parameters controlling OCR behavior, page segmentation, character recognition, and output formatting.
public class TessBaseAPI {
// Parameter Management
public boolean SetVariable(String name, String value);
public boolean GetIntVariable(String name, int[] value);
public boolean GetBoolVariable(String name, boolean[] value);
public boolean GetDoubleVariable(String name, double[] value);
public String GetStringVariable(String name);
// Page Segmentation
public void SetPageSegMode(int mode);
public int GetPageSegMode();
// OCR Engine Mode
public static final int OEM_TESSERACT_ONLY = 0;
public static final int OEM_LSTM_ONLY = 1;
public static final int OEM_DEFAULT = 3;
}Multi-language OCR with support for 100+ languages, custom language models, and language detection capabilities.
public class TessBaseAPI {
public String GetInitLanguagesAsString();
public void GetLoadedLanguagesAsVector(StringVector langs);
public void GetAvailableLanguagesAsVector(StringVector langs);
}
// Language initialization examples:
// "eng" - English
// "fra" - French
// "deu" - German
// "chi_sim" - Simplified Chinese
// "ara" - Arabic
// "eng+fra+deu" - Multiple languages// Progress monitoring and cancellation
public class ETEXT_DESC {
public short progress(); // Progress percentage (0-100)
public boolean more_to_come(); // More processing pending
public boolean ocr_alive(); // OCR engine active
public byte err_code(); // Error code
public void set_deadline_msecs(int deadline_msecs);
public boolean deadline_exceeded();
}
// Unicode character handling
public class UNICHAR {
public UNICHAR(String utf8_str, int len);
public UNICHAR(int unicode);
public int first_uni(); // Get first character as UCS-4
public int utf8_len(); // Get UTF-8 byte length
public String utf8_str(); // Get UTF-8 string
public static int[] UTF8ToUTF32(String utf8_str);
public static String UTF32ToUTF8(int[] str32);
}
// Collection types
public class StringVector {
public StringVector();
public long size();
public String get(long i);
public StringVector put(long i, String value);
public StringVector push_back(String value);
public void clear();
}// Page hierarchy levels for iteration
public static final int RIL_BLOCK = 0; // Block level
public static final int RIL_PARA = 1; // Paragraph level
public static final int RIL_TEXTLINE = 2; // Text line level
public static final int RIL_WORD = 3; // Word level
public static final int RIL_SYMBOL = 4; // Character/symbol level// Layout block types
public static final int PT_UNKNOWN = 0; // Unknown block type
public static final int PT_FLOWING_TEXT = 1; // Flowing text
public static final int PT_HEADING_TEXT = 2; // Heading text
public static final int PT_PULLOUT_TEXT = 3; // Pull-out text
public static final int PT_EQUATION = 4; // Mathematical equation
public static final int PT_TABLE = 6; // Table
public static final int PT_VERTICAL_TEXT = 7; // Vertical text
public static final int PT_CAPTION_TEXT = 8; // Caption text
public static final int PT_FLOWING_IMAGE = 9; // Flowing image
public static final int PT_NOISE = 14; // Noise/artifacts