or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

basic-ocr.mdconfiguration.mddata-structures.mdindex.mditerators.mdrenderers.md

basic-ocr.mddocs/

0

# Basic OCR Operations

1

2

Core text recognition functionality providing the primary interface for extracting text from images using the Tesseract OCR engine.

3

4

## Capabilities

5

6

### TessBaseAPI Class

7

8

The main entry point for Tesseract OCR operations, providing initialization, configuration, image processing, and text extraction capabilities.

9

10

```java { .api }

11

/**

12

* Main Tesseract OCR API class providing complete OCR functionality

13

*/

14

public class TessBaseAPI extends Pointer {

15

public TessBaseAPI();

16

17

// Initialization and cleanup

18

public int Init(String datapath, String language);

19

public int Init(String datapath, String language, int oem);

20

public void InitForAnalysePage();

21

public void End();

22

23

// Image input

24

public void SetImage(PIX pix);

25

public void SetImage(byte[] imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line);

26

public void SetInputImage(PIX pix);

27

public PIX GetInputImage();

28

public void SetSourceResolution(int ppi);

29

public void SetRectangle(int left, int top, int width, int height);

30

31

// OCR processing

32

public int Recognize(ETEXT_DESC monitor);

33

public BytePointer TesseractRect(byte[] imagedata, int bytes_per_pixel, int bytes_per_line,

34

int left, int top, int width, int height);

35

36

// Text output

37

public BytePointer GetUTF8Text();

38

public BytePointer GetHOCRText(int page_number);

39

public BytePointer GetAltoText(int page_number);

40

public BytePointer GetPAGEText(int page_number);

41

public BytePointer GetTSVText(int page_number);

42

public BytePointer GetBoxText(int page_number);

43

public BytePointer GetLSTMBoxText(int page_number);

44

public BytePointer GetUNLVText();

45

46

// Analysis results

47

public PageIterator AnalyseLayout();

48

public ResultIterator GetIterator();

49

public MutableIterator GetMutableIterator();

50

public int MeanTextConf();

51

public IntPointer AllWordConfidences();

52

53

// Image processing results

54

public PIX GetThresholdedImage();

55

56

// Static utilities

57

public static BytePointer Version();

58

public static void ClearPersistentCache();

59

}

60

```

61

62

**Basic OCR Example:**

63

64

```java

65

import org.bytedeco.javacpp.*;

66

import org.bytedeco.leptonica.*;

67

import org.bytedeco.tesseract.*;

68

import static org.bytedeco.leptonica.global.leptonica.*;

69

import static org.bytedeco.tesseract.global.tesseract.*;

70

71

// Initialize Tesseract

72

TessBaseAPI api = new TessBaseAPI();

73

if (api.Init(null, "eng") != 0) {

74

System.err.println("Could not initialize Tesseract.");

75

System.exit(1);

76

}

77

78

// Load image using Leptonica

79

PIX image = pixRead("document.png");

80

api.SetImage(image);

81

82

// Extract text

83

BytePointer text = api.GetUTF8Text();

84

System.out.println("Extracted text: " + text.getString());

85

86

// Get confidence score

87

int confidence = api.MeanTextConf();

88

System.out.println("Average confidence: " + confidence + "%");

89

90

// Cleanup

91

api.End();

92

text.deallocate();

93

pixDestroy(image);

94

```

95

96

### Initialization Methods

97

98

Initialize the Tesseract engine with language models and configuration.

99

100

```java { .api }

101

/**

102

* Initialize Tesseract with default OCR engine mode

103

* @param datapath Path to tessdata directory (null for system default)

104

* @param language Language code (e.g., "eng", "eng+fra", "chi_sim")

105

* @return 0 on success, -1 on failure

106

*/

107

public int Init(String datapath, String language);

108

109

/**

110

* Initialize Tesseract with specific OCR engine mode

111

* @param datapath Path to tessdata directory (null for system default)

112

* @param language Language code

113

* @param oem OCR Engine Mode (OEM_LSTM_ONLY, OEM_DEFAULT, etc.)

114

* @return 0 on success, -1 on failure

115

*/

116

public int Init(String datapath, String language, int oem);

117

118

/**

119

* Initialize only for layout analysis (faster than full OCR)

120

*/

121

public void InitForAnalysePage();

122

123

/**

124

* Shutdown Tesseract and free resources

125

*/

126

public void End();

127

```

128

129

### Image Input Methods

130

131

Set the input image for OCR processing using various formats.

132

133

```java { .api }

134

/**

135

* Set image from Leptonica PIX structure (recommended)

136

* @param pix Leptonica PIX image structure

137

*/

138

public void SetImage(PIX pix);

139

140

/**

141

* Set image from raw image data

142

* @param imagedata Raw image bytes

143

* @param width Image width in pixels

144

* @param height Image height in pixels

145

* @param bytes_per_pixel Bytes per pixel (1, 3, or 4)

146

* @param bytes_per_line Bytes per line (width * bytes_per_pixel + padding)

147

*/

148

public void SetImage(byte[] imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line);

149

150

/**

151

* Set source image resolution for better accuracy

152

* @param ppi Pixels per inch (typical values: 200-300)

153

*/

154

public void SetSourceResolution(int ppi);

155

156

/**

157

* Set rectangular region of interest for OCR

158

* @param left Left coordinate

159

* @param top Top coordinate

160

* @param width Width of region

161

* @param height Height of region

162

*/

163

public void SetRectangle(int left, int top, int width, int height);

164

```

165

166

### OCR Processing Methods

167

168

Perform the actual OCR recognition with optional progress monitoring.

169

170

```java { .api }

171

/**

172

* Perform OCR recognition with optional progress monitoring

173

* @param monitor Progress monitor (can be null)

174

* @return 0 on success, negative on failure

175

*/

176

public int Recognize(ETEXT_DESC monitor);

177

178

/**

179

* One-shot OCR for rectangular region of raw image data

180

* @param imagedata Raw image bytes

181

* @param bytes_per_pixel Bytes per pixel

182

* @param bytes_per_line Bytes per line

183

* @param left Left coordinate of region

184

* @param top Top coordinate of region

185

* @param width Width of region

186

* @param height Height of region

187

* @return Recognized text as BytePointer (must deallocate)

188

*/

189

public BytePointer TesseractRect(byte[] imagedata, int bytes_per_pixel, int bytes_per_line,

190

int left, int top, int width, int height);

191

```

192

193

### Text Output Methods

194

195

Extract recognized text in various formats.

196

197

```java { .api }

198

/**

199

* Get recognized text as UTF-8 encoded string

200

* @return Text as BytePointer (must call deallocate())

201

*/

202

public BytePointer GetUTF8Text();

203

204

/**

205

* Get text in hOCR HTML format with position information

206

* @param page_number Page number (0-based)

207

* @return hOCR HTML as BytePointer (must deallocate)

208

*/

209

public BytePointer GetHOCRText(int page_number);

210

211

/**

212

* Get text in ALTO XML format

213

* @param page_number Page number (0-based)

214

* @return ALTO XML as BytePointer (must deallocate)

215

*/

216

public BytePointer GetAltoText(int page_number);

217

218

/**

219

* Get text in PAGE XML format

220

* @param page_number Page number (0-based)

221

* @return PAGE XML as BytePointer (must deallocate)

222

*/

223

public BytePointer GetPAGEText(int page_number);

224

225

/**

226

* Get text in Tab Separated Values format

227

* @param page_number Page number (0-based)

228

* @return TSV data as BytePointer (must deallocate)

229

*/

230

public BytePointer GetTSVText(int page_number);

231

232

/**

233

* Get character bounding boxes in training format

234

* @param page_number Page number (0-based)

235

* @return Box coordinates as BytePointer (must deallocate)

236

*/

237

public BytePointer GetBoxText(int page_number);

238

```

239

240

**Multi-format Output Example:**

241

242

```java

243

// Get plain text

244

BytePointer plainText = api.GetUTF8Text();

245

System.out.println("Plain text: " + plainText.getString());

246

247

// Get hOCR with position information

248

BytePointer hocrText = api.GetHOCRText(0);

249

Files.write(Paths.get("output.hocr"), hocrText.getString().getBytes());

250

251

// Get searchable PDF (requires different approach with renderers)

252

TessPDFRenderer pdfRenderer = new TessPDFRenderer("output", "/usr/share/tesseract-ocr/4.00/tessdata");

253

pdfRenderer.BeginDocument("OCR Results");

254

pdfRenderer.AddImage(api);

255

pdfRenderer.EndDocument();

256

257

// Cleanup

258

plainText.deallocate();

259

hocrText.deallocate();

260

```

261

262

### Analysis Result Methods

263

264

Get confidence scores and detailed analysis results.

265

266

```java { .api }

267

/**

268

* Get average confidence score for all recognized text

269

* @return Confidence percentage (0-100)

270

*/

271

public int MeanTextConf();

272

273

/**

274

* Get confidence scores for all individual words

275

* @return Array of confidence scores (must call deallocate())

276

*/

277

public IntPointer AllWordConfidences();

278

279

/**

280

* Get layout analysis iterator (without OCR)

281

* @return PageIterator for layout structure

282

*/

283

public PageIterator AnalyseLayout();

284

285

/**

286

* Get OCR results iterator

287

* @return ResultIterator for detailed OCR results

288

*/

289

public ResultIterator GetIterator();

290

291

/**

292

* Get processed binary image used for OCR

293

* @return PIX structure with thresholded image

294

*/

295

public PIX GetThresholdedImage();

296

```

297

298

### Advanced Layout Analysis Methods

299

300

Extract detailed layout components including regions, textlines, strips, words, and connected components.

301

302

```java { .api }

303

/**

304

* Get page regions as bounding boxes and images

305

* @param pixa Output parameter for region images

306

* @return BOXA with region bounding boxes

307

*/

308

public BOXA GetRegions(PIXA pixa);

309

310

/**

311

* Get textlines with detailed positioning information

312

* @param raw_image If true, extract from original image instead of thresholded

313

* @param raw_padding Padding pixels for raw image extraction

314

* @param pixa Output parameter for textline images

315

* @param blockids Output parameter for block IDs of each line

316

* @param paraids Output parameter for paragraph IDs within blocks

317

* @return BOXA with textline bounding boxes

318

*/

319

public BOXA GetTextlines(boolean raw_image, int raw_padding, PIXA pixa,

320

IntPointer blockids, IntPointer paraids);

321

public BOXA GetTextlines(PIXA pixa, IntPointer blockids);

322

323

/**

324

* Get textlines and strips for non-rectangular regions

325

* @param pixa Output parameter for strip images

326

* @param blockids Output parameter for block IDs

327

* @return BOXA with strip bounding boxes

328

*/

329

public BOXA GetStrips(PIXA pixa, IntPointer blockids);

330

331

/**

332

* Get individual words as bounding boxes and images

333

* @param pixa Output parameter for word images

334

* @return BOXA with word bounding boxes

335

*/

336

public BOXA GetWords(PIXA pixa);

337

338

/**

339

* Get connected components (individual character shapes)

340

* @param pixa Output parameter for component images

341

* @return BOXA with component bounding boxes

342

*/

343

public BOXA GetConnectedComponents(PIXA pixa);

344

345

/**

346

* Get component images after layout analysis

347

* @param level Page iterator level (block, paragraph, textline, word)

348

* @param text_only If true, only return text components

349

* @param raw_image If true, extract from original image

350

* @param raw_padding Padding for raw image extraction

351

* @param pixa Output parameter for component images

352

* @param blockids Output parameter for block IDs

353

* @param paraids Output parameter for paragraph IDs

354

* @return BOXA with component bounding boxes

355

*/

356

public BOXA GetComponentImages(int level, boolean text_only, boolean raw_image,

357

int raw_padding, PIXA pixa, IntPointer blockids,

358

IntPointer paraids);

359

```

360

361

### Orientation and Script Detection

362

363

Detect document orientation and script direction for proper text processing.

364

365

```java { .api }

366

/**

367

* Detect page orientation and script information

368

* @param results Output parameter for orientation results

369

* @return True if orientation was detected successfully

370

*/

371

public boolean DetectOrientationScript(OSResults results);

372

373

/**

374

* Detect orientation and script with LSTM support

375

* @param orient Output parameter for detected orientation (0-3)

376

* @param script_dir Output parameter for script direction

377

* @param out_conf Output parameter for confidence score

378

* @param is_para_ltr Output parameter for paragraph left-to-right flag

379

* @return True if detection was successful

380

*/

381

public boolean DetectOS(IntPointer orient, IntPointer script_dir,

382

FloatPointer out_conf, BoolPointer is_para_ltr);

383

```

384

385

### Adaptive Training Methods

386

387

Advanced functionality for improving recognition accuracy through adaptive training.

388

389

```java { .api }

390

/**

391

* Adapt the classifier to recognize a specific word

392

* Improves accuracy for repeated words in similar contexts

393

* @param mode Training mode (0=simple, 1=detailed)

394

* @param wordstr The word string to adapt to

395

* @return True if adaptation was successful

396

*/

397

public boolean AdaptToWordStr(int mode, String wordstr);

398

399

/**

400

* Check if a word is valid according to the current language model

401

* @param word Word to validate

402

* @return True if word is considered valid

403

*/

404

public boolean IsValidWord(String word);

405

406

/**

407

* Check if a character is valid in the current character set

408

* @param utf8_character UTF-8 encoded character to check

409

* @return True if character is valid

410

*/

411

public boolean IsValidCharacter(String utf8_character);

412

```

413

414

### LSTM Advanced Methods

415

416

Access to LSTM neural network specific features and raw recognition data.

417

418

```java { .api }

419

/**

420

* Get raw LSTM timestep data for detailed analysis

421

* @return Vector of symbol-confidence pairs for each timestep

422

*/

423

public StringFloatPairVectorVector GetRawLSTMTimesteps();

424

425

/**

426

* Get best symbol choices from LSTM at each position

427

* @return Vector of symbol-confidence pairs for best choices

428

*/

429

public StringFloatPairVectorVector GetBestLSTMSymbolChoices();

430

```

431

432

### Static Utility Methods

433

434

Version information and cache management.

435

436

```java { .api }

437

/**

438

* Get Tesseract version string

439

* @return Version string as BytePointer (do not deallocate)

440

*/

441

public static BytePointer Version();

442

443

/**

444

* Clear internal caches to free memory

445

*/

446

public static void ClearPersistentCache();

447

```

448

449

## Memory Management

450

451

**Important**: JavaCPP uses native memory management. Always:

452

- Call `deallocate()` on BytePointer objects returned by text methods

453

- Call `End()` on TessBaseAPI before program exit

454

- Use `pixDestroy()` on PIX images when done

455

- Check for null pointers before accessing results

456

457

## Error Handling

458

459

**Initialization Errors**: `Init()` returns 0 on success, -1 on failure

460

**Recognition Errors**: `Recognize()` returns negative values on failure

461

**Memory Errors**: Check for null results from getter methods

462

**Resource Errors**: Always call cleanup methods to prevent memory leaks