or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

collections.mdconnected-components.mdcore-images.mdgeometry.mdimage-io.mdimage-processing.mdindex.mdmorphology.mdtext-recognition.mdutilities.md

text-recognition.mddocs/

0

# Text Recognition

1

2

OCR capabilities, document analysis, and text extraction with specialized structures for character recognition and document processing.

3

4

## Capabilities

5

6

### Recognition Engine

7

8

Core text recognition functionality with training, classification, and confidence scoring.

9

10

```java { .api }

11

/**

12

* Character recognition engine

13

*/

14

class L_RECOG extends Pointer {

15

PIX pixdb_ave(); // average templates

16

PIX pixdb_range(); // template ranges

17

PIXA pixa_tr(); // training examples

18

PIXAA pixaa_tr(); // organized training data

19

PTA pta_tr(); // training centroids

20

NUMA nasum_tr(); // training sums

21

int threshold(); // classification threshold

22

int maxyshift(); // maximum y shift

23

}

24

25

/**

26

* Create recognition engine

27

* @param scalew - Template width scale

28

* @param scaleh - Template height scale

29

* @param linew - Line width for template rendering

30

* @param threshold - Classification threshold

31

* @param maxyshift - Maximum vertical shift allowed

32

* @return L_RECOG engine or null on failure

33

*/

34

L_RECOG recogCreate(int scalew, int scaleh, int linew, int threshold, int maxyshift);

35

36

/**

37

* Create from existing recognizer

38

* @param recs - Source recognizer

39

* @param scalew - New width scale

40

* @param scaleh - New height scale

41

* @param linew - Line width

42

* @param threshold - Classification threshold

43

* @param maxyshift - Maximum y shift

44

* @return New L_RECOG or null on failure

45

*/

46

L_RECOG recogCreateFromRecog(L_RECOG recs, int scalew, int scaleh, int linew, int threshold, int maxyshift);

47

48

/**

49

* Train recognizer with labeled example

50

* @param recog - Recognition engine

51

* @param pixs - Training image

52

* @param box - Character bounding box (can be null for full image)

53

* @param text - Character label

54

* @param debug - Debug level (0 = none)

55

* @return 0 on success, 1 on failure

56

*/

57

int recogTrainLabeled(L_RECOG recog, PIX pixs, BOX box, String text, int debug);

58

59

/**

60

* Finalize training (build templates)

61

* @param recog - Recognition engine

62

* @param debug - Debug level

63

* @return 0 on success, 1 on failure

64

*/

65

int recogFinishTraining(L_RECOG recog, int debug);

66

67

/**

68

* Classify character

69

* @param recog - Recognition engine

70

* @param pixs - Character image

71

* @param box - Character bounding box (can be null)

72

* @param pcharstr - Returns recognized character

73

* @param pscore - Returns confidence score

74

* @param debug - Debug level

75

* @return 0 on success, 1 on failure

76

*/

77

int recogClassifyPixel(L_RECOG recog, PIX pixs, BOX box, BytePointer pcharstr, FloatPointer pscore, int debug);

78

```

79

80

**Usage Examples:**

81

82

```java

83

import org.bytedeco.leptonica.*;

84

import static org.bytedeco.leptonica.global.leptonica.*;

85

86

// Create OCR engine for digits

87

L_RECOG digitRecog = recogCreate(32, 32, 4, 128, 2);

88

89

// Train with labeled examples

90

PIX digit0 = pixRead("digit_0_sample.png");

91

recogTrainLabeled(digitRecog, digit0, null, "0", 0);

92

93

PIX digit1 = pixRead("digit_1_sample.png");

94

recogTrainLabeled(digitRecog, digit1, null, "1", 0);

95

96

// ... train with more examples ...

97

98

// Finalize training

99

recogFinishTraining(digitRecog, 0);

100

101

// Classify unknown character

102

PIX unknown = pixRead("unknown_digit.png");

103

BytePointer result = new BytePointer(10);

104

FloatPointer confidence = new FloatPointer(1);

105

106

int status = recogClassifyPixel(digitRecog, unknown, null, result, confidence, 0);

107

if (status == 0) {

108

System.out.println("Recognized: " + result.getString() +

109

" (confidence: " + confidence.get() + ")");

110

}

111

```

112

113

### Document Dewarping

114

115

Correct document distortion and perspective issues for improved OCR accuracy.

116

117

```java { .api }

118

/**

119

* Single page dewarp correction

120

*/

121

class L_DEWARP extends Pointer {

122

PIX pixs(); // source image

123

PIXA sampv(); // vertical samples

124

PIXA samph(); // horizontal samples

125

PTA ptav(); // vertical control points

126

PTA ptah(); // horizontal control points

127

int w(); // image width

128

int h(); // image height

129

int nx(); // horizontal sampling points

130

int ny(); // vertical sampling points

131

}

132

133

/**

134

* Multi-page dewarp processing

135

*/

136

class L_DEWARPA extends Pointer {

137

int nalloc(); // allocated array size

138

int maxpage(); // maximum page number

139

int sampling(); // sampling factor

140

int redfactor(); // reduction factor

141

int minlines(); // minimum lines for modeling

142

int maxdist(); // maximum distance for interpolation

143

}

144

145

/**

146

* Create dewarp structure for single page

147

* @param pixs - Source document image

148

* @param pageno - Page number identifier

149

* @return L_DEWARP structure or null on failure

150

*/

151

L_DEWARP dewarpCreate(PIX pixs, int pageno);

152

153

/**

154

* Create multi-page dewarp structure

155

* @param nmax - Maximum number of pages

156

* @param sampling - Sampling density

157

* @param redfactor - Size reduction factor

158

* @param minlines - Minimum text lines required

159

* @param maxdist - Maximum interpolation distance

160

* @return L_DEWARPA structure or null on failure

161

*/

162

L_DEWARPA dewarpaCreate(int nmax, int sampling, int redfactor, int minlines, int maxdist);

163

164

/**

165

* Build dewarp model for page

166

* @param dew - Dewarp structure

167

* @param debugfile - Debug output file (can be null)

168

* @return 0 on success, 1 on failure

169

*/

170

int dewarpBuildModel(L_DEWARP dew, String debugfile);

171

172

/**

173

* Apply dewarp correction

174

* @param dew - Dewarp structure with built model

175

* @param pixs - Source image to correct

176

* @param debugfile - Debug output file (can be null)

177

* @return Corrected PIX or null on failure

178

*/

179

PIX dewarpApply(L_DEWARP dew, PIX pixs, String debugfile);

180

181

/**

182

* Add page to multi-page dewarper

183

* @param dewa - Multi-page dewarp structure

184

* @param pixs - Page image

185

* @param pageno - Page number

186

* @param debugfile - Debug output file (can be null)

187

* @return 0 on success, 1 on failure

188

*/

189

int dewarpaInsertDewarp(L_DEWARPA dewa, L_DEWARP dew);

190

```

191

192

**Usage Examples:**

193

194

```java

195

// Single page dewarping

196

PIX document = pixRead("scanned_page.jpg");

197

L_DEWARP dewarp = dewarpCreate(document, 1);

198

199

// Build correction model

200

int result = dewarpBuildModel(dewarp, null);

201

if (result == 0) {

202

// Apply correction

203

PIX corrected = dewarpApply(dewarp, document, null);

204

pixWrite("corrected_page.jpg", corrected, IFF_JPEG);

205

}

206

207

// Multi-page document processing

208

L_DEWARPA multiPage = dewarpaCreate(100, 7, 1, 6, 30);

209

210

// Process each page

211

for (int i = 1; i <= pageCount; i++) {

212

PIX page = pixRead("page_" + i + ".jpg");

213

L_DEWARP pageDewarp = dewarpCreate(page, i);

214

215

if (dewarpBuildModel(pageDewarp, null) == 0) {

216

dewarpaInsertDewarp(multiPage, pageDewarp);

217

218

PIX corrected = dewarpApply(pageDewarp, page, null);

219

pixWrite("corrected_page_" + i + ".jpg", corrected, IFF_JPEG);

220

}

221

}

222

```

223

224

### JBig2 Classification

225

226

Specialized encoding and classification for document compression and analysis.

227

228

```java { .api }

229

/**

230

* JBig2 symbol classifier

231

*/

232

class JBCLASSER extends Pointer {

233

SARRAY safiles(); // input file names

234

int method(); // classification method

235

int components(); // number of components

236

int maxwidth(); // maximum symbol width

237

int maxheight(); // maximum symbol height

238

int npages(); // number of pages processed

239

int baseindex(); // base index for symbols

240

}

241

242

/**

243

* JBig2 encoding data

244

*/

245

class JBDATA extends Pointer {

246

PIX pix(); // reconstructed image

247

int w(); // image width

248

int h(); // image height

249

int nclass(); // number of symbol classes

250

PIXA pixat(); // template symbols

251

PTAA ptaul(); // upper-left coordinates

252

}

253

254

/**

255

* Create JBig2 classifier

256

* @param method - Classification method

257

* @param components - Number of components to use

258

* @return JBCLASSER or null on failure

259

*/

260

JBCLASSER jbClasserCreate(int method, int components);

261

262

/**

263

* Add page to classifier

264

* @param classer - JBig2 classifier

265

* @param pixs - Page image

266

* @param filename - Source filename

267

* @return 0 on success, 1 on failure

268

*/

269

int jbClasserAddPage(JBCLASSER classer, PIX pixs, String filename);

270

271

/**

272

* Generate JBig2 encoding data

273

* @param classer - Trained classifier

274

* @param pageno - Page number to encode

275

* @return JBDATA encoding or null on failure

276

*/

277

JBDATA jbClasserGetJbData(JBCLASSER classer, int pageno);

278

```

279

280

**Usage Examples:**

281

282

```java

283

// Create JBig2 classifier for document compression

284

JBCLASSER classifier = jbClasserCreate(JB_CLASSIFICATION, 8);

285

286

// Add document pages

287

for (int i = 0; i < pageCount; i++) {

288

PIX page = pixRead("page_" + i + ".tiff");

289

jbClasserAddPage(classifier, page, "page_" + i + ".tiff");

290

}

291

292

// Generate compressed representation

293

JBDATA compressed = jbClasserGetJbData(classifier, 0);

294

PIX reconstructed = compressed.pix();

295

```

296

297

### Bitmap Fonts

298

299

Bitmap font rendering for text overlay and document generation.

300

301

```java { .api }

302

/**

303

* Bitmap font structure

304

*/

305

class L_BMF extends Pointer {

306

PIX pixa(); // character bitmaps

307

int size(); // font size

308

BytePointer directory(); // font directory

309

}

310

311

/**

312

* Create bitmap font

313

* @param dir - Font directory path

314

* @param fontsize - Font size

315

* @return L_BMF font or null on failure

316

*/

317

L_BMF bmfCreate(String dir, int fontsize);

318

319

/**

320

* Render text using bitmap font

321

* @param bmf - Bitmap font

322

* @param textstr - Text to render

323

* @return PIX with rendered text or null on failure

324

*/

325

PIX bmfGetPix(L_BMF bmf, String textstr);

326

327

/**

328

* Get text width in pixels

329

* @param bmf - Bitmap font

330

* @param textstr - Text string

331

* @param pw - Returns width in pixels

332

* @return 0 on success, 1 on failure

333

*/

334

int bmfGetWidth(L_BMF bmf, String textstr, IntPointer pw);

335

```

336

337

**Usage Examples:**

338

339

```java

340

// Create bitmap font

341

L_BMF font = bmfCreate("/usr/share/fonts/leptonica", 12);

342

343

// Render text

344

PIX textImage = bmfGetPix(font, "Hello, World!");

345

346

// Get text dimensions

347

IntPointer width = new IntPointer(1);

348

bmfGetWidth(font, "Sample Text", width);

349

System.out.println("Text width: " + width.get() + " pixels");

350

351

// Overlay text on image

352

PIX overlayed = pixPaintBoxa(baseImage, textImage, 100, 50, 0x000000);

353

```

354

355

## Text Processing Pipeline

356

357

### Complete OCR Workflow

358

359

```java

360

// 1. Document preprocessing

361

PIX document = pixRead("document.jpg");

362

PIX gray = pixConvertRGBToGray(document, 0.299f, 0.587f, 0.114f);

363

PIX binary = pixOtsuAdaptiveThreshold(gray, 32, 32, 0, 0, 0.1f, null);

364

365

// 2. Dewarp correction

366

L_DEWARP dewarp = dewarpCreate(binary, 1);

367

if (dewarpBuildModel(dewarp, null) == 0) {

368

binary = dewarpApply(dewarp, binary, null);

369

}

370

371

// 3. Character segmentation (hypothetical)

372

BOXA characters = segmentCharacters(binary);

373

374

// 4. Character recognition

375

L_RECOG ocr = loadTrainedOCR(); // hypothetical

376

StringBuilder result = new StringBuilder();

377

378

int charCount = boxaGetCount(characters);

379

for (int i = 0; i < charCount; i++) {

380

BOX charBox = boxaGetBox(characters, i, L_CLONE);

381

PIX charImage = pixClipRectangle(binary, charBox, null);

382

383

BytePointer character = new BytePointer(10);

384

FloatPointer confidence = new FloatPointer(1);

385

386

if (recogClassifyPixel(ocr, charImage, null, character, confidence, 0) == 0) {

387

if (confidence.get() > 0.7f) { // confidence threshold

388

result.append(character.getString());

389

}

390

}

391

}

392

393

System.out.println("Recognized text: " + result.toString());

394

```

395

396

## Constants

397

398

```java { .api }

399

// JBig2 classification methods

400

static final int JB_CLASSIFICATION = 0;

401

static final int JB_CORRELATION = 1;

402

403

// Font sizes

404

static final int L_BM_FONT_4 = 4;

405

static final int L_BM_FONT_6 = 6;

406

static final int L_BM_FONT_8 = 8;

407

static final int L_BM_FONT_10 = 10;

408

static final int L_BM_FONT_12 = 12;

409

static final int L_BM_FONT_14 = 14;

410

static final int L_BM_FONT_16 = 16;

411

static final int L_BM_FONT_20 = 20;

412

```