Tessl Tile for maven/org.bytedeco/javacpp-presets-platform@1.5.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

computer-vision.md gpu-computing.md index.md machine-learning.md multimedia.md scientific-computing.md text-processing.md

text-processing.mddocs/

0
# Text Processing
1

2
Text recognition, natural language processing, and document analysis capabilities through Tesseract OCR, Leptonica image processing, and SentencePiece tokenization.
3

4
## Capabilities
5

6
### Optical Character Recognition (OCR)
7

8
Tesseract OCR engine for extracting text from images and documents.
9

10
```java { .api }
11
/**
12
 * Tesseract OCR API base class
13
 */
14
public class TessBaseAPI extends Pointer {
15
    /**
16
     * Create Tesseract API instance
17
     */
18
    public TessBaseAPI();
19
    
20
    /**
21
     * Initialize Tesseract with language and data path
22
     * @param datapath Path to tessdata directory
23
     * @param language Language code (e.g., "eng", "spa", "fra")
24
     * @return true if initialization successful
25
     */
26
    public native boolean Init(String datapath, String language);
27
    
28
    /**
29
     * Initialize with language, OCR engine mode, and config variables
30
     * @param datapath Path to tessdata directory
31
     * @param language Language code
32
     * @param mode OCR Engine Mode (OEM_TESSERACT_ONLY, OEM_LSTM_ONLY, etc.)
33
     * @param configs Config files to load
34
     * @param configs_size Number of config files
35
     * @param vars_vec Variable names to set
36
     * @param vars_values Variable values to set
37
     * @param vars_vec_size Number of variables
38
     * @param set_only_non_debug_params Only set non-debug parameters
39
     * @return true if initialization successful
40
     */
41
    public native boolean Init(String datapath, String language, int mode,
42
        PointerPointer configs, int configs_size, StringVector vars_vec,
43
        StringVector vars_values, long vars_vec_size, boolean set_only_non_debug_params);
44
    
45
    /**
46
     * Set image from memory buffer
47
     * @param imagedata Image data buffer
48
     * @param width Image width in pixels
49
     * @param height Image height in pixels
50
     * @param bytes_per_pixel Bytes per pixel (1, 3, or 4)
51
     * @param bytes_per_line Bytes per line (width * bytes_per_pixel if no padding)
52
     */
53
    public native void SetImage(BytePointer imagedata, int width, int height,
54
        int bytes_per_pixel, int bytes_per_line);
55
    
56
    /**
57
     * Set image from PIX (Leptonica image format)
58
     * @param pix Leptonica PIX image
59
     */
60
    public native void SetImage(PIX pix);
61
    
62
    /**
63
     * Get recognized text as UTF-8 string
64
     * @return Recognized text (caller must free with delete[])
65
     */
66
    public native String GetUTF8Text();
67
    
68
    /**
69
     * Get recognition confidence (0-100)
70
     * @return Mean confidence value
71
     */
72
    public native int MeanTextConf();
73
    
74
    /**
75
     * Get word-level recognition results
76
     * @return Array of word confidence values
77
     */
78
    public native IntPointer AllWordConfidences();
79
    
80
    /**
81
     * Set variable value
82
     * @param name Variable name
83
     * @param value Variable value
84
     * @return true if variable was set
85
     */
86
    public native boolean SetVariable(String name, String value);
87
    
88
    /**
89
     * Get variable value
90
     * @param name Variable name
91
     * @return Variable value or null if not found
92
     */
93
    public native String GetStringVariable(String name);
94
    
95
    /**
96
     * Set page segmentation mode
97
     * @param mode Page segmentation mode
98
     */
99
    public native void SetPageSegMode(int mode);
100
    
101
    /**
102
     * Get current page segmentation mode
103
     * @return Current PSM
104
     */
105
    public native int GetPageSegMode();
106
    
107
    /**
108
     * Set rectangle to restrict recognition area
109
     * @param left Left boundary
110
     * @param top Top boundary  
111
     * @param width Rectangle width
112
     * @param height Rectangle height
113
     */
114
    public native void SetRectangle(int left, int top, int width, int height);
115
    
116
    /**
117
     * Clear recognition results and free memory
118
     */
119
    public native void Clear();
120
    
121
    /**
122
     * End recognition and free resources
123
     */
124
    public native void End();
125
}
126

127
/**
128
 * Result iterator for detailed OCR results
129
 */
130
public class ResultIterator extends Pointer {
131
    /**
132
     * Get text at current position
133
     * @param level Text level (word, line, paragraph, block)
134
     * @return Text string
135
     */
136
    public native String GetUTF8Text(int level);
137
    
138
    /**
139
     * Get confidence at current position
140
     * @param level Text level
141
     * @return Confidence value (0-100)
142
     */
143
    public native float Confidence(int level);
144
    
145
    /**
146
     * Get bounding box at current position
147
     * @param level Text level
148
     * @param left Output left coordinate
149
     * @param top Output top coordinate
150
     * @param right Output right coordinate
151
     * @param bottom Output bottom coordinate
152
     * @return true if bounding box available
153
     */
154
    public native boolean BoundingBox(int level, IntPointer left, IntPointer top,
155
        IntPointer right, IntPointer bottom);
156
    
157
    /**
158
     * Move to next element at specified level
159
     * @param level Text level
160
     * @return true if moved successfully
161
     */
162
    public native boolean Next(int level);
163
    
164
    /**
165
     * Check if iterator is at beginning of element
166
     * @param level Text level
167
     * @return true if at beginning
168
     */
169
    public native boolean IsAtBeginningOf(int level);
170
    
171
    /**
172
     * Check if iterator is at final element
173
     * @param level Text level
174
     * @param element Element type
175
     * @return true if at final element
176
     */
177
    public native boolean IsAtFinalElement(int level, int element);
178
}
179

180
/**
181
 * Page segmentation modes
182
 */
183
public static final int PSM_OSD_ONLY = 0;           // Orientation and script detection only
184
public static final int PSM_AUTO_OSD = 1;          // Automatic page segmentation with OSD
185
public static final int PSM_AUTO_ONLY = 2;         // Automatic page segmentation without OSD
186
public static final int PSM_AUTO = 3;              // Fully automatic page segmentation (default)
187
public static final int PSM_SINGLE_COLUMN = 4;     // Single uniform column of text
188
public static final int PSM_SINGLE_BLOCK_VERT_TEXT = 5; // Single uniform block of vertically aligned text
189
public static final int PSM_SINGLE_BLOCK = 6;      // Single uniform block of text
190
public static final int PSM_SINGLE_LINE = 7;       // Single text line
191
public static final int PSM_SINGLE_WORD = 8;       // Single word
192
public static final int PSM_CIRCLE_WORD = 9;       // Single word in a circle
193
public static final int PSM_SINGLE_CHAR = 10;      // Single character
194
public static final int PSM_SPARSE_TEXT = 11;      // Sparse text (find as much text as possible)
195
public static final int PSM_SPARSE_TEXT_OSD = 12;  // Sparse text with orientation and script detection
196
public static final int PSM_RAW_LINE = 13;         // Raw line (no assumptions about text layout)
197

198
/**
199
 * OCR Engine modes
200
 */
201
public static final int OEM_TESSERACT_ONLY = 0;    // Legacy Tesseract engine only
202
public static final int OEM_LSTM_ONLY = 1;         // Neural nets LSTM engine only
203
public static final int OEM_TESSERACT_LSTM_COMBINED = 2; // Both engines combined
204
public static final int OEM_DEFAULT = 3;           // Default (whatever is available)
205
```
206

207
### Image Processing for OCR
208

209
Leptonica library providing image processing operations optimized for document analysis and OCR preprocessing.
210

211
```java { .api }
212
/**
213
 * PIX - Leptonica image structure
214
 */
215
public class PIX extends Pointer {
216
    /**
217
     * Get image width
218
     * @return Image width in pixels
219
     */
220
    public native int getWidth();
221
    
222
    /**
223
     * Get image height
224
     * @return Image height in pixels
225
     */
226
    public native int getHeight();
227
    
228
    /**
229
     * Get image depth (bits per pixel)
230
     * @return Image depth
231
     */
232
    public native int getDepth();
233
    
234
    /**
235
     * Get image data pointer
236
     * @return Pointer to image data
237
     */
238
    public native IntPointer getData();
239
    
240
    /**
241
     * Get words per line
242
     * @return Words per line
243
     */
244
    public native int getWpl();
245
    
246
    /**
247
     * Get input format
248
     * @return Input file format
249
     */
250
    public native int getInputFormat();
251
    
252
    /**
253
     * Get X resolution (DPI)
254
     * @return X resolution
255
     */
256
    public native int getXRes();
257
    
258
    /**
259
     * Get Y resolution (DPI)
260
     * @return Y resolution
261
     */
262
    public native int getYRes();
263
    
264
    /**
265
     * Clone PIX image
266
     * @return Cloned image
267
     */
268
    public native PIX pixClone();
269
    
270
    /**
271
     * Copy PIX image
272
     * @return Copied image
273
     */
274
    public native PIX pixCopy();
275
}
276

277
/**
278
 * Image I/O operations
279
 */
280
public static class LeptonicaIO {
281
    /**
282
     * Read image from file
283
     * @param filename Image file path
284
     * @return PIX image or null on error
285
     */
286
    public static native PIX pixRead(String filename);
287
    
288
    /**
289
     * Write image to file
290
     * @param filename Output file path
291
     * @param pix Image to write
292
     * @param format Output format (IFF_PNG, IFF_JPEG, etc.)
293
     * @return 0 on success, 1 on error
294
     */
295
    public static native int pixWrite(String filename, PIX pix, int format);
296
    
297
    /**
298
     * Read image from memory
299
     * @param data Image data buffer
300
     * @param size Buffer size
301
     * @return PIX image or null on error
302
     */
303
    public static native PIX pixReadMem(BytePointer data, long size);
304
    
305
    /**
306
     * Write image to memory
307
     * @param pdata Output data buffer pointer
308
     * @param psize Output buffer size
309
     * @param pix Image to write
310
     * @param format Output format
311
     * @return 0 on success, 1 on error
312
     */
313
    public static native int pixWriteMem(PointerPointer pdata, SizeTPointer psize,
314
        PIX pix, int format);
315
    
316
    /**
317
     * Display image (X11 or other display)
318
     * @param pix Image to display
319
     * @param x X position
320
     * @param y Y position
321
     * @return 0 on success, 1 on error
322
     */
323
    public static native int pixDisplay(PIX pix, int x, int y);
324
}
325

326
/**
327
 * Image enhancement and preprocessing
328
 */
329
public static class LeptonicaEnhancement {
330
    /**
331
     * Convert to grayscale
332
     * @param pixs Source image
333
     * @return Grayscale image
334
     */
335
    public static native PIX pixConvertTo8(PIX pixs);
336
    
337
    /**
338
     * Scale image
339
     * @param pixs Source image
340
     * @param scalex X scale factor
341
     * @param scaley Y scale factor
342
     * @return Scaled image
343
     */
344
    public static native PIX pixScale(PIX pixs, float scalex, float scaley);
345
    
346
    /**
347
     * Rotate image
348
     * @param pixs Source image
349
     * @param angle Rotation angle in radians
350
     * @param type Rotation type (L_ROTATE_AREA_MAP, etc.)
351
     * @param incolor Fill color for background
352
     * @param width Output width (0 for auto)
353
     * @param height Output height (0 for auto)
354
     * @return Rotated image
355
     */
356
    public static native PIX pixRotate(PIX pixs, float angle, int type, int incolor,
357
        int width, int height);
358
    
359
    /**
360
     * Deskew image (correct skew angle)
361
     * @param pixs Source image
362
     * @param redsearch Reduction factor for search
363
     * @return Deskewed image
364
     */
365
    public static native PIX pixDeskew(PIX pixs, int redsearch);
366
    
367
    /**
368
     * Unsharp mask filter for sharpening
369
     * @param pixs Source image
370
     * @param halfwidth Half-width of convolution kernel
371
     * @param fract Fraction for mixing
372
     * @return Sharpened image
373
     */
374
    public static native PIX pixUnsharpMasking(PIX pixs, int halfwidth, float fract);
375
    
376
    /**
377
     * Otsu thresholding for binarization
378
     * @param pixs Source grayscale image
379
     * @param sx Tile width for adaptive threshold
380
     * @param sy Tile height for adaptive threshold
381
     * @param smoothx Smoothing width
382
     * @param smoothy Smoothing height
383
     * @param scorefract Fraction of max score
384
     * @param pthresh Output threshold value
385
     * @return Binary image
386
     */
387
    public static native PIX pixOtsuAdaptiveThreshold(PIX pixs, int sx, int sy,
388
        int smoothx, int smoothy, float scorefract, IntPointer pthresh);
389
    
390
    /**
391
     * Remove noise using morphological operations
392
     * @param pixs Source binary image
393
     * @param removal Type of removal (L_REMOVE_SMALL_CC, etc.)
394
     * @param minsize Minimum component size to keep
395
     * @param connectivity Connectivity (4 or 8)
396
     * @return Denoised image
397
     */
398
    public static native PIX pixRemoveNoise(PIX pixs, int removal, int minsize, int connectivity);
399
}
400

401
/**
402
 * Morphological operations
403
 */
404
public static class LeptonicaMorphology {
405
    /**
406
     * Morphological erosion
407
     * @param pixs Source image
408
     * @param sel Structuring element
409
     * @return Eroded image
410
     */
411
    public static native PIX pixErode(PIX pixs, SEL sel);
412
    
413
    /**
414
     * Morphological dilation
415
     * @param pixs Source image
416
     * @param sel Structuring element
417
     * @return Dilated image
418
     */
419
    public static native PIX pixDilate(PIX pixs, SEL sel);
420
    
421
    /**
422
     * Morphological opening (erosion followed by dilation)
423
     * @param pixs Source image
424
     * @param sel Structuring element
425
     * @return Opened image
426
     */
427
    public static native PIX pixOpen(PIX pixs, SEL sel);
428
    
429
    /**
430
     * Morphological closing (dilation followed by erosion)
431
     * @param pixs Source image
432
     * @param sel Structuring element
433
     * @return Closed image
434
     */
435
    public static native PIX pixClose(PIX pixs, SEL sel);
436
}
437
```
438

439
### Text Tokenization
440

441
SentencePiece library for neural text processing and tokenization.
442

443
```java { .api }
444
/**
445
 * SentencePiece processor for text tokenization
446
 */
447
public class SentencePieceProcessor extends Pointer {
448
    /**
449
     * Create SentencePiece processor
450
     */
451
    public SentencePieceProcessor();
452
    
453
    /**
454
     * Load model from file
455
     * @param filename Path to SentencePiece model file
456
     * @return Status object indicating success/failure
457
     */
458
    public native Status Load(String filename);
459
    
460
    /**
461
     * Load model from serialized data
462
     * @param serialized_model_proto Serialized model data
463
     * @return Status object
464
     */
465
    public native Status LoadFromSerializedProto(String serialized_model_proto);
466
    
467
    /**
468
     * Encode text to pieces
469
     * @param input Input text
470
     * @param pieces Output token pieces
471
     * @return Status object
472
     */
473
    public native Status Encode(String input, StringVector pieces);
474
    
475
    /**
476
     * Encode text to IDs
477
     * @param input Input text
478
     * @param ids Output token IDs
479
     * @return Status object
480
     */
481
    public native Status Encode(String input, IntVector ids);
482
    
483
    /**
484
     * Decode pieces to text
485
     * @param pieces Input token pieces
486
     * @param output Output text
487
     * @return Status object
488
     */
489
    public native Status Decode(StringVector pieces, StringPointer output);
490
    
491
    /**
492
     * Decode IDs to text
493
     * @param ids Input token IDs
494
     * @param output Output text
495
     * @return Status object
496
     */
497
    public native Status Decode(IntVector ids, StringPointer output);
498
    
499
    /**
500
     * Sample encode with multiple possible segmentations
501
     * @param input Input text
502
     * @param nbest_size Number of best segmentations
503
     * @param alpha Smoothing parameter
504
     * @param pieces Output token pieces
505
     * @return Status object
506
     */
507
    public native Status SampleEncode(String input, int nbest_size, float alpha,
508
        StringVector pieces);
509
    
510
    /**
511
     * Get vocabulary size
512
     * @return Vocabulary size
513
     */
514
    public native int GetPieceSize();
515
    
516
    /**
517
     * Get piece from ID
518
     * @param id Token ID
519
     * @return Token piece string
520
     */
521
    public native String IdToPiece(int id);
522
    
523
    /**
524
     * Get ID from piece
525
     * @param piece Token piece string
526
     * @return Token ID
527
     */
528
    public native int PieceToId(String piece);
529
    
530
    /**
531
     * Check if token is unknown
532
     * @param id Token ID
533
     * @return true if unknown token
534
     */
535
    public native boolean IsUnknown(int id);
536
    
537
    /**
538
     * Check if token is control symbol
539
     * @param id Token ID
540
     * @return true if control symbol
541
     */
542
    public native boolean IsControl(int id);
543
    
544
    /**
545
     * Set encoding extra options
546
     * @param extra_option Extra options string
547
     * @return Status object
548
     */
549
    public native Status SetEncodeExtraOptions(String extra_option);
550
    
551
    /**
552
     * Set decoding extra options
553
     * @param extra_option Extra options string
554
     * @return Status object
555
     */
556
    public native Status SetDecodeExtraOptions(String extra_option);
557
}
558

559
/**
560
 * Status object for operation results
561
 */
562
public class Status extends Pointer {
563
    /**
564
     * Check if operation was successful
565
     * @return true if successful
566
     */
567
    public native boolean ok();
568
    
569
    /**
570
     * Get error code
571
     * @return Error code
572
     */
573
    public native int code();
574
    
575
    /**
576
     * Get error message
577
     * @return Error message string
578
     */
579
    public native String error_message();
580
    
581
    /**
582
     * Convert to string representation
583
     * @return Status string
584
     */
585
    public native String ToString();
586
}
587

588
/**
589
 * SentencePiece trainer for creating custom models
590
 */
591
public static class SentencePieceTrainer {
592
    /**
593
     * Train SentencePiece model
594
     * @param args Training arguments
595
     * @return Status object
596
     */
597
    public static native Status Train(String args);
598
    
599
    /**
600
     * Train from arguments map
601
     * @param kwargs Training arguments as key-value pairs
602
     * @return Status object
603
     */
604
    public static native Status Train(StringStringMap kwargs);
605
}
606
```
607

608
## Usage Examples
609

610
### Basic OCR with Tesseract
611

612
```java
613
import org.bytedeco.tesseract.*;
614
import org.bytedeco.leptonica.*;
615
import static org.bytedeco.tesseract.global.tesseract.*;
616
import static org.bytedeco.leptonica.global.leptonica.*;
617

618
public class TesseractOCR {
619
    static {
620
        Loader.load(tesseract.class);
621
        Loader.load(leptonica.class);
622
    }
623
    
624
    public static void basicOCR(String imagePath) {
625
        try (PointerScope scope = new PointerScope()) {
626
            // Initialize Tesseract API
627
            TessBaseAPI api = new TessBaseAPI();
628
            
629
            // Initialize with English language
630
            // Note: tessdata directory must be available
631
            if (!api.Init(null, "eng")) {
632
                System.err.println("Could not initialize Tesseract API");
633
                return;
634
            }
635
            
636
            // Load image using Leptonica
637
            PIX image = pixRead(imagePath);
638
            if (image == null) {
639
                System.err.println("Could not load image: " + imagePath);
640
                api.End();
641
                return;
642
            }
643
            
644
            // Set image for OCR
645
            api.SetImage(image);
646
            
647
            // Get OCR result
648
            String ocrResult = api.GetUTF8Text();
649
            int confidence = api.MeanTextConf();
650
            
651
            System.out.println("OCR Result:");
652
            System.out.println(ocrResult);
653
            System.out.println("Mean confidence: " + confidence + "%");
654
            
655
            // Cleanup
656
            pixDestroy(image);
657
            api.End();
658
        }
659
    }
660
    
661
    public static void configuredOCR(String imagePath) {
662
        try (PointerScope scope = new PointerScope()) {
663
            TessBaseAPI api = new TessBaseAPI();
664
            
665
            // Initialize with specific configurations
666
            if (!api.Init(null, "eng")) {
667
                System.err.println("Could not initialize Tesseract");
668
                return;
669
            }
670
            
671
            // Configure OCR parameters
672
            api.SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ");
673
            api.SetPageSegMode(PSM_SINGLE_BLOCK);
674
            
675
            PIX image = pixRead(imagePath);
676
            api.SetImage(image);
677
            
678
            // Set recognition area (optional)
679
            api.SetRectangle(50, 50, 400, 200);
680
            
681
            String text = api.GetUTF8Text();
682
            System.out.println("Configured OCR Result: " + text);
683
            
684
            pixDestroy(image);
685
            api.End();
686
        }
687
    }
688
    
689
    public static void detailedOCR(String imagePath) {
690
        try (PointerScope scope = new PointerScope()) {
691
            TessBaseAPI api = new TessBaseAPI();
692
            api.Init(null, "eng");
693
            
694
            PIX image = pixRead(imagePath);
695
            api.SetImage(image);
696
            
697
            // Get detailed results with iterator
698
            ResultIterator ri = api.GetIterator();
699
            if (ri != null) {
700
                int level = RIL_WORD;  // Word level
701
                
702
                do {
703
                    String word = ri.GetUTF8Text(level);
704
                    float conf = ri.Confidence(level);
705
                    
706
                    // Get bounding box
707
                    IntPointer left = new IntPointer(1);
708
                    IntPointer top = new IntPointer(1);
709
                    IntPointer right = new IntPointer(1);
710
                    IntPointer bottom = new IntPointer(1);
711
                    
712
                    if (ri.BoundingBox(level, left, top, right, bottom)) {
713
                        System.out.printf("Word: '%s' (conf: %.2f) at (%d,%d)-(%d,%d)\n",
714
                            word, conf, left.get(), top.get(), right.get(), bottom.get());
715
                    }
716
                    
717
                } while (ri.Next(level));
718
            }
719
            
720
            pixDestroy(image);
721
            api.End();
722
        }
723
    }
724
}
725
```
726

727
### Image Preprocessing with Leptonica
728

729
```java
730
import org.bytedeco.leptonica.*;
731
import static org.bytedeco.leptonica.global.leptonica.*;
732

733
public class ImagePreprocessing {
734
    static {
735
        Loader.load(leptonica.class);
736
    }
737
    
738
    public static void preprocessForOCR(String inputPath, String outputPath) {
739
        try (PointerScope scope = new PointerScope()) {
740
            // Load image
741
            PIX original = pixRead(inputPath);
742
            if (original == null) {
743
                System.err.println("Could not load image");
744
                return;
745
            }
746
            
747
            System.out.printf("Original image: %dx%d, depth: %d\n",
748
                original.getWidth(), original.getHeight(), original.getDepth());
749
            
750
            // Convert to 8-bit grayscale
751
            PIX gray = pixConvertTo8(original);
752
            
753
            // Scale up if image is small (improves OCR accuracy)
754
            PIX scaled = gray;
755
            if (gray.getWidth() < 300 || gray.getHeight() < 300) {
756
                float scale = Math.max(300.0f / gray.getWidth(), 300.0f / gray.getHeight());
757
                scaled = pixScale(gray, scale, scale);
758
                pixDestroy(gray);
759
            }
760
            
761
            // Deskew the image
762
            PIX deskewed = pixDeskew(scaled, 2);
763
            if (deskewed != null) {
764
                pixDestroy(scaled);
765
                scaled = deskewed;
766
            }
767
            
768
            // Unsharp masking for better text definition
769
            PIX sharpened = pixUnsharpMasking(scaled, 5, 0.3f);
770
            
771
            // Adaptive binarization using Otsu
772
            IntPointer threshold = new IntPointer(1);
773
            PIX binary = pixOtsuAdaptiveThreshold(sharpened, 32, 32, 0, 0, 0.1f, threshold);
774
            
775
            System.out.println("Adaptive threshold: " + threshold.get());
776
            
777
            // Remove small noise components
778
            PIX denoised = pixRemoveNoise(binary, L_REMOVE_SMALL_CC, 3, 8);
779
            
780
            // Save preprocessed image
781
            pixWrite(outputPath, denoised, IFF_PNG);
782
            
783
            System.out.printf("Preprocessed image saved: %dx%d\n",
784
                denoised.getWidth(), denoised.getHeight());
785
            
786
            // Cleanup
787
            pixDestroy(original);
788
            pixDestroy(sharpened);
789
            pixDestroy(binary);
790
            pixDestroy(denoised);
791
        }
792
    }
793
    
794
    public static void morphologicalOperations(String imagePath) {
795
        try (PointerScope scope = new PointerScope()) {
796
            PIX original = pixRead(imagePath);
797
            PIX binary = pixConvertTo1(original, 128);  // Convert to binary
798
            
799
            // Create structuring elements
800
            SEL sel3x3 = selCreateBrick(3, 3, 1, 1, SEL_HIT);
801
            SEL sel5x1 = selCreateBrick(5, 1, 2, 0, SEL_HIT);
802
            
803
            // Morphological operations
804
            PIX eroded = pixErode(binary, sel3x3);
805
            PIX dilated = pixDilate(binary, sel3x3);
806
            PIX opened = pixOpen(binary, sel3x3);
807
            PIX closed = pixClose(binary, sel3x3);
808
            
809
            // Horizontal line detection
810
            PIX horizontal = pixOpen(binary, sel5x1);
811
            
812
            // Save results
813
            pixWrite("eroded.png", eroded, IFF_PNG);
814
            pixWrite("dilated.png", dilated, IFF_PNG);
815
            pixWrite("opened.png", opened, IFF_PNG);
816
            pixWrite("closed.png", closed, IFF_PNG);
817
            pixWrite("horizontal.png", horizontal, IFF_PNG);
818
            
819
            // Cleanup
820
            pixDestroy(original);
821
            pixDestroy(binary);
822
            pixDestroy(eroded);
823
            pixDestroy(dilated);
824
            pixDestroy(opened);
825
            pixDestroy(closed);
826
            pixDestroy(horizontal);
827
            selDestroy(sel3x3);
828
            selDestroy(sel5x1);
829
        }
830
    }
831
}
832
```
833

834
### Text Tokenization with SentencePiece
835

836
```java
837
import org.bytedeco.sentencepiece.*;
838
import static org.bytedeco.sentencepiece.global.sentencepiece.*;
839

840
public class TextTokenization {
841
    static {
842
        Loader.load(sentencepiece.class);
843
    }
844
    
845
    public static void basicTokenization(String modelPath) {
846
        try (PointerScope scope = new PointerScope()) {
847
            // Create processor
848
            SentencePieceProcessor processor = new SentencePieceProcessor();
849
            
850
            // Load pre-trained model
851
            Status status = processor.Load(modelPath);
852
            if (!status.ok()) {
853
                System.err.println("Failed to load model: " + status.error_message());
854
                return;
855
            }
856
            
857
            String text = "This is a sample text for tokenization.";
858
            
859
            // Encode to pieces
860
            StringVector pieces = new StringVector();
861
            status = processor.Encode(text, pieces);
862
            
863
            if (status.ok()) {
864
                System.out.println("Input text: " + text);
865
                System.out.print("Pieces: ");
866
                for (int i = 0; i < pieces.size(); i++) {
867
                    System.out.print("'" + pieces.get(i).getString() + "' ");
868
                }
869
                System.out.println();
870
            }
871
            
872
            // Encode to IDs
873
            IntVector ids = new IntVector();
874
            status = processor.Encode(text, ids);
875
            
876
            if (status.ok()) {
877
                System.out.print("IDs: ");
878
                for (int i = 0; i < ids.size(); i++) {
879
                    System.out.print(ids.get(i) + " ");
880
                }
881
                System.out.println();
882
            }
883
            
884
            // Decode back to text
885
            StringPointer decoded = new StringPointer();
886
            status = processor.Decode(pieces, decoded);
887
            
888
            if (status.ok()) {
889
                System.out.println("Decoded: " + decoded.getString());
890
            }
891
            
892
            // Vocabulary info
893
            System.out.println("Vocabulary size: " + processor.GetPieceSize());
894
            System.out.println("First 10 pieces:");
895
            for (int i = 0; i < Math.min(10, processor.GetPieceSize()); i++) {
896
                System.out.println("  " + i + ": '" + processor.IdToPiece(i) + "'");
897
            }
898
        }
899
    }
900
    
901
    public static void samplingTokenization(String modelPath) {
902
        try (PointerScope scope = new PointerScope()) {
903
            SentencePieceProcessor processor = new SentencePieceProcessor();
904
            processor.Load(modelPath);
905
            
906
            String text = "Neural machine translation with attention mechanism.";
907
            
908
            // Sample multiple segmentations
909
            System.out.println("Input: " + text);
910
            System.out.println("Sample segmentations:");
911
            
912
            for (int i = 0; i < 5; i++) {
913
                StringVector pieces = new StringVector();
914
                Status status = processor.SampleEncode(text, -1, 0.1f, pieces);
915
                
916
                if (status.ok()) {
917
                    System.out.print("Sample " + (i+1) + ": ");
918
                    for (int j = 0; j < pieces.size(); j++) {
919
                        System.out.print("'" + pieces.get(j).getString() + "' ");
920
                    }
921
                    System.out.println();
922
                }
923
            }
924
        }
925
    }
926
    
927
    public static void trainCustomModel() {
928
        try (PointerScope scope = new PointerScope()) {
929
            // Training arguments
930
            String args = "--input=training_data.txt " +
931
                         "--model_prefix=custom_model " +
932
                         "--vocab_size=8000 " +
933
                         "--character_coverage=0.9995 " +
934
                         "--model_type=bpe";
935
            
936
            Status status = SentencePieceTrainer.Train(args);
937
            
938
            if (status.ok()) {
939
                System.out.println("Model training completed successfully!");
940
                System.out.println("Model files: custom_model.model, custom_model.vocab");
941
            } else {
942
                System.err.println("Training failed: " + status.error_message());
943
            }
944
        }
945
    }
946
}
947
```
948

949
### Complete OCR Pipeline
950

951
```java
952
import org.bytedeco.tesseract.*;
953
import org.bytedeco.leptonica.*;
954
import org.bytedeco.sentencepiece.*;
955

956
public class OCRPipeline {
957
    public static void processDocument(String imagePath, String modelPath) {
958
        try (PointerScope scope = new PointerScope()) {
959
            // Step 1: Preprocess image
960
            PIX original = pixRead(imagePath);
961
            PIX gray = pixConvertTo8(original);
962
            PIX deskewed = pixDeskew(gray, 2);
963
            PIX sharpened = pixUnsharpMasking(deskewed != null ? deskewed : gray, 5, 0.3f);
964
            
965
            IntPointer threshold = new IntPointer(1);
966
            PIX binary = pixOtsuAdaptiveThreshold(sharpened, 32, 32, 0, 0, 0.1f, threshold);
967
            PIX denoised = pixRemoveNoise(binary, L_REMOVE_SMALL_CC, 3, 8);
968
            
969
            // Step 2: OCR with Tesseract
970
            TessBaseAPI api = new TessBaseAPI();
971
            api.Init(null, "eng");
972
            api.SetImage(denoised);
973
            
974
            String rawText = api.GetUTF8Text();
975
            int confidence = api.MeanTextConf();
976
            
977
            System.out.println("OCR Confidence: " + confidence + "%");
978
            System.out.println("Raw OCR Text:\n" + rawText);
979
            
980
            // Step 3: Post-process with SentencePiece (if model available)
981
            if (modelPath != null) {
982
                SentencePieceProcessor processor = new SentencePieceProcessor();
983
                Status status = processor.Load(modelPath);
984
                
985
                if (status.ok()) {
986
                    StringVector pieces = new StringVector();
987
                    processor.Encode(rawText, pieces);
988
                    
989
                    System.out.println("\nTokenized into " + pieces.size() + " pieces:");
990
                    for (int i = 0; i < Math.min(pieces.size(), 20); i++) {
991
                        System.out.print("'" + pieces.get(i).getString() + "' ");
992
                    }
993
                    System.out.println();
994
                }
995
            }
996
            
997
            // Cleanup
998
            pixDestroy(original);
999
            pixDestroy(gray);
1000
            if (deskewed != null) pixDestroy(deskewed);
1001
            pixDestroy(sharpened);
1002
            pixDestroy(binary);
1003
            pixDestroy(denoised);
1004
            api.End();
1005
        }
1006
    }
1007
}
1008
```

Version

Tile

Files

text-processing.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

text-processing.mddocs/