tessl install tessl/pypi-paddleocr@3.3.0Industry-leading OCR and document AI engine that converts documents and images into structured, AI-friendly data formats with comprehensive solutions from text extraction to intelligent document understanding.
The main OCR pipeline for text detection and recognition. Combines text detection and text recognition models to extract text from images with support for 109 languages. Compatible with PaddleOCR 2.x interfaces.
Initialize the PaddleOCR pipeline with language and version selection.
class PaddleOCR:
"""
Main OCR pipeline for text detection and recognition.
Combines text detection and recognition models to extract text from images.
Supports 109 languages and multiple PP-OCR versions (v3, v4, v5).
"""
def __init__(
self,
doc_orientation_classify_model_name: str = None,
doc_orientation_classify_model_dir: str = None,
doc_unwarping_model_name: str = None,
doc_unwarping_model_dir: str = None,
text_detection_model_name: str = None,
text_detection_model_dir: str = None,
textline_orientation_model_name: str = None,
textline_orientation_model_dir: str = None,
textline_orientation_batch_size: int = None,
text_recognition_model_name: str = None,
text_recognition_model_dir: str = None,
text_recognition_batch_size: int = None,
use_doc_orientation_classify: bool = None,
use_doc_unwarping: bool = None,
use_textline_orientation: bool = None,
text_det_limit_side_len: int = None,
text_det_limit_type: str = None,
text_det_thresh: float = None,
text_det_box_thresh: float = None,
text_det_unclip_ratio: float = None,
text_det_input_shape: tuple = None,
text_rec_score_thresh: float = None,
return_word_box: bool = None,
text_rec_input_shape: tuple = None,
lang: str = None,
ocr_version: str = None,
paddlex_config: str = None,
device: str = None,
use_hpi: bool = None,
**kwargs
):
"""
Initialize PaddleOCR pipeline.
Args:
lang (str, optional): Language code for OCR. Options include:
- 'ch': Simplified Chinese
- 'en': English
- 'fr': French
- 'es': Spanish
- 'pt': Portuguese
- 'ru': Russian
- 'ko': Korean
- 'ja': Japanese
- And many more (109 languages total)
Default: 'en'
ocr_version (str, optional): PP-OCR model version. Options:
- 'PP-OCRv3': Older, faster but less accurate
- 'PP-OCRv4': Balanced performance
- 'PP-OCRv5': Latest (default), highest accuracy
use_doc_orientation_classify (bool, optional): Enable document orientation classification
Use when: Images may be rotated (0°, 90°, 180°, 270°)
Impact: +0.1-0.2s processing time, +5-10% accuracy on rotated images
Default: False
use_doc_unwarping (bool, optional): Enable document unwarping
Use when: Photos of documents with perspective distortion or curved pages
Impact: +0.3-0.5s processing time, +10-20% accuracy on distorted images
Default: False
use_textline_orientation (bool, optional): Enable text line orientation classification
Use when: Mixed vertical and horizontal text
Impact: +0.05-0.1s processing time
Default: False
text_det_limit_side_len (int, optional): Limit on side length for text detection
Range: 640-2560 recommended
Lower: Faster, may miss small text
Higher: Better for small text, more memory, slower
Default: 960
text_det_limit_type (str, optional): Type of side length limit ('min' or 'max')
'max': Resize if longest side > limit (default, most common)
'min': Resize if shortest side < limit
text_det_thresh (float, optional): Pixel threshold for text detection
Range: 0.1-0.5
Lower: More sensitive, more false positives
Higher: More conservative, may miss faint text
Default: 0.3
text_det_box_thresh (float, optional): Box threshold for text detection
Range: 0.3-0.8
Lower: More boxes detected
Higher: Only high-confidence boxes
Default: 0.6
text_det_unclip_ratio (float, optional): Expansion ratio for detected text regions
Range: 1.0-2.5
Higher: Larger boxes, ensures text not clipped
Lower: Tighter boxes
Default: 1.5
text_rec_score_thresh (float, optional): Score threshold for text recognition results
Range: 0.0-1.0
Use to filter low-confidence results
Default: 0.5
return_word_box (bool, optional): Return single-character coordinates
Enable for: Character-level editing, fine-grained analysis
Impact: Slightly slower, more output data
Default: False
text_detection_model_name (str, optional): Name of text detection model
Options: 'PP-OCRv5_server_det' (default), 'PP-OCRv5_mobile_det'
text_detection_model_dir (str, optional): Directory of custom text detection model
text_recognition_model_name (str, optional): Name of text recognition model
Options: 'PP-OCRv5_server_rec' (default), 'PP-OCRv5_mobile_rec'
text_recognition_model_dir (str, optional): Directory of custom text recognition model
text_recognition_batch_size (int, optional): Batch size for recognition
Higher: Better GPU utilization, more memory
Default: 6
device (str, optional): Device for inference
Options: 'cpu', 'gpu' (default GPU 0), 'gpu:0', 'gpu:1', etc.
GPU provides 3-10x speedup
use_hpi (bool, optional): Enable high-performance inference
Requires: TensorRT (GPU) or OpenVINO (CPU)
Speedup: 2-3x additional speedup
Default: False
paddlex_config (str or dict, optional): PaddleX configuration file path or dict
Use for: Reproducible configurations, version control
"""Perform OCR on images to detect and recognize text.
def predict(
self,
input,
*,
use_doc_orientation_classify: bool = None,
use_doc_unwarping: bool = None,
use_textline_orientation: bool = None,
text_det_limit_side_len: int = None,
text_det_limit_type: str = None,
text_det_thresh: float = None,
text_det_box_thresh: float = None,
text_det_unclip_ratio: float = None,
text_rec_score_thresh: float = None,
return_word_box: bool = None
) -> list:
"""
Perform OCR on input image(s).
Args:
input: Image path (str), image array (numpy.ndarray), PIL Image,
directory path, or list of any of these
Supported formats: JPG, PNG, BMP, PDF (converted to images)
use_doc_orientation_classify (bool, optional): Override doc orientation classification
use_doc_unwarping (bool, optional): Override document unwarping
use_textline_orientation (bool, optional): Override text line orientation
text_det_limit_side_len (int, optional): Override detection side length limit
text_det_limit_type (str, optional): Override detection limit type
text_det_thresh (float, optional): Override detection pixel threshold
text_det_box_thresh (float, optional): Override detection box threshold
text_det_unclip_ratio (float, optional): Override unclip ratio
text_rec_score_thresh (float, optional): Override recognition score threshold
return_word_box (bool, optional): Override word box return setting
Returns:
list: List of dictionaries containing OCR results. Each dict contains:
- input_path (str): Path to input image
- dt_polys (list): Detected text polygons
Format: [[[x1, y1], [x2, y2], [x3, y3], [x4, y4]], ...]
Each polygon is 4 points defining the text box corners (clockwise from top-left)
- rec_text (list): Recognized text strings
- rec_score (list): Recognition confidence scores (0.0-1.0)
- dt_scores (list): Detection confidence scores (0.0-1.0)
- rec_result (list): List of dicts with 'rec_text' and 'rec_score' keys
If return_word_box=True, also includes:
- char_polys (list): Character-level bounding boxes
- char_scores (list): Character-level confidence scores
Empty or failed results: If no text detected, lists will be empty
Error cases: Raises exception on invalid input or processing failure
"""
def predict_iter(
self,
input,
*,
use_doc_orientation_classify: bool = None,
use_doc_unwarping: bool = None,
use_textline_orientation: bool = None,
text_det_limit_side_len: int = None,
text_det_limit_type: str = None,
text_det_thresh: float = None,
text_det_box_thresh: float = None,
text_det_unclip_ratio: float = None,
text_rec_score_thresh: float = None,
return_word_box: bool = None
):
"""
Perform OCR on input images with iterator for memory efficiency.
Args: Same as predict()
Yields:
dict: OCR result for each image (same format as predict())
Use Cases:
- Large image batches (>100 images)
- Limited memory environments
- Streaming processing
- Progress tracking during processing
"""def ocr(self, img, **kwargs) -> list:
"""
DEPRECATED: Use predict() instead.
Legacy method for compatibility with PaddleOCR 2.x.
Internally calls predict() with the same arguments.
Note: May be removed in future versions. Migrate to predict().
"""def close(self) -> None:
"""
Close the pipeline and free resources.
Critical for:
- Freeing GPU memory
- Releasing model weights from RAM
- Preventing memory leaks in long-running processes
Best Practice: Always call in finally block or use try-finally pattern
"""
def export_paddlex_config_to_yaml(self, yaml_path: str) -> None:
"""
Export pipeline configuration to YAML file.
Args:
yaml_path (str): Path to save YAML configuration
Use Cases:
- Save optimal configuration for production
- Version control model settings
- Share configurations across team
- Reproduce experimental results
"""from paddleocr import PaddleOCR
# Initialize with English language
ocr = PaddleOCR(lang='en')
# Perform OCR
result = ocr.predict('document.jpg')
# Process results
for item in result:
for line in item.get('rec_result', []):
print(f"Text: {line['rec_text']}")
print(f"Confidence: {line['rec_score']:.2f}")
ocr.close()from paddleocr import PaddleOCR
# Chinese + English mixed text
ocr_ch = PaddleOCR(lang='ch')
result = ocr_ch.predict('chinese_document.jpg')
ocr_ch.close()
# French text
ocr_fr = PaddleOCR(lang='fr')
result = ocr_fr.predict('french_document.jpg')
ocr_fr.close()
# Russian text
ocr_ru = PaddleOCR(lang='ru')
result = ocr_ru.predict('russian_document.jpg')
ocr_ru.close()Language-Specific Considerations:
from paddleocr import PaddleOCR
# Use PP-OCRv5 for best accuracy
ocr = PaddleOCR(lang='en', ocr_version='PP-OCRv5')
result = ocr.predict('image.jpg')
ocr.close()Version Comparison:
PP-OCRv5: 94.0% accuracy, ~0.15s/image (GPU)
PP-OCRv4: 92.5% accuracy, ~0.12s/image (GPU)
PP-OCRv3: 90.0% accuracy, ~0.10s/image (GPU)from paddleocr import PaddleOCR
# Enable word/character box output
ocr = PaddleOCR(lang='en', return_word_box=True)
result = ocr.predict('image.jpg')
for item in result:
# Character polygons
char_polys = item.get('char_polys', [])
char_scores = item.get('char_scores', [])
for poly, score in zip(char_polys, char_scores):
print(f"Character box: {poly}, Score: {score}")
ocr.close()Use Cases for Character Boxes:
from paddleocr import PaddleOCR
# Enable orientation correction and unwarping
ocr = PaddleOCR(
lang='en',
use_doc_orientation_classify=True,
use_doc_unwarping=True,
use_textline_orientation=True
)
result = ocr.predict('skewed_document.jpg')
ocr.close()When to Enable Preprocessing:
Performance Impact:
No preprocessing: 0.15s/image
+ orientation: 0.25s/image (+0.10s)
+ unwarping: 0.60s/image (+0.45s)
+ text orientation: 0.70s/image (+0.10s)from paddleocr import PaddleOCR
ocr = PaddleOCR(lang='en')
# Process multiple images
image_paths = ['page1.jpg', 'page2.jpg', 'page3.jpg']
# Memory-efficient iteration
for result in ocr.predict_iter(image_paths):
for line in result.get('rec_result', []):
print(line['rec_text'])
ocr.close()Batch Processing Strategies:
# Strategy 1: List input (all in memory)
results = ocr.predict(['img1.jpg', 'img2.jpg', 'img3.jpg']) # Use for <100 images
# Strategy 2: Iterator (memory efficient)
for result in ocr.predict_iter(large_image_list): # Use for >100 images
process(result)
# Strategy 3: Directory processing
results = ocr.predict('images_directory/') # Processes all images in directoryfrom paddleocr import PaddleOCR
# Use custom detection and recognition models
ocr = PaddleOCR(
lang='en',
text_detection_model_dir='./custom_det_model',
text_recognition_model_dir='./custom_rec_model'
)
result = ocr.predict('image.jpg')
ocr.close()Custom Model Requirements:
from paddleocr import PaddleOCR
# Adjust detection thresholds for better results
ocr = PaddleOCR(
lang='en',
text_det_thresh=0.3, # Lower for more sensitive detection
text_det_box_thresh=0.5, # Threshold for box confidence
text_det_unclip_ratio=1.6, # Expand detected boxes
text_rec_score_thresh=0.6 # Filter low-confidence recognition
)
result = ocr.predict('complex_image.jpg')
ocr.close()Parameter Tuning Guide:
# For faint/low-contrast text
ocr = PaddleOCR(lang='en', text_det_thresh=0.2, text_det_box_thresh=0.4)
# For high-quality scans (faster, fewer false positives)
ocr = PaddleOCR(lang='en', text_det_thresh=0.4, text_det_box_thresh=0.7)
# For small text
ocr = PaddleOCR(lang='en', text_det_limit_side_len=2048, text_det_unclip_ratio=1.8)
# For large text / faster processing
ocr = PaddleOCR(lang='en', text_det_limit_side_len=640, text_det_unclip_ratio=1.2)from paddleocr import PaddleOCR
# Use GPU for faster inference
ocr = PaddleOCR(lang='en', device='gpu')
# Or specify GPU ID
ocr_gpu0 = PaddleOCR(lang='en', device='gpu:0')
result = ocr.predict('large_document.jpg')
ocr.close()GPU Performance Tips:
# Enable HPI for additional 2-3x speedup
ocr = PaddleOCR(lang='en', device='gpu', use_hpi=True)
# Batch processing for better GPU utilization
images = ['img1.jpg', 'img2.jpg', 'img3.jpg', 'img4.jpg']
results = ocr.predict(images) # Processes batch on GPU
# Multi-GPU setup
ocr_gpu0 = PaddleOCR(lang='en', device='gpu:0')
ocr_gpu1 = PaddleOCR(lang='en', device='gpu:1')
# Distribute images across GPUs manuallyfrom paddleocr import PaddleOCR
# Create pipeline with custom configuration
ocr = PaddleOCR(
lang='en',
ocr_version='PP-OCRv5',
text_det_thresh=0.3,
text_rec_score_thresh=0.6
)
# Export configuration
ocr.export_paddlex_config_to_yaml('my_ocr_config.yaml')
ocr.close()
# Load configuration in another session
ocr_reloaded = PaddleOCR(paddlex_config='my_ocr_config.yaml')
result = ocr_reloaded.predict('image.jpg')
ocr_reloaded.close()Configuration Management Benefits:
The lang parameter supports the following language codes (109 total):
Full Language List: See official documentation for complete list of 109 supported languages
The predict() and predict_iter() methods return results in the following structure:
[
{
"input_path": "path/to/image.jpg",
"dt_polys": [
[[x1, y1], [x2, y2], [x3, y3], [x4, y4]], # Polygon for first text region
[[x1, y1], [x2, y2], [x3, y3], [x4, y4]], # Polygon for second text region
# ...
],
"rec_text": ["recognized text 1", "recognized text 2", ...],
"rec_score": [0.98, 0.95, ...], # Confidence scores
"dt_scores": [0.99, 0.97, ...], # Detection scores
"rec_result": [
{"rec_text": "recognized text 1", "rec_score": 0.98},
{"rec_text": "recognized text 2", "rec_score": 0.95},
# ...
],
# If return_word_box=True:
"char_polys": [
[ # Character boxes for first text region
[[x1, y1], [x2, y2], [x3, y3], [x4, y4]], # First character
[[x1, y1], [x2, y2], [x3, y3], [x4, y4]], # Second character
# ...
],
# ...
],
"char_scores": [[0.99, 0.98, ...], ...] # Character-level scores
}
]Field Descriptions:
# Maximum accuracy (slowest)
ocr_accurate = PaddleOCR(
lang='en',
ocr_version='PP-OCRv5',
text_det_limit_side_len=2048,
use_doc_orientation_classify=True,
use_doc_unwarping=True,
device='gpu'
)
# Balanced (recommended for most use cases)
ocr_balanced = PaddleOCR(
lang='en',
ocr_version='PP-OCRv5',
device='gpu'
)
# Maximum speed (lower accuracy)
ocr_fast = PaddleOCR(
lang='en',
ocr_version='PP-OCRv4',
text_detection_model_name='PP-OCRv4_mobile_det',
text_recognition_model_name='PP-OCRv4_mobile_rec',
text_det_limit_side_len=640,
device='gpu',
use_hpi=True
)# Low memory mode
ocr = PaddleOCR(
lang='en',
text_recognition_batch_size=1, # Process one at a time
text_det_limit_side_len=640 # Reduce image size
)
# Process with iterator for large batches
for result in ocr.predict_iter(large_image_list):
process(result)
# Result is freed after each iterationfrom paddleocr import PaddleOCR
def robust_ocr(image_path):
"""Robust OCR with error handling."""
try:
ocr = PaddleOCR(lang='en', device='gpu')
except Exception as gpu_error:
print(f"GPU initialization failed: {gpu_error}, falling back to CPU")
ocr = PaddleOCR(lang='en', device='cpu')
try:
result = ocr.predict(image_path)
# Check if any text was detected
if not result or not result[0].get('rec_result'):
print(f"Warning: No text detected in {image_path}")
return []
# Filter low-confidence results
filtered = [
line for line in result[0]['rec_result']
if line['rec_score'] > 0.5
]
return filtered
except FileNotFoundError:
print(f"Error: Image file not found: {image_path}")
return []
except Exception as e:
print(f"Error processing {image_path}: {e}")
return []
finally:
ocr.close()ocr.close() to free resourcespredict_iter()# Try increasing detection sensitivity
ocr = PaddleOCR(
lang='en',
text_det_thresh=0.2, # Lower threshold
text_det_unclip_ratio=1.8 # Larger boxes
)
# Enable preprocessing for poor quality images
ocr = PaddleOCR(
lang='en',
use_doc_orientation_classify=True,
use_doc_unwarping=True
)
# Use higher resolution
ocr = PaddleOCR(
lang='en',
text_det_limit_side_len=2048 # Higher resolution
)# Lower detection thresholds
ocr = PaddleOCR(
lang='en',
text_det_thresh=0.2,
text_det_box_thresh=0.4
)
# Increase image size limit
ocr = PaddleOCR(
lang='en',
text_det_limit_side_len=1920
)# Reduce batch size
ocr = PaddleOCR(
lang='en',
text_recognition_batch_size=1
)
# Use mobile models
ocr = PaddleOCR(
lang='en',
text_detection_model_name='PP-OCRv5_mobile_det',
text_recognition_model_name='PP-OCRv5_mobile_rec'
)
# Use iterator for large batches
for result in ocr.predict_iter(images):
process(result)# Enable GPU
ocr = PaddleOCR(lang='en', device='gpu')
# Enable HPI
ocr = PaddleOCR(lang='en', device='gpu', use_hpi=True)
# Use mobile models
ocr = PaddleOCR(
lang='en',
text_detection_model_name='PP-OCRv5_mobile_det',
text_recognition_model_name='PP-OCRv5_mobile_rec'
)
# Reduce image size
ocr = PaddleOCR(lang='en', text_det_limit_side_len=640)