or run

tessl search

Version

Workspace: tessl
Visibility: Public
Created: 10 days ago
Last updated: 1 day ago
Describes: pkg:pypi/paddleocr@3.3.x

docs

models

pipelines

doc-preprocessor.md doc-understanding.md formula-recognition.md paddleocr-vl.md paddleocr.md pp-chatocrv4.md pp-doctranslation.md pp-structurev3.md seal-recognition.md table-recognition.md

index.md utilities.md

tile.json

tessl/pypi-paddleocr

tessl install tessl/pypi-paddleocr@3.3.0

Industry-leading OCR and document AI engine that converts documents and images into structured, AI-friendly data formats with comprehensive solutions from text extraction to intelligent document understanding.

PaddleOCR-VL Pipeline

Vision-Language OCR pipeline using the compact 0.9B PaddleOCR-VL model for multilingual document parsing. Supports 109 languages with minimal resource consumption and excels at recognizing complex elements like text, tables, formulas, and charts.

Capabilities

Pipeline Initialization

class PaddleOCRVL:
    """
    Vision-Language document parsing pipeline using PaddleOCR-VL model.

    Uses a compact 0.9B VLM (vision-language model) for efficient document parsing
    across 109 languages with state-of-the-art accuracy on complex elements.
    """
    def __init__(
        self,
        layout_detection_model_name: str = None,
        layout_detection_model_dir: str = None,
        layout_threshold: float = None,
        layout_nms: bool = None,
        layout_unclip_ratio: float = None,
        layout_merge_bboxes_mode: str = None,
        vl_rec_model_name: str = None,
        vl_rec_model_dir: str = None,
        vl_rec_backend: str = None,
        vl_rec_server_url: str = None,
        vl_rec_max_concurrency: int = None,
        vl_rec_api_model_name: str = None,
        vl_rec_api_key: str = None,
        doc_orientation_classify_model_name: str = None,
        doc_orientation_classify_model_dir: str = None,
        doc_unwarping_model_name: str = None,
        doc_unwarping_model_dir: str = None,
        use_doc_orientation_classify: bool = None,
        use_doc_unwarping: bool = None,
        use_layout_detection: bool = None,
        use_chart_recognition: bool = None,
        format_block_content: bool = None,
        paddlex_config: str = None,
        device: str = None,
        use_hpi: bool = None,
        **kwargs
    ):
        """
        Initialize PaddleOCR-VL pipeline.

        Args:
            vl_rec_backend (str, optional): VL recognition backend:
                - 'native': Local model inference (default)
                - 'vllm-server': vLLM server
                - 'sglang-server': SGLang server
                - 'fastdeploy-server': FastDeploy server
            vl_rec_server_url (str, optional): Server URL for remote backends
            vl_rec_max_concurrency (int, optional): Max concurrent requests for server backends
            vl_rec_api_model_name (str, optional): API model name for server backends
            vl_rec_api_key (str, optional): API key for authentication
            use_layout_detection (bool, optional): Enable layout detection preprocessing
            use_chart_recognition (bool, optional): Enable chart-to-table conversion
            use_doc_orientation_classify (bool, optional): Enable orientation correction
            use_doc_unwarping (bool, optional): Enable document unwarping
            format_block_content (bool, optional): Format block content in output
            device (str, optional): Device for inference
            paddlex_config (str or dict, optional): Configuration file or dict
        """

Document Parsing

def predict(
    self,
    input,
    *,
    use_doc_orientation_classify: bool = None,
    use_doc_unwarping: bool = None,
    use_layout_detection: bool = None,
    use_chart_recognition: bool = None,
    layout_threshold: float = None,
    layout_nms: bool = None,
    layout_unclip_ratio: float = None,
    layout_merge_bboxes_mode: str = None,
    use_queues: bool = None,
    prompt_label: str = None,
    format_block_content: bool = None,
    repetition_penalty: float = None,
    temperature: float = None,
    top_p: float = None,
    min_pixels: int = None,
    max_pixels: int = None,
    **kwargs
) -> list:
    """
    Parse document using vision-language model.

    Args:
        input: Image/PDF path, numpy array, PIL Image, directory, or list
        use_layout_detection (bool, optional): Override layout detection
        use_chart_recognition (bool, optional): Override chart recognition
        prompt_label (str, optional): Custom prompt for VL model
        format_block_content (bool, optional): Format block content
        repetition_penalty (float, optional): Repetition penalty for generation
        temperature (float, optional): Temperature for generation
        top_p (float, optional): Top-p sampling parameter
        min_pixels (int, optional): Minimum image pixels for VL model
        max_pixels (int, optional): Maximum image pixels for VL model

    Returns:
        list: Parsing results with markdown, layout, and element information
    """

def predict_iter(self, input, **kwargs):
    """Iterate over parsing results for memory efficiency."""

def concatenate_markdown_pages(self, markdown_list: list) -> str:
    """Concatenate markdown from multiple pages."""

def close(self) -> None:
    """Close the pipeline and free resources."""

def export_paddlex_config_to_yaml(self, yaml_path: str) -> None:
    """Export configuration to YAML."""

Usage Examples

Basic Document Parsing

from paddleocr import PaddleOCRVL

# Initialize PaddleOCR-VL
ocr_vl = PaddleOCRVL()

# Parse document
result = ocr_vl.predict('document.pdf')

# Get markdown output
for item in result:
    print(item['markdown'])

ocr_vl.close()

Multilingual Document Parsing

from paddleocr import PaddleOCRVL

# PaddleOCR-VL supports 109 languages natively
ocr_vl = PaddleOCRVL()

# Parse documents in different languages
result_en = ocr_vl.predict('english_doc.pdf')
result_zh = ocr_vl.predict('chinese_doc.pdf')
result_ar = ocr_vl.predict('arabic_doc.pdf')
result_hi = ocr_vl.predict('hindi_doc.pdf')

ocr_vl.close()

With Layout Detection and Chart Recognition

from paddleocr import PaddleOCRVL

ocr_vl = PaddleOCRVL(
    use_layout_detection=True,
    use_chart_recognition=True
)

result = ocr_vl.predict('document_with_charts.pdf')
ocr_vl.close()

Using Remote Server Backend

from paddleocr import PaddleOCRVL

# Connect to vLLM server
ocr_vl = PaddleOCRVL(
    vl_rec_backend='vllm-server',
    vl_rec_server_url='http://localhost:8000',
    vl_rec_max_concurrency=4
)

result = ocr_vl.predict('document.pdf')
ocr_vl.close()

Custom Generation Parameters

from paddleocr import PaddleOCRVL

ocr_vl = PaddleOCRVL()

result = ocr_vl.predict(
    'document.pdf',
    repetition_penalty=1.2,
    temperature=0.7,
    top_p=0.9
)

ocr_vl.close()

Supported Languages

PaddleOCR-VL natively supports 109 languages including all major world languages, scripts (Latin, CJK, Cyrillic, Arabic, Devanagari, Thai, etc.), and specialized use cases like handwritten text and historical documents.

Return Value Structure

[
    {
        "input_path": "path/to/document.pdf",
        "markdown": "# Markdown output\n\nContent...",
        "layout_result": [
            {
                "bbox": [x1, y1, x2, y2],
                "label": "text",
                "content": "recognized content"
            },
            # ...
        ],
        "chart_result": [  # If use_chart_recognition=True
            {
                "bbox": [x1, y1, x2, y2],
                "table_data": {...}
            }
        ]
    }
]