CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-google-cloud-documentai

Google Cloud Document AI client library for extracting structured information from documents using machine learning

Pending
Overview
Eval results
Files

beta-features.mddocs/

Beta Features (v1beta3)

This guide covers beta features available in the v1beta3 API of Google Cloud Document AI, including dataset management, enhanced document processing, and experimental capabilities.

⚠️ Beta Notice: These features are in beta and may change or be removed in future versions. Use with caution in production environments.

API Versions Overview

V1beta3 vs V1 API

The v1beta3 API includes all v1 functionality plus additional experimental features:

# V1 (Stable) - Production ready
from google.cloud.documentai import DocumentProcessorServiceClient
from google.cloud.documentai.types import Document, ProcessRequest

# V1beta3 (Beta) - Includes experimental features  
from google.cloud.documentai_v1beta3 import DocumentProcessorServiceClient as DocumentProcessorServiceClientBeta
from google.cloud.documentai_v1beta3 import DocumentServiceClient  # Beta-only service
from google.cloud.documentai_v1beta3.types import Dataset, DatasetSchema  # Beta-only types

Import Patterns

# Beta document processing (enhanced features)
from google.cloud.documentai_v1beta3 import (
    DocumentProcessorServiceClient,
    DocumentProcessorServiceAsyncClient
)

# Beta dataset management (exclusive to v1beta3)
from google.cloud.documentai_v1beta3 import (
    DocumentServiceClient,
    DocumentServiceAsyncClient
)

# Beta-specific types
from google.cloud.documentai_v1beta3.types import (
    # Dataset types
    Dataset,
    DatasetSchema, 
    DocumentMetadata,
    DocumentId,
    BatchDatasetDocuments,
    DocumentPageRange,
    
    # Enhanced document types
    RevisionRef,
    
    # Schema enhancements
    EntityTypeMetadata,
    FieldExtractionMetadata,
    PropertyMetadata,
    SummaryOptions,
    
    # Additional beta request/response types
    ImportProcessorVersionRequest,
    ImportProcessorVersionResponse,
    ImportProcessorVersionMetadata,
    BatchDeleteDocumentsRequest,
    BatchDeleteDocumentsResponse,
    BatchDeleteDocumentsMetadata,
    UpdateDatasetRequest,
    UpdateDatasetSchemaRequest,
    GetDatasetSchemaRequest,
    GetDocumentRequest,
    GetDocumentResponse,
    ImportDocumentsRequest,
    ImportDocumentsResponse,
    ImportDocumentsMetadata,
    UpdateDatasetOperationMetadata,
    
    # Enums
    DatasetSplitType,
    DocumentLabelingState
)

Dataset Management

DocumentServiceClient

The DocumentServiceClient is exclusive to v1beta3 and provides dataset management capabilities for custom processor training.

from google.cloud.documentai_v1beta3 import DocumentServiceClient
from google.cloud.documentai_v1beta3.types import (
    Dataset,
    DatasetSchema,
    UpdateDatasetRequest,
    ImportDocumentsRequest,
    GetDocumentRequest,
    ListDocumentsRequest,
    BatchDeleteDocumentsRequest
)

class DocumentServiceClient:
    """
    Service for managing datasets and documents for training custom processors.
    
    This service is only available in v1beta3 API.
    """
    
    def update_dataset(
        self,
        request: UpdateDatasetRequest,
        **kwargs
    ) -> Dataset:
        """
        Update dataset configuration.
        
        Args:
            request: Update dataset request with dataset and field mask
            
        Returns:
            Dataset: Updated dataset object
        """
        pass
    
    def import_documents(
        self,
        request: ImportDocumentsRequest,
        **kwargs
    ) -> "Operation":
        """
        Import documents into a dataset for training.
        
        Args:
            request: Import documents request
            
        Returns:
            Operation: Long-running operation for import process
        """
        pass
    
    def get_document(
        self,
        request: GetDocumentRequest,
        **kwargs
    ) -> "Document":
        """
        Get document metadata and content from dataset.
        
        Args:
            request: Get document request with document name
            
        Returns:
            Document: Document object with metadata
        """
        pass
    
    def list_documents(
        self,
        request: ListDocumentsRequest,
        **kwargs
    ) -> "ListDocumentsResponse":
        """
        List documents in a dataset.
        
        Args:
            request: List documents request with parent dataset
            
        Returns:
            ListDocumentsResponse: Paginated list of documents
        """
        pass
    
    def batch_delete_documents(
        self,
        request: BatchDeleteDocumentsRequest,
        **kwargs
    ) -> "Operation":
        """
        Delete multiple documents from dataset.
        
        Args:
            request: Batch delete request with document names
            
        Returns:
            Operation: Long-running operation for deletion
        """
        pass
    
    def get_dataset_schema(
        self,
        request: "GetDatasetSchemaRequest",
        **kwargs
    ) -> DatasetSchema:
        """
        Get dataset schema definition.
        
        Args:
            request: Get schema request
            
        Returns:
            DatasetSchema: Schema definition for dataset
        """
        pass
    
    def update_dataset_schema(
        self,
        request: "UpdateDatasetSchemaRequest",
        **kwargs
    ) -> DatasetSchema:
        """
        Update dataset schema definition.
        
        Args:
            request: Update schema request
            
        Returns:
            DatasetSchema: Updated schema definition
        """
        pass

# Example usage
client = DocumentServiceClient()

# List documents in a dataset
parent = "projects/my-project/locations/us/processors/abc123/dataset"
request = ListDocumentsRequest(parent=parent)
response = client.list_documents(request=request)

for document_metadata in response.document_metadata:
    print(f"Document: {document_metadata.document_id.document_id}")
    print(f"State: {document_metadata.labeling_state}")

Dataset Types

Dataset

from google.cloud.documentai_v1beta3.types import Dataset

class Dataset:
    """
    A dataset of documents for training custom processors.
    
    Attributes:
        name (str): Dataset resource name
        display_name (str): Human-readable name
        description (str): Dataset description
        document_count (int): Number of documents in dataset
        satisfies_pzs (bool): Reserved for future use
        satisfies_pzi (bool): Reserved for future use
    """
    
    def __init__(
        self,
        display_name: str,
        description: str = ""
    ):
        """
        Initialize dataset.
        
        Args:
            display_name: Human-readable dataset name
            description: Optional description
        """
        self.display_name = display_name
        self.description = description

# Example usage
def create_training_dataset_config(
    display_name: str,
    description: str
) -> Dataset:
    """
    Create dataset configuration for custom processor training.
    
    Args:
        display_name: Dataset name
        description: Dataset description
        
    Returns:
        Dataset: Dataset configuration
    """
    return Dataset(
        display_name=display_name,
        description=description
    )

DatasetSchema

from google.cloud.documentai_v1beta3.types import DatasetSchema

class DatasetSchema:
    """
    Schema definition for a dataset, specifying entity types and structure.
    
    Attributes:
        name (str): Schema resource name
        display_name (str): Human-readable schema name
        description (str): Schema description
        entity_types (Sequence[DocumentSchema.EntityType]): Entity types in schema
    """
    
    def __init__(
        self,
        display_name: str,
        entity_types: list["DocumentSchema.EntityType"],
        description: str = ""
    ):
        """
        Initialize dataset schema.
        
        Args:
            display_name: Schema name
            entity_types: List of entity type definitions
            description: Optional description
        """
        self.display_name = display_name
        self.entity_types = entity_types
        self.description = description

def create_custom_schema(
    schema_name: str,
    entity_definitions: list[dict]
) -> DatasetSchema:
    """
    Create custom dataset schema for training.
    
    Args:
        schema_name: Name for the schema
        entity_definitions: List of entity type definitions
        
    Returns:
        DatasetSchema: Custom schema definition
    """
    from google.cloud.documentai_v1beta3.types import DocumentSchema
    
    entity_types = []
    
    for entity_def in entity_definitions:
        properties = []
        
        for prop_def in entity_def.get("properties", []):
            prop = DocumentSchema.EntityType.Property(
                name=prop_def["name"],
                display_name=prop_def["display_name"],
                value_type=prop_def["value_type"],
                occurrence_type=prop_def.get("occurrence_type", "OPTIONAL_ONCE")
            )
            properties.append(prop)
        
        entity_type = DocumentSchema.EntityType(
            name=entity_def["name"],
            display_name=entity_def["display_name"],
            properties=properties
        )
        entity_types.append(entity_type)
    
    return DatasetSchema(
        display_name=schema_name,
        entity_types=entity_types,
        description=f"Custom schema: {schema_name}"
    )

# Example usage
entity_definitions = [
    {
        "name": "contract_date",
        "display_name": "Contract Date",
        "properties": [
            {
                "name": "date_value",
                "display_name": "Date Value",
                "value_type": "date",
                "occurrence_type": "REQUIRED_ONCE"
            }
        ]
    },
    {
        "name": "contract_parties",
        "display_name": "Contract Parties", 
        "properties": [
            {
                "name": "party_name",
                "display_name": "Party Name",
                "value_type": "text",
                "occurrence_type": "REQUIRED_MULTIPLE"
            }
        ]
    }
]

schema = create_custom_schema("Contract Analysis Schema", entity_definitions)

Document Management

DocumentMetadata

from google.cloud.documentai_v1beta3.types import (
    DocumentMetadata,
    DocumentId,
    DocumentLabelingState
)

class DocumentMetadata:
    """
    Metadata for documents in a dataset.
    
    Attributes:
        document_id (DocumentId): Document identifier
        page_count (int): Number of pages in document
        dataset_type (DatasetSplitType): Dataset split type (TRAIN, TEST, etc.)
        labeling_state (DocumentLabelingState): Document labeling status
        display_name (str): Human-readable document name
    """
    pass

class DocumentId:
    """
    Identifier for a document within a dataset.
    
    Attributes:
        gcs_managed_doc_id (str): Cloud Storage managed document ID
        unmanaged_doc_id (str): User-managed document ID  
        revision_ref (RevisionRef): Reference to document revision
    """
    pass

class DocumentLabelingState:
    """
    Enum describing document labeling status.
    
    Values:
        DOCUMENT_LABELING_STATE_UNSPECIFIED: Unspecified state
        DOCUMENT_LABELED: Document is labeled
        DOCUMENT_UNLABELED: Document is not labeled
        DOCUMENT_AUTO_LABELED: Document is auto-labeled
    """
    DOCUMENT_LABELING_STATE_UNSPECIFIED = 0
    DOCUMENT_LABELED = 1  
    DOCUMENT_UNLABELED = 2
    DOCUMENT_AUTO_LABELED = 3

# Example usage
def list_dataset_documents(
    client: DocumentServiceClient,
    project_id: str,
    location: str,
    processor_id: str
) -> list[DocumentMetadata]:
    """
    List all documents in a dataset with metadata.
    
    Args:
        client: DocumentServiceClient instance
        project_id: Google Cloud project ID
        location: Processor location
        processor_id: Processor ID
        
    Returns:
        list[DocumentMetadata]: List of document metadata
    """
    from google.cloud.documentai_v1beta3.types import ListDocumentsRequest
    
    # Build dataset parent path
    parent = f"projects/{project_id}/locations/{location}/processors/{processor_id}/dataset"
    
    request = ListDocumentsRequest(parent=parent)
    response = client.list_documents(request=request)
    
    documents = []
    for doc_metadata in response.document_metadata:
        documents.append(doc_metadata)
    
    return documents

def filter_labeled_documents(
    document_metadata_list: list[DocumentMetadata]
) -> list[DocumentMetadata]:
    """
    Filter documents that are labeled and ready for training.
    
    Args:
        document_metadata_list: List of document metadata
        
    Returns:
        list[DocumentMetadata]: Filtered labeled documents
    """
    return [
        doc for doc in document_metadata_list
        if doc.labeling_state == DocumentLabelingState.DOCUMENT_LABELED
    ]

Enhanced Document Processing

Import Processor Version (Beta)

from google.cloud.documentai_v1beta3 import DocumentProcessorServiceClient
from google.cloud.documentai_v1beta3.types import ImportProcessorVersionRequest

def import_processor_version(
    project_id: str,
    location: str,
    processor_id: str,
    source_processor_version: str
) -> "Operation":
    """
    Import a processor version from another location or project (beta feature).
    
    Args:
        project_id: Target project ID
        location: Target location
        processor_id: Target processor ID
        source_processor_version: Source processor version to import
        
    Returns:
        Operation: Long-running operation for import
    """
    client = DocumentProcessorServiceClient()
    
    # Build parent processor path
    parent = client.processor_path(project_id, location, processor_id)
    
    # Create import request
    request = ImportProcessorVersionRequest(
        parent=parent,
        processor_version_source=source_processor_version
    )
    
    # Start import operation
    operation = client.import_processor_version(request=request)
    
    print(f"Importing processor version...")
    print(f"Operation: {operation.operation.name}")
    
    return operation

# Example usage
operation = import_processor_version(
    project_id="target-project",
    location="us",
    processor_id="target-processor-id",
    source_processor_version="projects/source-project/locations/eu/processors/source-id/processorVersions/version-id"
)

# Monitor import progress
result = operation.result()  # Wait for completion
print(f"Import completed: {result}")

Enhanced Schema Types

EntityTypeMetadata

from google.cloud.documentai_v1beta3.types import EntityTypeMetadata

class EntityTypeMetadata:
    """
    Metadata for entity types in document schema (beta feature).
    
    Attributes:
        inactive (bool): Whether entity type is inactive
        description (str): Description of the entity type
    """
    
    def __init__(self, description: str = "", inactive: bool = False):
        """
        Initialize entity type metadata.
        
        Args:
            description: Entity type description
            inactive: Whether entity type is inactive
        """
        self.description = description
        self.inactive = inactive

SummaryOptions

from google.cloud.documentai_v1beta3.types import SummaryOptions

class SummaryOptions:
    """
    Options for document summarization (beta feature).
    
    Attributes:
        length (SummaryOptions.Length): Summary length preference
        format_ (SummaryOptions.Format): Summary format preference
    """
    
    class Length:
        """Summary length options."""
        BRIEF = "BRIEF"
        MODERATE = "MODERATE"
        COMPREHENSIVE = "COMPREHENSIVE"
    
    class Format:
        """Summary format options."""
        PARAGRAPH = "PARAGRAPH"
        BULLETS = "BULLETS"
        STRUCTURED = "STRUCTURED"
    
    def __init__(
        self,
        length: str = "MODERATE",
        format_: str = "PARAGRAPH"
    ):
        """
        Initialize summary options.
        
        Args:
            length: Summary length preference
            format_: Summary format preference
        """
        self.length = length
        self.format_ = format_

Beta Enums and Constants

DatasetSplitType

from google.cloud.documentai_v1beta3.types import DatasetSplitType

class DatasetSplitType:
    """
    Enum for dataset split types used in training (beta feature).
    
    Values:
        DATASET_SPLIT_TYPE_UNSPECIFIED: Unspecified split type
        DATASET_SPLIT_TRAIN: Training dataset
        DATASET_SPLIT_TEST: Test dataset  
        DATASET_SPLIT_UNASSIGNED: Unassigned documents
    """
    DATASET_SPLIT_TYPE_UNSPECIFIED = 0
    DATASET_SPLIT_TRAIN = 1
    DATASET_SPLIT_TEST = 2
    DATASET_SPLIT_UNASSIGNED = 3

def categorize_documents_by_split(
    document_metadata_list: list[DocumentMetadata]
) -> dict[str, list[DocumentMetadata]]:
    """
    Categorize documents by their dataset split type.
    
    Args:
        document_metadata_list: List of document metadata
        
    Returns:
        dict: Documents organized by split type
    """
    categorized = {
        "train": [],
        "test": [], 
        "unassigned": [],
        "unspecified": []
    }
    
    for doc in document_metadata_list:
        if doc.dataset_type == DatasetSplitType.DATASET_SPLIT_TRAIN:
            categorized["train"].append(doc)
        elif doc.dataset_type == DatasetSplitType.DATASET_SPLIT_TEST:
            categorized["test"].append(doc)
        elif doc.dataset_type == DatasetSplitType.DATASET_SPLIT_UNASSIGNED:
            categorized["unassigned"].append(doc)
        else:
            categorized["unspecified"].append(doc)
    
    return categorized

Complete Beta Feature Example

Custom Processor Training Workflow

from google.cloud.documentai_v1beta3 import (
    DocumentServiceClient,
    DocumentProcessorServiceClient
)
from google.cloud.documentai_v1beta3.types import (
    Dataset,
    DatasetSchema, 
    ImportDocumentsRequest,
    TrainProcessorVersionRequest,
    ListDocumentsRequest
)

def complete_custom_training_workflow(
    project_id: str,
    location: str,
    processor_type: str = "CUSTOM_EXTRACTION_PROCESSOR"
):
    """
    Complete workflow for training a custom processor using beta features.
    
    Args:
        project_id: Google Cloud project ID
        location: Processing location
        processor_type: Type of custom processor to train
    """
    
    # Initialize clients
    doc_service = DocumentServiceClient()
    processor_service = DocumentProcessorServiceClient()
    
    print("=== CUSTOM PROCESSOR TRAINING WORKFLOW ===")
    
    # Step 1: Create processor for training
    print("1. Creating custom processor...")
    
    from google.cloud.documentai_v1beta3.types import CreateProcessorRequest, Processor
    
    parent = processor_service.common_location_path(project_id, location)
    processor = Processor(
        display_name="Custom Contract Processor",
        type_=processor_type
    )
    
    create_request = CreateProcessorRequest(
        parent=parent,
        processor=processor
    )
    
    created_processor = processor_service.create_processor(request=create_request)
    processor_id = created_processor.name.split('/')[-1]
    
    print(f"Created processor: {processor_id}")
    
    # Step 2: Setup dataset schema
    print("2. Creating dataset schema...")
    
    entity_definitions = [
        {
            "name": "contract_date",
            "display_name": "Contract Date",
            "properties": [
                {
                    "name": "date_value", 
                    "display_name": "Date Value",
                    "value_type": "date",
                    "occurrence_type": "REQUIRED_ONCE"
                }
            ]
        },
        {
            "name": "contract_value",
            "display_name": "Contract Value",
            "properties": [
                {
                    "name": "money_value",
                    "display_name": "Money Value", 
                    "value_type": "money",
                    "occurrence_type": "REQUIRED_ONCE"
                }
            ]
        },
        {
            "name": "party_names",
            "display_name": "Party Names",
            "properties": [
                {
                    "name": "text_value",
                    "display_name": "Text Value",
                    "value_type": "text", 
                    "occurrence_type": "REQUIRED_MULTIPLE"
                }
            ]
        }
    ]
    
    schema = create_custom_schema("Contract Schema", entity_definitions)
    
    # Step 3: Import training documents
    print("3. Importing training documents...")
    
    dataset_parent = f"projects/{project_id}/locations/{location}/processors/{processor_id}/dataset"
    
    # Configure document import from Cloud Storage
    batch_documents_input_config = {
        "gcs_prefix": {"gcs_uri_prefix": "gs://my-training-bucket/contracts/"}
    }
    
    import_request = ImportDocumentsRequest(
        dataset=dataset_parent,
        batch_documents_input_config=batch_documents_input_config
    )
    
    import_operation = doc_service.import_documents(request=import_request)
    
    print("Importing documents...")
    import_result = import_operation.result()  # Wait for completion
    print("Documents imported successfully")
    
    # Step 4: Check dataset status
    print("4. Checking dataset status...")
    
    list_request = ListDocumentsRequest(parent=dataset_parent)
    list_response = doc_service.list_documents(request=list_request)
    
    total_docs = len(list_response.document_metadata)
    labeled_docs = len(filter_labeled_documents(list_response.document_metadata))
    
    print(f"Total documents: {total_docs}")
    print(f"Labeled documents: {labeled_docs}")
    
    # Step 5: Train processor version (if sufficient labeled data)
    if labeled_docs >= 10:  # Minimum for training
        print("5. Starting processor training...")
        
        processor_parent = processor_service.processor_path(
            project_id, location, processor_id
        )
        
        train_request = TrainProcessorVersionRequest(
            parent=processor_parent,
            processor_version={
                "display_name": "Contract Processor v1.0",
                "document_schema": schema
            },
            input_data={
                "training_documents": {
                    "gcs_prefix": {"gcs_uri_prefix": "gs://my-training-bucket/contracts/labeled/"}
                },
                "test_documents": {
                    "gcs_prefix": {"gcs_uri_prefix": "gs://my-training-bucket/contracts/test/"}
                }
            }
        )
        
        train_operation = processor_service.train_processor_version(request=train_request)
        
        print(f"Training started: {train_operation.operation.name}")
        print("Training typically takes several hours. Monitor progress using the operation name.")
        
    else:
        print(f"Insufficient labeled documents ({labeled_docs}). Need at least 10 for training.")
    
    return {
        "processor_id": processor_id,
        "dataset_parent": dataset_parent,
        "total_documents": total_docs,
        "labeled_documents": labeled_docs
    }

Beta Feature Monitoring

def monitor_beta_operations(
    project_id: str,
    location: str
) -> dict:
    """
    Monitor various beta operations and provide status.
    
    Args:
        project_id: Google Cloud project ID
        location: Processing location
        
    Returns:
        dict: Status of beta operations
    """
    from google.api_core import operations_v1
    from google.auth import default
    
    credentials, _ = default()
    operations_client = operations_v1.OperationsClient(credentials=credentials)
    
    # List all operations for the location
    name = f"projects/{project_id}/locations/{location}"
    
    beta_operations = {
        "import_documents": [],
        "train_processor": [],
        "import_processor_version": [],
        "other": []
    }
    
    for operation in operations_client.list_operations(name=name):
        op_info = {
            "name": operation.name,
            "done": operation.done,
            "error": operation.error.message if operation.error else None
        }
        
        # Categorize by operation type
        if "importDocuments" in operation.name:
            beta_operations["import_documents"].append(op_info)
        elif "trainProcessorVersion" in operation.name:
            beta_operations["train_processor"].append(op_info)
        elif "importProcessorVersion" in operation.name:
            beta_operations["import_processor_version"].append(op_info)
        else:
            beta_operations["other"].append(op_info)
    
    return beta_operations

def print_beta_status(project_id: str, location: str):
    """Print status of beta operations."""
    status = monitor_beta_operations(project_id, location)
    
    print("=== BETA OPERATIONS STATUS ===")
    
    for op_type, operations in status.items():
        print(f"\n{op_type.replace('_', ' ').title()} Operations ({len(operations)}):")
        
        for op in operations:
            status_text = "✓ Complete" if op["done"] else "⏳ Running"
            error_text = f" (Error: {op['error']})" if op["error"] else ""
            print(f"  - {op['name'].split('/')[-1]}: {status_text}{error_text}")

# Example usage
if __name__ == "__main__":
    # Run custom training workflow
    result = complete_custom_training_workflow(
        project_id="my-project",
        location="us"
    )
    
    print(f"\nWorkflow completed:")
    print(f"Processor ID: {result['processor_id']}")
    print(f"Dataset: {result['dataset_parent']}")
    print(f"Documents: {result['labeled_documents']}/{result['total_documents']} labeled")
    
    # Monitor operations
    print_beta_status("my-project", "us")

Migration from V1 to V1beta3

Compatibility Notes

# V1 API (stable) - continues to work
from google.cloud.documentai import DocumentProcessorServiceClient as V1Client
from google.cloud.documentai.types import ProcessRequest as V1ProcessRequest

# V1beta3 API (beta) - includes all v1 functionality + beta features  
from google.cloud.documentai_v1beta3 import DocumentProcessorServiceClient as V1Beta3Client
from google.cloud.documentai_v1beta3.types import ProcessRequest as V1Beta3ProcessRequest

def migrate_to_beta_client():
    """
    Example showing migration from v1 to v1beta3 client.
    
    V1beta3 client is backward compatible with v1 API calls.
    """
    
    # V1 approach (still works)
    v1_client = V1Client()
    
    # V1beta3 approach (recommended for new features)
    v1beta3_client = V1Beta3Client()
    
    # Both clients support the same core operations
    processor_name = "projects/my-project/locations/us/processors/abc123"
    
    # Same request works with both clients
    from google.cloud.documentai_v1beta3.types import RawDocument
    
    raw_doc = RawDocument(content=b"document content", mime_type="application/pdf")
    request = V1Beta3ProcessRequest(name=processor_name, raw_document=raw_doc)
    
    # Both calls work identically
    v1_result = v1_client.process_document(request=request)
    v1beta3_result = v1beta3_client.process_document(request=request)
    
    # But only v1beta3 client supports beta features
    try:
        # This only works with v1beta3 client
        import_operation = v1beta3_client.import_processor_version(
            # import request
        )
        print("Beta feature available")
    except AttributeError:
        print("Beta feature not available in v1 client")

# Best practice: Use v1beta3 for new projects to access all features
def recommended_client_usage():
    """Recommended pattern for using v1beta3 client."""
    
    # Use v1beta3 client for all operations
    client = V1Beta3Client()
    
    # Standard operations work normally
    # Beta operations are available when needed
    
    return client

This comprehensive guide covers all beta features available in Google Cloud Document AI v1beta3, including dataset management, enhanced processing capabilities, and migration strategies from the stable v1 API.

Install with Tessl CLI

npx tessl i tessl/pypi-google-cloud-documentai

docs

batch-operations.md

beta-features.md

document-processing.md

document-types.md

index.md

processor-management.md

tile.json