Azure AI Document Intelligence client library for Python - a cloud service that uses machine learning to analyze text and structured data from documents
npx @tessl/cli install tessl/pypi-azure-ai-documentintelligence@1.0.0A comprehensive Python client library for Azure AI Document Intelligence service, enabling document analysis, custom model management, and document classification through machine learning. The service extracts text, key-value pairs, tables, structures, and custom fields from documents across various formats including PDFs, images, and Office documents.
pip install azure-ai-documentintelligencefrom azure.ai.documentintelligence import (
DocumentIntelligenceClient,
DocumentIntelligenceAdministrationClient,
AnalyzeDocumentLROPoller
)Async clients:
from azure.ai.documentintelligence.aio import (
DocumentIntelligenceClient,
DocumentIntelligenceAdministrationClient
)Authentication:
from azure.core.credentials import AzureKeyCredential, TokenCredentialfrom azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.core.credentials import AzureKeyCredential
# Initialize client with endpoint and API key
client = DocumentIntelligenceClient(
endpoint="https://your-resource.cognitiveservices.azure.com/",
credential=AzureKeyCredential("your-api-key")
)
# Analyze a document with prebuilt layout model
with open("invoice.pdf", "rb") as document:
poller = client.begin_analyze_document("prebuilt-layout", document)
result = poller.result()
# Access extracted content
print(f"Content: {result.content}")
# Access extracted tables
for table in result.tables or []:
print(f"Table with {table.row_count} rows and {table.column_count} columns")
for cell in table.cells:
print(f"Cell [{cell.row_index}][{cell.column_index}]: {cell.content}")
# Build custom model (administration client)
from azure.ai.documentintelligence import DocumentIntelligenceAdministrationClient
from azure.ai.documentintelligence.models import BuildDocumentModelRequest, AzureBlobContentSource
admin_client = DocumentIntelligenceAdministrationClient(
endpoint="https://your-resource.cognitiveservices.azure.com/",
credential=AzureKeyCredential("your-api-key")
)
# Build a custom model
build_request = BuildDocumentModelRequest(
model_id="my-custom-model",
build_mode="neural",
training_data_source=AzureBlobContentSource(
container_url="https://account.blob.core.windows.net/container"
)
)
poller = admin_client.begin_build_document_model(build_request)
model = poller.result()
print(f"Model built: {model.model_id}")The Azure AI Document Intelligence SDK is organized around several key components:
aio module with identical functionalityAnalyzeDocumentLROPoller with operation metadata accessBoth clients support multiple authentication methods (API key and Azure Active Directory) and provide extensive customization options for document processing features.
Core document processing functionality including single document analysis, batch operations, result retrieval, and resource management. Supports prebuilt models and custom models with advanced features like high-resolution OCR, language detection, and structured data extraction.
def begin_analyze_document(
model_id: str,
body: Union[AnalyzeDocumentRequest, JSON, IO[bytes]],
**kwargs
) -> AnalyzeDocumentLROPoller[AnalyzeResult]: ...
def begin_analyze_batch_documents(
model_id: str,
body: Union[AnalyzeBatchDocumentsRequest, JSON, IO[bytes]],
**kwargs
) -> LROPoller[AnalyzeBatchResult]: ...
def begin_classify_document(
classifier_id: str,
body: Union[ClassifyDocumentRequest, JSON, IO[bytes]],
**kwargs
) -> LROPoller[AnalyzeResult]: ...
def get_analyze_result_pdf(
model_id: str, result_id: str, **kwargs
) -> Iterator[bytes]: ...
def get_analyze_result_figure(
model_id: str, result_id: str, figure_id: str, **kwargs
) -> Iterator[bytes]: ...Custom model lifecycle management including building, composing, copying, and managing document models. Supports both template and neural training modes with comprehensive model metadata, operation tracking, and resource management.
def begin_build_document_model(
body: Union[BuildDocumentModelRequest, JSON, IO[bytes]],
**kwargs
) -> LROPoller[DocumentModelDetails]: ...
def begin_compose_model(
body: Union[ComposeDocumentModelRequest, JSON, IO[bytes]],
**kwargs
) -> LROPoller[DocumentModelDetails]: ...
def begin_copy_model_to(
model_id: str,
body: Union[ModelCopyAuthorization, JSON, IO[bytes]],
**kwargs
) -> LROPoller[DocumentModelDetails]: ...
def authorize_model_copy(
body: Union[AuthorizeCopyRequest, JSON, IO[bytes]],
**kwargs
) -> ModelCopyAuthorization: ...
def get_resource_details(**kwargs) -> DocumentIntelligenceResourceDetails: ...
def list_operations(**kwargs) -> Iterable[DocumentIntelligenceOperationDetails]: ...Document classifier lifecycle management for automated document type classification. Includes building, copying, and managing custom classifiers with support for multi-class document routing and comprehensive classifier management.
def begin_build_classifier(
body: Union[BuildDocumentClassifierRequest, JSON, IO[bytes]],
**kwargs
) -> LROPoller[DocumentClassifierDetails]: ...
def begin_copy_classifier_to(
classifier_id: str,
body: Union[ClassifierCopyAuthorization, JSON, IO[bytes]],
**kwargs
) -> LROPoller[DocumentClassifierDetails]: ...
def authorize_classifier_copy(
body: Union[AuthorizeClassifierCopyRequest, JSON, IO[bytes]],
**kwargs
) -> ClassifierCopyAuthorization: ...
def get_classifier(classifier_id: str, **kwargs) -> DocumentClassifierDetails: ...
def list_classifiers(**kwargs) -> Iterable[DocumentClassifierDetails]: ...Classifier Management Operations
Full asynchronous implementations of both DocumentIntelligenceClient and DocumentIntelligenceAdministrationClient with identical functionality and enhanced performance for concurrent operations.
async def begin_analyze_document(
model_id: str,
body: Union[AnalyzeDocumentRequest, JSON, IO[bytes]],
**kwargs
) -> AnalyzeDocumentLROPoller[AnalyzeResult]: ...
async def begin_build_document_model(
body: Union[BuildDocumentModelRequest, JSON, IO[bytes]],
**kwargs
) -> LROPoller[DocumentModelDetails]: ...Comprehensive data models, enums, and type definitions covering analysis results, document structures, configuration options, and service responses. Includes 57 model classes and 19 enums providing complete type safety.
class AnalyzeResult:
api_version: Optional[str]
model_id: str
content: Optional[str]
pages: Optional[List[DocumentPage]]
tables: Optional[List[DocumentTable]]
documents: Optional[List[AnalyzedDocument]]
# ... additional properties
class DocumentField:
type: Optional[DocumentFieldType]
content: Optional[str]
confidence: Optional[float]
# ... type-specific value properties