Microsoft Azure Document Intelligence client library for analyzing text and structured data from documents with machine learning
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Advanced document analysis capabilities using the modern Document Intelligence API (2022-08-31, 2023-07-31). This API provides enhanced AI models, improved accuracy, and advanced features like document classification, high-resolution OCR, and enhanced content detection.
Analyzes documents using prebuilt or custom models to extract comprehensive information including text, layout, tables, key-value pairs, and entities.
def begin_analyze_document(model_id: str, document: Union[bytes, IO[bytes]], **kwargs) -> LROPoller[AnalyzeResult]:
"""
Analyze document using specified model.
Parameters:
- model_id: Model identifier (prebuilt models: "prebuilt-receipt", "prebuilt-invoice",
"prebuilt-businessCard", "prebuilt-idDocument", "prebuilt-layout",
"prebuilt-document", "prebuilt-read", "prebuilt-tax.us.w2")
- document: Document as bytes or file stream
- pages: Comma-separated page numbers to analyze (e.g., "1,3-5")
- locale: Locale hint for better recognition (e.g., "en-US")
- features: List of AnalysisFeature values for enhanced processing
- output_content_format: Content output format ("text" or "markdown")
Returns:
LROPoller that yields AnalyzeResult with comprehensive document analysis
"""
def begin_analyze_document_from_url(model_id: str, document_url: str, **kwargs) -> LROPoller[AnalyzeResult]:
"""
Analyze document from URL using specified model.
Parameters:
- model_id: Model identifier
- document_url: Publicly accessible URL to document
- pages: Comma-separated page numbers to analyze
- locale: Locale hint for better recognition
- features: List of AnalysisFeature values for enhanced processing
- output_content_format: Content output format
Returns:
LROPoller that yields AnalyzeResult with comprehensive document analysis
"""from azure.ai.formrecognizer import DocumentAnalysisClient, AnalysisFeature
from azure.core.credentials import AzureKeyCredential
client = DocumentAnalysisClient(endpoint, AzureKeyCredential("key"))
# Analyze with prebuilt receipt model
with open("receipt.jpg", "rb") as document:
poller = client.begin_analyze_document(
"prebuilt-receipt",
document,
locale="en-US"
)
result = poller.result()
# Access extracted data
print(f"Model ID: {result.model_id}")
print(f"Content: {result.content[:100]}...")
for document in result.documents:
print(f"Document type: {document.doc_type}")
print(f"Confidence: {document.confidence}")
for field_name, field in document.fields.items():
if field.value:
print(f"{field_name}: {field.value} (confidence: {field.confidence})")
# Access page-level information
for page in result.pages:
print(f"Page {page.page_number}: {page.width}x{page.height} {page.unit}")
print(f"Text angle: {page.angle} degrees")
# Access words, lines, tables
print(f"Words: {len(page.words)}")
print(f"Lines: {len(page.lines)}")
if page.tables:
print(f"Tables: {len(page.tables)}")Enhanced capabilities available in API version 2023-07-31 for improved content extraction and analysis.
class AnalysisFeature(str, Enum):
"""Enhanced analysis features for document processing."""
OCR_HIGH_RESOLUTION = "ocrHighResolution" # High-resolution OCR processing
LANGUAGES = "languages" # Language detection and processing
BARCODES = "barcodes" # Barcode and QR code extraction
FORMULAS = "formulas" # Mathematical formula recognition
KEY_VALUE_PAIRS = "keyValuePairs" # Enhanced key-value pair detection
STYLE_FONT = "styleFont" # Font style and appearance analysis# Enable multiple advanced features
features = [
AnalysisFeature.OCR_HIGH_RESOLUTION,
AnalysisFeature.BARCODES,
AnalysisFeature.FORMULAS,
AnalysisFeature.LANGUAGES,
AnalysisFeature.KEY_VALUE_PAIRS,
AnalysisFeature.STYLE_FONT
]
with open("complex_document.pdf", "rb") as document:
poller = client.begin_analyze_document(
"prebuilt-layout",
document,
features=features,
output_content_format="markdown"
)
result = poller.result()
# Access barcode information (if BARCODES feature enabled)
for page in result.pages:
if page.barcodes:
for barcode in page.barcodes:
print(f"Barcode: {barcode.kind} - {barcode.value}")
print(f"Confidence: {barcode.confidence}")
# Access formula information (if FORMULAS feature enabled)
for page in result.pages:
if page.formulas:
for formula in page.formulas:
print(f"Formula: {formula.value}")
print(f"Kind: {formula.kind}")
# Access language information (if LANGUAGES feature enabled)
if result.languages:
for language in result.languages:
print(f"Language: {language.locale} (confidence: {language.confidence})")Classifies documents into predefined categories using custom-trained classifiers to automatically determine document types.
def begin_classify_document(classifier_id: str, document: Union[bytes, IO[bytes]], **kwargs) -> LROPoller[AnalyzeResult]:
"""
Classify document using custom classifier.
Parameters:
- classifier_id: ID of custom document classifier
- document: Document to classify as bytes or file stream
- pages: Comma-separated page numbers to analyze
Returns:
LROPoller that yields AnalyzeResult with classification results
"""
def begin_classify_document_from_url(classifier_id: str, document_url: str, **kwargs) -> LROPoller[AnalyzeResult]:
"""
Classify document from URL using custom classifier.
Parameters:
- classifier_id: ID of custom document classifier
- document_url: Publicly accessible URL to document
- pages: Comma-separated page numbers to analyze
Returns:
LROPoller that yields AnalyzeResult with classification results
"""# Classify document type
classifier_id = "your-custom-classifier-id"
with open("unknown_document.pdf", "rb") as document:
poller = client.begin_classify_document(classifier_id, document)
result = poller.result()
# Check classification results
for document in result.documents:
print(f"Classified as: {document.doc_type}")
print(f"Confidence: {document.confidence}")
# Use appropriate model based on classification
if document.doc_type == "invoice":
# Process as invoice
invoice_poller = client.begin_analyze_document("prebuilt-invoice", document_bytes)
invoice_result = invoice_poller.result()
elif document.doc_type == "receipt":
# Process as receipt
receipt_poller = client.begin_analyze_document("prebuilt-receipt", document_bytes)
receipt_result = receipt_poller.result()class DocumentAnalysisClient:
"""
Client for analyzing documents using Document Intelligence API 2022-08-31 and later.
"""
def __init__(
self,
endpoint: str,
credential: Union[AzureKeyCredential, TokenCredential],
**kwargs
):
"""
Initialize DocumentAnalysisClient.
Parameters:
- endpoint: Cognitive Services endpoint URL
- credential: Authentication credential
- api_version: API version (default: DocumentAnalysisApiVersion.V2023_07_31)
"""
def close(self) -> None:
"""Close client and release resources."""
# Async version
class AsyncDocumentAnalysisClient:
"""
Async client for analyzing documents using Document Intelligence API 2022-08-31 and later.
Provides the same methods as DocumentAnalysisClient but with async/await support.
All methods return async pollers that can be awaited.
"""
def __init__(
self,
endpoint: str,
credential: Union[AzureKeyCredential, AsyncTokenCredential],
**kwargs
):
"""
Initialize AsyncDocumentAnalysisClient.
Parameters:
- endpoint: Cognitive Services endpoint URL
- credential: Authentication credential (must support async operations)
- api_version: API version (default: DocumentAnalysisApiVersion.V2023_07_31)
"""
async def begin_analyze_document(self, model_id: str, document: Union[bytes, IO[bytes]], **kwargs) -> AsyncLROPoller[AnalyzeResult]: ...
async def begin_analyze_document_from_url(self, model_id: str, document_url: str, **kwargs) -> AsyncLROPoller[AnalyzeResult]: ...
async def begin_classify_document(self, classifier_id: str, document: Union[bytes, IO[bytes]], **kwargs) -> AsyncLROPoller[AnalyzeResult]: ...
async def begin_classify_document_from_url(self, classifier_id: str, document_url: str, **kwargs) -> AsyncLROPoller[AnalyzeResult]: ...
async def close(self) -> None:
"""Close client and release resources."""# Layout analysis
"prebuilt-layout" # Extract text, tables, selection marks, and document structure
"prebuilt-document" # General document analysis with entities and key-value pairs
"prebuilt-read" # OCR text extraction only
# Specific document types
"prebuilt-receipt" # Receipts and transaction records
"prebuilt-invoice" # Invoices and billing documents
"prebuilt-businessCard" # Business cards and contact information
"prebuilt-idDocument" # Identity documents (driver's license, passport)
"prebuilt-tax.us.w2" # US W-2 tax forms
# Additional specialized models (vary by region)
"prebuilt-healthInsuranceCard.us" # US health insurance cards
"prebuilt-contract" # Contracts and agreements| Model | Text | Tables | Key-Value | Entities | Forms | Classification |
|---|---|---|---|---|---|---|
| prebuilt-read | ✓ | |||||
| prebuilt-layout | ✓ | ✓ | ||||
| prebuilt-document | ✓ | ✓ | ✓ | ✓ | ||
| prebuilt-receipt | ✓ | ✓ | ✓ | ✓ | ✓ | |
| prebuilt-invoice | ✓ | ✓ | ✓ | ✓ | ✓ | |
| prebuilt-businessCard | ✓ | ✓ | ✓ | ✓ | ||
| prebuilt-idDocument | ✓ | ✓ | ✓ | ✓ |
# Text format (default)
output_content_format="text"
# Markdown format with enhanced structure preservation
output_content_format="markdown"# Example with markdown output
with open("structured_document.pdf", "rb") as document:
poller = client.begin_analyze_document(
"prebuilt-layout",
document,
output_content_format="markdown"
)
result = poller.result()
# Content includes markdown formatting
print(result.content) # Output includes ## Headers, **bold**, tables, etc.from azure.ai.formrecognizer import DocumentAnalysisError
try:
poller = client.begin_analyze_document("prebuilt-receipt", document)
result = poller.result()
except DocumentAnalysisError as e:
print(f"Analysis failed: {e.code} - {e.message}")
if e.details:
for detail in e.details:
print(f"Detail: {detail}")
if e.innererror:
print(f"Inner error: {e.innererror.code} - {e.innererror.message}")# Configure polling behavior
poller = client.begin_analyze_document("prebuilt-invoice", document)
# Check status
print(f"Status: {poller.status()}")
print(f"Operation ID: {poller.details['operation_id']}")
# Wait with timeout
try:
result = poller.result(timeout=600) # 10 minute timeout
except Exception as e:
print(f"Operation timed out or failed: {e}")
# Manual polling
while not poller.done():
time.sleep(5)
print(f"Status: {poller.status()}")
result = poller.result()Install with Tessl CLI
npx tessl i tessl/pypi-azure-ai-formrecognizer