0
# Document Analysis (Modern API)
1
2
Advanced document analysis capabilities using the modern Document Intelligence API (2022-08-31, 2023-07-31). This API provides enhanced AI models, improved accuracy, and advanced features like document classification, high-resolution OCR, and enhanced content detection.
3
4
## Capabilities
5
6
### General Document Analysis
7
8
Analyzes documents using prebuilt or custom models to extract comprehensive information including text, layout, tables, key-value pairs, and entities.
9
10
```python { .api }
11
def begin_analyze_document(model_id: str, document: Union[bytes, IO[bytes]], **kwargs) -> LROPoller[AnalyzeResult]:
12
"""
13
Analyze document using specified model.
14
15
Parameters:
16
- model_id: Model identifier (prebuilt models: "prebuilt-receipt", "prebuilt-invoice",
17
"prebuilt-businessCard", "prebuilt-idDocument", "prebuilt-layout",
18
"prebuilt-document", "prebuilt-read", "prebuilt-tax.us.w2")
19
- document: Document as bytes or file stream
20
- pages: Comma-separated page numbers to analyze (e.g., "1,3-5")
21
- locale: Locale hint for better recognition (e.g., "en-US")
22
- features: List of AnalysisFeature values for enhanced processing
23
- output_content_format: Content output format ("text" or "markdown")
24
25
Returns:
26
LROPoller that yields AnalyzeResult with comprehensive document analysis
27
"""
28
29
def begin_analyze_document_from_url(model_id: str, document_url: str, **kwargs) -> LROPoller[AnalyzeResult]:
30
"""
31
Analyze document from URL using specified model.
32
33
Parameters:
34
- model_id: Model identifier
35
- document_url: Publicly accessible URL to document
36
- pages: Comma-separated page numbers to analyze
37
- locale: Locale hint for better recognition
38
- features: List of AnalysisFeature values for enhanced processing
39
- output_content_format: Content output format
40
41
Returns:
42
LROPoller that yields AnalyzeResult with comprehensive document analysis
43
"""
44
```
45
46
#### Usage Example
47
48
```python
49
from azure.ai.formrecognizer import DocumentAnalysisClient, AnalysisFeature
50
from azure.core.credentials import AzureKeyCredential
51
52
client = DocumentAnalysisClient(endpoint, AzureKeyCredential("key"))
53
54
# Analyze with prebuilt receipt model
55
with open("receipt.jpg", "rb") as document:
56
poller = client.begin_analyze_document(
57
"prebuilt-receipt",
58
document,
59
locale="en-US"
60
)
61
result = poller.result()
62
63
# Access extracted data
64
print(f"Model ID: {result.model_id}")
65
print(f"Content: {result.content[:100]}...")
66
67
for document in result.documents:
68
print(f"Document type: {document.doc_type}")
69
print(f"Confidence: {document.confidence}")
70
71
for field_name, field in document.fields.items():
72
if field.value:
73
print(f"{field_name}: {field.value} (confidence: {field.confidence})")
74
75
# Access page-level information
76
for page in result.pages:
77
print(f"Page {page.page_number}: {page.width}x{page.height} {page.unit}")
78
print(f"Text angle: {page.angle} degrees")
79
80
# Access words, lines, tables
81
print(f"Words: {len(page.words)}")
82
print(f"Lines: {len(page.lines)}")
83
if page.tables:
84
print(f"Tables: {len(page.tables)}")
85
```
86
87
### Advanced Analysis Features
88
89
Enhanced capabilities available in API version 2023-07-31 for improved content extraction and analysis.
90
91
```python { .api }
92
class AnalysisFeature(str, Enum):
93
"""Enhanced analysis features for document processing."""
94
OCR_HIGH_RESOLUTION = "ocrHighResolution" # High-resolution OCR processing
95
LANGUAGES = "languages" # Language detection and processing
96
BARCODES = "barcodes" # Barcode and QR code extraction
97
FORMULAS = "formulas" # Mathematical formula recognition
98
KEY_VALUE_PAIRS = "keyValuePairs" # Enhanced key-value pair detection
99
STYLE_FONT = "styleFont" # Font style and appearance analysis
100
```
101
102
#### Enhanced Features Usage
103
104
```python
105
# Enable multiple advanced features
106
features = [
107
AnalysisFeature.OCR_HIGH_RESOLUTION,
108
AnalysisFeature.BARCODES,
109
AnalysisFeature.FORMULAS,
110
AnalysisFeature.LANGUAGES,
111
AnalysisFeature.KEY_VALUE_PAIRS,
112
AnalysisFeature.STYLE_FONT
113
]
114
115
with open("complex_document.pdf", "rb") as document:
116
poller = client.begin_analyze_document(
117
"prebuilt-layout",
118
document,
119
features=features,
120
output_content_format="markdown"
121
)
122
result = poller.result()
123
124
# Access barcode information (if BARCODES feature enabled)
125
for page in result.pages:
126
if page.barcodes:
127
for barcode in page.barcodes:
128
print(f"Barcode: {barcode.kind} - {barcode.value}")
129
print(f"Confidence: {barcode.confidence}")
130
131
# Access formula information (if FORMULAS feature enabled)
132
for page in result.pages:
133
if page.formulas:
134
for formula in page.formulas:
135
print(f"Formula: {formula.value}")
136
print(f"Kind: {formula.kind}")
137
138
# Access language information (if LANGUAGES feature enabled)
139
if result.languages:
140
for language in result.languages:
141
print(f"Language: {language.locale} (confidence: {language.confidence})")
142
```
143
144
### Document Classification
145
146
Classifies documents into predefined categories using custom-trained classifiers to automatically determine document types.
147
148
```python { .api }
149
def begin_classify_document(classifier_id: str, document: Union[bytes, IO[bytes]], **kwargs) -> LROPoller[AnalyzeResult]:
150
"""
151
Classify document using custom classifier.
152
153
Parameters:
154
- classifier_id: ID of custom document classifier
155
- document: Document to classify as bytes or file stream
156
- pages: Comma-separated page numbers to analyze
157
158
Returns:
159
LROPoller that yields AnalyzeResult with classification results
160
"""
161
162
def begin_classify_document_from_url(classifier_id: str, document_url: str, **kwargs) -> LROPoller[AnalyzeResult]:
163
"""
164
Classify document from URL using custom classifier.
165
166
Parameters:
167
- classifier_id: ID of custom document classifier
168
- document_url: Publicly accessible URL to document
169
- pages: Comma-separated page numbers to analyze
170
171
Returns:
172
LROPoller that yields AnalyzeResult with classification results
173
"""
174
```
175
176
#### Classification Usage Example
177
178
```python
179
# Classify document type
180
classifier_id = "your-custom-classifier-id"
181
182
with open("unknown_document.pdf", "rb") as document:
183
poller = client.begin_classify_document(classifier_id, document)
184
result = poller.result()
185
186
# Check classification results
187
for document in result.documents:
188
print(f"Classified as: {document.doc_type}")
189
print(f"Confidence: {document.confidence}")
190
191
# Use appropriate model based on classification
192
if document.doc_type == "invoice":
193
# Process as invoice
194
invoice_poller = client.begin_analyze_document("prebuilt-invoice", document_bytes)
195
invoice_result = invoice_poller.result()
196
elif document.doc_type == "receipt":
197
# Process as receipt
198
receipt_poller = client.begin_analyze_document("prebuilt-receipt", document_bytes)
199
receipt_result = receipt_poller.result()
200
```
201
202
## DocumentAnalysisClient
203
204
```python { .api }
205
class DocumentAnalysisClient:
206
"""
207
Client for analyzing documents using Document Intelligence API 2022-08-31 and later.
208
"""
209
210
def __init__(
211
self,
212
endpoint: str,
213
credential: Union[AzureKeyCredential, TokenCredential],
214
**kwargs
215
):
216
"""
217
Initialize DocumentAnalysisClient.
218
219
Parameters:
220
- endpoint: Cognitive Services endpoint URL
221
- credential: Authentication credential
222
- api_version: API version (default: DocumentAnalysisApiVersion.V2023_07_31)
223
"""
224
225
def close(self) -> None:
226
"""Close client and release resources."""
227
228
# Async version
229
class AsyncDocumentAnalysisClient:
230
"""
231
Async client for analyzing documents using Document Intelligence API 2022-08-31 and later.
232
233
Provides the same methods as DocumentAnalysisClient but with async/await support.
234
All methods return async pollers that can be awaited.
235
"""
236
237
def __init__(
238
self,
239
endpoint: str,
240
credential: Union[AzureKeyCredential, AsyncTokenCredential],
241
**kwargs
242
):
243
"""
244
Initialize AsyncDocumentAnalysisClient.
245
246
Parameters:
247
- endpoint: Cognitive Services endpoint URL
248
- credential: Authentication credential (must support async operations)
249
- api_version: API version (default: DocumentAnalysisApiVersion.V2023_07_31)
250
"""
251
252
async def begin_analyze_document(self, model_id: str, document: Union[bytes, IO[bytes]], **kwargs) -> AsyncLROPoller[AnalyzeResult]: ...
253
async def begin_analyze_document_from_url(self, model_id: str, document_url: str, **kwargs) -> AsyncLROPoller[AnalyzeResult]: ...
254
async def begin_classify_document(self, classifier_id: str, document: Union[bytes, IO[bytes]], **kwargs) -> AsyncLROPoller[AnalyzeResult]: ...
255
async def begin_classify_document_from_url(self, classifier_id: str, document_url: str, **kwargs) -> AsyncLROPoller[AnalyzeResult]: ...
256
257
async def close(self) -> None:
258
"""Close client and release resources."""
259
```
260
261
## Prebuilt Models
262
263
### Available Prebuilt Models
264
265
```python { .api }
266
# Layout analysis
267
"prebuilt-layout" # Extract text, tables, selection marks, and document structure
268
"prebuilt-document" # General document analysis with entities and key-value pairs
269
"prebuilt-read" # OCR text extraction only
270
271
# Specific document types
272
"prebuilt-receipt" # Receipts and transaction records
273
"prebuilt-invoice" # Invoices and billing documents
274
"prebuilt-businessCard" # Business cards and contact information
275
"prebuilt-idDocument" # Identity documents (driver's license, passport)
276
"prebuilt-tax.us.w2" # US W-2 tax forms
277
278
# Additional specialized models (vary by region)
279
"prebuilt-healthInsuranceCard.us" # US health insurance cards
280
"prebuilt-contract" # Contracts and agreements
281
```
282
283
### Model Capabilities Comparison
284
285
| Model | Text | Tables | Key-Value | Entities | Forms | Classification |
286
|-------|------|--------|-----------|----------|-------|---------------|
287
| prebuilt-read | ✓ | | | | | |
288
| prebuilt-layout | ✓ | ✓ | | | | |
289
| prebuilt-document | ✓ | ✓ | ✓ | ✓ | | |
290
| prebuilt-receipt | ✓ | ✓ | ✓ | ✓ | ✓ | |
291
| prebuilt-invoice | ✓ | ✓ | ✓ | ✓ | ✓ | |
292
| prebuilt-businessCard | ✓ | | ✓ | ✓ | ✓ | |
293
| prebuilt-idDocument | ✓ | | ✓ | ✓ | ✓ | |
294
295
## Content Output Formats
296
297
```python { .api }
298
# Text format (default)
299
output_content_format="text"
300
301
# Markdown format with enhanced structure preservation
302
output_content_format="markdown"
303
```
304
305
### Markdown Output Benefits
306
307
- Preserves document structure and formatting
308
- Better representation of headers, lists, and emphasis
309
- Improved table formatting
310
- Enhanced readability for downstream processing
311
312
```python
313
# Example with markdown output
314
with open("structured_document.pdf", "rb") as document:
315
poller = client.begin_analyze_document(
316
"prebuilt-layout",
317
document,
318
output_content_format="markdown"
319
)
320
result = poller.result()
321
322
# Content includes markdown formatting
323
print(result.content) # Output includes ## Headers, **bold**, tables, etc.
324
```
325
326
## Error Handling
327
328
```python { .api }
329
from azure.ai.formrecognizer import DocumentAnalysisError
330
331
try:
332
poller = client.begin_analyze_document("prebuilt-receipt", document)
333
result = poller.result()
334
except DocumentAnalysisError as e:
335
print(f"Analysis failed: {e.code} - {e.message}")
336
if e.details:
337
for detail in e.details:
338
print(f"Detail: {detail}")
339
if e.innererror:
340
print(f"Inner error: {e.innererror.code} - {e.innererror.message}")
341
```
342
343
## Polling and Timeouts
344
345
```python
346
# Configure polling behavior
347
poller = client.begin_analyze_document("prebuilt-invoice", document)
348
349
# Check status
350
print(f"Status: {poller.status()}")
351
print(f"Operation ID: {poller.details['operation_id']}")
352
353
# Wait with timeout
354
try:
355
result = poller.result(timeout=600) # 10 minute timeout
356
except Exception as e:
357
print(f"Operation timed out or failed: {e}")
358
359
# Manual polling
360
while not poller.done():
361
time.sleep(5)
362
print(f"Status: {poller.status()}")
363
364
result = poller.result()
365
```