Azure AI Document Intelligence client library for Python - a cloud service that uses machine learning to analyze text and structured data from documents
76
Comprehensive data models, enums, and type definitions covering analysis results, document structures, configuration options, and service responses. The SDK provides 57 model classes and 19 enums offering complete type safety for all Document Intelligence operations.
from azure.ai.documentintelligence.models import (
# Analysis results
AnalyzeResult, AnalyzedDocument, DocumentField,
# Document structure
DocumentPage, DocumentTable, DocumentParagraph,
# Request types
AnalyzeDocumentRequest, BuildDocumentModelRequest,
# Enums
DocumentFieldType, DocumentAnalysisFeature, StringIndexType,
# All other model classes...
)class AnalyzeResult:
"""Main container for document analysis results."""
api_version: Optional[str]
model_id: str
string_index_type: Optional[StringIndexType]
content: Optional[str]
pages: Optional[List[DocumentPage]]
paragraphs: Optional[List[DocumentParagraph]]
tables: Optional[List[DocumentTable]]
figures: Optional[List[DocumentFigure]]
sections: Optional[List[DocumentSection]]
key_value_pairs: Optional[List[DocumentKeyValuePair]]
styles: Optional[List[DocumentStyle]]
languages: Optional[List[DocumentLanguage]]
documents: Optional[List[AnalyzedDocument]]
warnings: Optional[List[DocumentIntelligenceWarning]]
class AnalyzedDocument:
"""Individual document analysis result with extracted fields."""
doc_type: str
bounding_regions: Optional[List[BoundingRegion]]
spans: List[DocumentSpan]
fields: Optional[Dict[str, DocumentField]]
confidence: Optional[float]
class DocumentField:
"""Field value with comprehensive type support."""
type: Optional[DocumentFieldType]
# String values
value_string: Optional[str]
content: Optional[str]
# Date and time values
value_date: Optional[date]
value_time: Optional[time]
# Numeric values
value_number: Optional[float]
value_integer: Optional[int]
value_currency: Optional[CurrencyValue]
# Contact information
value_phone_number: Optional[str]
value_address: Optional[AddressValue]
# Geographic and selection values
value_country_region: Optional[str]
value_selection_mark: Optional[DocumentSelectionMarkState]
value_signature: Optional[DocumentSignatureType]
value_boolean: Optional[bool]
value_selection_group: Optional[List[str]]
# Complex types
value_array: Optional[List[DocumentField]]
value_object: Optional[Dict[str, DocumentField]]
# Positioning information
bounding_regions: Optional[List[BoundingRegion]]
spans: Optional[List[DocumentSpan]]
confidence: Optional[float]class DocumentPage:
"""Represents a single document page with all detected elements."""
page_number: int
angle: Optional[float]
width: Optional[float]
height: Optional[float]
unit: Optional[LengthUnit]
spans: List[DocumentSpan]
words: Optional[List[DocumentWord]]
selection_marks: Optional[List[DocumentSelectionMark]]
lines: Optional[List[DocumentLine]]
barcodes: Optional[List[DocumentBarcode]]
formulas: Optional[List[DocumentFormula]]
class DocumentTable:
"""Table structure with cells and metadata."""
row_count: int
column_count: int
cells: List[DocumentTableCell]
bounding_regions: Optional[List[BoundingRegion]]
spans: List[DocumentSpan]
caption: Optional[DocumentCaption]
footnotes: Optional[List[DocumentFootnote]]
class DocumentTableCell:
"""Individual table cell with positioning and content."""
kind: Optional[DocumentTableCellKind]
row_index: int
column_index: int
row_span: Optional[int]
column_span: Optional[int]
content: str
bounding_regions: Optional[List[BoundingRegion]]
spans: List[DocumentSpan]
elements: Optional[List[str]]
class DocumentParagraph:
"""Paragraph-level content with role classification."""
spans: List[DocumentSpan]
bounding_regions: Optional[List[BoundingRegion]]
role: Optional[ParagraphRole]
content: str
class DocumentLine:
"""Text line with content and positioning."""
content: str
polygon: Optional[List[float]]
spans: List[DocumentSpan]
class DocumentWord:
"""Individual word with confidence and positioning."""
content: str
polygon: Optional[List[float]]
confidence: Optional[float]
span: DocumentSpanclass DocumentKeyValuePair:
"""Key-value pair extraction result."""
key: DocumentKeyValueElement
value: Optional[DocumentKeyValueElement]
confidence: Optional[float]
class DocumentKeyValueElement:
"""Key or value element in a key-value pair."""
content: str
bounding_regions: Optional[List[BoundingRegion]]
spans: List[DocumentSpan]
class DocumentSelectionMark:
"""Selection mark (checkbox, radio button) detection."""
state: DocumentSelectionMarkState
polygon: Optional[List[float]]
confidence: Optional[float]
span: DocumentSpan
class DocumentBarcode:
"""Barcode detection result."""
kind: DocumentBarcodeKind
value: str
polygon: Optional[List[float]]
confidence: Optional[float]
span: DocumentSpan
class DocumentFormula:
"""Mathematical formula detection."""
kind: DocumentFormulaKind
value: str
polygon: Optional[List[float]]
confidence: Optional[float]
span: DocumentSpan
class DocumentFigure:
"""Figure or image detection within document."""
id: str
bounding_regions: List[BoundingRegion]
spans: List[DocumentSpan]
elements: Optional[List[str]]
caption: Optional[DocumentCaption]
footnotes: Optional[List[DocumentFootnote]]
class DocumentSection:
"""Document section with hierarchical structure."""
spans: List[DocumentSpan]
elements: Optional[List[str]]
class DocumentCaption:
"""Caption text associated with tables and figures."""
content: str
bounding_regions: Optional[List[BoundingRegion]]
spans: List[DocumentSpan]
elements: Optional[List[str]]
class DocumentFootnote:
"""Footnote content associated with tables and figures."""
content: str
bounding_regions: Optional[List[BoundingRegion]]
spans: List[DocumentSpan]
elements: Optional[List[str]]class BoundingRegion:
"""Geometric bounding region on a page."""
page_number: int
polygon: List[float]
class DocumentSpan:
"""Text span with offset and length."""
offset: int
length: int
class DocumentStyle:
"""Text styling information."""
is_handwritten: Optional[bool]
similar_font_family: Optional[str]
font_style: Optional[DocumentFontStyle]
font_weight: Optional[DocumentFontWeight]
color: Optional[str]
background_color: Optional[str]
spans: List[DocumentSpan]
confidence: Optional[float]
class DocumentLanguage:
"""Language detection result."""
locale: str
spans: List[DocumentSpan]
confidence: Optional[float]class AddressValue:
"""Structured address information."""
house_number: Optional[str]
po_box: Optional[str]
road: Optional[str]
city: Optional[str]
state: Optional[str]
postal_code: Optional[str]
country_region: Optional[str]
street_address: Optional[str]
unit: Optional[str]
city_district: Optional[str]
state_district: Optional[str]
suburb: Optional[str]
house: Optional[str]
level: Optional[str]
class CurrencyValue:
"""Currency amount with symbol and code."""
amount: float
currency_symbol: Optional[str]
currency_code: Optional[str]class DocumentModelDetails:
"""Comprehensive model information."""
model_id: str
description: Optional[str]
created_date_time: datetime
expiration_date_time: Optional[datetime]
api_version: str
tags: Optional[Dict[str, str]]
build_mode: Optional[DocumentBuildMode]
azure_blob_source: Optional[AzureBlobContentSource]
azure_blob_file_list_source: Optional[AzureBlobFileListContentSource]
doc_types: Optional[Dict[str, DocumentTypeDetails]]
warnings: Optional[List[DocumentIntelligenceWarning]]
training_hours: Optional[int]
base_model_id: Optional[str]
class DocumentTypeDetails:
"""Document type configuration within a model."""
description: Optional[str]
build_mode: Optional[DocumentBuildMode]
field_schema: Optional[Dict[str, DocumentFieldSchema]]
field_confidence: Optional[Dict[str, float]]
class DocumentFieldSchema:
"""Schema definition for document fields."""
type: DocumentFieldType
description: Optional[str]
example: Optional[str]
items: Optional["DocumentFieldSchema"]
properties: Optional[Dict[str, "DocumentFieldSchema"]]
class ComponentDocumentModelDetails:
"""Component model reference for composition."""
model_id: strclass DocumentClassifierDetails:
"""Document classifier information and configuration."""
classifier_id: str
description: Optional[str]
created_date_time: datetime
expiration_date_time: Optional[datetime]
api_version: str
base_classifier_id: Optional[str]
doc_types: Dict[str, ClassifierDocumentTypeDetails]
warnings: Optional[List[DocumentIntelligenceWarning]]
class ClassifierDocumentTypeDetails:
"""Document type configuration for classifier."""
azure_blob_source: Optional[AzureBlobContentSource]
azure_blob_file_list_source: Optional[AzureBlobFileListContentSource]class AnalyzeDocumentRequest:
"""Request for single document analysis."""
url_source: Optional[str]
base64_source: Optional[str]
pages: Optional[str]
locale: Optional[str]
string_index_type: Optional[StringIndexType]
features: Optional[List[DocumentAnalysisFeature]]
query_fields: Optional[List[str]]
output_content_format: Optional[DocumentContentFormat]
output: Optional[List[AnalyzeOutputOption]]
class AnalyzeBatchDocumentsRequest:
"""Request for batch document processing."""
azure_blob_source: Optional[AzureBlobContentSource]
azure_blob_file_list_source: Optional[AzureBlobFileListContentSource]
result_container_url: str
result_prefix: Optional[str]
overwrite_existing: Optional[bool]
pages: Optional[str]
locale: Optional[str]
string_index_type: Optional[StringIndexType]
features: Optional[List[DocumentAnalysisFeature]]
query_fields: Optional[List[str]]
output_content_format: Optional[DocumentContentFormat]
output: Optional[List[AnalyzeOutputOption]]
class ClassifyDocumentRequest:
"""Request for document classification."""
url_source: Optional[str]
base64_source: Optional[str]
pages: Optional[str]
string_index_type: Optional[StringIndexType]
split_mode: Optional[SplitMode]class BuildDocumentModelRequest:
"""Request to build custom document model."""
model_id: str
description: Optional[str]
build_mode: DocumentBuildMode
training_data_source: Union[AzureBlobContentSource, AzureBlobFileListContentSource]
test_data_source: Optional[Union[AzureBlobContentSource, AzureBlobFileListContentSource]]
tags: Optional[Dict[str, str]]
class ComposeDocumentModelRequest:
"""Request to compose multiple models."""
model_id: str
description: Optional[str]
component_models: List[ComponentDocumentModelDetails]
tags: Optional[Dict[str, str]]
class BuildDocumentClassifierRequest:
"""Request to build document classifier."""
classifier_id: str
description: Optional[str]
doc_types: Dict[str, ClassifierDocumentTypeDetails]
base_classifier_id: Optional[str]class AuthorizeCopyRequest:
"""Request to authorize model copying."""
model_id: str
description: Optional[str]
tags: Optional[Dict[str, str]]
class AuthorizeClassifierCopyRequest:
"""Request to authorize classifier copying."""
classifier_id: str
description: Optional[str]
tags: Optional[Dict[str, str]]
class ModelCopyAuthorization:
"""Model copy authorization token."""
target_resource_id: str
target_resource_region: str
target_model_id: str
target_model_location: str
access_token: str
expiration_date_time: datetime
class ClassifierCopyAuthorization:
"""Classifier copy authorization token."""
target_resource_id: str
target_resource_region: str
target_classifier_id: str
target_classifier_location: str
access_token: str
expiration_date_time: datetimeclass AzureBlobContentSource:
"""Azure Blob Storage content source."""
container_url: str
prefix: Optional[str]
class AzureBlobFileListContentSource:
"""Azure Blob Storage file list source."""
container_url: str
file_list: strclass DocumentIntelligenceOperationDetails:
"""Base operation details with polymorphic support."""
operation_id: str
status: DocumentIntelligenceOperationStatus
percent_completed: Optional[int]
created_date_time: datetime
last_updated_date_time: datetime
kind: OperationKind
resource_location: str
api_version: Optional[str]
tags: Optional[Dict[str, str]]
error: Optional[DocumentIntelligenceError]
class DocumentModelBuildOperationDetails(DocumentIntelligenceOperationDetails):
"""Model build operation details."""
result: Optional[DocumentModelDetails]
class DocumentModelComposeOperationDetails(DocumentIntelligenceOperationDetails):
"""Model compose operation details."""
result: Optional[DocumentModelDetails]
class DocumentModelCopyToOperationDetails(DocumentIntelligenceOperationDetails):
"""Model copy operation details."""
result: Optional[DocumentModelDetails]
class DocumentClassifierBuildOperationDetails(DocumentIntelligenceOperationDetails):
"""Classifier build operation details."""
result: Optional[DocumentClassifierDetails]
class DocumentClassifierCopyToOperationDetails(DocumentIntelligenceOperationDetails):
"""Classifier copy operation details."""
result: Optional[DocumentClassifierDetails]class AnalyzeBatchResult:
"""Batch analysis summary results."""
succeeded_count: int
failed_count: int
skipped_count: int
details: List[AnalyzeBatchOperationDetail]
class AnalyzeBatchOperation:
"""Batch operation metadata and results."""
operation_id: str
status: DocumentIntelligenceOperationStatus
created_date_time: datetime
last_updated_date_time: datetime
percent_completed: Optional[int]
result: Optional[AnalyzeBatchResult]
error: Optional[DocumentIntelligenceError]
class AnalyzeBatchOperationDetail:
"""Individual document result within batch operation."""
status: DocumentIntelligenceOperationStatus
source_url: Optional[str]
result_url: Optional[str]
error: Optional[DocumentIntelligenceError]class DocumentIntelligenceResourceDetails:
"""Service resource information and limits."""
custom_document_models: CustomDocumentModelsDetails
custom_neural_document_model_builds: CustomDocumentModelsDetails
class CustomDocumentModelsDetails:
"""Model quota and usage information."""
count: int
limit: intclass DocumentIntelligenceError:
"""Service error information."""
code: str
message: str
target: Optional[str]
details: Optional[List["DocumentIntelligenceError"]]
innererror: Optional[DocumentIntelligenceInnerError]
class DocumentIntelligenceErrorResponse:
"""Error response wrapper."""
error: DocumentIntelligenceError
class DocumentIntelligenceInnerError:
"""Detailed inner error information."""
code: Optional[str]
message: Optional[str]
innererror: Optional["DocumentIntelligenceInnerError"]
class DocumentIntelligenceWarning:
"""Service warning information."""
code: str
message: str
target: Optional[str]class DocumentAnalysisFeature(str, Enum):
"""Document analysis feature options."""
OCR_HIGH_RESOLUTION = "ocrHighResolution"
LANGUAGES = "languages"
BARCODES = "barcodes"
FORMULAS = "formulas"
KEY_VALUE_PAIRS = "keyValuePairs"
STYLE_FONT = "styleFont"
QUERY_FIELDS = "queryFields"
class AnalyzeOutputOption(str, Enum):
"""Additional output format options."""
PDF = "pdf"
FIGURES = "figures"
class DocumentContentFormat(str, Enum):
"""Content format options."""
TEXT = "text"
MARKDOWN = "markdown"
class StringIndexType(str, Enum):
"""Character indexing schemes."""
TEXT_ELEMENTS = "textElements"
UNICODE_CODE_POINT = "unicodeCodePoint"
UTF16_CODE_UNIT = "utf16CodeUnit"class DocumentFieldType(str, Enum):
"""Document field value types."""
STRING = "string"
DATE = "date"
TIME = "time"
PHONE_NUMBER = "phoneNumber"
NUMBER = "number"
INTEGER = "integer"
SELECTION_MARK = "selectionMark"
COUNTRY_REGION = "countryRegion"
SIGNATURE = "signature"
ARRAY = "array"
OBJECT = "object"
CURRENCY = "currency"
ADDRESS = "address"
BOOLEAN = "boolean"
SELECTION_GROUP = "selectionGroup"
class DocumentBarcodeKind(str, Enum):
"""Barcode type classifications."""
QR_CODE = "QRCode"
PDF417 = "PDF417"
UPCA = "UPCA"
UPCE = "UPCE"
CODE39 = "Code39"
CODE128 = "Code128"
EAN8 = "EAN8"
EAN13 = "EAN13"
DATA_BAR = "DataBar"
CODE93 = "Code93"
CODABAR = "Codabar"
DATA_BAR_EXPANDED = "DataBarExpanded"
ITF = "ITF"
MICRO_QR_CODE = "MicroQRCode"
AZTEC = "Aztec"
DATA_MATRIX = "DataMatrix"
MAXI_CODE = "MaxiCode"class DocumentFontStyle(str, Enum):
"""Font style classifications."""
NORMAL = "normal"
ITALIC = "italic"
class DocumentFontWeight(str, Enum):
"""Font weight classifications."""
NORMAL = "normal"
BOLD = "bold"
class DocumentSelectionMarkState(str, Enum):
"""Selection mark states."""
SELECTED = "selected"
UNSELECTED = "unselected"
class DocumentSignatureType(str, Enum):
"""Signature detection results."""
SIGNED = "signed"
UNSIGNED = "unsigned"
class DocumentTableCellKind(str, Enum):
"""Table cell type classifications."""
CONTENT = "content"
ROW_HEADER = "rowHeader"
COLUMN_HEADER = "columnHeader"
STUB_HEAD = "stubHead"
DESCRIPTION = "description"class DocumentIntelligenceOperationStatus(str, Enum):
"""Operation status values."""
NOT_STARTED = "notStarted"
RUNNING = "running"
FAILED = "failed"
SUCCEEDED = "succeeded"
CANCELED = "canceled"
SKIPPED = "skipped"
class DocumentBuildMode(str, Enum):
"""Model building approaches."""
TEMPLATE = "template"
NEURAL = "neural"
class OperationKind(str, Enum):
"""Operation type classifications."""
DOCUMENT_MODEL_BUILD = "documentModelBuild"
DOCUMENT_MODEL_COMPOSE = "documentModelCompose"
DOCUMENT_MODEL_COPY_TO = "documentModelCopyTo"
DOCUMENT_CLASSIFIER_COPY_TO = "documentClassifierCopyTo"
DOCUMENT_CLASSIFIER_BUILD = "documentClassifierBuild"
class ContentSourceKind(str, Enum):
"""Content source type classifications."""
URL = "url"
BASE64 = "base64"
AZURE_BLOB = "azureBlob"
AZURE_BLOB_FILE_LIST = "azureBlobFileList"
class SplitMode(str, Enum):
"""Document splitting behavior."""
AUTO = "auto"
NONE = "none"
PER_PAGE = "perPage"class LengthUnit(str, Enum):
"""Measurement units for dimensions."""
PIXEL = "pixel"
INCH = "inch"
class ParagraphRole(str, Enum):
"""Paragraph role classifications."""
PAGE_HEADER = "pageHeader"
PAGE_FOOTER = "pageFooter"
PAGE_NUMBER = "pageNumber"
TITLE = "title"
SECTION_HEADING = "sectionHeading"
FOOTNOTE = "footnote"
FORMULA_BLOCK = "formulaBlock"
class DocumentFormulaKind(str, Enum):
"""Mathematical formula types."""
INLINE = "inline"
DISPLAY = "display"Install with Tessl CLI
npx tessl i tessl/pypi-azure-ai-documentintelligencedocs
evals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
scenario-10