Python Client SDK for the Mistral AI API with chat completions, embeddings, fine-tuning, and agent capabilities.
—
Process documents and images to extract text and structured data using optical character recognition. The OCR API can analyze various document formats and extract text with position information.
Process documents and images to extract text and structural information.
def process(
model: str,
document: Document,
pages: Optional[List[int]] = None,
**kwargs
) -> OCRResponse:
"""
Process a document with OCR.
Parameters:
- model: OCR model identifier
- document: Document to process (image or PDF)
- pages: Optional list of page numbers to process
Returns:
OCRResponse with extracted text and structure information
"""from mistralai import Mistral
from mistralai.models import Document
client = Mistral(api_key="your-api-key")
# Process an image document
with open("document.pdf", "rb") as f:
document = Document(
type="application/pdf",
data=f.read()
)
response = client.ocr.process(
model="ocr-model",
document=document,
pages=[1, 2, 3] # Process first 3 pages
)
# Extract text from all pages
for page in response.pages:
print(f"Page {page.page_number}:")
print(f"Text: {page.text}")
print(f"Dimensions: {page.dimensions.width}x{page.dimensions.height}")
print()# Process document and analyze structure
response = client.ocr.process(
model="ocr-model",
document=document
)
# Access structured information
for page in response.pages:
print(f"Page {page.page_number}:")
# Extract images if present
for image in page.images:
print(f" Image: {image.width}x{image.height} at ({image.x}, {image.y})")
# Get text content
print(f" Text content: {len(page.text)} characters")
print(f" Preview: {page.text[:200]}...")class OCRRequest:
model: str
document: Document
pages: Optional[List[int]]
class Document:
type: str # MIME type (e.g., "application/pdf", "image/jpeg")
data: bytes # Document content as bytesclass OCRResponse:
id: str
object: str
model: str
pages: List[OCRPageObject]
usage: Optional[OCRUsageInfo]
class OCRPageObject:
page_number: int
text: str
dimensions: OCRPageDimensions
images: List[OCRImageObject]
class OCRPageDimensions:
width: float
height: float
class OCRImageObject:
x: float
y: float
width: float
height: float
class OCRUsageInfo:
prompt_tokens: int
completion_tokens: int
total_tokens: intInstall with Tessl CLI
npx tessl i tessl/pypi-mistralai