Microsoft Azure Cognitive Services Computer Vision Client Library for Python providing state-of-the-art algorithms to process images and return information including mature content detection, face detection, color analysis, image categorization, description generation, and thumbnail creation.
npx @tessl/cli install tessl/pypi-azure-cognitiveservices-vision-computervision@0.9.0Microsoft Azure Cognitive Services Computer Vision Client Library for Python provides state-of-the-art algorithms to process images and return information. The library enables developers to analyze images for mature content detection, face detection, color analysis, image categorization, description generation, and intelligent thumbnail creation.
Note: This package has been deprecated as of November 1, 2024, and is replaced by azure-ai-vision-imageanalysis.
pip install azure-cognitiveservices-vision-computervisionmsrest>=0.6.21, azure-common~=1.1from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision import ComputerVisionClientConfiguration
from azure.cognitiveservices.vision.computervision.models import VisualFeatureTypesfrom azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import VisualFeatureTypes
from msrest.authentication import CognitiveServicesCredentials
# Initialize client
credentials = CognitiveServicesCredentials("your-api-key")
client = ComputerVisionClient("https://your-endpoint.cognitiveservices.azure.com/", credentials)
# Analyze an image
image_url = "https://example.com/image.jpg"
visual_features = [VisualFeatureTypes.categories, VisualFeatureTypes.description, VisualFeatureTypes.faces]
analysis = client.analyze_image(image_url, visual_features=visual_features)
# Access results
print(f"Description: {analysis.description.captions[0].text}")
print(f"Categories: {[cat.name for cat in analysis.categories]}")
print(f"Faces detected: {len(analysis.faces)}")The Computer Vision API provides a comprehensive set of image analysis capabilities through a single client interface:
Comprehensive image analysis including content categorization, description generation, face detection, color analysis, and object recognition. Supports multiple visual features in a single API call.
def analyze_image(url, visual_features=None, details=None, language="en", description_exclude=None, model_version="latest", custom_headers=None, raw=False, **operation_config):
"""
Extract rich visual features from image content.
Args:
url (str): Publicly reachable URL of an image
visual_features (list[VisualFeatureTypes], optional): Visual feature types to return
details (list[Details], optional): Domain-specific details (Celebrities, Landmarks)
language (str, optional): Output language ("en", "es", "ja", "pt", "zh")
description_exclude (list[DescriptionExclude], optional): Domain models to exclude
model_version (str, optional): AI model version ("latest", "2021-04-01")
Returns:
ImageAnalysis: Complete analysis results
"""
def analyze_image_in_stream(image, visual_features=None, details=None, language="en", description_exclude=None, model_version="latest", custom_headers=None, raw=False, **operation_config):
"""
Analyze image from binary stream.
Args:
image (Generator): Binary image data stream
Returns:
ImageAnalysis: Complete analysis results
"""Detect and locate objects within images, providing bounding boxes and confidence scores for identified objects.
def detect_objects(url, model_version="latest", custom_headers=None, raw=False, **operation_config):
"""
Detect objects within an image.
Args:
url (str): Publicly reachable URL of an image
model_version (str, optional): AI model version
Returns:
DetectResult: Object detection results with bounding boxes
"""
def detect_objects_in_stream(image, model_version="latest", custom_headers=None, raw=False, **operation_config):
"""
Detect objects from binary stream.
Returns:
DetectResult: Object detection results
"""Extract text from images using both synchronous OCR for printed text and asynchronous Read API for handwritten and printed text recognition.
def recognize_printed_text(detect_orientation, url, language=None, custom_headers=None, raw=False, **operation_config):
"""
Perform OCR on printed text in images.
Args:
detect_orientation (bool): Whether to detect text orientation
url (str): Publicly reachable URL of an image
language (str, optional): OCR language code
Returns:
OcrResult: OCR results with text regions and words
"""
def read(url, language=None, pages=None, model_version="latest", custom_headers=None, raw=False, **operation_config):
"""
Read text from image (asynchronous operation).
Args:
url (str): Publicly reachable URL of an image
language (str, optional): Text language for recognition
pages (list[int], optional): Page numbers to process
Returns:
str: Operation location URL for polling status
"""
def get_read_result(operation_id, custom_headers=None, raw=False, **operation_config):
"""
Get result of read operation.
Args:
operation_id (str): Operation ID from read operation
Returns:
ReadOperationResult: Text recognition results
"""Generate human-readable descriptions of image content in complete English sentences.
def describe_image(url, max_candidates=None, language="en", description_exclude=None, model_version="latest", custom_headers=None, raw=False, **operation_config):
"""
Generate description of image content.
Args:
url (str): Publicly reachable URL of an image
max_candidates (int, optional): Maximum description candidates to return
language (str, optional): Output language
Returns:
ImageDescription: Generated descriptions with confidence scores
"""Generate detailed tags for image content with confidence scores.
def tag_image(url, language="en", model_version="latest", custom_headers=None, raw=False, **operation_config):
"""
Generate tags for image content.
Args:
url (str): Publicly reachable URL of an image
language (str, optional): Output language
Returns:
TagResult: Generated tags with confidence scores
"""Generate intelligent thumbnails with smart cropping to preserve important image content.
def generate_thumbnail(width, height, url, smart_cropping=None, model_version="latest", custom_headers=None, raw=False, **operation_config):
"""
Generate thumbnail image with smart cropping.
Args:
width (int): Thumbnail width in pixels
height (int): Thumbnail height in pixels
url (str): Source image URL
smart_cropping (bool, optional): Enable smart cropping algorithm
Returns:
Generator: Binary image data stream
"""Specialized analysis using domain-specific models for celebrity and landmark recognition.
def analyze_image_by_domain(model, url, language="en", custom_headers=None, raw=False, **operation_config):
"""
Analyze image using domain-specific model.
Args:
model (str): Domain model name ("celebrities" or "landmarks")
url (str): Publicly reachable URL of an image
language (str, optional): Output language
Returns:
DomainModelResults: Domain-specific analysis results
"""
def list_models(custom_headers=None, raw=False, **operation_config):
"""
List available domain models.
Returns:
ListModelsResult: Available domain models
"""Identify the most important rectangular area within an image for cropping or focus.
def get_area_of_interest(url, model_version="latest", custom_headers=None, raw=False, **operation_config):
"""
Get area of interest in image for optimal cropping.
Args:
url (str): Publicly reachable URL of an image
Returns:
AreaOfInterestResult: Bounding rectangle of interest area
"""class ComputerVisionClient:
"""
Main client for Computer Vision API operations.
Attributes:
config (ComputerVisionClientConfiguration): Client configuration
api_version (str): API version ("3.2")
"""
def __init__(self, endpoint, credentials):
"""
Initialize Computer Vision client.
Args:
endpoint (str): Cognitive Services endpoint URL
credentials: Subscription credentials
"""class ComputerVisionClientConfiguration:
"""
Configuration for ComputerVisionClient.
Attributes:
endpoint (str): Service endpoint URL
credentials: Authentication credentials
keep_alive (bool): Connection pool setting
"""
def __init__(self, endpoint, credentials):
"""
Initialize client configuration.
Args:
endpoint (str): Cognitive Services endpoint URL
credentials: Subscription credentials
"""class ImageAnalysis:
"""
Complete image analysis results.
Attributes:
categories (list[Category]): Image categories with confidence scores
adult (AdultInfo): Adult content detection results
tags (list[ImageTag]): Generated tags with confidence
description (ImageDescription): Generated descriptions
faces (list[FaceDescription]): Detected faces with demographics
color (ColorInfo): Color analysis results
image_type (ImageType): Image type classification
objects (list[DetectedObject]): Detected objects with locations
brands (list[DetectedBrand]): Detected brands with locations
request_id (str): Request identifier
metadata (ImageMetadata): Image metadata
model_version (str): Model version used
"""class VisualFeatureTypes(str, Enum):
"""Visual features available for image analysis."""
image_type = "ImageType"
faces = "Faces"
adult = "Adult"
categories = "Categories"
color = "Color"
tags = "Tags"
description = "Description"
objects = "Objects"
brands = "Brands"class Details(str, Enum):
"""Domain-specific detail types."""
celebrities = "Celebrities"
landmarks = "Landmarks"class DescriptionExclude(str, Enum):
"""Domain models to exclude from descriptions."""
celebrities = "Celebrities"
landmarks = "Landmarks"class OcrDetectionLanguage(str, Enum):
"""Languages supported for OCR detection."""
zh_hans = "zh-Hans"
zh_hant = "zh-Hant"
cs = "cs"
da = "da"
nl = "nl"
en = "en"
fi = "fi"
fr = "fr"
de = "de"
el = "el"
hu = "hu"
it = "it"
ja = "ja"
ko = "ko"
nb = "nb"
pl = "pl"
pt = "pt"
ru = "ru"
es = "es"
sv = "sv"
tr = "tr"
ar = "ar"
ro = "ro"
sr_cyrl = "sr-Cyrl"
sr_latn = "sr-Latn"
sk = "sk"
class OperationStatusCodes(str, Enum):
"""Status codes for asynchronous operations."""
not_started = "notStarted"
running = "running"
succeeded = "succeeded"
failed = "failed"
class TextStyle(str, Enum):
"""Text style types for text recognition."""
handwriting = "handwriting"
print = "print"
class TextRecognitionResultDimensionUnit(str, Enum):
"""Dimension units for text recognition results."""
pixel = "pixel"
inch = "inch"class ImageUrl:
"""
Image URL wrapper for API requests.
Attributes:
url (str): Publicly reachable URL of an image
"""
def __init__(self, url):
"""
Initialize with image URL.
Args:
url (str): Image URL
"""
class ImageMetadata:
"""
Image metadata information.
Attributes:
height (int): Image height in pixels
width (int): Image width in pixels
format (str): Image format (e.g., 'Jpeg', 'Png')
"""
class BoundingRect:
"""
Rectangular bounding box coordinates.
Attributes:
x (int): Left coordinate (pixels from left edge)
y (int): Top coordinate (pixels from top edge)
w (int): Rectangle width in pixels
h (int): Rectangle height in pixels
"""class CategoryDetail:
"""Additional details for image categories."""
pass
class CelebritiesModel:
"""Celebrity recognition model information."""
pass
class LandmarksModel:
"""Landmark recognition model information."""
pass
class ComputerVisionError:
"""
Computer Vision API error information.
Attributes:
code (str): Error code
message (str): Error message
inner_error (ComputerVisionInnerError): Detailed error information
"""
class ComputerVisionInnerError:
"""
Detailed error information.
Attributes:
code (str): Specific error code
message (str): Detailed error message
"""class ComputerVisionErrorResponseException(Exception):
"""Exception for Computer Vision API errors."""
pass
class ComputerVisionOcrErrorException(Exception):
"""Exception for OCR operation errors."""
pass