Microsoft Azure Cognitive Services Computer Vision Client Library for Python providing state-of-the-art algorithms to process images and return information including mature content detection, face detection, color analysis, image categorization, description generation, and thumbnail creation.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Detect and locate objects within images, providing bounding boxes, confidence scores, and hierarchical object relationships. The service can identify a wide range of common objects and provides spatial location information for each detection.
Identify objects within images and provide their locations using bounding rectangles.
def detect_objects(url, model_version="latest", custom_headers=None, raw=False, **operation_config):
"""
Detect objects within an image.
Args:
url (str): Publicly reachable URL of an image
model_version (str, optional): AI model version to use. Default: "latest"
custom_headers (dict, optional): Custom HTTP headers
raw (bool, optional): Return raw response. Default: False
Returns:
DetectResult: Object detection results with bounding boxes and confidence scores
Raises:
ComputerVisionErrorResponseException: API error occurred
"""
def detect_objects_in_stream(image, model_version="latest", custom_headers=None, raw=False, **operation_config):
"""
Detect objects from binary image stream.
Args:
image (Generator): Binary image data stream
model_version (str, optional): AI model version to use
Returns:
DetectResult: Object detection results
"""from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from msrest.authentication import CognitiveServicesCredentials
# Initialize client
credentials = CognitiveServicesCredentials("your-api-key")
client = ComputerVisionClient("https://your-endpoint.cognitiveservices.azure.com/", credentials)
# Detect objects in image
image_url = "https://example.com/street-scene.jpg"
detection_result = client.detect_objects(image_url)
print(f"Detected {len(detection_result.objects)} objects:")
for obj in detection_result.objects:
print(f"\nObject: {obj.object_property}")
print(f"Confidence: {obj.confidence:.3f}")
# Bounding rectangle
rect = obj.rectangle
print(f"Location: x={rect.x}, y={rect.y}, width={rect.w}, height={rect.h}")
# Parent object (if part of hierarchy)
if obj.parent:
print(f"Parent object: {obj.parent.object_property}")
parent_rect = obj.parent.rectangle
print(f"Parent location: x={parent_rect.x}, y={parent_rect.y}, "
f"width={parent_rect.w}, height={parent_rect.h}")# Detect objects from local image file
with open("local_image.jpg", "rb") as image_stream:
detection_result = client.detect_objects_in_stream(image_stream)
# Group objects by type
object_counts = {}
for obj in detection_result.objects:
obj_type = obj.object_property
object_counts[obj_type] = object_counts.get(obj_type, 0) + 1
print("Object summary:")
for obj_type, count in object_counts.items():
print(f" {obj_type}: {count}")# Filter objects by confidence threshold
image_url = "https://example.com/busy-scene.jpg"
detection_result = client.detect_objects(image_url)
confidence_threshold = 0.7
high_confidence_objects = [
obj for obj in detection_result.objects
if obj.confidence >= confidence_threshold
]
print(f"High confidence objects (≥{confidence_threshold}):")
for obj in high_confidence_objects:
print(f" {obj.object_property}: {obj.confidence:.3f}")# Analyze object spatial relationships
detection_result = client.detect_objects(image_url)
# Find largest object by area
largest_object = max(
detection_result.objects,
key=lambda obj: obj.rectangle.w * obj.rectangle.h
)
print(f"Largest object: {largest_object.object_property}")
print(f"Area: {largest_object.rectangle.w * largest_object.rectangle.h} pixels")
# Find objects in the left half of the image
image_width = detection_result.metadata.width if detection_result.metadata else 1000 # fallback
left_half_objects = [
obj for obj in detection_result.objects
if obj.rectangle.x + obj.rectangle.w / 2 < image_width / 2
]
print(f"\nObjects in left half: {len(left_half_objects)}")
for obj in left_half_objects:
print(f" {obj.object_property}")class DetectResult:
"""
Object detection operation result.
Attributes:
objects (list[DetectedObject]): List of detected objects with locations
request_id (str): Request identifier
metadata (ImageMetadata): Image metadata (dimensions, format)
model_version (str): Model version used for detection
"""class DetectedObject:
"""
Individual detected object with location and hierarchy information.
Attributes:
rectangle (BoundingRect): Object bounding rectangle
object_property (str): Object name/type (e.g., "person", "car", "bicycle")
confidence (float): Detection confidence score (0.0 to 1.0)
parent (ObjectHierarchy, optional): Parent object in hierarchy
"""class BoundingRect:
"""
Rectangular bounding box for detected objects.
Attributes:
x (int): Left coordinate (pixels from left edge)
y (int): Top coordinate (pixels from top edge)
w (int): Rectangle width in pixels
h (int): Rectangle height in pixels
"""class ObjectHierarchy:
"""
Parent object information in object hierarchy.
Attributes:
object_property (str): Parent object name/type
confidence (float): Parent object confidence score
rectangle (BoundingRect): Parent object bounding rectangle
"""class ImageMetadata:
"""
Image metadata information.
Attributes:
height (int): Image height in pixels
width (int): Image width in pixels
format (str): Image format (e.g., "Jpeg", "Png")
"""The object detection service can identify many common objects including:
The service continues to expand its object recognition capabilities, and confidence scores help determine the reliability of each detection.
Install with Tessl CLI
npx tessl i tessl/pypi-azure-cognitiveservices-vision-computervision