Python Client for Video Intelligence API that enables developers to make videos searchable and discoverable by extracting metadata through machine learning.
npx @tessl/cli install tessl/pypi-google-cloud-videointelligence@2.16.0A comprehensive Python client library for Google Cloud Video Intelligence API that enables developers to make videos searchable and discoverable by extracting metadata through machine learning. The library provides capabilities for analyzing videos with features like label detection, face detection, explicit content detection, speech transcription, text detection, object tracking, logo recognition, person detection, and celebrity recognition (beta).
pip install google-cloud-videointelligencefrom google.cloud import videointelligenceFor specific API versions:
from google.cloud import videointelligence_v1
from google.cloud import videointelligence_v1p3beta1 # For streaming featuresfrom google.cloud import videointelligence
# Create a client
client = videointelligence.VideoIntelligenceServiceClient()
# Annotate a video with label detection
features = [videointelligence.Feature.LABEL_DETECTION]
operation = client.annotate_video(
request={
"features": features,
"input_uri": "gs://your-bucket/your-video.mp4",
}
)
# Wait for the operation to complete
print("Processing video for label detection...")
result = operation.result(timeout=300)
# Process results
for annotation_result in result.annotation_results:
for label in annotation_result.segment_label_annotations:
print(f"Label: {label.entity.description}")
for segment in label.segments:
start_time = segment.segment.start_time_offset.total_seconds()
end_time = segment.segment.end_time_offset.total_seconds()
print(f" Segment: {start_time}s to {end_time}s (confidence: {segment.confidence})")The Google Cloud Video Intelligence client library follows Google's client library design patterns:
Core client functionality for analyzing videos with Google's AI capabilities. Supports both synchronous and asynchronous operations, multiple transport protocols, and comprehensive error handling.
class VideoIntelligenceServiceClient:
def __init__(self, *, credentials=None, transport=None, client_options=None, client_info=None): ...
def annotate_video(self, request=None, *, input_uri=None, features=None, retry=None, timeout=None, metadata=()) -> operation.Operation: ...
@classmethod
def from_service_account_file(cls, filename: str, *args, **kwargs) -> VideoIntelligenceServiceClient: ...
@classmethod
def from_service_account_info(cls, info: dict, *args, **kwargs) -> VideoIntelligenceServiceClient: ...
class VideoIntelligenceServiceAsyncClient:
def __init__(self, *, credentials=None, transport=None, client_options=None, client_info=None): ...
async def annotate_video(self, request=None, *, input_uri=None, features=None, retry=None, timeout=None, metadata=()) -> operation_async.AsyncOperation: ...
@classmethod
def from_service_account_file(cls, filename: str, *args, **kwargs) -> VideoIntelligenceServiceAsyncClient: ...
@classmethod
def from_service_account_info(cls, info: dict, *args, **kwargs) -> VideoIntelligenceServiceAsyncClient: ...Real-time video analysis capabilities for processing video streams. Available in the v1p3beta1 API version for applications requiring immediate feedback on video content.
class StreamingVideoIntelligenceServiceClient:
def __init__(self, *, credentials=None, transport=None, client_options=None, client_info=None): ...
def streaming_annotate_video(self, requests, retry=None, timeout=None, metadata=()) -> Iterable[StreamingAnnotateVideoResponse]: ...
class StreamingVideoIntelligenceServiceAsyncClient:
def __init__(self, *, credentials=None, transport=None, client_options=None, client_info=None): ...
async def streaming_annotate_video(self, requests, retry=None, timeout=None, metadata=()) -> AsyncIterable[StreamingAnnotateVideoResponse]: ...Comprehensive configuration options for different AI detection capabilities. Each feature can be fine-tuned with specific parameters and thresholds to optimize results for different use cases.
class Feature(Enum):
FEATURE_UNSPECIFIED = 0
LABEL_DETECTION = 1
SHOT_CHANGE_DETECTION = 2
EXPLICIT_CONTENT_DETECTION = 3
FACE_DETECTION = 4
SPEECH_TRANSCRIPTION = 6
TEXT_DETECTION = 7
OBJECT_TRACKING = 9
LOGO_RECOGNITION = 12
PERSON_DETECTION = 14
class VideoContext:
segments: MutableSequence[VideoSegment]
label_detection_config: LabelDetectionConfig
shot_change_detection_config: ShotChangeDetectionConfig
explicit_content_detection_config: ExplicitContentDetectionConfig
face_detection_config: FaceDetectionConfig
speech_transcription_config: SpeechTranscriptionConfig
text_detection_config: TextDetectionConfig
object_tracking_config: ObjectTrackingConfig
person_detection_config: PersonDetectionConfigStructured data types for representing video analysis results. Includes annotations for detected objects, faces, text, speech, and other content with timestamps and confidence scores.
class AnnotateVideoResponse:
annotation_results: MutableSequence[VideoAnnotationResults]
class VideoAnnotationResults:
segment_label_annotations: MutableSequence[LabelAnnotation]
shot_label_annotations: MutableSequence[LabelAnnotation]
frame_label_annotations: MutableSequence[LabelAnnotation]
face_annotations: MutableSequence[FaceAnnotation]
shot_annotations: MutableSequence[VideoSegment]
explicit_annotation: ExplicitContentAnnotation
speech_transcriptions: MutableSequence[SpeechTranscription]
text_annotations: MutableSequence[TextAnnotation]
object_annotations: MutableSequence[ObjectTrackingAnnotation]
logo_recognition_annotations: MutableSequence[LogoRecognitionAnnotation]
person_detection_annotations: MutableSequence[PersonDetectionAnnotation]class AnnotateVideoRequest:
input_uri: str
input_content: bytes
features: MutableSequence[Feature]
video_context: VideoContext
output_uri: str
location_id: str
class VideoSegment:
start_time_offset: duration_pb2.Duration
end_time_offset: duration_pb2.Duration
class Entity:
entity_id: str
description: str
language_code: str
class NormalizedBoundingBox:
left: float
top: float
right: float
bottom: float
class Likelihood(Enum):
LIKELIHOOD_UNSPECIFIED = 0
VERY_UNLIKELY = 1
UNLIKELY = 2
POSSIBLE = 3
LIKELY = 4
VERY_LIKELY = 5