Comprehensive Python client library for Google Cloud Vertex AI, offering machine learning tools, generative AI models, and MLOps capabilities
—
Comprehensive vision AI capabilities including image generation, analysis, and multimodal understanding through specialized models like Imagen for generation and vision models for understanding tasks.
Generate high-quality images from text prompts using Imagen models with advanced editing capabilities.
class ImageGenerationModel:
@classmethod
def from_pretrained(cls, model_name: str) -> 'ImageGenerationModel': ...
def generate_images(
self,
prompt: str,
negative_prompt: Optional[str] = None,
number_of_images: int = 1,
aspect_ratio: Optional[str] = None,
safety_filter_level: Optional[str] = None,
person_generation: Optional[str] = None,
**kwargs
) -> ImageGenerationResponse: ...
def edit_image(
self,
prompt: str,
base_image: Optional[Image] = None,
mask: Optional[Image] = None,
edit_mode: Optional[str] = None,
**kwargs
) -> ImageGenerationResponse: ...
def upscale_image(
self,
image: Image,
new_size: int = 2048,
upscale_factor: Optional[int] = None,
**kwargs
) -> ImageGenerationResponse: ...Analyze and understand image content with captioning, question answering, and classification capabilities.
class ImageCaptioningModel:
@classmethod
def from_pretrained(cls, model_name: str) -> 'ImageCaptioningModel': ...
def get_captions(
self,
image: Image,
number_of_results: int = 1,
language: str = "en",
output_gcs_uri: Optional[str] = None,
**kwargs
) -> List[str]: ...
class ImageQnAModel:
@classmethod
def from_pretrained(cls, model_name: str) -> 'ImageQnAModel': ...
def ask_question(
self,
image: Image,
question: str,
number_of_results: int = 1,
**kwargs
) -> List[str]: ...
class ImageTextModel(ImageCaptioningModel, ImageQnAModel):
"""Combined image understanding model with both captioning and Q&A capabilities."""
passGenerate vector embeddings from images, videos, and text for similarity search and multimodal applications.
class MultiModalEmbeddingModel:
@classmethod
def from_pretrained(cls, model_name: str) -> 'MultiModalEmbeddingModel': ...
def get_embeddings(
self,
image: Optional[Image] = None,
video: Optional[Video] = None,
contextual_text: Optional[str] = None,
dimension: Optional[int] = None,
video_segment_config: Optional[VideoSegmentConfig] = None,
**kwargs
) -> MultiModalEmbeddingResponse: ...Comprehensive media handling for images and videos with flexible loading and processing options.
class Image:
def __init__(self, image_bytes: bytes = None, gcs_uri: str = None): ...
@staticmethod
def load_from_file(location: str) -> 'Image': ...
def show(self, figsize: Tuple[int, int] = (10, 10)) -> None: ...
def save(self, location: str) -> None: ...
@property
def _size(self) -> Optional[Tuple[int, int]]: ...
@property
def _mime_type(self) -> Optional[str]: ...
class Video:
def __init__(self, video_bytes: bytes = None, gcs_uri: str = None): ...
@staticmethod
def load_from_file(location: str) -> 'Video': ...
def save(self, location: str) -> None: ...
class GeneratedImage(Image):
"""Image generated by ImageGenerationModel with metadata."""
@property
def generation_parameters(self) -> Optional[Dict[str, Any]]: ...
def save(self, location: str, include_generation_parameters: bool = True) -> None: ...Structured responses containing generated images and analysis results with comprehensive metadata.
class ImageGenerationResponse:
"""Container for generated images."""
def __init__(self, images: List[GeneratedImage]): ...
@property
def images(self) -> List[GeneratedImage]: ...
def __iter__(self) -> Iterator[GeneratedImage]: ...
def __getitem__(self, index: int) -> GeneratedImage: ...
class MultiModalEmbeddingResponse:
"""Container for multimodal embedding results."""
@property
def image_embedding(self) -> Optional[List[float]]: ...
@property
def video_embeddings(self) -> Optional[List[VideoEmbedding]]: ...
@property
def text_embedding(self) -> Optional[List[float]]: ...
class VideoEmbedding:
"""Embedding for a video segment."""
@property
def start_offset_sec(self) -> float: ...
@property
def end_offset_sec(self) -> float: ...
@property
def embedding(self) -> List[float]: ...
class VideoSegmentConfig:
"""Configuration for video segment processing."""
def __init__(
self,
start_offset_sec: int = 0,
end_offset_sec: int = 120,
interval_sec: int = 16
): ...Generate images:
from vertexai.vision_models import ImageGenerationModel
model = ImageGenerationModel.from_pretrained("imagegeneration@006")
response = model.generate_images(
prompt="A serene mountain landscape at sunset",
number_of_images=2,
aspect_ratio="16:9"
)
for i, image in enumerate(response.images):
image.save(f"generated_image_{i}.png")Image understanding:
from vertexai.vision_models import ImageTextModel, Image
model = ImageTextModel.from_pretrained("imagetext@001")
image = Image.load_from_file("photo.jpg")
# Get captions
captions = model.get_captions(image, number_of_results=3)
print("Captions:", captions)
# Ask questions
answer = model.ask_question(image, "What objects are in this image?")
print("Answer:", answer)Multimodal embeddings:
from vertexai.vision_models import MultiModalEmbeddingModel, Image
model = MultiModalEmbeddingModel.from_pretrained("multimodalembedding@001")
image = Image.load_from_file("image.jpg")
response = model.get_embeddings(
image=image,
contextual_text="A beautiful landscape",
dimension=512
)
print(f"Image embedding dimension: {len(response.image_embedding)}")Install with Tessl CLI
npx tessl i tessl/pypi-google-cloud-aiplatform