tessl/pypi-google-cloud-aiplatform

Comprehensive Python client library for Google Cloud Vertex AI, offering machine learning tools, generative AI models, and MLOps capabilities

—

Pending

Overview

Eval results

Files

Computer Vision

Name: tessl/pypi-google-cloud-aiplatform
Author: tessl

Comprehensive vision AI capabilities including image generation, analysis, and multimodal understanding through specialized models like Imagen for generation and vision models for understanding tasks.

Capabilities

Image Generation

Generate high-quality images from text prompts using Imagen models with advanced editing capabilities.

class ImageGenerationModel:
    @classmethod
    def from_pretrained(cls, model_name: str) -> 'ImageGenerationModel': ...

    def generate_images(
        self,
        prompt: str,
        negative_prompt: Optional[str] = None,
        number_of_images: int = 1,
        aspect_ratio: Optional[str] = None,
        safety_filter_level: Optional[str] = None,
        person_generation: Optional[str] = None,
        **kwargs
    ) -> ImageGenerationResponse: ...

    def edit_image(
        self,
        prompt: str,
        base_image: Optional[Image] = None,
        mask: Optional[Image] = None,
        edit_mode: Optional[str] = None,
        **kwargs
    ) -> ImageGenerationResponse: ...

    def upscale_image(
        self,
        image: Image,
        new_size: int = 2048,
        upscale_factor: Optional[int] = None,
        **kwargs
    ) -> ImageGenerationResponse: ...

Image Understanding

Analyze and understand image content with captioning, question answering, and classification capabilities.

class ImageCaptioningModel:
    @classmethod
    def from_pretrained(cls, model_name: str) -> 'ImageCaptioningModel': ...

    def get_captions(
        self,
        image: Image,
        number_of_results: int = 1,
        language: str = "en",
        output_gcs_uri: Optional[str] = None,
        **kwargs
    ) -> List[str]: ...

class ImageQnAModel:
    @classmethod
    def from_pretrained(cls, model_name: str) -> 'ImageQnAModel': ...

    def ask_question(
        self,
        image: Image,
        question: str,
        number_of_results: int = 1,
        **kwargs
    ) -> List[str]: ...

class ImageTextModel(ImageCaptioningModel, ImageQnAModel):
    """Combined image understanding model with both captioning and Q&A capabilities."""
    pass

Multimodal Embeddings

Generate vector embeddings from images, videos, and text for similarity search and multimodal applications.

class MultiModalEmbeddingModel:
    @classmethod
    def from_pretrained(cls, model_name: str) -> 'MultiModalEmbeddingModel': ...

    def get_embeddings(
        self,
        image: Optional[Image] = None,
        video: Optional[Video] = None,
        contextual_text: Optional[str] = None,
        dimension: Optional[int] = None,
        video_segment_config: Optional[VideoSegmentConfig] = None,
        **kwargs
    ) -> MultiModalEmbeddingResponse: ...

Media Data Types

Comprehensive media handling for images and videos with flexible loading and processing options.

class Image:
    def __init__(self, image_bytes: bytes = None, gcs_uri: str = None): ...

    @staticmethod
    def load_from_file(location: str) -> 'Image': ...

    def show(self, figsize: Tuple[int, int] = (10, 10)) -> None: ...
    def save(self, location: str) -> None: ...

    @property
    def _size(self) -> Optional[Tuple[int, int]]: ...
    @property
    def _mime_type(self) -> Optional[str]: ...

class Video:
    def __init__(self, video_bytes: bytes = None, gcs_uri: str = None): ...

    @staticmethod
    def load_from_file(location: str) -> 'Video': ...

    def save(self, location: str) -> None: ...

class GeneratedImage(Image):
    """Image generated by ImageGenerationModel with metadata."""
    @property
    def generation_parameters(self) -> Optional[Dict[str, Any]]: ...

    def save(self, location: str, include_generation_parameters: bool = True) -> None: ...

Response Types

Structured responses containing generated images and analysis results with comprehensive metadata.

class ImageGenerationResponse:
    """Container for generated images."""
    def __init__(self, images: List[GeneratedImage]): ...

    @property
    def images(self) -> List[GeneratedImage]: ...

    def __iter__(self) -> Iterator[GeneratedImage]: ...
    def __getitem__(self, index: int) -> GeneratedImage: ...

class MultiModalEmbeddingResponse:
    """Container for multimodal embedding results."""
    @property
    def image_embedding(self) -> Optional[List[float]]: ...
    @property
    def video_embeddings(self) -> Optional[List[VideoEmbedding]]: ...
    @property
    def text_embedding(self) -> Optional[List[float]]: ...

class VideoEmbedding:
    """Embedding for a video segment."""
    @property
    def start_offset_sec(self) -> float: ...
    @property
    def end_offset_sec(self) -> float: ...
    @property
    def embedding(self) -> List[float]: ...

class VideoSegmentConfig:
    """Configuration for video segment processing."""
    def __init__(
        self,
        start_offset_sec: int = 0,
        end_offset_sec: int = 120,
        interval_sec: int = 16
    ): ...

Usage Examples

Generate images:

from vertexai.vision_models import ImageGenerationModel

model = ImageGenerationModel.from_pretrained("imagegeneration@006")
response = model.generate_images(
    prompt="A serene mountain landscape at sunset",
    number_of_images=2,
    aspect_ratio="16:9"
)

for i, image in enumerate(response.images):
    image.save(f"generated_image_{i}.png")

Image understanding:

from vertexai.vision_models import ImageTextModel, Image

model = ImageTextModel.from_pretrained("imagetext@001")
image = Image.load_from_file("photo.jpg")

# Get captions
captions = model.get_captions(image, number_of_results=3)
print("Captions:", captions)

# Ask questions
answer = model.ask_question(image, "What objects are in this image?")
print("Answer:", answer)

Multimodal embeddings:

from vertexai.vision_models import MultiModalEmbeddingModel, Image

model = MultiModalEmbeddingModel.from_pretrained("multimodalembedding@001")
image = Image.load_from_file("image.jpg")

response = model.get_embeddings(
    image=image,
    contextual_text="A beautiful landscape",
    dimension=512
)

print(f"Image embedding dimension: {len(response.image_embedding)}")

Install with Tessl CLI

npx tessl i tessl/pypi-google-cloud-aiplatform

docs