CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-google-cloud-aiplatform

Comprehensive Python client library for Google Cloud Vertex AI, offering machine learning tools, generative AI models, and MLOps capabilities

Pending
Overview
Eval results
Files

vision.mddocs/

Computer Vision

Comprehensive vision AI capabilities including image generation, analysis, and multimodal understanding through specialized models like Imagen for generation and vision models for understanding tasks.

Capabilities

Image Generation

Generate high-quality images from text prompts using Imagen models with advanced editing capabilities.

class ImageGenerationModel:
    @classmethod
    def from_pretrained(cls, model_name: str) -> 'ImageGenerationModel': ...

    def generate_images(
        self,
        prompt: str,
        negative_prompt: Optional[str] = None,
        number_of_images: int = 1,
        aspect_ratio: Optional[str] = None,
        safety_filter_level: Optional[str] = None,
        person_generation: Optional[str] = None,
        **kwargs
    ) -> ImageGenerationResponse: ...

    def edit_image(
        self,
        prompt: str,
        base_image: Optional[Image] = None,
        mask: Optional[Image] = None,
        edit_mode: Optional[str] = None,
        **kwargs
    ) -> ImageGenerationResponse: ...

    def upscale_image(
        self,
        image: Image,
        new_size: int = 2048,
        upscale_factor: Optional[int] = None,
        **kwargs
    ) -> ImageGenerationResponse: ...

Image Understanding

Analyze and understand image content with captioning, question answering, and classification capabilities.

class ImageCaptioningModel:
    @classmethod
    def from_pretrained(cls, model_name: str) -> 'ImageCaptioningModel': ...

    def get_captions(
        self,
        image: Image,
        number_of_results: int = 1,
        language: str = "en",
        output_gcs_uri: Optional[str] = None,
        **kwargs
    ) -> List[str]: ...

class ImageQnAModel:
    @classmethod
    def from_pretrained(cls, model_name: str) -> 'ImageQnAModel': ...

    def ask_question(
        self,
        image: Image,
        question: str,
        number_of_results: int = 1,
        **kwargs
    ) -> List[str]: ...

class ImageTextModel(ImageCaptioningModel, ImageQnAModel):
    """Combined image understanding model with both captioning and Q&A capabilities."""
    pass

Multimodal Embeddings

Generate vector embeddings from images, videos, and text for similarity search and multimodal applications.

class MultiModalEmbeddingModel:
    @classmethod
    def from_pretrained(cls, model_name: str) -> 'MultiModalEmbeddingModel': ...

    def get_embeddings(
        self,
        image: Optional[Image] = None,
        video: Optional[Video] = None,
        contextual_text: Optional[str] = None,
        dimension: Optional[int] = None,
        video_segment_config: Optional[VideoSegmentConfig] = None,
        **kwargs
    ) -> MultiModalEmbeddingResponse: ...

Media Data Types

Comprehensive media handling for images and videos with flexible loading and processing options.

class Image:
    def __init__(self, image_bytes: bytes = None, gcs_uri: str = None): ...

    @staticmethod
    def load_from_file(location: str) -> 'Image': ...

    def show(self, figsize: Tuple[int, int] = (10, 10)) -> None: ...
    def save(self, location: str) -> None: ...

    @property
    def _size(self) -> Optional[Tuple[int, int]]: ...
    @property
    def _mime_type(self) -> Optional[str]: ...

class Video:
    def __init__(self, video_bytes: bytes = None, gcs_uri: str = None): ...

    @staticmethod
    def load_from_file(location: str) -> 'Video': ...

    def save(self, location: str) -> None: ...

class GeneratedImage(Image):
    """Image generated by ImageGenerationModel with metadata."""
    @property
    def generation_parameters(self) -> Optional[Dict[str, Any]]: ...

    def save(self, location: str, include_generation_parameters: bool = True) -> None: ...

Response Types

Structured responses containing generated images and analysis results with comprehensive metadata.

class ImageGenerationResponse:
    """Container for generated images."""
    def __init__(self, images: List[GeneratedImage]): ...

    @property
    def images(self) -> List[GeneratedImage]: ...

    def __iter__(self) -> Iterator[GeneratedImage]: ...
    def __getitem__(self, index: int) -> GeneratedImage: ...

class MultiModalEmbeddingResponse:
    """Container for multimodal embedding results."""
    @property
    def image_embedding(self) -> Optional[List[float]]: ...
    @property
    def video_embeddings(self) -> Optional[List[VideoEmbedding]]: ...
    @property
    def text_embedding(self) -> Optional[List[float]]: ...

class VideoEmbedding:
    """Embedding for a video segment."""
    @property
    def start_offset_sec(self) -> float: ...
    @property
    def end_offset_sec(self) -> float: ...
    @property
    def embedding(self) -> List[float]: ...

class VideoSegmentConfig:
    """Configuration for video segment processing."""
    def __init__(
        self,
        start_offset_sec: int = 0,
        end_offset_sec: int = 120,
        interval_sec: int = 16
    ): ...

Usage Examples

Generate images:

from vertexai.vision_models import ImageGenerationModel

model = ImageGenerationModel.from_pretrained("imagegeneration@006")
response = model.generate_images(
    prompt="A serene mountain landscape at sunset",
    number_of_images=2,
    aspect_ratio="16:9"
)

for i, image in enumerate(response.images):
    image.save(f"generated_image_{i}.png")

Image understanding:

from vertexai.vision_models import ImageTextModel, Image

model = ImageTextModel.from_pretrained("imagetext@001")
image = Image.load_from_file("photo.jpg")

# Get captions
captions = model.get_captions(image, number_of_results=3)
print("Captions:", captions)

# Ask questions
answer = model.ask_question(image, "What objects are in this image?")
print("Answer:", answer)

Multimodal embeddings:

from vertexai.vision_models import MultiModalEmbeddingModel, Image

model = MultiModalEmbeddingModel.from_pretrained("multimodalembedding@001")
image = Image.load_from_file("image.jpg")

response = model.get_embeddings(
    image=image,
    contextual_text="A beautiful landscape",
    dimension=512
)

print(f"Image embedding dimension: {len(response.image_embedding)}")

Install with Tessl CLI

npx tessl i tessl/pypi-google-cloud-aiplatform

docs

batch.md

datasets.md

experiments.md

feature-store.md

generative-ai.md

index.md

models.md

pipelines.md

training.md

vector-search.md

vision.md

tile.json