Comprehensive Python client library for Google Cloud Vertex AI, offering machine learning tools, generative AI models, and MLOps capabilities
—
Comprehensive model lifecycle management including uploading, versioning, deployment, and serving with enterprise-grade features for production ML systems. The traditional Vertex AI SDK provides resource-based APIs for managing models as cloud resources with fine-grained control over deployment configurations.
Upload and register trained models with comprehensive metadata and version management.
class Model:
@classmethod
def upload(
cls,
display_name: str,
artifact_uri: str,
serving_container_image_uri: str,
serving_container_predict_route: Optional[str] = None,
serving_container_health_route: Optional[str] = None,
description: Optional[str] = None,
serving_container_command: Optional[Sequence[str]] = None,
serving_container_args: Optional[Sequence[str]] = None,
serving_container_environment_variables: Optional[Dict[str, str]] = None,
serving_container_ports: Optional[Sequence[int]] = None,
instance_schema_uri: Optional[str] = None,
parameters_schema_uri: Optional[str] = None,
prediction_schema_uri: Optional[str] = None,
explanation_metadata: Optional[explain.ExplanationMetadata] = None,
explanation_parameters: Optional[explain.ExplanationParameters] = None,
project: Optional[str] = None,
location: Optional[str] = None,
labels: Optional[Dict[str, str]] = None,
training_job: Optional[training_jobs._TrainingJob] = None,
parent_model: Optional[str] = None,
is_default_version: bool = True,
version_aliases: Optional[Sequence[str]] = None,
version_description: Optional[str] = None,
model_id: Optional[str] = None,
**kwargs
) -> 'Model': ...
def update(
self,
display_name: Optional[str] = None,
description: Optional[str] = None,
labels: Optional[Dict[str, str]] = None,
**kwargs
) -> 'Model': ...
@property
def resource_name(self) -> str: ...
@property
def display_name(self) -> str: ...
@property
def description(self) -> str: ...
@property
def labels(self) -> Dict[str, str]: ...
@property
def version_id(self) -> str: ...
@property
def version_aliases(self) -> Sequence[str]: ...
@property
def artifact_uri(self) -> str: ...Upload a custom model:
import google.cloud.aiplatform as aiplatform
aiplatform.init(project='my-project', location='us-central1')
model = aiplatform.Model.upload(
display_name='my-custom-model',
artifact_uri='gs://my-bucket/model-artifacts/',
serving_container_image_uri='gcr.io/cloud-aiplatform/prediction/tf2-cpu.2-8:latest',
description='Custom TensorFlow model for classification',
labels={'environment': 'production', 'version': '1.0'}
)
print(f"Model uploaded: {model.resource_name}")Deploy models to endpoints for online serving with configurable resources and traffic management.
class Model:
def deploy(
self,
endpoint: Optional[Endpoint] = None,
deployed_model_display_name: Optional[str] = None,
traffic_percentage: int = 0,
traffic_split: Optional[Dict[str, int]] = None,
machine_type: str = 'n1-standard-4',
min_replica_count: int = 1,
max_replica_count: int = 1,
accelerator_type: Optional[str] = None,
accelerator_count: Optional[int] = None,
service_account: Optional[str] = None,
explanation_metadata: Optional[explain.ExplanationMetadata] = None,
explanation_parameters: Optional[explain.ExplanationParameters] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = None,
encryption_spec_key_name: Optional[str] = None,
network: Optional[str] = None,
sync: bool = True,
deploy_request_timeout: Optional[float] = None,
autoscaling_target_cpu_utilization: Optional[int] = None,
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
**kwargs
) -> Endpoint: ...
def undeploy_all(self, sync: bool = True) -> None: ...
def get_model_evaluation(self, evaluation_id: str) -> ModelEvaluation: ...
def list_model_evaluations(self) -> List[ModelEvaluation]: ...Deploy model to new endpoint:
# Deploy to a new endpoint
endpoint = model.deploy(
deployed_model_display_name='my-model-v1',
machine_type='n1-standard-4',
min_replica_count=1,
max_replica_count=5,
traffic_percentage=100
)
print(f"Model deployed to endpoint: {endpoint.resource_name}")Deploy to existing endpoint with traffic split:
# Deploy to existing endpoint with traffic split
existing_endpoint = aiplatform.Endpoint('projects/my-project/locations/us-central1/endpoints/123')
model.deploy(
endpoint=existing_endpoint,
deployed_model_display_name='my-model-v2',
traffic_percentage=20 # 20% traffic to new version
)Create and manage serving endpoints with comprehensive traffic management and monitoring capabilities.
class Endpoint:
@classmethod
def create(
cls,
display_name: str,
description: Optional[str] = None,
labels: Optional[Dict[str, str]] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = None,
project: Optional[str] = None,
location: Optional[str] = None,
encryption_spec_key_name: Optional[str] = None,
network: Optional[str] = None,
sync: bool = True,
create_request_timeout: Optional[float] = None,
**kwargs
) -> 'Endpoint': ...
def predict(
self,
instances: List[Dict],
parameters: Optional[Dict] = None,
timeout: Optional[float] = None,
use_dedicated_endpoint: bool = False,
**kwargs
) -> Prediction: ...
def explain(
self,
instances: List[Dict],
parameters: Optional[Dict] = None,
deployed_model_id: Optional[str] = None,
timeout: Optional[float] = None,
**kwargs
) -> Prediction: ...
def update(
self,
display_name: Optional[str] = None,
description: Optional[str] = None,
labels: Optional[Dict[str, str]] = None,
traffic_split: Optional[Dict[str, int]] = None,
**kwargs
) -> 'Endpoint': ...
@property
def deployed_models(self) -> List[DeployedModel]: ...
@property
def traffic_split(self) -> Dict[str, int]: ...Create endpoint and make predictions:
# Create a new endpoint
endpoint = aiplatform.Endpoint.create(
display_name='prediction-endpoint',
description='Endpoint for model predictions'
)
# Make predictions
instances = [
{'feature1': 1.0, 'feature2': 2.0, 'feature3': 3.0},
{'feature1': 4.0, 'feature2': 5.0, 'feature3': 6.0}
]
predictions = endpoint.predict(instances=instances)
print(f"Predictions: {predictions.predictions}")Traffic splitting between model versions:
# Update traffic split between deployed models
endpoint.update(traffic_split={
'deployed-model-id-1': 80, # 80% traffic
'deployed-model-id-2': 20 # 20% traffic
})Deploy models on private networks for enhanced security and compliance.
class PrivateEndpoint:
@classmethod
def create(
cls,
display_name: str,
network: str,
description: Optional[str] = None,
labels: Optional[Dict[str, str]] = None,
project: Optional[str] = None,
location: Optional[str] = None,
encryption_spec_key_name: Optional[str] = None,
sync: bool = True,
**kwargs
) -> 'PrivateEndpoint': ...
def predict(
self,
instances: List[Dict],
parameters: Optional[Dict] = None,
timeout: Optional[float] = None,
**kwargs
) -> Prediction: ...Centralized model versioning and lifecycle management with lineage tracking.
class ModelRegistry:
@classmethod
def create_model(
cls,
model_id: str,
display_name: Optional[str] = None,
description: Optional[str] = None,
labels: Optional[Dict[str, str]] = None,
**kwargs
) -> Model: ...
@classmethod
def get_model(cls, model_id: str, **kwargs) -> Model: ...
@classmethod
def list_models(cls, filter: Optional[str] = None, **kwargs) -> List[Model]: ...
@classmethod
def create_model_version(
cls,
model_id: str,
artifact_uri: str,
serving_container_image_uri: str,
version_aliases: Optional[Sequence[str]] = None,
version_description: Optional[str] = None,
is_default_version: bool = False,
**kwargs
) -> Model: ...
@classmethod
def get_model_version(cls, model_id: str, version_id: str, **kwargs) -> Model: ...
@classmethod
def list_model_versions(cls, model_id: str, **kwargs) -> List[Model]: ...
@classmethod
def delete_model_version(cls, model_id: str, version_id: str, **kwargs) -> None: ...Model versioning:
# Create a model in the registry
model = aiplatform.ModelRegistry.create_model(
model_id='my-classification-model',
display_name='Customer Classification Model',
description='Model for customer segmentation'
)
# Add versions
v1 = aiplatform.ModelRegistry.create_model_version(
model_id='my-classification-model',
artifact_uri='gs://my-bucket/model-v1/',
serving_container_image_uri='gcr.io/project/image:v1',
version_aliases=['stable'],
is_default_version=True
)
# Add a new version
v2 = aiplatform.ModelRegistry.create_model_version(
model_id='my-classification-model',
artifact_uri='gs://my-bucket/model-v2/',
serving_container_image_uri='gcr.io/project/image:v2',
version_aliases=['experimental']
)Shared compute resources for cost optimization and resource management across multiple model deployments.
class DeploymentResourcePool:
@classmethod
def create(
cls,
deployment_resource_pool_id: str,
machine_spec: MachineSpec,
min_replica_count: int = 1,
max_replica_count: int = 1,
autoscaling_target_cpu_utilization: Optional[int] = None,
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
**kwargs
) -> 'DeploymentResourcePool': ...
def update(
self,
min_replica_count: Optional[int] = None,
max_replica_count: Optional[int] = None,
autoscaling_target_cpu_utilization: Optional[int] = None,
**kwargs
) -> 'DeploymentResourcePool': ...
@property
def dedicated_resources(self) -> DedicatedResources: ...Comprehensive model evaluation and performance analysis with automated metrics computation.
class ModelEvaluation:
@classmethod
def create(
cls,
model: Model,
dataset: Dataset,
prediction_type: str,
class_labels: Optional[List[str]] = None,
prediction_label_column: Optional[str] = None,
prediction_score_column: Optional[str] = None,
ground_truth_column: Optional[str] = None,
**kwargs
) -> 'ModelEvaluation': ...
@property
def metrics(self) -> Dict[str, float]: ...
@property
def metrics_schema_uri(self) -> str: ...
@property
def slice_dimensions(self) -> List[str]: ...# Prediction response
class Prediction:
predictions: List[Dict]
deployed_model_id: str
model_version_id: str
model_resource_name: str
explanations: Optional[List[Explanation]]
# Deployed model information
class DeployedModel:
id: str
display_name: str
model: str
model_version_id: str
create_time: datetime
dedicated_resources: Optional[DedicatedResources]
automatic_resources: Optional[AutomaticResources]
private_endpoints: Optional[PrivateServiceConnectConfig]
# Resource specifications
class MachineSpec:
machine_type: str
accelerator_type: Optional[str]
accelerator_count: Optional[int]
class DedicatedResources:
machine_spec: MachineSpec
min_replica_count: int
max_replica_count: int
autoscaling_target_cpu_utilization: Optional[int]
autoscaling_target_accelerator_duty_cycle: Optional[int]
class AutomaticResources:
min_replica_count: int
max_replica_count: int
# Model serving container specification
class ModelContainerSpec:
image_uri: str
command: Optional[List[str]]
args: Optional[List[str]]
env: Optional[List[EnvVar]]
ports: Optional[List[Port]]
predict_route: Optional[str]
health_route: Optional[str]
# Environment variable
class EnvVar:
name: str
value: str
# Container port
class Port:
container_port: int
protocol: strBuilt-in model explanation capabilities for understanding model predictions.
# Model explanations are configured during deployment
explanation_metadata = explain.ExplanationMetadata(
inputs={
'feature_name': explain.ExplanationMetadata.InputMetadata(
input_tensor_name='input_tensor_name'
)
},
outputs={
'output_name': explain.ExplanationMetadata.OutputMetadata(
output_tensor_name='output_tensor_name'
)
}
)
explanation_parameters = explain.ExplanationParameters(
sampled_shapley_attribution=explain.SampledShapleyAttribution(
path_count=10
)
)Built-in support for gradual rollouts and model comparison through traffic splitting.
# Canary deployment with 5% traffic
model.deploy(
endpoint=existing_endpoint,
deployed_model_display_name='model-v2-canary',
traffic_percentage=5
)
# Gradually increase traffic
endpoint.update(traffic_split={
'model-v1': 80,
'model-v2-canary': 20
})Deploy models across multiple regions for global availability and disaster recovery.
# Deploy to multiple regions
regions = ['us-central1', 'europe-west1', 'asia-southeast1']
endpoints = {}
for region in regions:
aiplatform.init(location=region)
endpoint = model.deploy(
deployed_model_display_name=f'model-{region}',
machine_type='n1-standard-4'
)
endpoints[region] = endpointThis comprehensive model management system provides enterprise-grade capabilities for deploying, serving, and managing ML models at scale with fine-grained control over resources, traffic, and performance.
Install with Tessl CLI
npx tessl i tessl/pypi-google-cloud-aiplatform