0
# Model Management
1
2
Comprehensive model lifecycle management including uploading, versioning, deployment, and serving with enterprise-grade features for production ML systems. The traditional Vertex AI SDK provides resource-based APIs for managing models as cloud resources with fine-grained control over deployment configurations.
3
4
## Capabilities
5
6
### Model Registration and Upload
7
8
Upload and register trained models with comprehensive metadata and version management.
9
10
```python { .api }
11
class Model:
12
@classmethod
13
def upload(
14
cls,
15
display_name: str,
16
artifact_uri: str,
17
serving_container_image_uri: str,
18
serving_container_predict_route: Optional[str] = None,
19
serving_container_health_route: Optional[str] = None,
20
description: Optional[str] = None,
21
serving_container_command: Optional[Sequence[str]] = None,
22
serving_container_args: Optional[Sequence[str]] = None,
23
serving_container_environment_variables: Optional[Dict[str, str]] = None,
24
serving_container_ports: Optional[Sequence[int]] = None,
25
instance_schema_uri: Optional[str] = None,
26
parameters_schema_uri: Optional[str] = None,
27
prediction_schema_uri: Optional[str] = None,
28
explanation_metadata: Optional[explain.ExplanationMetadata] = None,
29
explanation_parameters: Optional[explain.ExplanationParameters] = None,
30
project: Optional[str] = None,
31
location: Optional[str] = None,
32
labels: Optional[Dict[str, str]] = None,
33
training_job: Optional[training_jobs._TrainingJob] = None,
34
parent_model: Optional[str] = None,
35
is_default_version: bool = True,
36
version_aliases: Optional[Sequence[str]] = None,
37
version_description: Optional[str] = None,
38
model_id: Optional[str] = None,
39
**kwargs
40
) -> 'Model': ...
41
42
def update(
43
self,
44
display_name: Optional[str] = None,
45
description: Optional[str] = None,
46
labels: Optional[Dict[str, str]] = None,
47
**kwargs
48
) -> 'Model': ...
49
50
@property
51
def resource_name(self) -> str: ...
52
@property
53
def display_name(self) -> str: ...
54
@property
55
def description(self) -> str: ...
56
@property
57
def labels(self) -> Dict[str, str]: ...
58
@property
59
def version_id(self) -> str: ...
60
@property
61
def version_aliases(self) -> Sequence[str]: ...
62
@property
63
def artifact_uri(self) -> str: ...
64
```
65
66
#### Usage Examples
67
68
**Upload a custom model:**
69
```python
70
import google.cloud.aiplatform as aiplatform
71
72
aiplatform.init(project='my-project', location='us-central1')
73
74
model = aiplatform.Model.upload(
75
display_name='my-custom-model',
76
artifact_uri='gs://my-bucket/model-artifacts/',
77
serving_container_image_uri='gcr.io/cloud-aiplatform/prediction/tf2-cpu.2-8:latest',
78
description='Custom TensorFlow model for classification',
79
labels={'environment': 'production', 'version': '1.0'}
80
)
81
82
print(f"Model uploaded: {model.resource_name}")
83
```
84
85
### Model Deployment
86
87
Deploy models to endpoints for online serving with configurable resources and traffic management.
88
89
```python { .api }
90
class Model:
91
def deploy(
92
self,
93
endpoint: Optional[Endpoint] = None,
94
deployed_model_display_name: Optional[str] = None,
95
traffic_percentage: int = 0,
96
traffic_split: Optional[Dict[str, int]] = None,
97
machine_type: str = 'n1-standard-4',
98
min_replica_count: int = 1,
99
max_replica_count: int = 1,
100
accelerator_type: Optional[str] = None,
101
accelerator_count: Optional[int] = None,
102
service_account: Optional[str] = None,
103
explanation_metadata: Optional[explain.ExplanationMetadata] = None,
104
explanation_parameters: Optional[explain.ExplanationParameters] = None,
105
metadata: Optional[Sequence[Tuple[str, str]]] = None,
106
encryption_spec_key_name: Optional[str] = None,
107
network: Optional[str] = None,
108
sync: bool = True,
109
deploy_request_timeout: Optional[float] = None,
110
autoscaling_target_cpu_utilization: Optional[int] = None,
111
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
112
**kwargs
113
) -> Endpoint: ...
114
115
def undeploy_all(self, sync: bool = True) -> None: ...
116
117
def get_model_evaluation(self, evaluation_id: str) -> ModelEvaluation: ...
118
def list_model_evaluations(self) -> List[ModelEvaluation]: ...
119
```
120
121
#### Usage Examples
122
123
**Deploy model to new endpoint:**
124
```python
125
# Deploy to a new endpoint
126
endpoint = model.deploy(
127
deployed_model_display_name='my-model-v1',
128
machine_type='n1-standard-4',
129
min_replica_count=1,
130
max_replica_count=5,
131
traffic_percentage=100
132
)
133
134
print(f"Model deployed to endpoint: {endpoint.resource_name}")
135
```
136
137
**Deploy to existing endpoint with traffic split:**
138
```python
139
# Deploy to existing endpoint with traffic split
140
existing_endpoint = aiplatform.Endpoint('projects/my-project/locations/us-central1/endpoints/123')
141
model.deploy(
142
endpoint=existing_endpoint,
143
deployed_model_display_name='my-model-v2',
144
traffic_percentage=20 # 20% traffic to new version
145
)
146
```
147
148
### Endpoint Management
149
150
Create and manage serving endpoints with comprehensive traffic management and monitoring capabilities.
151
152
```python { .api }
153
class Endpoint:
154
@classmethod
155
def create(
156
cls,
157
display_name: str,
158
description: Optional[str] = None,
159
labels: Optional[Dict[str, str]] = None,
160
metadata: Optional[Sequence[Tuple[str, str]]] = None,
161
project: Optional[str] = None,
162
location: Optional[str] = None,
163
encryption_spec_key_name: Optional[str] = None,
164
network: Optional[str] = None,
165
sync: bool = True,
166
create_request_timeout: Optional[float] = None,
167
**kwargs
168
) -> 'Endpoint': ...
169
170
def predict(
171
self,
172
instances: List[Dict],
173
parameters: Optional[Dict] = None,
174
timeout: Optional[float] = None,
175
use_dedicated_endpoint: bool = False,
176
**kwargs
177
) -> Prediction: ...
178
179
def explain(
180
self,
181
instances: List[Dict],
182
parameters: Optional[Dict] = None,
183
deployed_model_id: Optional[str] = None,
184
timeout: Optional[float] = None,
185
**kwargs
186
) -> Prediction: ...
187
188
def update(
189
self,
190
display_name: Optional[str] = None,
191
description: Optional[str] = None,
192
labels: Optional[Dict[str, str]] = None,
193
traffic_split: Optional[Dict[str, int]] = None,
194
**kwargs
195
) -> 'Endpoint': ...
196
197
@property
198
def deployed_models(self) -> List[DeployedModel]: ...
199
@property
200
def traffic_split(self) -> Dict[str, int]: ...
201
```
202
203
#### Usage Examples
204
205
**Create endpoint and make predictions:**
206
```python
207
# Create a new endpoint
208
endpoint = aiplatform.Endpoint.create(
209
display_name='prediction-endpoint',
210
description='Endpoint for model predictions'
211
)
212
213
# Make predictions
214
instances = [
215
{'feature1': 1.0, 'feature2': 2.0, 'feature3': 3.0},
216
{'feature1': 4.0, 'feature2': 5.0, 'feature3': 6.0}
217
]
218
219
predictions = endpoint.predict(instances=instances)
220
print(f"Predictions: {predictions.predictions}")
221
```
222
223
**Traffic splitting between model versions:**
224
```python
225
# Update traffic split between deployed models
226
endpoint.update(traffic_split={
227
'deployed-model-id-1': 80, # 80% traffic
228
'deployed-model-id-2': 20 # 20% traffic
229
})
230
```
231
232
### Private Endpoints
233
234
Deploy models on private networks for enhanced security and compliance.
235
236
```python { .api }
237
class PrivateEndpoint:
238
@classmethod
239
def create(
240
cls,
241
display_name: str,
242
network: str,
243
description: Optional[str] = None,
244
labels: Optional[Dict[str, str]] = None,
245
project: Optional[str] = None,
246
location: Optional[str] = None,
247
encryption_spec_key_name: Optional[str] = None,
248
sync: bool = True,
249
**kwargs
250
) -> 'PrivateEndpoint': ...
251
252
def predict(
253
self,
254
instances: List[Dict],
255
parameters: Optional[Dict] = None,
256
timeout: Optional[float] = None,
257
**kwargs
258
) -> Prediction: ...
259
```
260
261
### Model Registry
262
263
Centralized model versioning and lifecycle management with lineage tracking.
264
265
```python { .api }
266
class ModelRegistry:
267
@classmethod
268
def create_model(
269
cls,
270
model_id: str,
271
display_name: Optional[str] = None,
272
description: Optional[str] = None,
273
labels: Optional[Dict[str, str]] = None,
274
**kwargs
275
) -> Model: ...
276
277
@classmethod
278
def get_model(cls, model_id: str, **kwargs) -> Model: ...
279
280
@classmethod
281
def list_models(cls, filter: Optional[str] = None, **kwargs) -> List[Model]: ...
282
283
@classmethod
284
def create_model_version(
285
cls,
286
model_id: str,
287
artifact_uri: str,
288
serving_container_image_uri: str,
289
version_aliases: Optional[Sequence[str]] = None,
290
version_description: Optional[str] = None,
291
is_default_version: bool = False,
292
**kwargs
293
) -> Model: ...
294
295
@classmethod
296
def get_model_version(cls, model_id: str, version_id: str, **kwargs) -> Model: ...
297
298
@classmethod
299
def list_model_versions(cls, model_id: str, **kwargs) -> List[Model]: ...
300
301
@classmethod
302
def delete_model_version(cls, model_id: str, version_id: str, **kwargs) -> None: ...
303
```
304
305
#### Usage Examples
306
307
**Model versioning:**
308
```python
309
# Create a model in the registry
310
model = aiplatform.ModelRegistry.create_model(
311
model_id='my-classification-model',
312
display_name='Customer Classification Model',
313
description='Model for customer segmentation'
314
)
315
316
# Add versions
317
v1 = aiplatform.ModelRegistry.create_model_version(
318
model_id='my-classification-model',
319
artifact_uri='gs://my-bucket/model-v1/',
320
serving_container_image_uri='gcr.io/project/image:v1',
321
version_aliases=['stable'],
322
is_default_version=True
323
)
324
325
# Add a new version
326
v2 = aiplatform.ModelRegistry.create_model_version(
327
model_id='my-classification-model',
328
artifact_uri='gs://my-bucket/model-v2/',
329
serving_container_image_uri='gcr.io/project/image:v2',
330
version_aliases=['experimental']
331
)
332
```
333
334
### Deployment Resource Pools
335
336
Shared compute resources for cost optimization and resource management across multiple model deployments.
337
338
```python { .api }
339
class DeploymentResourcePool:
340
@classmethod
341
def create(
342
cls,
343
deployment_resource_pool_id: str,
344
machine_spec: MachineSpec,
345
min_replica_count: int = 1,
346
max_replica_count: int = 1,
347
autoscaling_target_cpu_utilization: Optional[int] = None,
348
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
349
**kwargs
350
) -> 'DeploymentResourcePool': ...
351
352
def update(
353
self,
354
min_replica_count: Optional[int] = None,
355
max_replica_count: Optional[int] = None,
356
autoscaling_target_cpu_utilization: Optional[int] = None,
357
**kwargs
358
) -> 'DeploymentResourcePool': ...
359
360
@property
361
def dedicated_resources(self) -> DedicatedResources: ...
362
```
363
364
### Model Evaluation
365
366
Comprehensive model evaluation and performance analysis with automated metrics computation.
367
368
```python { .api }
369
class ModelEvaluation:
370
@classmethod
371
def create(
372
cls,
373
model: Model,
374
dataset: Dataset,
375
prediction_type: str,
376
class_labels: Optional[List[str]] = None,
377
prediction_label_column: Optional[str] = None,
378
prediction_score_column: Optional[str] = None,
379
ground_truth_column: Optional[str] = None,
380
**kwargs
381
) -> 'ModelEvaluation': ...
382
383
@property
384
def metrics(self) -> Dict[str, float]: ...
385
@property
386
def metrics_schema_uri(self) -> str: ...
387
@property
388
def slice_dimensions(self) -> List[str]: ...
389
```
390
391
## Types
392
393
```python { .api }
394
# Prediction response
395
class Prediction:
396
predictions: List[Dict]
397
deployed_model_id: str
398
model_version_id: str
399
model_resource_name: str
400
explanations: Optional[List[Explanation]]
401
402
# Deployed model information
403
class DeployedModel:
404
id: str
405
display_name: str
406
model: str
407
model_version_id: str
408
create_time: datetime
409
dedicated_resources: Optional[DedicatedResources]
410
automatic_resources: Optional[AutomaticResources]
411
private_endpoints: Optional[PrivateServiceConnectConfig]
412
413
# Resource specifications
414
class MachineSpec:
415
machine_type: str
416
accelerator_type: Optional[str]
417
accelerator_count: Optional[int]
418
419
class DedicatedResources:
420
machine_spec: MachineSpec
421
min_replica_count: int
422
max_replica_count: int
423
autoscaling_target_cpu_utilization: Optional[int]
424
autoscaling_target_accelerator_duty_cycle: Optional[int]
425
426
class AutomaticResources:
427
min_replica_count: int
428
max_replica_count: int
429
430
# Model serving container specification
431
class ModelContainerSpec:
432
image_uri: str
433
command: Optional[List[str]]
434
args: Optional[List[str]]
435
env: Optional[List[EnvVar]]
436
ports: Optional[List[Port]]
437
predict_route: Optional[str]
438
health_route: Optional[str]
439
440
# Environment variable
441
class EnvVar:
442
name: str
443
value: str
444
445
# Container port
446
class Port:
447
container_port: int
448
protocol: str
449
```
450
451
## Advanced Features
452
453
### Explanation and Interpretability
454
455
Built-in model explanation capabilities for understanding model predictions.
456
457
```python { .api }
458
# Model explanations are configured during deployment
459
explanation_metadata = explain.ExplanationMetadata(
460
inputs={
461
'feature_name': explain.ExplanationMetadata.InputMetadata(
462
input_tensor_name='input_tensor_name'
463
)
464
},
465
outputs={
466
'output_name': explain.ExplanationMetadata.OutputMetadata(
467
output_tensor_name='output_tensor_name'
468
)
469
}
470
)
471
472
explanation_parameters = explain.ExplanationParameters(
473
sampled_shapley_attribution=explain.SampledShapleyAttribution(
474
path_count=10
475
)
476
)
477
```
478
479
### A/B Testing and Canary Deployments
480
481
Built-in support for gradual rollouts and model comparison through traffic splitting.
482
483
```python
484
# Canary deployment with 5% traffic
485
model.deploy(
486
endpoint=existing_endpoint,
487
deployed_model_display_name='model-v2-canary',
488
traffic_percentage=5
489
)
490
491
# Gradually increase traffic
492
endpoint.update(traffic_split={
493
'model-v1': 80,
494
'model-v2-canary': 20
495
})
496
```
497
498
### Multi-region Deployment
499
500
Deploy models across multiple regions for global availability and disaster recovery.
501
502
```python
503
# Deploy to multiple regions
504
regions = ['us-central1', 'europe-west1', 'asia-southeast1']
505
endpoints = {}
506
507
for region in regions:
508
aiplatform.init(location=region)
509
endpoint = model.deploy(
510
deployed_model_display_name=f'model-{region}',
511
machine_type='n1-standard-4'
512
)
513
endpoints[region] = endpoint
514
```
515
516
This comprehensive model management system provides enterprise-grade capabilities for deploying, serving, and managing ML models at scale with fine-grained control over resources, traffic, and performance.