0
# Experiment Tracking
1
2
Comprehensive experiment management, metrics logging, and artifact tracking with integration to popular ML frameworks. Vertex AI Experiments provides centralized tracking for model development and comparison.
3
4
## Capabilities
5
6
### Experiment Management
7
8
Create and manage experiments for organizing related model training runs and comparisons.
9
10
```python { .api }
11
class Experiment:
12
@classmethod
13
def create(
14
cls,
15
experiment_id: str,
16
display_name: Optional[str] = None,
17
description: Optional[str] = None,
18
labels: Optional[Dict[str, str]] = None,
19
**kwargs
20
) -> 'Experiment': ...
21
22
@classmethod
23
def get(cls, experiment_id: str, **kwargs) -> 'Experiment': ...
24
25
@classmethod
26
def list(cls, **kwargs) -> List['Experiment']: ...
27
28
def get_data_frame(self, **kwargs) -> 'pandas.DataFrame': ...
29
30
@property
31
def resource_name(self) -> str: ...
32
@property
33
def experiment_id(self) -> str: ...
34
```
35
36
### Experiment Runs
37
38
Individual training runs within experiments with comprehensive metadata and artifact tracking.
39
40
```python { .api }
41
class ExperimentRun:
42
@classmethod
43
def create(
44
cls,
45
run_id: str,
46
experiment: Union[str, Experiment],
47
display_name: Optional[str] = None,
48
**kwargs
49
) -> 'ExperimentRun': ...
50
51
def log_params(self, params: Dict[str, Union[str, int, float]]) -> None: ...
52
def log_metrics(self, metrics: Dict[str, Union[int, float]]) -> None: ...
53
def log_classification_metrics(
54
self,
55
labels: List[str],
56
matrix: List[List[int]],
57
fpr: Optional[List[float]] = None,
58
tpr: Optional[List[float]] = None,
59
threshold: Optional[List[float]] = None,
60
display_name: Optional[str] = None
61
) -> None: ...
62
63
def log_model(
64
self,
65
model: Any,
66
artifact_id: Optional[str] = None,
67
uri: Optional[str] = None,
68
input_example: Optional[Any] = None,
69
display_name: Optional[str] = None,
70
metadata: Optional[Dict] = None,
71
**kwargs
72
) -> None: ...
73
74
def log_time_series_metrics(
75
self,
76
metrics: Dict[str, List[Union[int, float]]],
77
step: Optional[List[int]] = None,
78
wall_time: Optional[List[Union[int, float]]] = None
79
) -> None: ...
80
81
def end_run(self, state: Optional[str] = None) -> None: ...
82
83
@property
84
def resource_name(self) -> str: ...
85
@property
86
def run_id(self) -> str: ...
87
```
88
89
### Global Experiment Functions
90
91
Convenient functions for experiment tracking without explicit experiment/run management.
92
93
```python { .api }
94
def init(
95
project: str,
96
location: str,
97
experiment: Optional[str] = None,
98
experiment_description: Optional[str] = None,
99
staging_bucket: Optional[str] = None,
100
credentials: Optional[auth_credentials.Credentials] = None,
101
encryption_spec_key_name: Optional[str] = None,
102
**kwargs
103
) -> None: ...
104
105
def start_run(
106
run: str,
107
resume: bool = False,
108
experiment: Optional[str] = None,
109
**kwargs
110
) -> ExperimentRun: ...
111
112
def end_run(state: Optional[str] = None) -> None: ...
113
114
def log_params(params: Dict[str, Union[str, int, float]]) -> None: ...
115
116
def log_metrics(metrics: Dict[str, Union[int, float]]) -> None: ...
117
118
def log_classification_metrics(
119
labels: List[str],
120
matrix: List[List[int]],
121
fpr: Optional[List[float]] = None,
122
tpr: Optional[List[float]] = None,
123
threshold: Optional[List[float]] = None,
124
display_name: Optional[str] = None
125
) -> None: ...
126
127
def log_model(
128
model: Any,
129
artifact_id: Optional[str] = None,
130
uri: Optional[str] = None,
131
input_example: Optional[Any] = None,
132
display_name: Optional[str] = None,
133
metadata: Optional[Dict] = None,
134
**kwargs
135
) -> None: ...
136
137
def log_time_series_metrics(
138
metrics: Dict[str, List[Union[int, float]]],
139
step: Optional[List[int]] = None,
140
wall_time: Optional[List[Union[int, float]]] = None
141
) -> None: ...
142
143
def get_experiment_df(experiment: Optional[str] = None) -> 'pandas.DataFrame': ...
144
145
def get_pipeline_df(pipeline: str) -> 'pandas.DataFrame': ...
146
147
def autolog(framework: Optional[str] = None, **kwargs) -> None: ...
148
149
def start_execution(
150
schema_title: str,
151
display_name: Optional[str] = None,
152
**kwargs
153
) -> 'Execution': ...
154
155
def log(data: Dict[str, Any]) -> None: ...
156
```
157
158
### Metadata Resources
159
160
Structured metadata resources for tracking ML artifacts and their relationships.
161
162
```python { .api }
163
class Artifact:
164
@classmethod
165
def create(
166
cls,
167
schema_title: str,
168
uri: Optional[str] = None,
169
display_name: Optional[str] = None,
170
description: Optional[str] = None,
171
metadata: Optional[Dict] = None,
172
**kwargs
173
) -> 'Artifact': ...
174
175
def log_params(self, params: Dict[str, Union[str, int, float]]) -> None: ...
176
def log_metrics(self, metrics: Dict[str, Union[int, float]]) -> None: ...
177
178
@property
179
def resource_name(self) -> str: ...
180
@property
181
def artifact_id(self) -> str: ...
182
@property
183
def uri(self) -> Optional[str]: ...
184
@property
185
def metadata(self) -> Dict: ...
186
187
class Execution:
188
@classmethod
189
def create(
190
cls,
191
schema_title: str,
192
display_name: Optional[str] = None,
193
description: Optional[str] = None,
194
metadata: Optional[Dict] = None,
195
**kwargs
196
) -> 'Execution': ...
197
198
def assign_input_artifacts(self, artifacts: List[Artifact]) -> None: ...
199
def assign_output_artifacts(self, artifacts: List[Artifact]) -> None: ...
200
201
@property
202
def resource_name(self) -> str: ...
203
@property
204
def execution_id(self) -> str: ...
205
206
class Context:
207
@classmethod
208
def create(
209
cls,
210
schema_title: str,
211
display_name: Optional[str] = None,
212
description: Optional[str] = None,
213
metadata: Optional[Dict] = None,
214
**kwargs
215
) -> 'Context': ...
216
217
def add_artifacts_and_executions(
218
self,
219
artifact_resource_names: Optional[List[str]] = None,
220
execution_resource_names: Optional[List[str]] = None
221
) -> None: ...
222
223
@property
224
def resource_name(self) -> str: ...
225
@property
226
def context_id(self) -> str: ...
227
```
228
229
### Model Saving
230
231
Comprehensive model artifact management with framework integration.
232
233
```python { .api }
234
def save_model(
235
model: Any,
236
artifact_id: str,
237
uri: Optional[str] = None,
238
input_example: Optional[Any] = None,
239
display_name: Optional[str] = None,
240
metadata: Optional[Dict] = None,
241
**kwargs
242
) -> Artifact: ...
243
244
def get_experiment_model(
245
artifact_id: str,
246
experiment: Optional[str] = None,
247
**kwargs
248
) -> Any: ...
249
```
250
251
## Usage Examples
252
253
**Basic experiment tracking:**
254
```python
255
import google.cloud.aiplatform as aiplatform
256
257
# Initialize with experiment
258
aiplatform.init(
259
project='my-project',
260
location='us-central1',
261
experiment='customer-segmentation',
262
experiment_description='Customer segmentation model experiments'
263
)
264
265
# Start a run
266
aiplatform.start_run('run-001', resume=False)
267
268
# Log parameters
269
aiplatform.log_params({
270
'learning_rate': 0.01,
271
'batch_size': 32,
272
'epochs': 100,
273
'model_type': 'xgboost'
274
})
275
276
# Train model (your training code here)
277
# model = train_model(...)
278
279
# Log metrics
280
aiplatform.log_metrics({
281
'accuracy': 0.95,
282
'precision': 0.92,
283
'recall': 0.89,
284
'f1_score': 0.90
285
})
286
287
# Log model
288
aiplatform.log_model(model, artifact_id='customer-segmentation-v1')
289
290
# End run
291
aiplatform.end_run()
292
```
293
294
**Advanced experiment management:**
295
```python
296
# Create explicit experiment
297
experiment = aiplatform.Experiment.create(
298
experiment_id='hyperparameter-tuning',
299
display_name='Hyperparameter Tuning Experiment',
300
description='Testing different hyperparameter combinations'
301
)
302
303
# Create multiple runs
304
for lr in [0.001, 0.01, 0.1]:
305
run = aiplatform.ExperimentRun.create(
306
run_id=f'lr-{lr}',
307
experiment=experiment
308
)
309
310
run.log_params({'learning_rate': lr})
311
312
# Train and evaluate model
313
# accuracy = train_and_evaluate(lr)
314
315
run.log_metrics({'accuracy': accuracy})
316
run.end_run()
317
318
# Get experiment results as DataFrame
319
df = experiment.get_data_frame()
320
print(df[['run_name', 'param.learning_rate', 'metric.accuracy']])
321
```
322
323
**Framework integration with autolog:**
324
```python
325
# Enable automatic logging for supported frameworks
326
aiplatform.autolog(framework='tensorflow')
327
328
# Or enable for multiple frameworks
329
aiplatform.autolog() # Auto-detects framework
330
331
# Your training code - metrics/parameters logged automatically
332
model = tf.keras.Sequential([...])
333
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
334
model.fit(X_train, y_train, validation_data=(X_val, y_val))
335
```
336
337
This comprehensive experiment tracking system provides full lifecycle management for ML experiments with automatic integration to popular frameworks and detailed metadata tracking.