0
# Experiments and Tracking
1
2
Experiment management and tracking capabilities for organizing ML workflows, comparing runs, and tracking metrics across training jobs to enable reproducible machine learning research and development.
3
4
## Capabilities
5
6
### Experiment Management
7
8
Core classes for creating and managing experiments that group related ML runs and enable systematic comparison of results.
9
10
```python { .api }
11
class Experiment:
12
"""
13
SageMaker experiment for organizing and tracking ML workflows.
14
15
Parameters:
16
- experiment_name (str): Name of the experiment
17
- description (str, optional): Description of the experiment
18
- display_name (str, optional): Display name for the experiment
19
- sagemaker_session (Session, optional): SageMaker session
20
- tags (List[dict], optional): Resource tags
21
"""
22
def __init__(self, experiment_name: str, description: str = None, **kwargs): ...
23
24
def create(self, description: str = None) -> dict: ...
25
26
def load(self) -> dict: ...
27
28
def delete(self): ...
29
30
def describe(self) -> dict: ...
31
32
def list_trials(self, created_before: datetime = None,
33
created_after: datetime = None, sort_by: str = None,
34
sort_order: str = None, max_results: int = None) -> List[dict]: ...
35
36
@classmethod
37
def list(cls, created_before: datetime = None, created_after: datetime = None,
38
sort_by: str = None, sort_order: str = None,
39
max_results: int = None, **kwargs) -> List['Experiment']: ...
40
```
41
42
### Run Management
43
44
Classes for individual experiment runs with comprehensive tracking of parameters, metrics, and artifacts.
45
46
```python { .api }
47
class Run:
48
"""
49
Individual experiment run for tracking parameters, metrics, and artifacts.
50
51
Parameters:
52
- experiment_name (str, optional): Name of parent experiment
53
- run_name (str, optional): Name of the run
54
- sagemaker_session (Session, optional): SageMaker session
55
"""
56
def __init__(self, experiment_name: str = None, run_name: str = None,
57
sagemaker_session: Session = None): ...
58
59
def __enter__(self): ...
60
def __exit__(self, exc_type, exc_val, exc_tb): ...
61
62
def log_parameter(self, name: str, value, step: int = None): ...
63
64
def log_parameters(self, parameters: dict, step: int = None): ...
65
66
def log_metric(self, name: str, value: float, step: int = None,
67
timestamp: datetime = None): ...
68
69
def log_metrics(self, metrics: dict, step: int = None,
70
timestamp: datetime = None): ...
71
72
def log_artifact(self, name: str, value: str, media_type: str = None,
73
step: int = None, timestamp: datetime = None): ...
74
75
def log_artifacts(self, artifacts: dict, step: int = None,
76
timestamp: datetime = None): ...
77
78
def log_file(self, file_path: str, name: str = None, step: int = None,
79
timestamp: datetime = None): ...
80
81
def log_files(self, file_paths: List[str], step: int = None,
82
timestamp: datetime = None): ...
83
84
def log_precision_recall(self, y_true, y_pred, title: str = None,
85
is_output: bool = True, step: int = None): ...
86
87
def log_confusion_matrix(self, y_true, y_pred, title: str = None,
88
is_output: bool = True, step: int = None): ...
89
90
def log_roc_curve(self, y_true, y_score, title: str = None,
91
is_output: bool = True, step: int = None): ...
92
93
def wait(self, logs: bool = True): ...
94
95
def list_metrics(self) -> List[dict]: ...
96
97
def list_parameters(self) -> List[dict]: ...
98
99
def list_artifacts(self) -> List[dict]: ...
100
101
def delete(self): ...
102
103
def load_run(sagemaker_session: Session = None, **kwargs) -> Run:
104
"""
105
Load an existing experiment run.
106
107
Parameters:
108
- sagemaker_session (Session, optional): SageMaker session
109
- experiment_name (str, optional): Experiment name
110
- run_name (str, optional): Run name
111
112
Returns:
113
- Run: Loaded run object
114
"""
115
116
def list_runs(experiment_name: str = None, created_before: datetime = None,
117
created_after: datetime = None, sort_by: str = None,
118
sort_order: str = None, max_results: int = None,
119
sagemaker_session: Session = None) -> List[dict]:
120
"""
121
List experiment runs with optional filtering.
122
123
Parameters:
124
- experiment_name (str, optional): Filter by experiment name
125
- created_before (datetime, optional): Filter by creation time
126
- created_after (datetime, optional): Filter by creation time
127
- sort_by (str, optional): Sort criterion
128
- sort_order (str, optional): Sort order ("Ascending" or "Descending")
129
- max_results (int, optional): Maximum number of results
130
- sagemaker_session (Session, optional): SageMaker session
131
132
Returns:
133
- List[dict]: List of run summaries
134
"""
135
```
136
137
### Tracking Integration
138
139
Classes for integrating experiment tracking with SageMaker training jobs and estimators.
140
141
```python { .api }
142
class RunFileLoader:
143
"""
144
Utility for loading metrics and parameters from run files.
145
146
Parameters:
147
- run (Run): The run object to load data from
148
"""
149
def __init__(self, run: Run): ...
150
151
def load_metrics(self) -> 'DataFrame': ...
152
153
def load_parameters(self) -> 'DataFrame': ...
154
155
def load_artifacts(self) -> 'DataFrame': ...
156
157
class ExperimentConfig:
158
"""
159
Configuration for associating training jobs with experiments.
160
161
Parameters:
162
- experiment_name (str): Name of the experiment
163
- trial_name (str, optional): Name of the trial/run
164
- trial_component_display_name (str, optional): Display name for trial component
165
"""
166
def __init__(self, experiment_name: str, trial_name: str = None,
167
trial_component_display_name: str = None): ...
168
```
169
170
### Trial and Trial Component Management
171
172
Lower-level classes for fine-grained experiment tracking at the trial component level.
173
174
```python { .api }
175
class Trial:
176
"""
177
SageMaker trial that groups related trial components within an experiment.
178
179
Parameters:
180
- trial_name (str): Name of the trial
181
- experiment_name (str): Name of parent experiment
182
- sagemaker_session (Session, optional): SageMaker session
183
- tags (List[dict], optional): Resource tags
184
"""
185
def __init__(self, trial_name: str, experiment_name: str, **kwargs): ...
186
187
def create(self) -> dict: ...
188
189
def load(self) -> dict: ...
190
191
def delete(self): ...
192
193
def add_trial_component(self, trial_component): ...
194
195
def remove_trial_component(self, trial_component_name: str): ...
196
197
def list_trial_components(self) -> List[dict]: ...
198
199
class TrialComponent:
200
"""
201
Individual trial component representing a single step in an ML workflow.
202
203
Parameters:
204
- trial_component_name (str): Name of the trial component
205
- display_name (str, optional): Display name
206
- status (dict, optional): Status information
207
- start_time (datetime, optional): Start time
208
- end_time (datetime, optional): End time
209
- parameters (dict, optional): Input parameters
210
- input_artifacts (dict, optional): Input artifacts
211
- output_artifacts (dict, optional): Output artifacts
212
- metrics (dict, optional): Metrics
213
- source (dict, optional): Source information
214
- sagemaker_session (Session, optional): SageMaker session
215
- tags (List[dict], optional): Resource tags
216
"""
217
def __init__(self, trial_component_name: str, **kwargs): ...
218
219
def create(self) -> dict: ...
220
221
def load(self) -> dict: ...
222
223
def save(self): ...
224
225
def delete(self): ...
226
```
227
228
### Search and Analytics
229
230
Classes for searching and analyzing experiment results across multiple runs and experiments.
231
232
```python { .api }
233
class Search:
234
"""
235
Search across SageMaker resources including experiments, trials, and trial components.
236
237
Parameters:
238
- resource (str): Resource type to search ("Experiment", "Trial", "TrialComponent", "TrainingJob")
239
- search_expression (dict, optional): Search criteria
240
- sort_by (str, optional): Sort criterion
241
- sort_order (str, optional): Sort order
242
- max_results (int, optional): Maximum results to return
243
- sagemaker_session (Session, optional): SageMaker session
244
"""
245
def __init__(self, resource: str, **kwargs): ...
246
247
def search(self, search_expression: dict = None) -> List[dict]: ...
248
249
# Enums for sorting and filtering
250
class SortByType:
251
"""Sort criteria for experiment searches."""
252
Name = "Name"
253
CreationTime = "CreationTime"
254
255
class SortOrderType:
256
"""Sort order for experiment searches."""
257
Ascending = "Ascending"
258
Descending = "Descending"
259
```
260
261
## Usage Examples
262
263
### Basic Experiment Tracking
264
265
```python
266
from sagemaker.experiments import Experiment, Run
267
268
# Create experiment
269
experiment = Experiment(
270
experiment_name="recommendation-model-experiment",
271
description="Comparing different recommendation algorithms"
272
)
273
experiment.create()
274
275
# Create and use a run for tracking
276
with Run(experiment_name="recommendation-model-experiment",
277
run_name="xgboost-baseline") as run:
278
279
# Log hyperparameters
280
run.log_parameter("learning_rate", 0.1)
281
run.log_parameter("max_depth", 6)
282
run.log_parameter("n_estimators", 100)
283
284
# Train model (your training code here)
285
# model = train_model(...)
286
287
# Log metrics during training
288
for epoch in range(10):
289
# Your training loop
290
train_loss = 0.5 - epoch * 0.02 # Example
291
val_accuracy = 0.7 + epoch * 0.02 # Example
292
293
run.log_metric("train_loss", train_loss, step=epoch)
294
run.log_metric("validation_accuracy", val_accuracy, step=epoch)
295
296
# Log final results
297
run.log_metric("final_accuracy", 0.89)
298
run.log_metric("final_f1_score", 0.87)
299
300
# Log model artifacts
301
run.log_file("model.pkl", name="trained_model")
302
run.log_file("feature_importance.png", name="feature_plot")
303
```
304
305
### Integration with SageMaker Training
306
307
```python
308
from sagemaker.experiments import ExperimentConfig
309
from sagemaker.xgboost import XGBoost
310
311
# Create experiment
312
experiment = Experiment(
313
experiment_name="hyperparameter-optimization",
314
description="XGBoost hyperparameter optimization"
315
)
316
experiment.create()
317
318
# Configure experiment tracking for estimator
319
experiment_config = ExperimentConfig(
320
experiment_name="hyperparameter-optimization",
321
trial_name="xgboost-trial-1"
322
)
323
324
# Create estimator with experiment tracking
325
xgb_estimator = XGBoost(
326
entry_point="train.py",
327
framework_version="1.5-1",
328
instance_type="ml.m5.large",
329
role=role,
330
hyperparameters={
331
'objective': 'reg:squarederror',
332
'eval_metric': 'rmse',
333
'learning_rate': 0.1,
334
'max_depth': 5
335
}
336
)
337
338
# Train with experiment tracking
339
xgb_estimator.fit(
340
inputs={"train": "s3://bucket/train", "validation": "s3://bucket/val"},
341
experiment_config=experiment_config
342
)
343
```
344
345
### Comparing Multiple Runs
346
347
```python
348
from sagemaker.experiments import list_runs, load_run
349
350
# List all runs in an experiment
351
runs = list_runs(experiment_name="recommendation-model-experiment")
352
353
print("Experiment Results:")
354
print("-" * 50)
355
for run_summary in runs:
356
run_name = run_summary['TrialName']
357
358
# Load individual run
359
run = load_run(experiment_name="recommendation-model-experiment",
360
run_name=run_name)
361
362
# Get metrics
363
metrics = run.list_metrics()
364
accuracy_metrics = [m for m in metrics if m['MetricName'] == 'final_accuracy']
365
366
if accuracy_metrics:
367
accuracy = accuracy_metrics[0]['Value']
368
print(f"Run: {run_name} - Accuracy: {accuracy:.3f}")
369
370
# Find best performing run
371
best_run = max(runs, key=lambda x: x.get('final_accuracy', 0))
372
print(f"\nBest run: {best_run['TrialName']}")
373
```
374
375
### Advanced Search and Analytics
376
377
```python
378
from sagemaker.experiments import Search
379
380
# Search for high-performing runs
381
search = Search(
382
resource="TrialComponent",
383
search_expression={
384
"Filters": [{
385
"Name": "Metrics.final_accuracy.Value",
386
"Operator": "GreaterThan",
387
"Value": "0.85"
388
}]
389
},
390
sort_by="Metrics.final_accuracy.Value",
391
sort_order="Descending",
392
max_results=10
393
)
394
395
results = search.search()
396
print("Top performing models:")
397
for result in results:
398
name = result.get('TrialComponentName', 'Unknown')
399
metrics = result.get('Metrics', {})
400
accuracy = metrics.get('final_accuracy', {}).get('Value', 'N/A')
401
print(f"- {name}: {accuracy}")
402
```