or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

amazon-algorithms.mdautoml.mdcore-training.mddata-processing.mddebugging-profiling.mdexperiments.mdframework-training.mdhyperparameter-tuning.mdindex.mdmodel-monitoring.mdmodel-serving.mdremote-functions.md

experiments.mddocs/

0

# Experiments and Tracking

1

2

Experiment management and tracking capabilities for organizing ML workflows, comparing runs, and tracking metrics across training jobs to enable reproducible machine learning research and development.

3

4

## Capabilities

5

6

### Experiment Management

7

8

Core classes for creating and managing experiments that group related ML runs and enable systematic comparison of results.

9

10

```python { .api }

11

class Experiment:

12

"""

13

SageMaker experiment for organizing and tracking ML workflows.

14

15

Parameters:

16

- experiment_name (str): Name of the experiment

17

- description (str, optional): Description of the experiment

18

- display_name (str, optional): Display name for the experiment

19

- sagemaker_session (Session, optional): SageMaker session

20

- tags (List[dict], optional): Resource tags

21

"""

22

def __init__(self, experiment_name: str, description: str = None, **kwargs): ...

23

24

def create(self, description: str = None) -> dict: ...

25

26

def load(self) -> dict: ...

27

28

def delete(self): ...

29

30

def describe(self) -> dict: ...

31

32

def list_trials(self, created_before: datetime = None,

33

created_after: datetime = None, sort_by: str = None,

34

sort_order: str = None, max_results: int = None) -> List[dict]: ...

35

36

@classmethod

37

def list(cls, created_before: datetime = None, created_after: datetime = None,

38

sort_by: str = None, sort_order: str = None,

39

max_results: int = None, **kwargs) -> List['Experiment']: ...

40

```

41

42

### Run Management

43

44

Classes for individual experiment runs with comprehensive tracking of parameters, metrics, and artifacts.

45

46

```python { .api }

47

class Run:

48

"""

49

Individual experiment run for tracking parameters, metrics, and artifacts.

50

51

Parameters:

52

- experiment_name (str, optional): Name of parent experiment

53

- run_name (str, optional): Name of the run

54

- sagemaker_session (Session, optional): SageMaker session

55

"""

56

def __init__(self, experiment_name: str = None, run_name: str = None,

57

sagemaker_session: Session = None): ...

58

59

def __enter__(self): ...

60

def __exit__(self, exc_type, exc_val, exc_tb): ...

61

62

def log_parameter(self, name: str, value, step: int = None): ...

63

64

def log_parameters(self, parameters: dict, step: int = None): ...

65

66

def log_metric(self, name: str, value: float, step: int = None,

67

timestamp: datetime = None): ...

68

69

def log_metrics(self, metrics: dict, step: int = None,

70

timestamp: datetime = None): ...

71

72

def log_artifact(self, name: str, value: str, media_type: str = None,

73

step: int = None, timestamp: datetime = None): ...

74

75

def log_artifacts(self, artifacts: dict, step: int = None,

76

timestamp: datetime = None): ...

77

78

def log_file(self, file_path: str, name: str = None, step: int = None,

79

timestamp: datetime = None): ...

80

81

def log_files(self, file_paths: List[str], step: int = None,

82

timestamp: datetime = None): ...

83

84

def log_precision_recall(self, y_true, y_pred, title: str = None,

85

is_output: bool = True, step: int = None): ...

86

87

def log_confusion_matrix(self, y_true, y_pred, title: str = None,

88

is_output: bool = True, step: int = None): ...

89

90

def log_roc_curve(self, y_true, y_score, title: str = None,

91

is_output: bool = True, step: int = None): ...

92

93

def wait(self, logs: bool = True): ...

94

95

def list_metrics(self) -> List[dict]: ...

96

97

def list_parameters(self) -> List[dict]: ...

98

99

def list_artifacts(self) -> List[dict]: ...

100

101

def delete(self): ...

102

103

def load_run(sagemaker_session: Session = None, **kwargs) -> Run:

104

"""

105

Load an existing experiment run.

106

107

Parameters:

108

- sagemaker_session (Session, optional): SageMaker session

109

- experiment_name (str, optional): Experiment name

110

- run_name (str, optional): Run name

111

112

Returns:

113

- Run: Loaded run object

114

"""

115

116

def list_runs(experiment_name: str = None, created_before: datetime = None,

117

created_after: datetime = None, sort_by: str = None,

118

sort_order: str = None, max_results: int = None,

119

sagemaker_session: Session = None) -> List[dict]:

120

"""

121

List experiment runs with optional filtering.

122

123

Parameters:

124

- experiment_name (str, optional): Filter by experiment name

125

- created_before (datetime, optional): Filter by creation time

126

- created_after (datetime, optional): Filter by creation time

127

- sort_by (str, optional): Sort criterion

128

- sort_order (str, optional): Sort order ("Ascending" or "Descending")

129

- max_results (int, optional): Maximum number of results

130

- sagemaker_session (Session, optional): SageMaker session

131

132

Returns:

133

- List[dict]: List of run summaries

134

"""

135

```

136

137

### Tracking Integration

138

139

Classes for integrating experiment tracking with SageMaker training jobs and estimators.

140

141

```python { .api }

142

class RunFileLoader:

143

"""

144

Utility for loading metrics and parameters from run files.

145

146

Parameters:

147

- run (Run): The run object to load data from

148

"""

149

def __init__(self, run: Run): ...

150

151

def load_metrics(self) -> 'DataFrame': ...

152

153

def load_parameters(self) -> 'DataFrame': ...

154

155

def load_artifacts(self) -> 'DataFrame': ...

156

157

class ExperimentConfig:

158

"""

159

Configuration for associating training jobs with experiments.

160

161

Parameters:

162

- experiment_name (str): Name of the experiment

163

- trial_name (str, optional): Name of the trial/run

164

- trial_component_display_name (str, optional): Display name for trial component

165

"""

166

def __init__(self, experiment_name: str, trial_name: str = None,

167

trial_component_display_name: str = None): ...

168

```

169

170

### Trial and Trial Component Management

171

172

Lower-level classes for fine-grained experiment tracking at the trial component level.

173

174

```python { .api }

175

class Trial:

176

"""

177

SageMaker trial that groups related trial components within an experiment.

178

179

Parameters:

180

- trial_name (str): Name of the trial

181

- experiment_name (str): Name of parent experiment

182

- sagemaker_session (Session, optional): SageMaker session

183

- tags (List[dict], optional): Resource tags

184

"""

185

def __init__(self, trial_name: str, experiment_name: str, **kwargs): ...

186

187

def create(self) -> dict: ...

188

189

def load(self) -> dict: ...

190

191

def delete(self): ...

192

193

def add_trial_component(self, trial_component): ...

194

195

def remove_trial_component(self, trial_component_name: str): ...

196

197

def list_trial_components(self) -> List[dict]: ...

198

199

class TrialComponent:

200

"""

201

Individual trial component representing a single step in an ML workflow.

202

203

Parameters:

204

- trial_component_name (str): Name of the trial component

205

- display_name (str, optional): Display name

206

- status (dict, optional): Status information

207

- start_time (datetime, optional): Start time

208

- end_time (datetime, optional): End time

209

- parameters (dict, optional): Input parameters

210

- input_artifacts (dict, optional): Input artifacts

211

- output_artifacts (dict, optional): Output artifacts

212

- metrics (dict, optional): Metrics

213

- source (dict, optional): Source information

214

- sagemaker_session (Session, optional): SageMaker session

215

- tags (List[dict], optional): Resource tags

216

"""

217

def __init__(self, trial_component_name: str, **kwargs): ...

218

219

def create(self) -> dict: ...

220

221

def load(self) -> dict: ...

222

223

def save(self): ...

224

225

def delete(self): ...

226

```

227

228

### Search and Analytics

229

230

Classes for searching and analyzing experiment results across multiple runs and experiments.

231

232

```python { .api }

233

class Search:

234

"""

235

Search across SageMaker resources including experiments, trials, and trial components.

236

237

Parameters:

238

- resource (str): Resource type to search ("Experiment", "Trial", "TrialComponent", "TrainingJob")

239

- search_expression (dict, optional): Search criteria

240

- sort_by (str, optional): Sort criterion

241

- sort_order (str, optional): Sort order

242

- max_results (int, optional): Maximum results to return

243

- sagemaker_session (Session, optional): SageMaker session

244

"""

245

def __init__(self, resource: str, **kwargs): ...

246

247

def search(self, search_expression: dict = None) -> List[dict]: ...

248

249

# Enums for sorting and filtering

250

class SortByType:

251

"""Sort criteria for experiment searches."""

252

Name = "Name"

253

CreationTime = "CreationTime"

254

255

class SortOrderType:

256

"""Sort order for experiment searches."""

257

Ascending = "Ascending"

258

Descending = "Descending"

259

```

260

261

## Usage Examples

262

263

### Basic Experiment Tracking

264

265

```python

266

from sagemaker.experiments import Experiment, Run

267

268

# Create experiment

269

experiment = Experiment(

270

experiment_name="recommendation-model-experiment",

271

description="Comparing different recommendation algorithms"

272

)

273

experiment.create()

274

275

# Create and use a run for tracking

276

with Run(experiment_name="recommendation-model-experiment",

277

run_name="xgboost-baseline") as run:

278

279

# Log hyperparameters

280

run.log_parameter("learning_rate", 0.1)

281

run.log_parameter("max_depth", 6)

282

run.log_parameter("n_estimators", 100)

283

284

# Train model (your training code here)

285

# model = train_model(...)

286

287

# Log metrics during training

288

for epoch in range(10):

289

# Your training loop

290

train_loss = 0.5 - epoch * 0.02 # Example

291

val_accuracy = 0.7 + epoch * 0.02 # Example

292

293

run.log_metric("train_loss", train_loss, step=epoch)

294

run.log_metric("validation_accuracy", val_accuracy, step=epoch)

295

296

# Log final results

297

run.log_metric("final_accuracy", 0.89)

298

run.log_metric("final_f1_score", 0.87)

299

300

# Log model artifacts

301

run.log_file("model.pkl", name="trained_model")

302

run.log_file("feature_importance.png", name="feature_plot")

303

```

304

305

### Integration with SageMaker Training

306

307

```python

308

from sagemaker.experiments import ExperimentConfig

309

from sagemaker.xgboost import XGBoost

310

311

# Create experiment

312

experiment = Experiment(

313

experiment_name="hyperparameter-optimization",

314

description="XGBoost hyperparameter optimization"

315

)

316

experiment.create()

317

318

# Configure experiment tracking for estimator

319

experiment_config = ExperimentConfig(

320

experiment_name="hyperparameter-optimization",

321

trial_name="xgboost-trial-1"

322

)

323

324

# Create estimator with experiment tracking

325

xgb_estimator = XGBoost(

326

entry_point="train.py",

327

framework_version="1.5-1",

328

instance_type="ml.m5.large",

329

role=role,

330

hyperparameters={

331

'objective': 'reg:squarederror',

332

'eval_metric': 'rmse',

333

'learning_rate': 0.1,

334

'max_depth': 5

335

}

336

)

337

338

# Train with experiment tracking

339

xgb_estimator.fit(

340

inputs={"train": "s3://bucket/train", "validation": "s3://bucket/val"},

341

experiment_config=experiment_config

342

)

343

```

344

345

### Comparing Multiple Runs

346

347

```python

348

from sagemaker.experiments import list_runs, load_run

349

350

# List all runs in an experiment

351

runs = list_runs(experiment_name="recommendation-model-experiment")

352

353

print("Experiment Results:")

354

print("-" * 50)

355

for run_summary in runs:

356

run_name = run_summary['TrialName']

357

358

# Load individual run

359

run = load_run(experiment_name="recommendation-model-experiment",

360

run_name=run_name)

361

362

# Get metrics

363

metrics = run.list_metrics()

364

accuracy_metrics = [m for m in metrics if m['MetricName'] == 'final_accuracy']

365

366

if accuracy_metrics:

367

accuracy = accuracy_metrics[0]['Value']

368

print(f"Run: {run_name} - Accuracy: {accuracy:.3f}")

369

370

# Find best performing run

371

best_run = max(runs, key=lambda x: x.get('final_accuracy', 0))

372

print(f"\nBest run: {best_run['TrialName']}")

373

```

374

375

### Advanced Search and Analytics

376

377

```python

378

from sagemaker.experiments import Search

379

380

# Search for high-performing runs

381

search = Search(

382

resource="TrialComponent",

383

search_expression={

384

"Filters": [{

385

"Name": "Metrics.final_accuracy.Value",

386

"Operator": "GreaterThan",

387

"Value": "0.85"

388

}]

389

},

390

sort_by="Metrics.final_accuracy.Value",

391

sort_order="Descending",

392

max_results=10

393

)

394

395

results = search.search()

396

print("Top performing models:")

397

for result in results:

398

name = result.get('TrialComponentName', 'Unknown')

399

metrics = result.get('Metrics', {})

400

accuracy = metrics.get('final_accuracy', {}).get('Value', 'N/A')

401

print(f"- {name}: {accuracy}")

402

```