0
# Hyperparameter Tuning
1
2
Ray Tune provides comprehensive hyperparameter optimization with multiple search algorithms, schedulers, and experiment management. It supports all major ML frameworks and integrates seamlessly with distributed training.
3
4
## Capabilities
5
6
### Core Tuning Framework
7
8
Main tuning functionality and experiment management.
9
10
```python { .api }
11
class Tuner:
12
"""Main class for hyperparameter tuning experiments."""
13
14
def __init__(self, trainable, *, param_space=None, tune_config=None,
15
run_config=None):
16
"""
17
Initialize tuner.
18
19
Args:
20
trainable: Function or class to tune
21
param_space (dict, optional): Parameter search space
22
tune_config (TuneConfig, optional): Tuning configuration
23
run_config (RunConfig, optional): Run configuration
24
"""
25
26
def fit(self):
27
"""
28
Execute hyperparameter tuning.
29
30
Returns:
31
ResultGrid: Tuning results
32
"""
33
34
def get_results(self):
35
"""
36
Get tuning results.
37
38
Returns:
39
ResultGrid: Tuning results
40
"""
41
42
class TuneConfig:
43
"""Configuration for hyperparameter tuning."""
44
45
def __init__(self, *, metric=None, mode=None, search_alg=None,
46
scheduler=None, num_samples=10, max_concurrent_trials=None,
47
time_budget_s=None, **kwargs):
48
"""
49
Initialize tune configuration.
50
51
Args:
52
metric (str, optional): Metric to optimize
53
mode (str, optional): "min" or "max" for optimization
54
search_alg (SearchAlgorithm, optional): Search algorithm
55
scheduler (TrialScheduler, optional): Trial scheduler
56
num_samples (int): Number of trials to run
57
max_concurrent_trials (int, optional): Max concurrent trials
58
time_budget_s (float, optional): Time budget in seconds
59
"""
60
61
def run(trainable, *, config=None, metric=None, mode=None,
62
name=None, stop=None, time_budget_s=None, num_samples=10,
63
search_alg=None, scheduler=None, **kwargs):
64
"""
65
Run hyperparameter tuning experiment (legacy API).
66
67
Args:
68
trainable: Function or class to tune
69
config (dict, optional): Parameter configuration/search space
70
metric (str, optional): Metric to optimize
71
mode (str, optional): "min" or "max"
72
name (str, optional): Experiment name
73
stop (dict, optional): Stopping criteria
74
time_budget_s (float, optional): Time budget
75
num_samples (int): Number of trials
76
search_alg (SearchAlgorithm, optional): Search algorithm
77
scheduler (TrialScheduler, optional): Trial scheduler
78
79
Returns:
80
ResultGrid: Tuning results
81
"""
82
83
class ResultGrid:
84
"""Container for tuning results."""
85
86
def get_best_result(self, metric=None, mode=None):
87
"""Get best trial result."""
88
89
def get_dataframe(self):
90
"""Get results as pandas DataFrame."""
91
92
@property
93
def errors(self):
94
"""Get failed trials."""
95
96
def __len__(self):
97
"""Number of trials."""
98
99
def __iter__(self):
100
"""Iterate over results."""
101
```
102
103
### Search Algorithms
104
105
Various hyperparameter search algorithms.
106
107
```python { .api }
108
class BasicVariantGenerator:
109
"""Grid search and random search."""
110
111
def __init__(self, *, max_concurrent=None, random_state=None):
112
"""
113
Initialize basic search.
114
115
Args:
116
max_concurrent (int, optional): Max concurrent trials
117
random_state (int, optional): Random seed
118
"""
119
120
class GridSearch(BasicVariantGenerator):
121
"""Grid search algorithm."""
122
123
class RandomSearch(BasicVariantGenerator):
124
"""Random search algorithm."""
125
126
class ConcurrencyLimiter:
127
"""Wrapper to limit concurrent trials."""
128
129
def __init__(self, searcher, max_concurrent):
130
"""
131
Initialize concurrency limiter.
132
133
Args:
134
searcher: Search algorithm to wrap
135
max_concurrent (int): Max concurrent trials
136
"""
137
138
class BayesOptSearch:
139
"""Bayesian optimization using Gaussian processes."""
140
141
def __init__(self, space=None, *, metric=None, mode="max",
142
utility_kwargs=None, random_state=None, **kwargs):
143
"""
144
Initialize Bayesian optimization.
145
146
Args:
147
space (dict, optional): Search space
148
metric (str, optional): Metric to optimize
149
mode (str): "min" or "max"
150
utility_kwargs (dict, optional): Acquisition function parameters
151
random_state (int, optional): Random seed
152
"""
153
154
class HyperOptSearch:
155
"""HyperOpt-based search algorithms."""
156
157
def __init__(self, space=None, *, algo=None, metric=None, mode="max",
158
points_to_evaluate=None, random_state_seed=None, **kwargs):
159
"""
160
Initialize HyperOpt search.
161
162
Args:
163
space (dict, optional): HyperOpt search space
164
algo: HyperOpt algorithm (tpe.suggest, random.suggest, etc.)
165
metric (str, optional): Metric to optimize
166
mode (str): "min" or "max"
167
points_to_evaluate (list, optional): Initial points
168
random_state_seed (int, optional): Random seed
169
"""
170
171
class OptunaSearch:
172
"""Optuna-based search algorithm."""
173
174
def __init__(self, space=None, *, metric=None, mode="max",
175
sampler=None, seed=None, **kwargs):
176
"""
177
Initialize Optuna search.
178
179
Args:
180
space (dict, optional): Search space
181
metric (str, optional): Metric to optimize
182
mode (str): "min" or "max"
183
sampler: Optuna sampler
184
seed (int, optional): Random seed
185
"""
186
187
class AxSearch:
188
"""Ax-based search algorithm."""
189
190
def __init__(self, space=None, *, metric=None, mode="max",
191
parameter_constraints=None, outcome_constraints=None, **kwargs):
192
"""
193
Initialize Ax search.
194
195
Args:
196
space (list, optional): Ax search space
197
metric (str, optional): Metric to optimize
198
mode (str): "min" or "max"
199
parameter_constraints (list, optional): Parameter constraints
200
outcome_constraints (list, optional): Outcome constraints
201
"""
202
203
class DragonflySearch:
204
"""Dragonfly-based search algorithm."""
205
206
def __init__(self, space=None, *, metric=None, mode="max",
207
domain=None, optimizer=None, **kwargs):
208
"""
209
Initialize Dragonfly search.
210
211
Args:
212
space (list, optional): Search space
213
metric (str, optional): Metric to optimize
214
mode (str): "min" or "max"
215
domain: Dragonfly domain
216
optimizer (str, optional): Optimizer type
217
"""
218
```
219
220
### Trial Schedulers
221
222
Schedulers for early stopping and resource allocation.
223
224
```python { .api }
225
class FIFOScheduler:
226
"""First-in-first-out scheduler (no early stopping)."""
227
228
def __init__(self):
229
"""Initialize FIFO scheduler."""
230
231
class AsyncHyperBandScheduler:
232
"""Asynchronous Hyperband scheduler."""
233
234
def __init__(self, *, time_attr="training_iteration", metric=None,
235
mode="max", max_t=81, reduction_factor=3,
236
brackets=1, grace_period=1, **kwargs):
237
"""
238
Initialize AsyncHyperBand scheduler.
239
240
Args:
241
time_attr (str): Time attribute for scheduling
242
metric (str, optional): Metric to optimize
243
mode (str): "min" or "max"
244
max_t (int): Maximum time units
245
reduction_factor (int): Reduction factor
246
brackets (int): Number of brackets
247
grace_period (int): Minimum time before stopping
248
"""
249
250
class ASHAScheduler:
251
"""Asynchronous Successive Halving Algorithm (ASHA) scheduler."""
252
253
def __init__(self, *, time_attr="training_iteration", metric=None,
254
mode="max", max_t=100, grace_period=1, reduction_factor=4,
255
brackets=1, **kwargs):
256
"""
257
Initialize ASHA scheduler.
258
259
Args:
260
time_attr (str): Time attribute for scheduling
261
metric (str, optional): Metric to optimize
262
mode (str): "min" or "max"
263
max_t (int): Maximum time units
264
grace_period (int): Grace period before first halving
265
reduction_factor (int): Reduction factor for successive halving
266
brackets (int): Number of brackets
267
"""
268
269
class HyperBandScheduler:
270
"""Synchronous Hyperband scheduler."""
271
272
def __init__(self, *, time_attr="training_iteration", metric=None,
273
mode="max", max_t=81, reduction_factor=3, **kwargs):
274
"""
275
Initialize HyperBand scheduler.
276
277
Args:
278
time_attr (str): Time attribute for scheduling
279
metric (str, optional): Metric to optimize
280
mode (str): "min" or "max"
281
max_t (int): Maximum time units
282
reduction_factor (int): Reduction factor
283
"""
284
285
class MedianStoppingRule:
286
"""Stop trials below median performance."""
287
288
def __init__(self, *, time_attr="training_iteration", metric=None,
289
mode="max", grace_period=60, min_samples_required=3, **kwargs):
290
"""
291
Initialize median stopping rule.
292
293
Args:
294
time_attr (str): Time attribute for scheduling
295
metric (str, optional): Metric to optimize
296
mode (str): "min" or "max"
297
grace_period (int): Grace period before stopping
298
min_samples_required (int): Minimum samples needed
299
"""
300
301
class PopulationBasedTraining:
302
"""Population-based training scheduler."""
303
304
def __init__(self, *, time_attr="training_iteration", metric=None,
305
mode="max", perturbation_interval=60,
306
hyperparam_mutations=None, **kwargs):
307
"""
308
Initialize PBT scheduler.
309
310
Args:
311
time_attr (str): Time attribute for scheduling
312
metric (str, optional): Metric to optimize
313
mode (str): "min" or "max"
314
perturbation_interval (int): Interval between perturbations
315
hyperparam_mutations (dict, optional): Hyperparameter mutations
316
"""
317
318
class PopulationBasedTrainingReplay:
319
"""Replay population-based training."""
320
321
def __init__(self, policy_file):
322
"""
323
Initialize PBT replay.
324
325
Args:
326
policy_file (str): Path to PBT policy file
327
"""
328
```
329
330
### Search Space Definition
331
332
Define parameter search spaces.
333
334
```python { .api }
335
def choice(categories):
336
"""
337
Choose from categorical options.
338
339
Args:
340
categories (list): List of options
341
342
Returns:
343
Choice distribution
344
"""
345
346
def randint(lower, upper):
347
"""
348
Random integer in range.
349
350
Args:
351
lower (int): Lower bound (inclusive)
352
upper (int): Upper bound (exclusive)
353
354
Returns:
355
Randint distribution
356
"""
357
358
def uniform(lower, upper):
359
"""
360
Uniform distribution in range.
361
362
Args:
363
lower (float): Lower bound
364
upper (float): Upper bound
365
366
Returns:
367
Uniform distribution
368
"""
369
370
def loguniform(lower, upper, base=10):
371
"""
372
Log-uniform distribution.
373
374
Args:
375
lower (float): Lower bound
376
upper (float): Upper bound
377
base (float): Logarithm base
378
379
Returns:
380
Loguniform distribution
381
"""
382
383
def randn(mean=0, sd=1):
384
"""
385
Normal distribution.
386
387
Args:
388
mean (float): Mean
389
sd (float): Standard deviation
390
391
Returns:
392
Normal distribution
393
"""
394
395
def lograndn(mean=0, sd=1, base=10):
396
"""
397
Log-normal distribution.
398
399
Args:
400
mean (float): Mean of log
401
sd (float): Standard deviation of log
402
base (float): Logarithm base
403
404
Returns:
405
Log-normal distribution
406
"""
407
408
def grid_search(values):
409
"""
410
Grid search over values.
411
412
Args:
413
values (list): Values to search over
414
415
Returns:
416
Grid search specification
417
"""
418
419
def sample_from(func):
420
"""
421
Sample from custom function.
422
423
Args:
424
func: Function that returns sample
425
426
Returns:
427
Sample specification
428
"""
429
```
430
431
### Experiment Analysis
432
433
Analyze and visualize tuning results.
434
435
```python { .api }
436
class ExperimentAnalysis:
437
"""Analysis of tuning experiment results."""
438
439
def get_best_trial(self, metric=None, mode=None, scope="last"):
440
"""Get best trial."""
441
442
def get_best_config(self, metric=None, mode=None, scope="last"):
443
"""Get best configuration."""
444
445
def get_best_logdir(self, metric=None, mode=None, scope="last"):
446
"""Get best trial log directory."""
447
448
def get_trial_dataframes(self):
449
"""Get trial results as DataFrames."""
450
451
def dataframe(self, metric=None, mode=None):
452
"""Get results as DataFrame."""
453
454
def stats(self):
455
"""Get experiment statistics."""
456
457
def Analysis(experiment_checkpoint_path):
458
"""
459
Create ExperimentAnalysis from checkpoint.
460
461
Args:
462
experiment_checkpoint_path (str): Path to experiment checkpoint
463
464
Returns:
465
ExperimentAnalysis: Analysis object
466
"""
467
```
468
469
### Integration with Training
470
471
Integration with Ray Train for distributed hyperparameter tuning.
472
473
```python { .api }
474
def with_parameters(trainable, **kwargs):
475
"""
476
Wrap trainable with fixed parameters.
477
478
Args:
479
trainable: Trainable function or class
480
**kwargs: Fixed parameters
481
482
Returns:
483
Wrapped trainable
484
"""
485
486
def with_resources(trainable, resources):
487
"""
488
Wrap trainable with resource requirements.
489
490
Args:
491
trainable: Trainable function or class
492
resources (dict): Resource requirements
493
494
Returns:
495
Wrapped trainable
496
"""
497
```
498
499
## Usage Examples
500
501
### Basic Hyperparameter Tuning
502
503
```python
504
import ray
505
from ray import tune
506
from ray.tune import TuneConfig, Tuner
507
508
ray.init()
509
510
def train_function(config):
511
# Training logic
512
for epoch in range(10):
513
loss = config["lr"] * (0.9 ** epoch)
514
accuracy = 1 - loss
515
516
# Report intermediate results
517
tune.report({"loss": loss, "accuracy": accuracy, "epoch": epoch})
518
519
# Define search space
520
param_space = {
521
"lr": tune.loguniform(1e-4, 1e-1),
522
"batch_size": tune.choice([16, 32, 64, 128]),
523
"hidden_size": tune.randint(32, 512)
524
}
525
526
# Configure tuner
527
tuner = Tuner(
528
train_function,
529
param_space=param_space,
530
tune_config=TuneConfig(
531
metric="accuracy",
532
mode="max",
533
num_samples=20
534
)
535
)
536
537
# Run experiment
538
results = tuner.fit()
539
540
# Get best result
541
best_result = results.get_best_result()
542
print(f"Best config: {best_result.config}")
543
print(f"Best accuracy: {best_result.metrics['accuracy']}")
544
```
545
546
### Advanced Tuning with Schedulers
547
548
```python
549
import ray
550
from ray import tune
551
from ray.tune.schedulers import AsyncHyperBandScheduler
552
from ray.tune.search.hyperopt import HyperOptSearch
553
from hyperopt import hp
554
555
ray.init()
556
557
# Define search space using HyperOpt
558
search_space = {
559
"lr": hp.loguniform("lr", np.log(1e-4), np.log(1e-1)),
560
"batch_size": hp.choice("batch_size", [16, 32, 64, 128]),
561
"dropout": hp.uniform("dropout", 0.0, 0.5)
562
}
563
564
# Configure search algorithm
565
search_alg = HyperOptSearch(
566
space=search_space,
567
metric="accuracy",
568
mode="max"
569
)
570
571
# Configure scheduler
572
scheduler = AsyncHyperBandScheduler(
573
metric="accuracy",
574
mode="max",
575
grace_period=5,
576
reduction_factor=2
577
)
578
579
# Run tuning
580
analysis = tune.run(
581
train_function,
582
search_alg=search_alg,
583
scheduler=scheduler,
584
num_samples=50,
585
resources_per_trial={"cpu": 2, "gpu": 0.5}
586
)
587
588
# Analyze results
589
best_trial = analysis.get_best_trial("accuracy", "max")
590
print(f"Best trial config: {best_trial.config}")
591
```
592
593
### Population-Based Training
594
595
```python
596
import ray
597
from ray import tune
598
from ray.tune.schedulers import PopulationBasedTraining
599
600
ray.init()
601
602
# Configure PBT
603
pbt = PopulationBasedTraining(
604
time_attr="training_iteration",
605
perturbation_interval=20,
606
hyperparam_mutations={
607
"lr": tune.loguniform(1e-4, 1e-1),
608
"batch_size": [16, 32, 64, 128]
609
}
610
)
611
612
# Run with PBT
613
analysis = tune.run(
614
train_function,
615
scheduler=pbt,
616
metric="accuracy",
617
mode="max",
618
num_samples=8,
619
config={
620
"lr": tune.choice([0.001, 0.01, 0.1]),
621
"batch_size": tune.choice([16, 32, 64])
622
}
623
)
624
```
625
626
### Integration with Ray Train
627
628
```python
629
import ray
630
from ray import tune, train
631
from ray.train.torch import TorchTrainer
632
from ray.tune import TuneConfig, Tuner
633
634
ray.init()
635
636
def train_loop_per_worker(config):
637
# PyTorch training logic with config
638
model = create_model(config["hidden_size"])
639
optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"])
640
641
for epoch in range(config["num_epochs"]):
642
# Training step
643
loss = train_step(model, optimizer)
644
accuracy = evaluate(model)
645
646
# Report to both Train and Tune
647
train.report({"loss": loss, "accuracy": accuracy})
648
649
# Define trainer
650
trainer = TorchTrainer(
651
train_loop_per_worker=train_loop_per_worker,
652
scaling_config=ScalingConfig(num_workers=4, use_gpu=True)
653
)
654
655
# Tune the trainer
656
tuner = Tuner(
657
trainer,
658
param_space={
659
"train_loop_config": {
660
"lr": tune.loguniform(1e-4, 1e-1),
661
"hidden_size": tune.randint(64, 512),
662
"num_epochs": 20
663
}
664
},
665
tune_config=TuneConfig(
666
metric="accuracy",
667
mode="max",
668
num_samples=10
669
)
670
)
671
672
results = tuner.fit()
673
```