or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-distributed.mddata-processing.mddistributed-training.mdhyperparameter-tuning.mdindex.mdmodel-serving.mdreinforcement-learning.mdutilities-advanced.md

hyperparameter-tuning.mddocs/

0

# Hyperparameter Tuning

1

2

Ray Tune provides comprehensive hyperparameter optimization with multiple search algorithms, schedulers, and experiment management. It supports all major ML frameworks and integrates seamlessly with distributed training.

3

4

## Capabilities

5

6

### Core Tuning Framework

7

8

Main tuning functionality and experiment management.

9

10

```python { .api }

11

class Tuner:

12

"""Main class for hyperparameter tuning experiments."""

13

14

def __init__(self, trainable, *, param_space=None, tune_config=None,

15

run_config=None):

16

"""

17

Initialize tuner.

18

19

Args:

20

trainable: Function or class to tune

21

param_space (dict, optional): Parameter search space

22

tune_config (TuneConfig, optional): Tuning configuration

23

run_config (RunConfig, optional): Run configuration

24

"""

25

26

def fit(self):

27

"""

28

Execute hyperparameter tuning.

29

30

Returns:

31

ResultGrid: Tuning results

32

"""

33

34

def get_results(self):

35

"""

36

Get tuning results.

37

38

Returns:

39

ResultGrid: Tuning results

40

"""

41

42

class TuneConfig:

43

"""Configuration for hyperparameter tuning."""

44

45

def __init__(self, *, metric=None, mode=None, search_alg=None,

46

scheduler=None, num_samples=10, max_concurrent_trials=None,

47

time_budget_s=None, **kwargs):

48

"""

49

Initialize tune configuration.

50

51

Args:

52

metric (str, optional): Metric to optimize

53

mode (str, optional): "min" or "max" for optimization

54

search_alg (SearchAlgorithm, optional): Search algorithm

55

scheduler (TrialScheduler, optional): Trial scheduler

56

num_samples (int): Number of trials to run

57

max_concurrent_trials (int, optional): Max concurrent trials

58

time_budget_s (float, optional): Time budget in seconds

59

"""

60

61

def run(trainable, *, config=None, metric=None, mode=None,

62

name=None, stop=None, time_budget_s=None, num_samples=10,

63

search_alg=None, scheduler=None, **kwargs):

64

"""

65

Run hyperparameter tuning experiment (legacy API).

66

67

Args:

68

trainable: Function or class to tune

69

config (dict, optional): Parameter configuration/search space

70

metric (str, optional): Metric to optimize

71

mode (str, optional): "min" or "max"

72

name (str, optional): Experiment name

73

stop (dict, optional): Stopping criteria

74

time_budget_s (float, optional): Time budget

75

num_samples (int): Number of trials

76

search_alg (SearchAlgorithm, optional): Search algorithm

77

scheduler (TrialScheduler, optional): Trial scheduler

78

79

Returns:

80

ResultGrid: Tuning results

81

"""

82

83

class ResultGrid:

84

"""Container for tuning results."""

85

86

def get_best_result(self, metric=None, mode=None):

87

"""Get best trial result."""

88

89

def get_dataframe(self):

90

"""Get results as pandas DataFrame."""

91

92

@property

93

def errors(self):

94

"""Get failed trials."""

95

96

def __len__(self):

97

"""Number of trials."""

98

99

def __iter__(self):

100

"""Iterate over results."""

101

```

102

103

### Search Algorithms

104

105

Various hyperparameter search algorithms.

106

107

```python { .api }

108

class BasicVariantGenerator:

109

"""Grid search and random search."""

110

111

def __init__(self, *, max_concurrent=None, random_state=None):

112

"""

113

Initialize basic search.

114

115

Args:

116

max_concurrent (int, optional): Max concurrent trials

117

random_state (int, optional): Random seed

118

"""

119

120

class GridSearch(BasicVariantGenerator):

121

"""Grid search algorithm."""

122

123

class RandomSearch(BasicVariantGenerator):

124

"""Random search algorithm."""

125

126

class ConcurrencyLimiter:

127

"""Wrapper to limit concurrent trials."""

128

129

def __init__(self, searcher, max_concurrent):

130

"""

131

Initialize concurrency limiter.

132

133

Args:

134

searcher: Search algorithm to wrap

135

max_concurrent (int): Max concurrent trials

136

"""

137

138

class BayesOptSearch:

139

"""Bayesian optimization using Gaussian processes."""

140

141

def __init__(self, space=None, *, metric=None, mode="max",

142

utility_kwargs=None, random_state=None, **kwargs):

143

"""

144

Initialize Bayesian optimization.

145

146

Args:

147

space (dict, optional): Search space

148

metric (str, optional): Metric to optimize

149

mode (str): "min" or "max"

150

utility_kwargs (dict, optional): Acquisition function parameters

151

random_state (int, optional): Random seed

152

"""

153

154

class HyperOptSearch:

155

"""HyperOpt-based search algorithms."""

156

157

def __init__(self, space=None, *, algo=None, metric=None, mode="max",

158

points_to_evaluate=None, random_state_seed=None, **kwargs):

159

"""

160

Initialize HyperOpt search.

161

162

Args:

163

space (dict, optional): HyperOpt search space

164

algo: HyperOpt algorithm (tpe.suggest, random.suggest, etc.)

165

metric (str, optional): Metric to optimize

166

mode (str): "min" or "max"

167

points_to_evaluate (list, optional): Initial points

168

random_state_seed (int, optional): Random seed

169

"""

170

171

class OptunaSearch:

172

"""Optuna-based search algorithm."""

173

174

def __init__(self, space=None, *, metric=None, mode="max",

175

sampler=None, seed=None, **kwargs):

176

"""

177

Initialize Optuna search.

178

179

Args:

180

space (dict, optional): Search space

181

metric (str, optional): Metric to optimize

182

mode (str): "min" or "max"

183

sampler: Optuna sampler

184

seed (int, optional): Random seed

185

"""

186

187

class AxSearch:

188

"""Ax-based search algorithm."""

189

190

def __init__(self, space=None, *, metric=None, mode="max",

191

parameter_constraints=None, outcome_constraints=None, **kwargs):

192

"""

193

Initialize Ax search.

194

195

Args:

196

space (list, optional): Ax search space

197

metric (str, optional): Metric to optimize

198

mode (str): "min" or "max"

199

parameter_constraints (list, optional): Parameter constraints

200

outcome_constraints (list, optional): Outcome constraints

201

"""

202

203

class DragonflySearch:

204

"""Dragonfly-based search algorithm."""

205

206

def __init__(self, space=None, *, metric=None, mode="max",

207

domain=None, optimizer=None, **kwargs):

208

"""

209

Initialize Dragonfly search.

210

211

Args:

212

space (list, optional): Search space

213

metric (str, optional): Metric to optimize

214

mode (str): "min" or "max"

215

domain: Dragonfly domain

216

optimizer (str, optional): Optimizer type

217

"""

218

```

219

220

### Trial Schedulers

221

222

Schedulers for early stopping and resource allocation.

223

224

```python { .api }

225

class FIFOScheduler:

226

"""First-in-first-out scheduler (no early stopping)."""

227

228

def __init__(self):

229

"""Initialize FIFO scheduler."""

230

231

class AsyncHyperBandScheduler:

232

"""Asynchronous Hyperband scheduler."""

233

234

def __init__(self, *, time_attr="training_iteration", metric=None,

235

mode="max", max_t=81, reduction_factor=3,

236

brackets=1, grace_period=1, **kwargs):

237

"""

238

Initialize AsyncHyperBand scheduler.

239

240

Args:

241

time_attr (str): Time attribute for scheduling

242

metric (str, optional): Metric to optimize

243

mode (str): "min" or "max"

244

max_t (int): Maximum time units

245

reduction_factor (int): Reduction factor

246

brackets (int): Number of brackets

247

grace_period (int): Minimum time before stopping

248

"""

249

250

class ASHAScheduler:

251

"""Asynchronous Successive Halving Algorithm (ASHA) scheduler."""

252

253

def __init__(self, *, time_attr="training_iteration", metric=None,

254

mode="max", max_t=100, grace_period=1, reduction_factor=4,

255

brackets=1, **kwargs):

256

"""

257

Initialize ASHA scheduler.

258

259

Args:

260

time_attr (str): Time attribute for scheduling

261

metric (str, optional): Metric to optimize

262

mode (str): "min" or "max"

263

max_t (int): Maximum time units

264

grace_period (int): Grace period before first halving

265

reduction_factor (int): Reduction factor for successive halving

266

brackets (int): Number of brackets

267

"""

268

269

class HyperBandScheduler:

270

"""Synchronous Hyperband scheduler."""

271

272

def __init__(self, *, time_attr="training_iteration", metric=None,

273

mode="max", max_t=81, reduction_factor=3, **kwargs):

274

"""

275

Initialize HyperBand scheduler.

276

277

Args:

278

time_attr (str): Time attribute for scheduling

279

metric (str, optional): Metric to optimize

280

mode (str): "min" or "max"

281

max_t (int): Maximum time units

282

reduction_factor (int): Reduction factor

283

"""

284

285

class MedianStoppingRule:

286

"""Stop trials below median performance."""

287

288

def __init__(self, *, time_attr="training_iteration", metric=None,

289

mode="max", grace_period=60, min_samples_required=3, **kwargs):

290

"""

291

Initialize median stopping rule.

292

293

Args:

294

time_attr (str): Time attribute for scheduling

295

metric (str, optional): Metric to optimize

296

mode (str): "min" or "max"

297

grace_period (int): Grace period before stopping

298

min_samples_required (int): Minimum samples needed

299

"""

300

301

class PopulationBasedTraining:

302

"""Population-based training scheduler."""

303

304

def __init__(self, *, time_attr="training_iteration", metric=None,

305

mode="max", perturbation_interval=60,

306

hyperparam_mutations=None, **kwargs):

307

"""

308

Initialize PBT scheduler.

309

310

Args:

311

time_attr (str): Time attribute for scheduling

312

metric (str, optional): Metric to optimize

313

mode (str): "min" or "max"

314

perturbation_interval (int): Interval between perturbations

315

hyperparam_mutations (dict, optional): Hyperparameter mutations

316

"""

317

318

class PopulationBasedTrainingReplay:

319

"""Replay population-based training."""

320

321

def __init__(self, policy_file):

322

"""

323

Initialize PBT replay.

324

325

Args:

326

policy_file (str): Path to PBT policy file

327

"""

328

```

329

330

### Search Space Definition

331

332

Define parameter search spaces.

333

334

```python { .api }

335

def choice(categories):

336

"""

337

Choose from categorical options.

338

339

Args:

340

categories (list): List of options

341

342

Returns:

343

Choice distribution

344

"""

345

346

def randint(lower, upper):

347

"""

348

Random integer in range.

349

350

Args:

351

lower (int): Lower bound (inclusive)

352

upper (int): Upper bound (exclusive)

353

354

Returns:

355

Randint distribution

356

"""

357

358

def uniform(lower, upper):

359

"""

360

Uniform distribution in range.

361

362

Args:

363

lower (float): Lower bound

364

upper (float): Upper bound

365

366

Returns:

367

Uniform distribution

368

"""

369

370

def loguniform(lower, upper, base=10):

371

"""

372

Log-uniform distribution.

373

374

Args:

375

lower (float): Lower bound

376

upper (float): Upper bound

377

base (float): Logarithm base

378

379

Returns:

380

Loguniform distribution

381

"""

382

383

def randn(mean=0, sd=1):

384

"""

385

Normal distribution.

386

387

Args:

388

mean (float): Mean

389

sd (float): Standard deviation

390

391

Returns:

392

Normal distribution

393

"""

394

395

def lograndn(mean=0, sd=1, base=10):

396

"""

397

Log-normal distribution.

398

399

Args:

400

mean (float): Mean of log

401

sd (float): Standard deviation of log

402

base (float): Logarithm base

403

404

Returns:

405

Log-normal distribution

406

"""

407

408

def grid_search(values):

409

"""

410

Grid search over values.

411

412

Args:

413

values (list): Values to search over

414

415

Returns:

416

Grid search specification

417

"""

418

419

def sample_from(func):

420

"""

421

Sample from custom function.

422

423

Args:

424

func: Function that returns sample

425

426

Returns:

427

Sample specification

428

"""

429

```

430

431

### Experiment Analysis

432

433

Analyze and visualize tuning results.

434

435

```python { .api }

436

class ExperimentAnalysis:

437

"""Analysis of tuning experiment results."""

438

439

def get_best_trial(self, metric=None, mode=None, scope="last"):

440

"""Get best trial."""

441

442

def get_best_config(self, metric=None, mode=None, scope="last"):

443

"""Get best configuration."""

444

445

def get_best_logdir(self, metric=None, mode=None, scope="last"):

446

"""Get best trial log directory."""

447

448

def get_trial_dataframes(self):

449

"""Get trial results as DataFrames."""

450

451

def dataframe(self, metric=None, mode=None):

452

"""Get results as DataFrame."""

453

454

def stats(self):

455

"""Get experiment statistics."""

456

457

def Analysis(experiment_checkpoint_path):

458

"""

459

Create ExperimentAnalysis from checkpoint.

460

461

Args:

462

experiment_checkpoint_path (str): Path to experiment checkpoint

463

464

Returns:

465

ExperimentAnalysis: Analysis object

466

"""

467

```

468

469

### Integration with Training

470

471

Integration with Ray Train for distributed hyperparameter tuning.

472

473

```python { .api }

474

def with_parameters(trainable, **kwargs):

475

"""

476

Wrap trainable with fixed parameters.

477

478

Args:

479

trainable: Trainable function or class

480

**kwargs: Fixed parameters

481

482

Returns:

483

Wrapped trainable

484

"""

485

486

def with_resources(trainable, resources):

487

"""

488

Wrap trainable with resource requirements.

489

490

Args:

491

trainable: Trainable function or class

492

resources (dict): Resource requirements

493

494

Returns:

495

Wrapped trainable

496

"""

497

```

498

499

## Usage Examples

500

501

### Basic Hyperparameter Tuning

502

503

```python

504

import ray

505

from ray import tune

506

from ray.tune import TuneConfig, Tuner

507

508

ray.init()

509

510

def train_function(config):

511

# Training logic

512

for epoch in range(10):

513

loss = config["lr"] * (0.9 ** epoch)

514

accuracy = 1 - loss

515

516

# Report intermediate results

517

tune.report({"loss": loss, "accuracy": accuracy, "epoch": epoch})

518

519

# Define search space

520

param_space = {

521

"lr": tune.loguniform(1e-4, 1e-1),

522

"batch_size": tune.choice([16, 32, 64, 128]),

523

"hidden_size": tune.randint(32, 512)

524

}

525

526

# Configure tuner

527

tuner = Tuner(

528

train_function,

529

param_space=param_space,

530

tune_config=TuneConfig(

531

metric="accuracy",

532

mode="max",

533

num_samples=20

534

)

535

)

536

537

# Run experiment

538

results = tuner.fit()

539

540

# Get best result

541

best_result = results.get_best_result()

542

print(f"Best config: {best_result.config}")

543

print(f"Best accuracy: {best_result.metrics['accuracy']}")

544

```

545

546

### Advanced Tuning with Schedulers

547

548

```python

549

import ray

550

from ray import tune

551

from ray.tune.schedulers import AsyncHyperBandScheduler

552

from ray.tune.search.hyperopt import HyperOptSearch

553

from hyperopt import hp

554

555

ray.init()

556

557

# Define search space using HyperOpt

558

search_space = {

559

"lr": hp.loguniform("lr", np.log(1e-4), np.log(1e-1)),

560

"batch_size": hp.choice("batch_size", [16, 32, 64, 128]),

561

"dropout": hp.uniform("dropout", 0.0, 0.5)

562

}

563

564

# Configure search algorithm

565

search_alg = HyperOptSearch(

566

space=search_space,

567

metric="accuracy",

568

mode="max"

569

)

570

571

# Configure scheduler

572

scheduler = AsyncHyperBandScheduler(

573

metric="accuracy",

574

mode="max",

575

grace_period=5,

576

reduction_factor=2

577

)

578

579

# Run tuning

580

analysis = tune.run(

581

train_function,

582

search_alg=search_alg,

583

scheduler=scheduler,

584

num_samples=50,

585

resources_per_trial={"cpu": 2, "gpu": 0.5}

586

)

587

588

# Analyze results

589

best_trial = analysis.get_best_trial("accuracy", "max")

590

print(f"Best trial config: {best_trial.config}")

591

```

592

593

### Population-Based Training

594

595

```python

596

import ray

597

from ray import tune

598

from ray.tune.schedulers import PopulationBasedTraining

599

600

ray.init()

601

602

# Configure PBT

603

pbt = PopulationBasedTraining(

604

time_attr="training_iteration",

605

perturbation_interval=20,

606

hyperparam_mutations={

607

"lr": tune.loguniform(1e-4, 1e-1),

608

"batch_size": [16, 32, 64, 128]

609

}

610

)

611

612

# Run with PBT

613

analysis = tune.run(

614

train_function,

615

scheduler=pbt,

616

metric="accuracy",

617

mode="max",

618

num_samples=8,

619

config={

620

"lr": tune.choice([0.001, 0.01, 0.1]),

621

"batch_size": tune.choice([16, 32, 64])

622

}

623

)

624

```

625

626

### Integration with Ray Train

627

628

```python

629

import ray

630

from ray import tune, train

631

from ray.train.torch import TorchTrainer

632

from ray.tune import TuneConfig, Tuner

633

634

ray.init()

635

636

def train_loop_per_worker(config):

637

# PyTorch training logic with config

638

model = create_model(config["hidden_size"])

639

optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"])

640

641

for epoch in range(config["num_epochs"]):

642

# Training step

643

loss = train_step(model, optimizer)

644

accuracy = evaluate(model)

645

646

# Report to both Train and Tune

647

train.report({"loss": loss, "accuracy": accuracy})

648

649

# Define trainer

650

trainer = TorchTrainer(

651

train_loop_per_worker=train_loop_per_worker,

652

scaling_config=ScalingConfig(num_workers=4, use_gpu=True)

653

)

654

655

# Tune the trainer

656

tuner = Tuner(

657

trainer,

658

param_space={

659

"train_loop_config": {

660

"lr": tune.loguniform(1e-4, 1e-1),

661

"hidden_size": tune.randint(64, 512),

662

"num_epochs": 20

663

}

664

},

665

tune_config=TuneConfig(

666

metric="accuracy",

667

mode="max",

668

num_samples=10

669

)

670

)

671

672

results = tuner.fit()

673

```