or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

callbacks.mdcore-utilities.mdexperiment-management.mdhub-integration.mdhyperparameter-optimization.mdindex.mdplotting.mdwrappers.md

hub-integration.mddocs/

0

# HuggingFace Hub Integration

1

2

Model sharing and loading through HuggingFace Hub integration. Enables uploading trained models, downloading pre-trained models, and generating model cards for the RL community ecosystem.

3

4

## Core Imports

5

6

```python

7

from rl_zoo3.push_to_hub import package_to_hub, generate_model_card

8

from rl_zoo3.load_from_hub import download_from_hub

9

from stable_baselines3.common.base_class import BaseAlgorithm

10

from typing import Optional, Any, dict

11

```

12

13

## Capabilities

14

15

### Model Upload and Packaging

16

17

Upload trained models to HuggingFace Hub with comprehensive metadata and documentation.

18

19

```python { .api }

20

def package_to_hub(

21

model: BaseAlgorithm,

22

model_name: str,

23

repo_id: str,

24

commit_message: str = "Add model",

25

tags: Optional[list[str]] = None,

26

local_repo_path: Optional[str] = None,

27

model_architecture: Optional[str] = None,

28

env_id: Optional[str] = None,

29

eval_env: Optional[VecEnv] = None,

30

n_eval_episodes: int = 10,

31

deterministic: bool = True,

32

use_auth_token: Optional[Union[bool, str]] = None,

33

private: bool = False,

34

**kwargs

35

) -> str:

36

"""

37

Package and upload a trained model to HuggingFace Hub.

38

39

Parameters:

40

- model: Trained RL model to upload

41

- model_name: Name for the model

42

- repo_id: HuggingFace repository ID (e.g., "username/model-name")

43

- commit_message: Git commit message for the upload

44

- tags: List of tags for model categorization

45

- local_repo_path: Local path for temporary repository

46

- model_architecture: Architecture description

47

- env_id: Environment identifier

48

- eval_env: Environment for evaluation before upload

49

- n_eval_episodes: Number of evaluation episodes

50

- deterministic: Whether to use deterministic actions for evaluation

51

- use_auth_token: HuggingFace authentication token

52

- private: Whether to create a private repository

53

- **kwargs: Additional keyword arguments

54

55

Returns:

56

str: URL of the uploaded model repository

57

"""

58

```

59

60

Usage example:

61

```python

62

from rl_zoo3.push_to_hub import package_to_hub

63

from rl_zoo3 import create_test_env

64

from stable_baselines3 import PPO

65

66

# Train a model

67

env = create_test_env("CartPole-v1", n_envs=1)

68

model = PPO("MlpPolicy", env, verbose=1)

69

model.learn(total_timesteps=20000)

70

71

# Create evaluation environment

72

eval_env = create_test_env("CartPole-v1", n_envs=1)

73

74

# Upload to HuggingFace Hub

75

repo_url = package_to_hub(

76

model=model,

77

model_name="ppo-cartpole-v1",

78

repo_id="your-username/ppo-cartpole-v1",

79

commit_message="Upload trained PPO agent for CartPole-v1",

80

tags=["ppo", "cartpole", "reinforcement-learning"],

81

env_id="CartPole-v1",

82

eval_env=eval_env,

83

n_eval_episodes=10,

84

deterministic=True

85

)

86

87

print(f"Model uploaded to: {repo_url}")

88

```

89

90

### Model Download and Loading

91

92

Download and load pre-trained models from HuggingFace Hub.

93

94

```python { .api }

95

def download_from_hub(

96

repo_id: str,

97

filename: str,

98

force_download: bool = False,

99

local_dir: Optional[str] = None,

100

**kwargs

101

) -> str:

102

"""

103

Download a model file from HuggingFace Hub.

104

105

Parameters:

106

- repo_id: HuggingFace repository ID

107

- filename: Name of the file to download

108

- force_download: Whether to force re-download

109

- local_dir: Local directory to save the file

110

- **kwargs: Additional download arguments

111

112

Returns:

113

str: Path to the downloaded file

114

"""

115

```

116

117

Usage example:

118

```python

119

from rl_zoo3.load_from_hub import download_from_hub

120

from rl_zoo3 import ALGOS, create_test_env

121

122

# Download a pre-trained model

123

model_path = download_from_hub(

124

repo_id="sb3/ppo-CartPole-v1",

125

filename="ppo-CartPole-v1.zip"

126

)

127

128

# Load the model

129

model = ALGOS["ppo"].load(model_path)

130

131

# Test the model

132

env = create_test_env("CartPole-v1", n_envs=1)

133

obs = env.reset()

134

for _ in range(1000):

135

action, _states = model.predict(obs, deterministic=True)

136

obs, rewards, dones, info = env.step(action)

137

138

if dones.any():

139

obs = env.reset()

140

```

141

142

### Model Card Generation

143

144

Generate comprehensive model cards with training information, evaluation results, and usage instructions.

145

146

```python { .api }

147

def generate_model_card(

148

model: BaseAlgorithm,

149

env_id: str,

150

model_name: str = "",

151

repo_id: str = "",

152

eval_results: Optional[dict] = None,

153

training_time: Optional[float] = None,

154

total_timesteps: Optional[int] = None,

155

hyperparams: Optional[dict] = None,

156

model_architecture: Optional[str] = None,

157

**kwargs

158

) -> str:

159

"""

160

Generate a model card for a trained RL agent.

161

162

Parameters:

163

- model: Trained RL model

164

- env_id: Environment identifier

165

- model_name: Display name for the model

166

- repo_id: Repository identifier

167

- eval_results: Dictionary of evaluation results

168

- training_time: Total training time in seconds

169

- total_timesteps: Total training timesteps

170

- hyperparams: Model hyperparameters

171

- model_architecture: Description of model architecture

172

- **kwargs: Additional metadata

173

174

Returns:

175

str: Generated model card in Markdown format

176

"""

177

```

178

179

```python { .api }

180

def save_model_card(

181

repo_dir: Path,

182

generated_model_card: str,

183

metadata: dict[str, Any]

184

) -> None:

185

"""

186

Save a generated model card to a repository directory.

187

188

Parameters:

189

- repo_dir: Repository directory path

190

- generated_model_card: Generated model card content

191

- metadata: Additional metadata for the model card

192

"""

193

```

194

195

Usage example:

196

```python

197

from rl_zoo3.push_to_hub import generate_model_card, save_model_card

198

from pathlib import Path

199

200

# Generate model card

201

model_card = generate_model_card(

202

model=model,

203

env_id="CartPole-v1",

204

model_name="PPO Agent for CartPole",

205

repo_id="your-username/ppo-cartpole-v1",

206

eval_results={

207

"mean_reward": 195.2,

208

"std_reward": 12.5,

209

"n_eval_episodes": 10

210

},

211

training_time=300.5,

212

total_timesteps=20000,

213

hyperparams={

214

"learning_rate": 0.0003,

215

"n_steps": 2048,

216

"batch_size": 64,

217

"n_epochs": 10

218

},

219

model_architecture="MlpPolicy with [64, 64] hidden layers"

220

)

221

222

# Save model card

223

repo_dir = Path("./model_repo")

224

repo_dir.mkdir(exist_ok=True)

225

226

save_model_card(

227

repo_dir=repo_dir,

228

generated_model_card=model_card,

229

metadata={"framework": "stable-baselines3", "library": "rl-zoo3"}

230

)

231

232

print("Model card saved to README.md")

233

```

234

235

## Complete Workflow Examples

236

237

### End-to-End Model Sharing

238

239

```python

240

from rl_zoo3.exp_manager import ExperimentManager

241

from rl_zoo3.push_to_hub import package_to_hub

242

from rl_zoo3 import create_test_env

243

import argparse

244

245

def train_and_share_model():

246

"""

247

Complete workflow: train model, evaluate, and share on Hub.

248

"""

249

# 1. Train the model

250

args = argparse.Namespace(

251

algo='sac',

252

env='Pendulum-v1',

253

n_timesteps=50000,

254

eval_freq=5000,

255

n_eval_episodes=10,

256

verbose=1,

257

seed=42

258

)

259

260

exp_manager = ExperimentManager(

261

args=args,

262

algo='sac',

263

env_id='Pendulum-v1',

264

log_folder='./logs',

265

n_timesteps=50000,

266

eval_freq=5000,

267

seed=42

268

)

269

270

# Setup and train

271

model = exp_manager.setup_experiment()

272

exp_manager.learn(model)

273

exp_manager.save_trained_model(model)

274

275

# 2. Create evaluation environment

276

eval_env = create_test_env("Pendulum-v1", n_envs=1)

277

278

# 3. Upload to HuggingFace Hub

279

repo_url = package_to_hub(

280

model=model,

281

model_name="sac-pendulum-v1",

282

repo_id="your-username/sac-pendulum-v1",

283

commit_message="Upload SAC agent for Pendulum-v1 (50k timesteps)",

284

tags=["sac", "pendulum", "continuous-control", "rl-zoo3"],

285

env_id="Pendulum-v1",

286

eval_env=eval_env,

287

n_eval_episodes=20,

288

deterministic=True,

289

model_architecture="SAC with default MlpPolicy"

290

)

291

292

print(f"Model successfully shared at: {repo_url}")

293

return repo_url

294

295

# Run the complete workflow

296

train_and_share_model()

297

```

298

299

### Loading and Comparing Hub Models

300

301

```python

302

from rl_zoo3.load_from_hub import download_from_hub

303

from rl_zoo3 import ALGOS, create_test_env

304

import numpy as np

305

306

def compare_hub_models():

307

"""

308

Download and compare multiple models from HuggingFace Hub.

309

"""

310

# Models to compare

311

models_to_test = [

312

{"repo_id": "sb3/ppo-CartPole-v1", "filename": "ppo-CartPole-v1.zip", "algo": "ppo"},

313

{"repo_id": "sb3/dqn-CartPole-v1", "filename": "dqn-CartPole-v1.zip", "algo": "dqn"},

314

{"repo_id": "sb3/a2c-CartPole-v1", "filename": "a2c-CartPole-v1.zip", "algo": "a2c"}

315

]

316

317

# Test environment

318

env = create_test_env("CartPole-v1", n_envs=1)

319

320

results = {}

321

322

for model_info in models_to_test:

323

print(f"Testing {model_info['algo'].upper()} model...")

324

325

# Download model

326

model_path = download_from_hub(

327

repo_id=model_info["repo_id"],

328

filename=model_info["filename"]

329

)

330

331

# Load model

332

model = ALGOS[model_info["algo"]].load(model_path)

333

334

# Evaluate model

335

episode_rewards = []

336

n_eval_episodes = 10

337

338

for episode in range(n_eval_episodes):

339

obs = env.reset()

340

episode_reward = 0

341

done = False

342

343

while not done:

344

action, _states = model.predict(obs, deterministic=True)

345

obs, reward, done, info = env.step(action)

346

episode_reward += reward[0]

347

348

episode_rewards.append(episode_reward)

349

350

# Store results

351

results[model_info["algo"]] = {

352

"mean_reward": np.mean(episode_rewards),

353

"std_reward": np.std(episode_rewards),

354

"episodes": episode_rewards

355

}

356

357

print(f"{model_info['algo'].upper()}: "

358

f"{results[model_info['algo']]['mean_reward']:.1f} ± "

359

f"{results[model_info['algo']]['std_reward']:.1f}")

360

361

# Find best model

362

best_algo = max(results.keys(), key=lambda k: results[k]["mean_reward"])

363

print(f"\nBest model: {best_algo.upper()} "

364

f"({results[best_algo]['mean_reward']:.1f} ± "

365

f"{results[best_algo]['std_reward']:.1f})")

366

367

return results

368

369

# Compare models

370

comparison_results = compare_hub_models()

371

```

372

373

### Automated Model Sharing Pipeline

374

375

```python

376

from rl_zoo3.exp_manager import ExperimentManager

377

from rl_zoo3.push_to_hub import package_to_hub

378

from rl_zoo3 import create_test_env

379

import argparse

380

from pathlib import Path

381

382

class ModelSharingPipeline:

383

"""

384

Automated pipeline for training and sharing models.

385

"""

386

387

def __init__(self, username: str, auth_token: str):

388

self.username = username

389

self.auth_token = auth_token

390

391

def train_and_share(

392

self,

393

algo: str,

394

env_id: str,

395

n_timesteps: int,

396

description: str = "",

397

tags: list[str] = None

398

):

399

"""

400

Train a model and automatically share it on HuggingFace Hub.

401

"""

402

if tags is None:

403

tags = [algo, env_id.lower(), "rl-zoo3"]

404

405

# Setup training

406

args = argparse.Namespace(

407

algo=algo,

408

env=env_id,

409

n_timesteps=n_timesteps,

410

eval_freq=max(n_timesteps // 10, 1000),

411

n_eval_episodes=10,

412

verbose=1,

413

seed=42

414

)

415

416

# Create unique log folder

417

log_folder = f"./logs/{algo}_{env_id}_{n_timesteps}"

418

419

exp_manager = ExperimentManager(

420

args=args,

421

algo=algo,

422

env_id=env_id,

423

log_folder=log_folder,

424

n_timesteps=n_timesteps,

425

eval_freq=args.eval_freq

426

)

427

428

# Train model

429

print(f"Training {algo.upper()} on {env_id} for {n_timesteps} timesteps...")

430

model = exp_manager.setup_experiment()

431

exp_manager.learn(model)

432

exp_manager.save_trained_model(model)

433

434

# Create evaluation environment

435

eval_env = create_test_env(env_id, n_envs=1)

436

437

# Generate repository name

438

repo_name = f"{algo}-{env_id.lower()}-{n_timesteps//1000}k"

439

repo_id = f"{self.username}/{repo_name}"

440

441

# Upload to Hub

442

print(f"Uploading to HuggingFace Hub: {repo_id}")

443

repo_url = package_to_hub(

444

model=model,

445

model_name=repo_name,

446

repo_id=repo_id,

447

commit_message=f"Upload {algo.upper()} agent for {env_id} ({n_timesteps} timesteps)",

448

tags=tags,

449

env_id=env_id,

450

eval_env=eval_env,

451

n_eval_episodes=20,

452

deterministic=True,

453

use_auth_token=self.auth_token,

454

model_architecture=f"{algo.upper()} with default policy"

455

)

456

457

print(f"✅ Model uploaded successfully: {repo_url}")

458

return repo_url

459

460

def batch_training(self, configs: list[dict]):

461

"""

462

Train and share multiple models in batch.

463

"""

464

results = []

465

466

for config in configs:

467

try:

468

result = self.train_and_share(**config)

469

results.append({"config": config, "url": result, "status": "success"})

470

except Exception as e:

471

print(f"❌ Failed to train/share {config}: {e}")

472

results.append({"config": config, "error": str(e), "status": "failed"})

473

474

return results

475

476

# Example usage

477

pipeline = ModelSharingPipeline(

478

username="your-username",

479

auth_token="your-hf-token"

480

)

481

482

# Single model

483

pipeline.train_and_share(

484

algo="ppo",

485

env_id="CartPole-v1",

486

n_timesteps=25000,

487

tags=["ppo", "cartpole", "classic-control", "rl-zoo3"]

488

)

489

490

# Batch training

491

batch_configs = [

492

{"algo": "ppo", "env_id": "CartPole-v1", "n_timesteps": 25000},

493

{"algo": "dqn", "env_id": "CartPole-v1", "n_timesteps": 25000},

494

{"algo": "sac", "env_id": "Pendulum-v1", "n_timesteps": 50000}

495

]

496

497

batch_results = pipeline.batch_training(batch_configs)

498

print(f"Batch training completed. {len([r for r in batch_results if r['status'] == 'success'])} successes.")

499

```

500

501

## Hub Integration Features

502

503

The HuggingFace Hub integration provides:

504

505

- **Automatic model card generation** with training details, hyperparameters, and evaluation results

506

- **Model versioning** through Git-based repository system

507

- **Community sharing** enabling model discovery and reuse

508

- **Evaluation integration** with automatic performance benchmarking

509

- **Metadata preservation** including environment, algorithm, and training configuration

510

- **Download caching** for efficient model loading

511

- **Authentication handling** for private repositories and uploads

512

513

This integration makes RL Zoo3 models part of the broader ML community ecosystem, facilitating reproducible research and model sharing.