A Training Framework for Stable Baselines3 Reinforcement Learning Agents
—
Model sharing and loading through HuggingFace Hub integration. Enables uploading trained models, downloading pre-trained models, and generating model cards for the RL community ecosystem.
from rl_zoo3.push_to_hub import package_to_hub, generate_model_card
from rl_zoo3.load_from_hub import download_from_hub
from stable_baselines3.common.base_class import BaseAlgorithm
from typing import Optional, Any, dictUpload trained models to HuggingFace Hub with comprehensive metadata and documentation.
def package_to_hub(
model: BaseAlgorithm,
model_name: str,
repo_id: str,
commit_message: str = "Add model",
tags: Optional[list[str]] = None,
local_repo_path: Optional[str] = None,
model_architecture: Optional[str] = None,
env_id: Optional[str] = None,
eval_env: Optional[VecEnv] = None,
n_eval_episodes: int = 10,
deterministic: bool = True,
use_auth_token: Optional[Union[bool, str]] = None,
private: bool = False,
**kwargs
) -> str:
"""
Package and upload a trained model to HuggingFace Hub.
Parameters:
- model: Trained RL model to upload
- model_name: Name for the model
- repo_id: HuggingFace repository ID (e.g., "username/model-name")
- commit_message: Git commit message for the upload
- tags: List of tags for model categorization
- local_repo_path: Local path for temporary repository
- model_architecture: Architecture description
- env_id: Environment identifier
- eval_env: Environment for evaluation before upload
- n_eval_episodes: Number of evaluation episodes
- deterministic: Whether to use deterministic actions for evaluation
- use_auth_token: HuggingFace authentication token
- private: Whether to create a private repository
- **kwargs: Additional keyword arguments
Returns:
str: URL of the uploaded model repository
"""Usage example:
from rl_zoo3.push_to_hub import package_to_hub
from rl_zoo3 import create_test_env
from stable_baselines3 import PPO
# Train a model
env = create_test_env("CartPole-v1", n_envs=1)
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=20000)
# Create evaluation environment
eval_env = create_test_env("CartPole-v1", n_envs=1)
# Upload to HuggingFace Hub
repo_url = package_to_hub(
model=model,
model_name="ppo-cartpole-v1",
repo_id="your-username/ppo-cartpole-v1",
commit_message="Upload trained PPO agent for CartPole-v1",
tags=["ppo", "cartpole", "reinforcement-learning"],
env_id="CartPole-v1",
eval_env=eval_env,
n_eval_episodes=10,
deterministic=True
)
print(f"Model uploaded to: {repo_url}")Download and load pre-trained models from HuggingFace Hub.
def download_from_hub(
repo_id: str,
filename: str,
force_download: bool = False,
local_dir: Optional[str] = None,
**kwargs
) -> str:
"""
Download a model file from HuggingFace Hub.
Parameters:
- repo_id: HuggingFace repository ID
- filename: Name of the file to download
- force_download: Whether to force re-download
- local_dir: Local directory to save the file
- **kwargs: Additional download arguments
Returns:
str: Path to the downloaded file
"""Usage example:
from rl_zoo3.load_from_hub import download_from_hub
from rl_zoo3 import ALGOS, create_test_env
# Download a pre-trained model
model_path = download_from_hub(
repo_id="sb3/ppo-CartPole-v1",
filename="ppo-CartPole-v1.zip"
)
# Load the model
model = ALGOS["ppo"].load(model_path)
# Test the model
env = create_test_env("CartPole-v1", n_envs=1)
obs = env.reset()
for _ in range(1000):
action, _states = model.predict(obs, deterministic=True)
obs, rewards, dones, info = env.step(action)
if dones.any():
obs = env.reset()Generate comprehensive model cards with training information, evaluation results, and usage instructions.
def generate_model_card(
model: BaseAlgorithm,
env_id: str,
model_name: str = "",
repo_id: str = "",
eval_results: Optional[dict] = None,
training_time: Optional[float] = None,
total_timesteps: Optional[int] = None,
hyperparams: Optional[dict] = None,
model_architecture: Optional[str] = None,
**kwargs
) -> str:
"""
Generate a model card for a trained RL agent.
Parameters:
- model: Trained RL model
- env_id: Environment identifier
- model_name: Display name for the model
- repo_id: Repository identifier
- eval_results: Dictionary of evaluation results
- training_time: Total training time in seconds
- total_timesteps: Total training timesteps
- hyperparams: Model hyperparameters
- model_architecture: Description of model architecture
- **kwargs: Additional metadata
Returns:
str: Generated model card in Markdown format
"""def save_model_card(
repo_dir: Path,
generated_model_card: str,
metadata: dict[str, Any]
) -> None:
"""
Save a generated model card to a repository directory.
Parameters:
- repo_dir: Repository directory path
- generated_model_card: Generated model card content
- metadata: Additional metadata for the model card
"""Usage example:
from rl_zoo3.push_to_hub import generate_model_card, save_model_card
from pathlib import Path
# Generate model card
model_card = generate_model_card(
model=model,
env_id="CartPole-v1",
model_name="PPO Agent for CartPole",
repo_id="your-username/ppo-cartpole-v1",
eval_results={
"mean_reward": 195.2,
"std_reward": 12.5,
"n_eval_episodes": 10
},
training_time=300.5,
total_timesteps=20000,
hyperparams={
"learning_rate": 0.0003,
"n_steps": 2048,
"batch_size": 64,
"n_epochs": 10
},
model_architecture="MlpPolicy with [64, 64] hidden layers"
)
# Save model card
repo_dir = Path("./model_repo")
repo_dir.mkdir(exist_ok=True)
save_model_card(
repo_dir=repo_dir,
generated_model_card=model_card,
metadata={"framework": "stable-baselines3", "library": "rl-zoo3"}
)
print("Model card saved to README.md")from rl_zoo3.exp_manager import ExperimentManager
from rl_zoo3.push_to_hub import package_to_hub
from rl_zoo3 import create_test_env
import argparse
def train_and_share_model():
"""
Complete workflow: train model, evaluate, and share on Hub.
"""
# 1. Train the model
args = argparse.Namespace(
algo='sac',
env='Pendulum-v1',
n_timesteps=50000,
eval_freq=5000,
n_eval_episodes=10,
verbose=1,
seed=42
)
exp_manager = ExperimentManager(
args=args,
algo='sac',
env_id='Pendulum-v1',
log_folder='./logs',
n_timesteps=50000,
eval_freq=5000,
seed=42
)
# Setup and train
model = exp_manager.setup_experiment()
exp_manager.learn(model)
exp_manager.save_trained_model(model)
# 2. Create evaluation environment
eval_env = create_test_env("Pendulum-v1", n_envs=1)
# 3. Upload to HuggingFace Hub
repo_url = package_to_hub(
model=model,
model_name="sac-pendulum-v1",
repo_id="your-username/sac-pendulum-v1",
commit_message="Upload SAC agent for Pendulum-v1 (50k timesteps)",
tags=["sac", "pendulum", "continuous-control", "rl-zoo3"],
env_id="Pendulum-v1",
eval_env=eval_env,
n_eval_episodes=20,
deterministic=True,
model_architecture="SAC with default MlpPolicy"
)
print(f"Model successfully shared at: {repo_url}")
return repo_url
# Run the complete workflow
train_and_share_model()from rl_zoo3.load_from_hub import download_from_hub
from rl_zoo3 import ALGOS, create_test_env
import numpy as np
def compare_hub_models():
"""
Download and compare multiple models from HuggingFace Hub.
"""
# Models to compare
models_to_test = [
{"repo_id": "sb3/ppo-CartPole-v1", "filename": "ppo-CartPole-v1.zip", "algo": "ppo"},
{"repo_id": "sb3/dqn-CartPole-v1", "filename": "dqn-CartPole-v1.zip", "algo": "dqn"},
{"repo_id": "sb3/a2c-CartPole-v1", "filename": "a2c-CartPole-v1.zip", "algo": "a2c"}
]
# Test environment
env = create_test_env("CartPole-v1", n_envs=1)
results = {}
for model_info in models_to_test:
print(f"Testing {model_info['algo'].upper()} model...")
# Download model
model_path = download_from_hub(
repo_id=model_info["repo_id"],
filename=model_info["filename"]
)
# Load model
model = ALGOS[model_info["algo"]].load(model_path)
# Evaluate model
episode_rewards = []
n_eval_episodes = 10
for episode in range(n_eval_episodes):
obs = env.reset()
episode_reward = 0
done = False
while not done:
action, _states = model.predict(obs, deterministic=True)
obs, reward, done, info = env.step(action)
episode_reward += reward[0]
episode_rewards.append(episode_reward)
# Store results
results[model_info["algo"]] = {
"mean_reward": np.mean(episode_rewards),
"std_reward": np.std(episode_rewards),
"episodes": episode_rewards
}
print(f"{model_info['algo'].upper()}: "
f"{results[model_info['algo']]['mean_reward']:.1f} ± "
f"{results[model_info['algo']]['std_reward']:.1f}")
# Find best model
best_algo = max(results.keys(), key=lambda k: results[k]["mean_reward"])
print(f"\nBest model: {best_algo.upper()} "
f"({results[best_algo]['mean_reward']:.1f} ± "
f"{results[best_algo]['std_reward']:.1f})")
return results
# Compare models
comparison_results = compare_hub_models()from rl_zoo3.exp_manager import ExperimentManager
from rl_zoo3.push_to_hub import package_to_hub
from rl_zoo3 import create_test_env
import argparse
from pathlib import Path
class ModelSharingPipeline:
"""
Automated pipeline for training and sharing models.
"""
def __init__(self, username: str, auth_token: str):
self.username = username
self.auth_token = auth_token
def train_and_share(
self,
algo: str,
env_id: str,
n_timesteps: int,
description: str = "",
tags: list[str] = None
):
"""
Train a model and automatically share it on HuggingFace Hub.
"""
if tags is None:
tags = [algo, env_id.lower(), "rl-zoo3"]
# Setup training
args = argparse.Namespace(
algo=algo,
env=env_id,
n_timesteps=n_timesteps,
eval_freq=max(n_timesteps // 10, 1000),
n_eval_episodes=10,
verbose=1,
seed=42
)
# Create unique log folder
log_folder = f"./logs/{algo}_{env_id}_{n_timesteps}"
exp_manager = ExperimentManager(
args=args,
algo=algo,
env_id=env_id,
log_folder=log_folder,
n_timesteps=n_timesteps,
eval_freq=args.eval_freq
)
# Train model
print(f"Training {algo.upper()} on {env_id} for {n_timesteps} timesteps...")
model = exp_manager.setup_experiment()
exp_manager.learn(model)
exp_manager.save_trained_model(model)
# Create evaluation environment
eval_env = create_test_env(env_id, n_envs=1)
# Generate repository name
repo_name = f"{algo}-{env_id.lower()}-{n_timesteps//1000}k"
repo_id = f"{self.username}/{repo_name}"
# Upload to Hub
print(f"Uploading to HuggingFace Hub: {repo_id}")
repo_url = package_to_hub(
model=model,
model_name=repo_name,
repo_id=repo_id,
commit_message=f"Upload {algo.upper()} agent for {env_id} ({n_timesteps} timesteps)",
tags=tags,
env_id=env_id,
eval_env=eval_env,
n_eval_episodes=20,
deterministic=True,
use_auth_token=self.auth_token,
model_architecture=f"{algo.upper()} with default policy"
)
print(f"✅ Model uploaded successfully: {repo_url}")
return repo_url
def batch_training(self, configs: list[dict]):
"""
Train and share multiple models in batch.
"""
results = []
for config in configs:
try:
result = self.train_and_share(**config)
results.append({"config": config, "url": result, "status": "success"})
except Exception as e:
print(f"❌ Failed to train/share {config}: {e}")
results.append({"config": config, "error": str(e), "status": "failed"})
return results
# Example usage
pipeline = ModelSharingPipeline(
username="your-username",
auth_token="your-hf-token"
)
# Single model
pipeline.train_and_share(
algo="ppo",
env_id="CartPole-v1",
n_timesteps=25000,
tags=["ppo", "cartpole", "classic-control", "rl-zoo3"]
)
# Batch training
batch_configs = [
{"algo": "ppo", "env_id": "CartPole-v1", "n_timesteps": 25000},
{"algo": "dqn", "env_id": "CartPole-v1", "n_timesteps": 25000},
{"algo": "sac", "env_id": "Pendulum-v1", "n_timesteps": 50000}
]
batch_results = pipeline.batch_training(batch_configs)
print(f"Batch training completed. {len([r for r in batch_results if r['status'] == 'success'])} successes.")The HuggingFace Hub integration provides:
This integration makes RL Zoo3 models part of the broader ML community ecosystem, facilitating reproducible research and model sharing.
Install with Tessl CLI
npx tessl i tessl/pypi-rl-zoo3