tessl/pypi-rl-zoo3

A Training Framework for Stable Baselines3 Reinforcement Learning Agents

—

Pending

Overview

Eval results

Files

HuggingFace Hub Integration

Name: tessl/pypi-rl-zoo3
Author: tessl

Model sharing and loading through HuggingFace Hub integration. Enables uploading trained models, downloading pre-trained models, and generating model cards for the RL community ecosystem.

Core Imports

from rl_zoo3.push_to_hub import package_to_hub, generate_model_card
from rl_zoo3.load_from_hub import download_from_hub
from stable_baselines3.common.base_class import BaseAlgorithm
from typing import Optional, Any, dict

Capabilities

Model Upload and Packaging

Upload trained models to HuggingFace Hub with comprehensive metadata and documentation.

def package_to_hub(
    model: BaseAlgorithm,
    model_name: str,
    repo_id: str,
    commit_message: str = "Add model",
    tags: Optional[list[str]] = None,
    local_repo_path: Optional[str] = None,
    model_architecture: Optional[str] = None,
    env_id: Optional[str] = None,
    eval_env: Optional[VecEnv] = None,
    n_eval_episodes: int = 10,
    deterministic: bool = True,
    use_auth_token: Optional[Union[bool, str]] = None,
    private: bool = False,
    **kwargs
) -> str:
    """
    Package and upload a trained model to HuggingFace Hub.
    
    Parameters:
    - model: Trained RL model to upload
    - model_name: Name for the model
    - repo_id: HuggingFace repository ID (e.g., "username/model-name")
    - commit_message: Git commit message for the upload
    - tags: List of tags for model categorization
    - local_repo_path: Local path for temporary repository
    - model_architecture: Architecture description
    - env_id: Environment identifier
    - eval_env: Environment for evaluation before upload
    - n_eval_episodes: Number of evaluation episodes
    - deterministic: Whether to use deterministic actions for evaluation
    - use_auth_token: HuggingFace authentication token
    - private: Whether to create a private repository
    - **kwargs: Additional keyword arguments
    
    Returns:
    str: URL of the uploaded model repository
    """

Usage example:

from rl_zoo3.push_to_hub import package_to_hub
from rl_zoo3 import create_test_env
from stable_baselines3 import PPO

# Train a model
env = create_test_env("CartPole-v1", n_envs=1)
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=20000)

# Create evaluation environment
eval_env = create_test_env("CartPole-v1", n_envs=1)

# Upload to HuggingFace Hub
repo_url = package_to_hub(
    model=model,
    model_name="ppo-cartpole-v1",
    repo_id="your-username/ppo-cartpole-v1",
    commit_message="Upload trained PPO agent for CartPole-v1",
    tags=["ppo", "cartpole", "reinforcement-learning"],
    env_id="CartPole-v1",
    eval_env=eval_env,
    n_eval_episodes=10,
    deterministic=True
)

print(f"Model uploaded to: {repo_url}")

Model Download and Loading

Download and load pre-trained models from HuggingFace Hub.

def download_from_hub(
    repo_id: str,
    filename: str,
    force_download: bool = False,
    local_dir: Optional[str] = None,
    **kwargs
) -> str:
    """
    Download a model file from HuggingFace Hub.
    
    Parameters:
    - repo_id: HuggingFace repository ID
    - filename: Name of the file to download
    - force_download: Whether to force re-download
    - local_dir: Local directory to save the file
    - **kwargs: Additional download arguments
    
    Returns:
    str: Path to the downloaded file
    """

Usage example:

from rl_zoo3.load_from_hub import download_from_hub
from rl_zoo3 import ALGOS, create_test_env

# Download a pre-trained model
model_path = download_from_hub(
    repo_id="sb3/ppo-CartPole-v1",
    filename="ppo-CartPole-v1.zip"
)

# Load the model
model = ALGOS["ppo"].load(model_path)

# Test the model
env = create_test_env("CartPole-v1", n_envs=1)
obs = env.reset()
for _ in range(1000):
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, dones, info = env.step(action)
    
    if dones.any():
        obs = env.reset()

Model Card Generation

Generate comprehensive model cards with training information, evaluation results, and usage instructions.

def generate_model_card(
    model: BaseAlgorithm,
    env_id: str,
    model_name: str = "",
    repo_id: str = "",
    eval_results: Optional[dict] = None,
    training_time: Optional[float] = None,
    total_timesteps: Optional[int] = None,
    hyperparams: Optional[dict] = None,
    model_architecture: Optional[str] = None,
    **kwargs
) -> str:
    """
    Generate a model card for a trained RL agent.
    
    Parameters:
    - model: Trained RL model
    - env_id: Environment identifier
    - model_name: Display name for the model
    - repo_id: Repository identifier
    - eval_results: Dictionary of evaluation results
    - training_time: Total training time in seconds
    - total_timesteps: Total training timesteps
    - hyperparams: Model hyperparameters
    - model_architecture: Description of model architecture
    - **kwargs: Additional metadata
    
    Returns:
    str: Generated model card in Markdown format
    """

def save_model_card(
    repo_dir: Path,
    generated_model_card: str,
    metadata: dict[str, Any]
) -> None:
    """
    Save a generated model card to a repository directory.
    
    Parameters:
    - repo_dir: Repository directory path
    - generated_model_card: Generated model card content
    - metadata: Additional metadata for the model card
    """

Usage example:

from rl_zoo3.push_to_hub import generate_model_card, save_model_card
from pathlib import Path

# Generate model card
model_card = generate_model_card(
    model=model,
    env_id="CartPole-v1",
    model_name="PPO Agent for CartPole",
    repo_id="your-username/ppo-cartpole-v1",
    eval_results={
        "mean_reward": 195.2,
        "std_reward": 12.5,
        "n_eval_episodes": 10
    },
    training_time=300.5,
    total_timesteps=20000,
    hyperparams={
        "learning_rate": 0.0003,
        "n_steps": 2048,
        "batch_size": 64,
        "n_epochs": 10
    },
    model_architecture="MlpPolicy with [64, 64] hidden layers"
)

# Save model card
repo_dir = Path("./model_repo")
repo_dir.mkdir(exist_ok=True)

save_model_card(
    repo_dir=repo_dir,
    generated_model_card=model_card,
    metadata={"framework": "stable-baselines3", "library": "rl-zoo3"}
)

print("Model card saved to README.md")

Complete Workflow Examples

End-to-End Model Sharing

from rl_zoo3.exp_manager import ExperimentManager
from rl_zoo3.push_to_hub import package_to_hub
from rl_zoo3 import create_test_env
import argparse

def train_and_share_model():
    """
    Complete workflow: train model, evaluate, and share on Hub.
    """
    # 1. Train the model
    args = argparse.Namespace(
        algo='sac',
        env='Pendulum-v1',
        n_timesteps=50000,
        eval_freq=5000,
        n_eval_episodes=10,
        verbose=1,
        seed=42
    )
    
    exp_manager = ExperimentManager(
        args=args,
        algo='sac',
        env_id='Pendulum-v1',
        log_folder='./logs',
        n_timesteps=50000,
        eval_freq=5000,
        seed=42
    )
    
    # Setup and train
    model = exp_manager.setup_experiment()
    exp_manager.learn(model)
    exp_manager.save_trained_model(model)
    
    # 2. Create evaluation environment
    eval_env = create_test_env("Pendulum-v1", n_envs=1)
    
    # 3. Upload to HuggingFace Hub
    repo_url = package_to_hub(
        model=model,
        model_name="sac-pendulum-v1",
        repo_id="your-username/sac-pendulum-v1",
        commit_message="Upload SAC agent for Pendulum-v1 (50k timesteps)",
        tags=["sac", "pendulum", "continuous-control", "rl-zoo3"],
        env_id="Pendulum-v1",
        eval_env=eval_env,
        n_eval_episodes=20,
        deterministic=True,
        model_architecture="SAC with default MlpPolicy"
    )
    
    print(f"Model successfully shared at: {repo_url}")
    return repo_url

# Run the complete workflow
train_and_share_model()

Loading and Comparing Hub Models

from rl_zoo3.load_from_hub import download_from_hub
from rl_zoo3 import ALGOS, create_test_env
import numpy as np

def compare_hub_models():
    """
    Download and compare multiple models from HuggingFace Hub.
    """
    # Models to compare
    models_to_test = [
        {"repo_id": "sb3/ppo-CartPole-v1", "filename": "ppo-CartPole-v1.zip", "algo": "ppo"},
        {"repo_id": "sb3/dqn-CartPole-v1", "filename": "dqn-CartPole-v1.zip", "algo": "dqn"},
        {"repo_id": "sb3/a2c-CartPole-v1", "filename": "a2c-CartPole-v1.zip", "algo": "a2c"}
    ]
    
    # Test environment
    env = create_test_env("CartPole-v1", n_envs=1)
    
    results = {}
    
    for model_info in models_to_test:
        print(f"Testing {model_info['algo'].upper()} model...")
        
        # Download model
        model_path = download_from_hub(
            repo_id=model_info["repo_id"],
            filename=model_info["filename"]
        )
        
        # Load model
        model = ALGOS[model_info["algo"]].load(model_path)
        
        # Evaluate model
        episode_rewards = []
        n_eval_episodes = 10
        
        for episode in range(n_eval_episodes):
            obs = env.reset()
            episode_reward = 0
            done = False
            
            while not done:
                action, _states = model.predict(obs, deterministic=True)
                obs, reward, done, info = env.step(action)
                episode_reward += reward[0]
            
            episode_rewards.append(episode_reward)
        
        # Store results
        results[model_info["algo"]] = {
            "mean_reward": np.mean(episode_rewards),
            "std_reward": np.std(episode_rewards),
            "episodes": episode_rewards
        }
        
        print(f"{model_info['algo'].upper()}: "
              f"{results[model_info['algo']]['mean_reward']:.1f} ± "
              f"{results[model_info['algo']]['std_reward']:.1f}")
    
    # Find best model
    best_algo = max(results.keys(), key=lambda k: results[k]["mean_reward"])
    print(f"\nBest model: {best_algo.upper()} "
          f"({results[best_algo]['mean_reward']:.1f} ± "
          f"{results[best_algo]['std_reward']:.1f})")
    
    return results

# Compare models
comparison_results = compare_hub_models()

Automated Model Sharing Pipeline

from rl_zoo3.exp_manager import ExperimentManager
from rl_zoo3.push_to_hub import package_to_hub
from rl_zoo3 import create_test_env
import argparse
from pathlib import Path

class ModelSharingPipeline:
    """
    Automated pipeline for training and sharing models.
    """
    
    def __init__(self, username: str, auth_token: str):
        self.username = username
        self.auth_token = auth_token
    
    def train_and_share(
        self,
        algo: str,
        env_id: str,
        n_timesteps: int,
        description: str = "",
        tags: list[str] = None
    ):
        """
        Train a model and automatically share it on HuggingFace Hub.
        """
        if tags is None:
            tags = [algo, env_id.lower(), "rl-zoo3"]
        
        # Setup training
        args = argparse.Namespace(
            algo=algo,
            env=env_id,
            n_timesteps=n_timesteps,
            eval_freq=max(n_timesteps // 10, 1000),
            n_eval_episodes=10,
            verbose=1,
            seed=42
        )
        
        # Create unique log folder
        log_folder = f"./logs/{algo}_{env_id}_{n_timesteps}"
        
        exp_manager = ExperimentManager(
            args=args,
            algo=algo,
            env_id=env_id,
            log_folder=log_folder,
            n_timesteps=n_timesteps,
            eval_freq=args.eval_freq
        )
        
        # Train model
        print(f"Training {algo.upper()} on {env_id} for {n_timesteps} timesteps...")
        model = exp_manager.setup_experiment() 
        exp_manager.learn(model)
        exp_manager.save_trained_model(model)
        
        # Create evaluation environment
        eval_env = create_test_env(env_id, n_envs=1)
        
        # Generate repository name
        repo_name = f"{algo}-{env_id.lower()}-{n_timesteps//1000}k"
        repo_id = f"{self.username}/{repo_name}"
        
        # Upload to Hub
        print(f"Uploading to HuggingFace Hub: {repo_id}")
        repo_url = package_to_hub(
            model=model,
            model_name=repo_name,
            repo_id=repo_id,
            commit_message=f"Upload {algo.upper()} agent for {env_id} ({n_timesteps} timesteps)",
            tags=tags,
            env_id=env_id,
            eval_env=eval_env,
            n_eval_episodes=20,
            deterministic=True,
            use_auth_token=self.auth_token,
            model_architecture=f"{algo.upper()} with default policy"
        )
        
        print(f"✅ Model uploaded successfully: {repo_url}")
        return repo_url
    
    def batch_training(self, configs: list[dict]):
        """
        Train and share multiple models in batch.
        """
        results = []
        
        for config in configs:
            try:
                result = self.train_and_share(**config)
                results.append({"config": config, "url": result, "status": "success"})
            except Exception as e:
                print(f"❌ Failed to train/share {config}: {e}")
                results.append({"config": config, "error": str(e), "status": "failed"})
        
        return results

# Example usage
pipeline = ModelSharingPipeline(
    username="your-username",
    auth_token="your-hf-token"
)

# Single model
pipeline.train_and_share(
    algo="ppo",
    env_id="CartPole-v1",
    n_timesteps=25000,
    tags=["ppo", "cartpole", "classic-control", "rl-zoo3"]
)

# Batch training
batch_configs = [
    {"algo": "ppo", "env_id": "CartPole-v1", "n_timesteps": 25000},
    {"algo": "dqn", "env_id": "CartPole-v1", "n_timesteps": 25000},
    {"algo": "sac", "env_id": "Pendulum-v1", "n_timesteps": 50000}
]

batch_results = pipeline.batch_training(batch_configs)
print(f"Batch training completed. {len([r for r in batch_results if r['status'] == 'success'])} successes.")

Hub Integration Features

The HuggingFace Hub integration provides:

Automatic model card generation with training details, hyperparameters, and evaluation results
Model versioning through Git-based repository system
Community sharing enabling model discovery and reuse
Evaluation integration with automatic performance benchmarking
Metadata preservation including environment, algorithm, and training configuration
Download caching for efficient model loading
Authentication handling for private repositories and uploads