tessl/pypi-gymnasium

A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym).

—

Pending

Overview

Eval results

Files

Core API

Name: tessl/pypi-gymnasium
Author: tessl

The core API provides the fundamental classes and interfaces that form the foundation of all Gymnasium environments and wrappers.

Capabilities

Environment Base Class

The main class for implementing reinforcement learning environments with the standard step/reset/render/close interface.

class Env:
    """
    Base environment class for reinforcement learning.
    
    Attributes:
        action_space: The Space object for valid actions
        observation_space: The Space object for valid observations
        spec: Environment specification metadata
        metadata: Environment metadata dict
        render_mode: Current rendering mode
        np_random: Random number generator for the environment
    """
    
    def step(self, action) -> tuple[ObsType, SupportsFloat, bool, bool, dict[str, Any]]:
        """
        Execute one environment step.
        
        Args:
            action: Action to take in the environment
            
        Returns:
            tuple: (observation, reward, terminated, truncated, info)
                - observation: Agent's observation of environment
                - reward: Reward for taking the action  
                - terminated: Whether episode ended due to terminal state
                - truncated: Whether episode ended due to time limit
                - info: Additional information dict
        """
    
    def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[ObsType, dict[str, Any]]:
        """
        Reset environment to initial state.
        
        Args:
            seed (int, optional): Random seed for reproducibility
            options (dict, optional): Environment-specific options
            
        Returns:
            tuple: (observation, info)
                - observation: Initial observation
                - info: Additional information dict
        """
    
    def render(self) -> RenderFrame | list[RenderFrame] | None:
        """
        Render the environment for visualization.
        
        Returns:
            Rendered frame(s) or None depending on render_mode
        """
    
    def close(self) -> None:
        """Clean up environment resources."""
    
    @property
    def unwrapped(self):
        """Returns the base non-wrapped environment."""
    
    @property
    def np_random_seed(self) -> int:
        """Returns the environment's random seed."""
    
    def has_wrapper_attr(self, name: str) -> bool:
        """Checks if the given attribute exists."""
    
    def get_wrapper_attr(self, name: str):
        """Gets the attribute from the environment."""
    
    def set_wrapper_attr(self, name: str, value, *, force: bool = True) -> bool:
        """Sets the attribute on the environment."""

Wrapper Base Classes

Base classes for modifying environment behavior without changing the underlying environment.

class Wrapper(Env):
    """
    Base wrapper class that delegates to wrapped environment.
    
    Attributes:
        env: The wrapped environment
        unwrapped: The base unwrapped environment
    """
    
    def __init__(self, env):
        """
        Initialize wrapper with environment to wrap.
        
        Args:
            env: Environment to wrap
        """

class ObservationWrapper(Wrapper):
    """Base class for wrappers that modify observations."""
    
    def observation(self, observation):
        """
        Transform observation (must be implemented by subclasses).
        
        Args:
            observation: Original observation
            
        Returns:
            Transformed observation
        """

class ActionWrapper(Wrapper):  
    """Base class for wrappers that modify actions."""
    
    def action(self, action):
        """
        Transform action before passing to environment.
        
        Args:
            action: Action from agent
            
        Returns:
            Transformed action for environment
        """
    

class RewardWrapper(Wrapper):
    """Base class for wrappers that modify rewards."""
    
    def reward(self, reward):
        """
        Transform reward (must be implemented by subclasses).
        
        Args:
            reward: Original reward
            
        Returns:
            Transformed reward
        """

Space Base Class

Base class for defining action and observation spaces.

class Space:
    """
    Base class for action and observation spaces.
    
    Attributes:
        shape: Shape of space samples (tuple[int, ...] | None)
        dtype: Data type of space samples (np.dtype | None)
        np_random: Random number generator for the space
    """
    
    def sample(self, mask: Any | None = None, probability: Any | None = None) -> T_cov:
        """
        Sample a random element from the space.
        
        Args:
            mask: A mask used for random sampling
            probability: A probability mask used for sampling
        
        Returns:
            Random sample from the space
        """
    
    def contains(self, x) -> bool:
        """
        Check if x is contained in the space.
        
        Args:
            x: Element to check
            
        Returns:
            True if x is in the space, False otherwise
        """
    
    def seed(self, seed: int | None = None) -> int | list[int] | dict[str, int]:
        """
        Set random seed for sampling.
        
        Args:
            seed (int, optional): Random seed
            
        Returns:
            Seed values used for the PRNGs
        """
    
    @property
    def np_random(self) -> np.random.Generator:
        """Returns the random number generator for this space."""
    
    @property
    def is_np_flattenable(self) -> bool:
        """Checks whether this space can be flattened to a Box."""

Usage Examples

Creating a Custom Environment

import gymnasium as gym
from gymnasium import spaces
import numpy as np

class CustomEnv(gym.Env):
    def __init__(self):
        super().__init__()
        
        # Define action and observation spaces
        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Box(
            low=0, high=1, shape=(4,), dtype=np.float32
        )
        
        # Initialize state
        self.state = None
        
    def step(self, action):
        # Implement environment logic
        observation = self.observation_space.sample()
        reward = 1.0
        terminated = False
        truncated = False
        info = {}
        
        return observation, reward, terminated, truncated, info
    
    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        
        # Reset environment state
        self.state = self.observation_space.sample()
        observation = self.state
        info = {}
        
        return observation, info

Creating a Custom Wrapper

class LoggingWrapper(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        self.step_count = 0
        
    def step(self, action):
        observation, reward, terminated, truncated, info = self.env.step(action)
        self.step_count += 1
        print(f"Step {self.step_count}: action={action}, reward={reward}")
        return observation, reward, terminated, truncated, info
    
    def reset(self, **kwargs):
        self.step_count = 0
        return self.env.reset(**kwargs)

# Usage
env = gym.make('CartPole-v1')
wrapped_env = LoggingWrapper(env)

Install with Tessl CLI