A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym).
—
The core API provides the fundamental classes and interfaces that form the foundation of all Gymnasium environments and wrappers.
The main class for implementing reinforcement learning environments with the standard step/reset/render/close interface.
class Env:
"""
Base environment class for reinforcement learning.
Attributes:
action_space: The Space object for valid actions
observation_space: The Space object for valid observations
spec: Environment specification metadata
metadata: Environment metadata dict
render_mode: Current rendering mode
np_random: Random number generator for the environment
"""
def step(self, action) -> tuple[ObsType, SupportsFloat, bool, bool, dict[str, Any]]:
"""
Execute one environment step.
Args:
action: Action to take in the environment
Returns:
tuple: (observation, reward, terminated, truncated, info)
- observation: Agent's observation of environment
- reward: Reward for taking the action
- terminated: Whether episode ended due to terminal state
- truncated: Whether episode ended due to time limit
- info: Additional information dict
"""
def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[ObsType, dict[str, Any]]:
"""
Reset environment to initial state.
Args:
seed (int, optional): Random seed for reproducibility
options (dict, optional): Environment-specific options
Returns:
tuple: (observation, info)
- observation: Initial observation
- info: Additional information dict
"""
def render(self) -> RenderFrame | list[RenderFrame] | None:
"""
Render the environment for visualization.
Returns:
Rendered frame(s) or None depending on render_mode
"""
def close(self) -> None:
"""Clean up environment resources."""
@property
def unwrapped(self):
"""Returns the base non-wrapped environment."""
@property
def np_random_seed(self) -> int:
"""Returns the environment's random seed."""
def has_wrapper_attr(self, name: str) -> bool:
"""Checks if the given attribute exists."""
def get_wrapper_attr(self, name: str):
"""Gets the attribute from the environment."""
def set_wrapper_attr(self, name: str, value, *, force: bool = True) -> bool:
"""Sets the attribute on the environment."""Base classes for modifying environment behavior without changing the underlying environment.
class Wrapper(Env):
"""
Base wrapper class that delegates to wrapped environment.
Attributes:
env: The wrapped environment
unwrapped: The base unwrapped environment
"""
def __init__(self, env):
"""
Initialize wrapper with environment to wrap.
Args:
env: Environment to wrap
"""
class ObservationWrapper(Wrapper):
"""Base class for wrappers that modify observations."""
def observation(self, observation):
"""
Transform observation (must be implemented by subclasses).
Args:
observation: Original observation
Returns:
Transformed observation
"""
class ActionWrapper(Wrapper):
"""Base class for wrappers that modify actions."""
def action(self, action):
"""
Transform action before passing to environment.
Args:
action: Action from agent
Returns:
Transformed action for environment
"""
class RewardWrapper(Wrapper):
"""Base class for wrappers that modify rewards."""
def reward(self, reward):
"""
Transform reward (must be implemented by subclasses).
Args:
reward: Original reward
Returns:
Transformed reward
"""Base class for defining action and observation spaces.
class Space:
"""
Base class for action and observation spaces.
Attributes:
shape: Shape of space samples (tuple[int, ...] | None)
dtype: Data type of space samples (np.dtype | None)
np_random: Random number generator for the space
"""
def sample(self, mask: Any | None = None, probability: Any | None = None) -> T_cov:
"""
Sample a random element from the space.
Args:
mask: A mask used for random sampling
probability: A probability mask used for sampling
Returns:
Random sample from the space
"""
def contains(self, x) -> bool:
"""
Check if x is contained in the space.
Args:
x: Element to check
Returns:
True if x is in the space, False otherwise
"""
def seed(self, seed: int | None = None) -> int | list[int] | dict[str, int]:
"""
Set random seed for sampling.
Args:
seed (int, optional): Random seed
Returns:
Seed values used for the PRNGs
"""
@property
def np_random(self) -> np.random.Generator:
"""Returns the random number generator for this space."""
@property
def is_np_flattenable(self) -> bool:
"""Checks whether this space can be flattened to a Box."""import gymnasium as gym
from gymnasium import spaces
import numpy as np
class CustomEnv(gym.Env):
def __init__(self):
super().__init__()
# Define action and observation spaces
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Box(
low=0, high=1, shape=(4,), dtype=np.float32
)
# Initialize state
self.state = None
def step(self, action):
# Implement environment logic
observation = self.observation_space.sample()
reward = 1.0
terminated = False
truncated = False
info = {}
return observation, reward, terminated, truncated, info
def reset(self, seed=None, options=None):
super().reset(seed=seed)
# Reset environment state
self.state = self.observation_space.sample()
observation = self.state
info = {}
return observation, infoclass LoggingWrapper(gym.Wrapper):
def __init__(self, env):
super().__init__(env)
self.step_count = 0
def step(self, action):
observation, reward, terminated, truncated, info = self.env.step(action)
self.step_count += 1
print(f"Step {self.step_count}: action={action}, reward={reward}")
return observation, reward, terminated, truncated, info
def reset(self, **kwargs):
self.step_count = 0
return self.env.reset(**kwargs)
# Usage
env = gym.make('CartPole-v1')
wrapped_env = LoggingWrapper(env)Install with Tessl CLI
npx tessl i tessl/pypi-gymnasium