A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym).
—
Vector environments enable batched execution of multiple environment instances for improved performance when training with parallel environments. They provide the same interface as regular environments but operate on batches of observations, actions, and rewards.
Base class that defines the interface for vectorized environments.
class VectorEnv:
"""
Base class for vectorized environments.
Attributes:
num_envs: Number of parallel environments
single_observation_space: Observation space for individual environment
single_action_space: Action space for individual environment
observation_space: Batched observation space
action_space: Batched action space
closed: Whether environments are closed
"""
def step(self, actions: ActType) -> tuple[ObsType, ArrayType, ArrayType, ArrayType, dict[str, Any]]:
"""
Execute actions in all environments.
Args:
actions: Batch of actions for all environments
Returns:
tuple: (observations, rewards, terminations, truncations, infos)
- observations: Batch of observations
- rewards: Array of rewards
- terminations: Array of termination flags
- truncations: Array of truncation flags
- infos: Dictionary of batched info values
"""
def reset(self, seed: int | list[int] | None = None,
options: dict | list[dict] | None = None) -> tuple[ObsType, dict[str, Any]]:
"""
Reset all environments.
Args:
seed: Random seed(s) for environments
options: Environment options
Returns:
tuple: (observations, infos)
- observations: Batch of initial observations
- infos: Dictionary of batched info values
"""
def close(self) -> None:
"""Close all environments."""
def call(self, name: str, *args, **kwargs) -> list[Any]:
"""
Call method on all environments.
Args:
name: Method name to call
*args: Positional arguments
**kwargs: Keyword arguments
Returns:
List of method results
"""
def get_attr(self, name: str) -> list[Any]:
"""
Get attribute from all environments.
Args:
name: Attribute name
Returns:
List of attribute values
"""
def set_attr(self, name: str, values: Any | list[Any]) -> None:
"""
Set attribute on all environments.
Args:
name: Attribute name
values: Value(s) to set
"""Runs environments sequentially in the same process.
class SyncVectorEnv(VectorEnv):
"""
Synchronous vectorized environment.
Runs environments sequentially in the same process.
Simple but may be slower for computationally intensive environments.
Args:
env_fns: Iterator or sequence of functions that return environment instances
copy: Whether to deepcopy observations
observation_mode: How to batch observation spaces ('same', 'different', or Space)
autoreset_mode: Autoreset mode for vector environment
"""
def __init__(self, env_fns: Iterator[Callable[[], Env]] | Sequence[Callable[[], Env]],
copy: bool = True, observation_mode: str | Space = "same",
autoreset_mode: str | AutoresetMode = AutoresetMode.NEXT_STEP):
passRuns environments in parallel processes for better performance.
class AsyncVectorEnv(VectorEnv):
"""
Asynchronous vectorized environment.
Runs environments in parallel processes using multiprocessing.
Better performance for computationally intensive environments.
Args:
env_fns: Sequence of functions that return environment instances
shared_memory: Whether to use shared memory for observations
copy: Whether to deepcopy observations
context: Multiprocessing context ('spawn', 'fork', 'forkserver')
daemon: Whether worker processes are daemonic
worker: Custom worker function
observation_mode: How to batch observation spaces ('same', 'different', or Space)
autoreset_mode: Autoreset mode for vector environment
"""
def __init__(self, env_fns: Sequence[Callable[[], Env]], shared_memory: bool = True,
copy: bool = True, context: str | None = None, daemon: bool = True,
worker: Callable | None = None, observation_mode: str | Space = "same",
autoreset_mode: str | AutoresetMode = AutoresetMode.NEXT_STEP):
pass
def step_async(self, actions: ActType) -> None:
"""
Asynchronously execute actions (non-blocking).
Args:
actions: Batch of actions
"""
def step_wait(self) -> tuple[ObsType, ArrayType, ArrayType, ArrayType, dict[str, Any]]:
"""
Wait for asynchronous step to complete.
Returns:
tuple: (observations, rewards, terminations, truncations, infos)
"""
def reset_async(self, seed: int | list[int] | None = None,
options: dict | list[dict] | None = None) -> None:
"""
Asynchronously reset environments (non-blocking).
Args:
seed: Random seed(s)
options: Environment options
"""
def reset_wait(self) -> tuple[ObsType, dict[str, Any]]:
"""
Wait for asynchronous reset to complete.
Returns:
tuple: (observations, infos)
"""Wrappers for modifying vector environment behavior.
class VectorWrapper(VectorEnv):
"""
Base wrapper for vector environments.
Args:
env: Vector environment to wrap
"""
def __init__(self, env: VectorEnv):
pass
class VectorObservationWrapper(VectorWrapper):
"""
Base class for vector observation wrappers.
"""
def observation(self, observations: ObsType) -> ObsType:
"""
Transform batch of observations.
Args:
observations: Batch of observations
Returns:
Transformed batch of observations
"""
class VectorActionWrapper(VectorWrapper):
"""
Base class for vector action wrappers.
"""
def action(self, actions: ActType) -> ActType:
"""
Transform batch of actions.
Args:
actions: Batch of actions
Returns:
Transformed batch of actions
"""
class VectorRewardWrapper(VectorWrapper):
"""
Base class for vector reward wrappers.
"""
def reward(self, rewards: ArrayType) -> ArrayType:
"""
Transform batch of rewards.
Args:
rewards: Batch of rewards
Returns:
Transformed batch of rewards
"""Enumerations for vector environment configuration.
class AutoresetMode(Enum):
"""
Auto-reset modes for vector environments.
Values:
NEXT_STEP: Reset on next step after termination/truncation
SAME_STEP: Reset immediately on same step
DISABLED: No auto-reset
"""
NEXT_STEP = "NextStep"
SAME_STEP = "SameStep"
DISABLED = "Disabled"import gymnasium as gym
from gymnasium.vector import SyncVectorEnv, AsyncVectorEnv
# Create synchronous vector environment
def make_env():
return gym.make('CartPole-v1')
env_fns = [make_env for _ in range(4)]
sync_vec_env = SyncVectorEnv(env_fns)
# Create asynchronous vector environment (better for complex environments)
async_vec_env = AsyncVectorEnv(env_fns)
# Using make_vec for convenience
vec_env = gym.make_vec('CartPole-v1', num_envs=4, vectorization_mode='async')import numpy as np
# Create vector environment
vec_env = gym.make_vec('CartPole-v1', num_envs=4)
# Reset all environments
observations, infos = vec_env.reset(seed=42)
print(f"Observations shape: {observations.shape}") # (4, 4) for CartPole
# Take actions in all environments
actions = vec_env.action_space.sample() # Sample batch of actions
print(f"Actions shape: {actions.shape}") # (4,)
# Step all environments
observations, rewards, terminations, truncations, infos = vec_env.step(actions)
print(f"Rewards: {rewards}") # Array of 4 rewards
print(f"Terminations: {terminations}") # Array of 4 boolean flags
# Handle individual environment resets automatically
for i in range(100):
actions = vec_env.action_space.sample()
observations, rewards, terminations, truncations, infos = vec_env.step(actions)
# Vector environments automatically reset terminated/truncated environments
# The observations array will contain reset observations for those environments
vec_env.close()# Create environments with different configurations
def make_env(env_id, seed=None):
def _make():
env = gym.make(env_id)
if seed is not None:
env.reset(seed=seed)
return env
return _make
env_fns = [
make_env('CartPole-v1', seed=i)
for i in range(4)
]
vec_env = AsyncVectorEnv(env_fns, shared_memory=True)
# Call methods on all environments
max_steps = vec_env.call('spec')[0].max_episode_steps
print(f"Max episode steps: {max_steps}")
# Get attributes from all environments
action_spaces = vec_env.get_attr('action_space')
print(f"All action spaces are Discrete(2): {all(isinstance(space, gym.spaces.Discrete) and space.n == 2 for space in action_spaces)}")
# Set attributes on all environments (if supported)
vec_env.set_attr('render_mode', 'rgb_array')# For AsyncVectorEnv, you can use async operations for better control
async_env = AsyncVectorEnv([make_env for _ in range(4)])
# Reset asynchronously
async_env.reset_async(seed=42)
observations, infos = async_env.reset_wait()
# Step asynchronously
actions = async_env.action_space.sample()
async_env.step_async(actions)
# Do other work here while environments are stepping...
# Wait for step to complete
observations, rewards, terminations, truncations, infos = async_env.step_wait()from gymnasium.vector.utils import spaces as vector_spaces
class BatchedNormalizeObservation(VectorObservationWrapper):
"""Normalize observations across the batch."""
def __init__(self, env):
super().__init__(env)
self.running_mean = np.zeros(env.single_observation_space.shape)
self.running_var = np.ones(env.single_observation_space.shape)
self.count = 0
def observation(self, observations):
# Update running statistics
batch_mean = np.mean(observations, axis=0)
batch_var = np.var(observations, axis=0)
# Update running mean and variance (simplified)
self.running_mean = self.running_mean * 0.99 + batch_mean * 0.01
self.running_var = self.running_var * 0.99 + batch_var * 0.01
# Normalize
return (observations - self.running_mean) / np.sqrt(self.running_var + 1e-8)
# Apply wrapper
vec_env = gym.make_vec('CartPole-v1', num_envs=4)
vec_env = BatchedNormalizeObservation(vec_env)import atexit
try:
# Create vector environment
vec_env = AsyncVectorEnv([make_env for _ in range(4)])
# Register cleanup function
atexit.register(vec_env.close)
# Training loop
observations, infos = vec_env.reset()
for step in range(1000):
actions = vec_env.action_space.sample()
observations, rewards, terminations, truncations, infos = vec_env.step(actions)
# Handle any exceptions in individual environments
for i, info in enumerate(infos):
if 'exception' in info:
print(f"Environment {i} had exception: {info['exception']}")
except KeyboardInterrupt:
print("Training interrupted")
finally:
vec_env.close()Install with Tessl CLI
npx tessl i tessl/pypi-gymnasium