tessl/pypi-gymnasium

A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym).

—

Pending

Overview

Eval results

Files

Vector Environments

Name: tessl/pypi-gymnasium
Author: tessl

Vector environments enable batched execution of multiple environment instances for improved performance when training with parallel environments. They provide the same interface as regular environments but operate on batches of observations, actions, and rewards.

Capabilities

Vector Environment Base Class

Base class that defines the interface for vectorized environments.

class VectorEnv:
    """
    Base class for vectorized environments.
    
    Attributes:
        num_envs: Number of parallel environments
        single_observation_space: Observation space for individual environment
        single_action_space: Action space for individual environment
        observation_space: Batched observation space
        action_space: Batched action space
        closed: Whether environments are closed
    """
    
    def step(self, actions: ActType) -> tuple[ObsType, ArrayType, ArrayType, ArrayType, dict[str, Any]]:
        """
        Execute actions in all environments.
        
        Args:
            actions: Batch of actions for all environments
            
        Returns:
            tuple: (observations, rewards, terminations, truncations, infos)
                - observations: Batch of observations
                - rewards: Array of rewards
                - terminations: Array of termination flags
                - truncations: Array of truncation flags
                - infos: Dictionary of batched info values
        """
    
    def reset(self, seed: int | list[int] | None = None, 
              options: dict | list[dict] | None = None) -> tuple[ObsType, dict[str, Any]]:
        """
        Reset all environments.
        
        Args:
            seed: Random seed(s) for environments
            options: Environment options
            
        Returns:
            tuple: (observations, infos)
                - observations: Batch of initial observations
                - infos: Dictionary of batched info values
        """
    
    def close(self) -> None:
        """Close all environments."""
    
    def call(self, name: str, *args, **kwargs) -> list[Any]:
        """
        Call method on all environments.
        
        Args:
            name: Method name to call
            *args: Positional arguments
            **kwargs: Keyword arguments
            
        Returns:
            List of method results
        """
    
    def get_attr(self, name: str) -> list[Any]:
        """
        Get attribute from all environments.
        
        Args:
            name: Attribute name
            
        Returns:
            List of attribute values
        """
    
    def set_attr(self, name: str, values: Any | list[Any]) -> None:
        """
        Set attribute on all environments.
        
        Args:
            name: Attribute name
            values: Value(s) to set
        """

Synchronous Vector Environment

Runs environments sequentially in the same process.

class SyncVectorEnv(VectorEnv):
    """
    Synchronous vectorized environment.
    
    Runs environments sequentially in the same process.
    Simple but may be slower for computationally intensive environments.
    
    Args:
        env_fns: Iterator or sequence of functions that return environment instances
        copy: Whether to deepcopy observations
        observation_mode: How to batch observation spaces ('same', 'different', or Space)
        autoreset_mode: Autoreset mode for vector environment
    """
    
    def __init__(self, env_fns: Iterator[Callable[[], Env]] | Sequence[Callable[[], Env]],
                 copy: bool = True, observation_mode: str | Space = "same",
                 autoreset_mode: str | AutoresetMode = AutoresetMode.NEXT_STEP):
        pass

Asynchronous Vector Environment

Runs environments in parallel processes for better performance.

class AsyncVectorEnv(VectorEnv):
    """
    Asynchronous vectorized environment.
    
    Runs environments in parallel processes using multiprocessing.
    Better performance for computationally intensive environments.
    
    Args:
        env_fns: Sequence of functions that return environment instances
        shared_memory: Whether to use shared memory for observations
        copy: Whether to deepcopy observations
        context: Multiprocessing context ('spawn', 'fork', 'forkserver')
        daemon: Whether worker processes are daemonic
        worker: Custom worker function
        observation_mode: How to batch observation spaces ('same', 'different', or Space)
        autoreset_mode: Autoreset mode for vector environment
    """
    
    def __init__(self, env_fns: Sequence[Callable[[], Env]], shared_memory: bool = True,
                 copy: bool = True, context: str | None = None, daemon: bool = True,
                 worker: Callable | None = None, observation_mode: str | Space = "same",
                 autoreset_mode: str | AutoresetMode = AutoresetMode.NEXT_STEP):
        pass
    
    def step_async(self, actions: ActType) -> None:
        """
        Asynchronously execute actions (non-blocking).
        
        Args:
            actions: Batch of actions
        """
    
    def step_wait(self) -> tuple[ObsType, ArrayType, ArrayType, ArrayType, dict[str, Any]]:
        """
        Wait for asynchronous step to complete.
        
        Returns:
            tuple: (observations, rewards, terminations, truncations, infos)
        """
    
    def reset_async(self, seed: int | list[int] | None = None,
                    options: dict | list[dict] | None = None) -> None:
        """
        Asynchronously reset environments (non-blocking).
        
        Args:
            seed: Random seed(s)
            options: Environment options
        """
    
    def reset_wait(self) -> tuple[ObsType, dict[str, Any]]:
        """
        Wait for asynchronous reset to complete.
        
        Returns:
            tuple: (observations, infos)
        """

Vector Environment Wrappers

Wrappers for modifying vector environment behavior.

class VectorWrapper(VectorEnv):
    """
    Base wrapper for vector environments.
    
    Args:
        env: Vector environment to wrap
    """
    
    def __init__(self, env: VectorEnv):
        pass

class VectorObservationWrapper(VectorWrapper):
    """
    Base class for vector observation wrappers.
    """
    
    def observation(self, observations: ObsType) -> ObsType:
        """
        Transform batch of observations.
        
        Args:
            observations: Batch of observations
            
        Returns:
            Transformed batch of observations
        """

class VectorActionWrapper(VectorWrapper):
    """
    Base class for vector action wrappers.
    """
    
    def action(self, actions: ActType) -> ActType:
        """
        Transform batch of actions.
        
        Args:
            actions: Batch of actions
            
        Returns:
            Transformed batch of actions
        """

class VectorRewardWrapper(VectorWrapper):
    """
    Base class for vector reward wrappers.
    """
    
    def reward(self, rewards: ArrayType) -> ArrayType:
        """
        Transform batch of rewards.
        
        Args:
            rewards: Batch of rewards
            
        Returns:
            Transformed batch of rewards
        """

Utility Enums

Enumerations for vector environment configuration.

class AutoresetMode(Enum):
    """
    Auto-reset modes for vector environments.
    
    Values:
        NEXT_STEP: Reset on next step after termination/truncation
        SAME_STEP: Reset immediately on same step
        DISABLED: No auto-reset
    """
    NEXT_STEP = "NextStep"
    SAME_STEP = "SameStep"
    DISABLED = "Disabled"

Usage Examples

Creating Vector Environments

import gymnasium as gym
from gymnasium.vector import SyncVectorEnv, AsyncVectorEnv

# Create synchronous vector environment
def make_env():
    return gym.make('CartPole-v1')

env_fns = [make_env for _ in range(4)]
sync_vec_env = SyncVectorEnv(env_fns)

# Create asynchronous vector environment (better for complex environments)
async_vec_env = AsyncVectorEnv(env_fns)

# Using make_vec for convenience
vec_env = gym.make_vec('CartPole-v1', num_envs=4, vectorization_mode='async')

Working with Vector Environments

import numpy as np

# Create vector environment
vec_env = gym.make_vec('CartPole-v1', num_envs=4)

# Reset all environments
observations, infos = vec_env.reset(seed=42)
print(f"Observations shape: {observations.shape}")  # (4, 4) for CartPole

# Take actions in all environments
actions = vec_env.action_space.sample()  # Sample batch of actions
print(f"Actions shape: {actions.shape}")  # (4,)

# Step all environments
observations, rewards, terminations, truncations, infos = vec_env.step(actions)
print(f"Rewards: {rewards}")  # Array of 4 rewards
print(f"Terminations: {terminations}")  # Array of 4 boolean flags

# Handle individual environment resets automatically
for i in range(100):
    actions = vec_env.action_space.sample()
    observations, rewards, terminations, truncations, infos = vec_env.step(actions)
    
    # Vector environments automatically reset terminated/truncated environments
    # The observations array will contain reset observations for those environments

vec_env.close()

Advanced Vector Environment Usage

# Create environments with different configurations
def make_env(env_id, seed=None):
    def _make():
        env = gym.make(env_id)
        if seed is not None:
            env.reset(seed=seed)
        return env
    return _make

env_fns = [
    make_env('CartPole-v1', seed=i) 
    for i in range(4)
]

vec_env = AsyncVectorEnv(env_fns, shared_memory=True)

# Call methods on all environments
max_steps = vec_env.call('spec')[0].max_episode_steps
print(f"Max episode steps: {max_steps}")

# Get attributes from all environments
action_spaces = vec_env.get_attr('action_space')
print(f"All action spaces are Discrete(2): {all(isinstance(space, gym.spaces.Discrete) and space.n == 2 for space in action_spaces)}")

# Set attributes on all environments (if supported)
vec_env.set_attr('render_mode', 'rgb_array')

Asynchronous Operations

# For AsyncVectorEnv, you can use async operations for better control
async_env = AsyncVectorEnv([make_env for _ in range(4)])

# Reset asynchronously
async_env.reset_async(seed=42)
observations, infos = async_env.reset_wait()

# Step asynchronously
actions = async_env.action_space.sample()
async_env.step_async(actions)

# Do other work here while environments are stepping...

# Wait for step to complete
observations, rewards, terminations, truncations, infos = async_env.step_wait()

Vector Environment Wrappers

from gymnasium.vector.utils import spaces as vector_spaces

class BatchedNormalizeObservation(VectorObservationWrapper):
    """Normalize observations across the batch."""
    
    def __init__(self, env):
        super().__init__(env)
        self.running_mean = np.zeros(env.single_observation_space.shape)
        self.running_var = np.ones(env.single_observation_space.shape)
        self.count = 0
        
    def observation(self, observations):
        # Update running statistics
        batch_mean = np.mean(observations, axis=0)
        batch_var = np.var(observations, axis=0)
        
        # Update running mean and variance (simplified)
        self.running_mean = self.running_mean * 0.99 + batch_mean * 0.01
        self.running_var = self.running_var * 0.99 + batch_var * 0.01
        
        # Normalize
        return (observations - self.running_mean) / np.sqrt(self.running_var + 1e-8)

# Apply wrapper
vec_env = gym.make_vec('CartPole-v1', num_envs=4)
vec_env = BatchedNormalizeObservation(vec_env)

Error Handling and Cleanup

import atexit

try:
    # Create vector environment
    vec_env = AsyncVectorEnv([make_env for _ in range(4)])
    
    # Register cleanup function
    atexit.register(vec_env.close)
    
    # Training loop
    observations, infos = vec_env.reset()
    
    for step in range(1000):
        actions = vec_env.action_space.sample()
        observations, rewards, terminations, truncations, infos = vec_env.step(actions)
        
        # Handle any exceptions in individual environments
        for i, info in enumerate(infos):
            if 'exception' in info:
                print(f"Environment {i} had exception: {info['exception']}")
                
except KeyboardInterrupt:
    print("Training interrupted")
finally:
    vec_env.close()

Install with Tessl CLI