tessl/pypi-dm-control

Continuous control environments and MuJoCo Python bindings for physics-based simulation and Reinforcement Learning

—

Pending

Overview

Eval results

Files

Environment Suite

Name: tessl/pypi-dm-control
Author: tessl

Pre-built collection of continuous control reinforcement learning environments spanning diverse domains including locomotion, manipulation, and classic control problems. The suite provides standardized interfaces, consistent action/observation spaces, and benchmark task definitions for RL research.

Capabilities

Environment Loading

Load environments by domain and task name with optional configuration parameters.

def load(domain_name: str, task_name: str, task_kwargs=None, environment_kwargs=None, visualize_reward=False):
    """
    Returns an environment from a domain name, task name and optional settings.
    
    Parameters:
    - domain_name: String name of the domain (e.g., 'cartpole', 'walker')
    - task_name: String name of the task (e.g., 'balance', 'walk')
    - task_kwargs: Optional dict of keyword arguments for the task
    - environment_kwargs: Optional dict of keyword arguments for the environment
    - visualize_reward: Optional bool to enable reward visualization in rendering
    
    Returns:
    Environment instance ready for interaction
    
    Example:
    >>> env = suite.load('cartpole', 'balance')
    >>> env = suite.load('walker', 'walk', task_kwargs={'random': 42})
    """

def build_environment(domain_name: str, task_name: str, task_kwargs=None, environment_kwargs=None, visualize_reward=False):
    """
    Returns an environment from the suite with comprehensive error handling.
    
    Parameters: Same as load()
    
    Raises:
    - ValueError: If domain or task doesn't exist
    
    Returns:
    Environment instance
    
    Note: Identical functionality to load() but with explicit error handling
    """

Environment Collections

Pre-defined collections of environments organized by difficulty and purpose.

# Complete environment catalog
ALL_TASKS: tuple
    """Tuple containing all available (domain_name, task_name) pairs"""

# Difficulty-based collections  
BENCHMARKING: tuple
    """Tuple of (domain, task) pairs used for benchmarking"""

EASY: tuple
    """Tuple of easier difficulty tasks suitable for initial testing"""

HARD: tuple  
    """Tuple of challenging tasks for advanced evaluation"""

EXTRA: tuple
    """Tuple of additional tasks not included in benchmarking set"""

# Visualization-based collections
REWARD_VIZ: tuple
    """Tuple of tasks that support reward visualization"""

NO_REWARD_VIZ: tuple
    """Tuple of tasks without reward visualization support"""

# Domain organization
TASKS_BY_DOMAIN: dict
    """Dict mapping domain names to tuples of their task names"""

Available Domains

The suite includes environments across these domains:

# Locomotion domains
acrobot      # Acrobat balancing task
cheetah      # Cheetah running tasks  
hopper       # Single-leg hopping tasks
humanoid     # Humanoid locomotion tasks
humanoid_CMU # CMU humanoid with mocap data
quadruped    # Four-legged locomotion
swimmer      # Swimming locomotion
walker       # Bipedal walking tasks
dog          # Dog locomotion tasks

# Manipulation domains  
finger       # Finger manipulation tasks
manipulator  # Robotic arm manipulation
reacher      # Point reaching tasks
stacker      # Block stacking tasks

# Classic control domains
ball_in_cup  # Ball-in-cup balancing
cartpole     # Cartpole balancing
pendulum     # Pendulum swing-up
point_mass   # Point mass navigation

# Control theory domains
lqr          # Linear quadratic regulator

# Aquatic domains
fish         # Fish swimming tasks

Usage Examples

Basic Environment Usage

from dm_control import suite

# Load environment
env = suite.load('cartpole', 'balance')

# Environment interaction loop
time_step = env.reset()
while not time_step.last():
    action = env.action_spec().generate_value()  # Random action
    time_step = env.step(action)
    
    print(f"Reward: {time_step.reward}")
    print(f"Observation: {time_step.observation}")

Environment Exploration

# Explore available environments
print("All available tasks:")
for domain, task in suite.ALL_TASKS:
    print(f"  {domain}/{task}")

print(f"\nBenchmarking tasks: {len(suite.BENCHMARKING)}")
print(f"Easy tasks: {len(suite.EASY)}")
print(f"Hard tasks: {len(suite.HARD)}")

# Explore domain-specific tasks
print("\nTasks by domain:")
for domain, tasks in suite.TASKS_BY_DOMAIN.items():
    print(f"  {domain}: {tasks}")

Custom Configuration

# Load with custom task parameters
env = suite.load(
    'walker', 'walk',
    task_kwargs={'random': 42},  # Set random seed
    environment_kwargs={'flat_observation': True}  # Flatten observations
)

# Enable reward visualization
env = suite.load('reacher', 'easy', visualize_reward=True)

Environment Properties

env = suite.load('humanoid', 'stand')

# Inspect environment specifications
print(f"Action spec: {env.action_spec()}")
print(f"Observation spec: {env.observation_spec()}")
print(f"Reward range: {env.reward_range()}")

# Access physics simulation
physics = env.physics
print(f"Timestep: {physics.timestep()}")
print(f"Control: {physics.control}")

Error Handling

try:
    env = suite.load('nonexistent_domain', 'task')
except ValueError as e:
    print(f"Domain error: {e}")

try:  
    env = suite.load('cartpole', 'nonexistent_task')
except ValueError as e:
    print(f"Task error: {e}")

Install with Tessl CLI