tessl/pypi-dm-control

Continuous control environments and MuJoCo Python bindings for physics-based simulation and Reinforcement Learning

—

Pending

Overview

Eval results

Files

Environment Viewer

Name: tessl/pypi-dm-control
Author: tessl

Interactive GUI application for visualizing dm-control environments, executing policies, and exploring simulation dynamics. Provides real-time rendering, camera controls, policy execution capabilities, and comprehensive environment interaction tools.

Capabilities

Viewer Launch

Launch interactive viewer for any dm-control environment.

def launch(environment_loader, policy=None, title='Explorer', width=1024, height=768) -> None:
    """
    Launch interactive environment viewer.
    
    Parameters:
    - environment_loader: Callable that returns Environment instance, or Environment instance directly
    - policy: Optional callable for automatic policy execution
    - title: Window title string (default: 'Explorer')  
    - width: Window width in pixels (default: 1024)
    - height: Window height in pixels (default: 768)
    
    Raises:
    - ValueError: When environment_loader is None
    
    The viewer provides:
    - Real-time environment rendering
    - Interactive camera controls
    - Manual action input via GUI controls  
    - Policy execution and visualization
    - Episode reset and control
    - Physics parameter inspection
    
    Example:
    >>> from dm_control import suite, viewer
    >>> env = suite.load('cartpole', 'balance')
    >>> viewer.launch(env)
    
    With policy:
    >>> def random_policy(time_step):
    ...     return env.action_spec().generate_value()
    >>> viewer.launch(env, policy=random_policy)
    
    With environment loader:
    >>> def env_loader():
    ...     return suite.load('walker', 'walk')
    >>> viewer.launch(env_loader, title='Walker Environment')
    """

Application Class

Core application class for advanced viewer customization.

class Application:
    """
    Core viewer application with GUI and rendering capabilities.
    
    Provides lower-level access to viewer functionality for custom
    applications and advanced use cases.
    """
    
    def __init__(self, title: str = 'Explorer', width: int = 1024, height: int = 768):
        """
        Initialize viewer application.
        
        Parameters:
        - title: Application window title
        - width: Window width in pixels
        - height: Window height in pixels
        """
        
    def launch(self, environment_loader, policy=None) -> None:
        """
        Launch viewer with environment and optional policy.
        
        Parameters:
        - environment_loader: Environment or environment factory function
        - policy: Optional policy function for automatic control
        """

Usage Examples

Basic Environment Viewing

from dm_control import suite, viewer

# View suite environment
env = suite.load('humanoid', 'walk')
viewer.launch(env)

# View with custom window settings
viewer.launch(env, title='Humanoid Walker', width=1280, height=720)

Policy Visualization

from dm_control import suite, viewer
import numpy as np

# Load environment
env = suite.load('cheetah', 'run')

# Define policy function
def random_policy(time_step):
    """Random policy for demonstration."""
    return env.action_spec().generate_value()

def simple_controller(time_step):
    """Simple PD controller example."""
    # Extract joint positions and velocities
    obs = time_step.observation
    positions = obs.get('position', np.zeros(6))
    velocities = obs.get('velocity', np.zeros(6))
    
    # Simple PD control toward zero position
    kp, kd = 1.0, 0.1
    control = -kp * positions - kd * velocities
    
    # Clip to action bounds
    spec = env.action_spec()
    return np.clip(control, spec.minimum, spec.maximum)

# Launch with policy
viewer.launch(env, policy=simple_controller, title='Cheetah with Controller')

Environment Factory Pattern

from dm_control import suite, composer, viewer

def create_environment():
    """Factory function for creating environments."""
    # Create fresh environment instance each time
    return suite.load('walker', 'walk', 
                     task_kwargs={'random': np.random.randint(1000)})

def create_composer_environment():
    """Factory for composer environments."""
    # Custom composer environment creation
    task = MyCustomTask()
    arena = MyCustomArena() 
    return composer.Environment(task, arena, time_limit=20.0)

# Launch with factory functions
viewer.launch(create_environment, title='Random Walker')
viewer.launch(create_composer_environment, title='Custom Environment')

Policy Development and Testing

from dm_control import suite, viewer
import numpy as np

# Load environment for policy development
env = suite.load('cartpole', 'balance')

class PDController:
    """PD controller for cartpole balancing."""
    
    def __init__(self, kp=10.0, kd=1.0):
        self.kp = kp
        self.kd = kd
        
    def __call__(self, time_step):
        obs = time_step.observation
        angle = obs['orientation'][0]  # Pole angle
        angular_vel = obs['velocity'][1]  # Angular velocity
        
        # PD control
        control = -self.kp * angle - self.kd * angular_vel
        return np.array([control])

# Test different controller parameters
controllers = [
    PDController(kp=5.0, kd=0.5),
    PDController(kp=10.0, kd=1.0), 
    PDController(kp=20.0, kd=2.0)
]

for i, controller in enumerate(controllers):
    print(f"Testing controller {i+1}")
    viewer.launch(env, policy=controller, 
                 title=f'Controller {i+1} (kp={controller.kp}, kd={controller.kd})')

Advanced Viewer Usage

from dm_control import viewer

# Create custom application
app = viewer.application.Application(
    title='Custom Physics Viewer',
    width=1600, 
    height=900
)

def environment_with_variations():
    """Create environment with random variations."""
    domains = ['walker', 'humanoid', 'cheetah']
    tasks = ['walk', 'run', 'stand']
    
    domain = np.random.choice(domains)
    if domain == 'walker':
        task = np.random.choice(['walk', 'run', 'stand'])
    elif domain == 'humanoid':
        task = np.random.choice(['walk', 'run', 'stand'])
    else:  # cheetah
        task = 'run'
        
    return suite.load(domain, task)

def adaptive_policy(time_step):
    """Policy that adapts to different environments.""" 
    obs = time_step.observation
    
    # Simple heuristic based on observation structure
    if 'orientations' in obs:  # Humanoid-like
        return np.random.uniform(-0.5, 0.5, size=21)
    elif 'position' in obs:  # Walker-like
        return np.random.uniform(-1, 1, size=6)
    else:  # Default fallback
        return np.random.uniform(-1, 1, size=12)

# Launch with custom application
app.launch(environment_with_variations, policy=adaptive_policy)

Debugging and Analysis

from dm_control import suite, viewer

# Environment for debugging
env = suite.load('manipulator', 'bring_ball')

def debug_policy(time_step):
    """Policy with debugging output."""
    obs = time_step.observation
    
    # Print observation info for debugging
    print(f"Step type: {time_step.step_type}")
    print(f"Reward: {time_step.reward}")
    print(f"Observation keys: {list(obs.keys())}")
    
    # Simple random action
    action = env.action_spec().generate_value()
    print(f"Action: {action}")
    
    return action

# Launch with debug output
viewer.launch(env, policy=debug_policy, title='Debug Mode')

Viewer Controls

The interactive viewer provides these controls:

Camera Controls

Mouse drag: Rotate camera view
Mouse wheel: Zoom in/out
Shift + mouse drag: Pan camera
R key: Reset camera to default position

Simulation Controls

Space bar: Pause/resume simulation
R key: Reset episode
S key: Single step when paused
Arrow keys: Manual action input (environment-dependent)

Display Options

F key: Toggle fullscreen
H key: Show/hide help overlay
I key: Show/hide info panel
C key: Cycle through available cameras

Policy Controls

P key: Toggle policy execution on/off
T key: Toggle time display
Number keys: Adjust simulation speed

Integration with Other Modules

# Viewer with physics inspection
from dm_control import suite, viewer, mujoco

env = suite.load('quadruped', 'walk')

def physics_inspector(time_step):
    """Policy that inspects physics state."""
    physics = env.physics
    
    # Access physics data during viewer session
    print(f"Time: {physics.time():.3f}")
    print(f"Energy: {physics.named.data.energy}")
    
    return env.action_spec().generate_value()

viewer.launch(env, policy=physics_inspector)

Types

# Policy function signature
PolicyFunction = Callable[[TimeStep], np.ndarray]
    """
    Policy function that takes a TimeStep and returns actions.
    
    Parameters:
    - time_step: Current environment timestep
    
    Returns:
    Action array conforming to environment's action_spec()
    """

# Environment loader signature  
EnvironmentLoader = Callable[[], Environment]
    """
    Function that creates and returns an Environment instance.
    
    Returns:
    Fresh Environment instance ready for interaction
    """

Install with Tessl CLI