Continuous control environments and MuJoCo Python bindings for physics-based simulation and Reinforcement Learning
—
Interactive GUI application for visualizing dm-control environments, executing policies, and exploring simulation dynamics. Provides real-time rendering, camera controls, policy execution capabilities, and comprehensive environment interaction tools.
Launch interactive viewer for any dm-control environment.
def launch(environment_loader, policy=None, title='Explorer', width=1024, height=768) -> None:
"""
Launch interactive environment viewer.
Parameters:
- environment_loader: Callable that returns Environment instance, or Environment instance directly
- policy: Optional callable for automatic policy execution
- title: Window title string (default: 'Explorer')
- width: Window width in pixels (default: 1024)
- height: Window height in pixels (default: 768)
Raises:
- ValueError: When environment_loader is None
The viewer provides:
- Real-time environment rendering
- Interactive camera controls
- Manual action input via GUI controls
- Policy execution and visualization
- Episode reset and control
- Physics parameter inspection
Example:
>>> from dm_control import suite, viewer
>>> env = suite.load('cartpole', 'balance')
>>> viewer.launch(env)
With policy:
>>> def random_policy(time_step):
... return env.action_spec().generate_value()
>>> viewer.launch(env, policy=random_policy)
With environment loader:
>>> def env_loader():
... return suite.load('walker', 'walk')
>>> viewer.launch(env_loader, title='Walker Environment')
"""Core application class for advanced viewer customization.
class Application:
"""
Core viewer application with GUI and rendering capabilities.
Provides lower-level access to viewer functionality for custom
applications and advanced use cases.
"""
def __init__(self, title: str = 'Explorer', width: int = 1024, height: int = 768):
"""
Initialize viewer application.
Parameters:
- title: Application window title
- width: Window width in pixels
- height: Window height in pixels
"""
def launch(self, environment_loader, policy=None) -> None:
"""
Launch viewer with environment and optional policy.
Parameters:
- environment_loader: Environment or environment factory function
- policy: Optional policy function for automatic control
"""from dm_control import suite, viewer
# View suite environment
env = suite.load('humanoid', 'walk')
viewer.launch(env)
# View with custom window settings
viewer.launch(env, title='Humanoid Walker', width=1280, height=720)from dm_control import suite, viewer
import numpy as np
# Load environment
env = suite.load('cheetah', 'run')
# Define policy function
def random_policy(time_step):
"""Random policy for demonstration."""
return env.action_spec().generate_value()
def simple_controller(time_step):
"""Simple PD controller example."""
# Extract joint positions and velocities
obs = time_step.observation
positions = obs.get('position', np.zeros(6))
velocities = obs.get('velocity', np.zeros(6))
# Simple PD control toward zero position
kp, kd = 1.0, 0.1
control = -kp * positions - kd * velocities
# Clip to action bounds
spec = env.action_spec()
return np.clip(control, spec.minimum, spec.maximum)
# Launch with policy
viewer.launch(env, policy=simple_controller, title='Cheetah with Controller')from dm_control import suite, composer, viewer
def create_environment():
"""Factory function for creating environments."""
# Create fresh environment instance each time
return suite.load('walker', 'walk',
task_kwargs={'random': np.random.randint(1000)})
def create_composer_environment():
"""Factory for composer environments."""
# Custom composer environment creation
task = MyCustomTask()
arena = MyCustomArena()
return composer.Environment(task, arena, time_limit=20.0)
# Launch with factory functions
viewer.launch(create_environment, title='Random Walker')
viewer.launch(create_composer_environment, title='Custom Environment')from dm_control import suite, viewer
import numpy as np
# Load environment for policy development
env = suite.load('cartpole', 'balance')
class PDController:
"""PD controller for cartpole balancing."""
def __init__(self, kp=10.0, kd=1.0):
self.kp = kp
self.kd = kd
def __call__(self, time_step):
obs = time_step.observation
angle = obs['orientation'][0] # Pole angle
angular_vel = obs['velocity'][1] # Angular velocity
# PD control
control = -self.kp * angle - self.kd * angular_vel
return np.array([control])
# Test different controller parameters
controllers = [
PDController(kp=5.0, kd=0.5),
PDController(kp=10.0, kd=1.0),
PDController(kp=20.0, kd=2.0)
]
for i, controller in enumerate(controllers):
print(f"Testing controller {i+1}")
viewer.launch(env, policy=controller,
title=f'Controller {i+1} (kp={controller.kp}, kd={controller.kd})')from dm_control import viewer
# Create custom application
app = viewer.application.Application(
title='Custom Physics Viewer',
width=1600,
height=900
)
def environment_with_variations():
"""Create environment with random variations."""
domains = ['walker', 'humanoid', 'cheetah']
tasks = ['walk', 'run', 'stand']
domain = np.random.choice(domains)
if domain == 'walker':
task = np.random.choice(['walk', 'run', 'stand'])
elif domain == 'humanoid':
task = np.random.choice(['walk', 'run', 'stand'])
else: # cheetah
task = 'run'
return suite.load(domain, task)
def adaptive_policy(time_step):
"""Policy that adapts to different environments."""
obs = time_step.observation
# Simple heuristic based on observation structure
if 'orientations' in obs: # Humanoid-like
return np.random.uniform(-0.5, 0.5, size=21)
elif 'position' in obs: # Walker-like
return np.random.uniform(-1, 1, size=6)
else: # Default fallback
return np.random.uniform(-1, 1, size=12)
# Launch with custom application
app.launch(environment_with_variations, policy=adaptive_policy)from dm_control import suite, viewer
# Environment for debugging
env = suite.load('manipulator', 'bring_ball')
def debug_policy(time_step):
"""Policy with debugging output."""
obs = time_step.observation
# Print observation info for debugging
print(f"Step type: {time_step.step_type}")
print(f"Reward: {time_step.reward}")
print(f"Observation keys: {list(obs.keys())}")
# Simple random action
action = env.action_spec().generate_value()
print(f"Action: {action}")
return action
# Launch with debug output
viewer.launch(env, policy=debug_policy, title='Debug Mode')The interactive viewer provides these controls:
# Viewer with physics inspection
from dm_control import suite, viewer, mujoco
env = suite.load('quadruped', 'walk')
def physics_inspector(time_step):
"""Policy that inspects physics state."""
physics = env.physics
# Access physics data during viewer session
print(f"Time: {physics.time():.3f}")
print(f"Energy: {physics.named.data.energy}")
return env.action_spec().generate_value()
viewer.launch(env, policy=physics_inspector)# Policy function signature
PolicyFunction = Callable[[TimeStep], np.ndarray]
"""
Policy function that takes a TimeStep and returns actions.
Parameters:
- time_step: Current environment timestep
Returns:
Action array conforming to environment's action_spec()
"""
# Environment loader signature
EnvironmentLoader = Callable[[], Environment]
"""
Function that creates and returns an Environment instance.
Returns:
Fresh Environment instance ready for interaction
"""Install with Tessl CLI
npx tessl i tessl/pypi-dm-control