Tessl Tile for pypi/rl-zoo3@2.7.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

callbacks.md core-utilities.md experiment-management.md hub-integration.md hyperparameter-optimization.md index.md plotting.md wrappers.md

wrappers.mddocs/

0
# Environment Wrappers
1

2
Custom Gymnasium environment wrappers for observation processing, reward modification, action manipulation, and training optimization. These wrappers extend environments with specialized functionality needed for effective RL training.
3

4
## Core Imports
5

6
```python
7
from rl_zoo3.wrappers import (
8
    TruncatedOnSuccessWrapper,
9
    ActionNoiseWrapper,
10
    ActionSmoothingWrapper,
11
    DelayedRewardWrapper,
12
    HistoryWrapper,
13
    HistoryWrapperObsDict,
14
    FrameSkip,
15
    MaskVelocityWrapper
16
)
17
import gymnasium as gym
18
import numpy as np
19
```
20

21
## Capabilities
22

23
### Success-Based Truncation
24

25
Wrapper that truncates episodes upon achieving success conditions, useful for goal-oriented environments and curriculum learning.
26

27
```python { .api }
28
class TruncatedOnSuccessWrapper(gym.Wrapper):
29
    """
30
    Reset on success and offsets the reward.
31
    Useful for GoalEnv and goal-oriented tasks.
32
    """
33
    
34
    def __init__(
35
        self,
36
        env: gym.Env,
37
        reward_offset: float = 0.0,
38
        n_successes: int = 1
39
    ):
40
        """
41
        Initialize TruncatedOnSuccessWrapper.
42
        
43
        Parameters:
44
        - env: Base environment to wrap
45
        - reward_offset: Offset to add to all rewards
46
        - n_successes: Number of consecutive successes needed for truncation
47
        """
48
    
49
    def reset(
50
        self,
51
        seed: Optional[int] = None,
52
        options: Optional[dict] = None
53
    ) -> GymResetReturn:
54
        """Reset environment and success counter."""
55
    
56
    def step(self, action) -> GymStepReturn:
57
        """
58
        Execute action and check for success truncation.
59
        
60
        Returns:
61
        Tuple of (observation, reward + offset, terminated, truncated, info)
62
        """
63
    
64
    def compute_reward(self, achieved_goal, desired_goal, info):
65
        """Compute reward with offset for goal environments."""
66
```
67

68
Usage example:
69
```python
70
import gymnasium as gym
71
from rl_zoo3.wrappers import TruncatedOnSuccessWrapper
72

73
# Create base environment
74
env = gym.make("FetchReach-v1")
75

76
# Wrap with success truncation
77
wrapped_env = TruncatedOnSuccessWrapper(
78
    env,
79
    reward_offset=1.0,  # Add bonus reward
80
    n_successes=3       # Require 3 consecutive successes
81
)
82

83
# Use in training
84
obs, info = wrapped_env.reset()
85
for step in range(1000):
86
    action = wrapped_env.action_space.sample()
87
    obs, reward, terminated, truncated, info = wrapped_env.step(action)
88
    
89
    if truncated and info.get("is_success", False):
90
        print(f"Success achieved at step {step}")
91
        break
92
```
93

94
### Action Noise
95

96
Wrapper that adds configurable noise to agent actions, useful for exploration and robustness testing.
97

98
```python { .api }
99
class ActionNoiseWrapper(gym.Wrapper[ObsType, np.ndarray, ObsType, np.ndarray]):
100
    """
101
    Wrapper that adds noise to actions.
102
    Useful for exploration and robustness evaluation.
103
    """
104
    
105
    def __init__(
106
        self,
107
        env: gym.Env,
108
        noise_std: float = 0.1,
109
        noise_type: str = "gaussian"
110
    ):
111
        """
112
        Initialize ActionNoiseWrapper.
113
        
114
        Parameters:
115
        - env: Base environment to wrap
116
        - noise_std: Standard deviation of noise
117
        - noise_type: Type of noise ('gaussian', 'uniform')
118
        """
119
    
120
    def step(self, action) -> GymStepReturn:
121
        """
122
        Execute action with added noise.
123
        
124
        Parameters:
125
        - action: Original action from agent
126
        
127
        Returns:
128
        Environment step result with noisy action applied
129
        """
130
```
131

132
Usage example:
133
```python
134
from rl_zoo3.wrappers import ActionNoiseWrapper
135
import gymnasium as gym
136

137
# Create environment
138
env = gym.make("Pendulum-v1")
139

140
# Add action noise
141
noisy_env = ActionNoiseWrapper(
142
    env,
143
    noise_std=0.05,  # 5% noise
144
    noise_type="gaussian"
145
)
146

147
# Actions will have noise added automatically
148
obs, info = noisy_env.reset()
149
action = np.array([0.5])  # Clean action
150
obs, reward, terminated, truncated, info = noisy_env.step(action)  # Noise added internally
151
```
152

153
### Action Smoothing
154

155
Wrapper that smooths actions over multiple timesteps, reducing jerkiness in continuous control tasks.
156

157
```python { .api }
158
class ActionSmoothingWrapper(gym.Wrapper):
159
    """
160
    Wrapper for action smoothing over multiple timesteps.
161
    Reduces action jerkiness in continuous control.
162
    """
163
    
164
    def __init__(
165
        self,
166
        env: gym.Env,
167
        smoothing_coef: float = 0.9
168
    ):
169
        """
170
        Initialize ActionSmoothingWrapper.
171
        
172
        Parameters:
173
        - env: Base environment to wrap
174
        - smoothing_coef: Smoothing coefficient (0.0 = no smoothing, 1.0 = maximum smoothing)
175
        """
176
    
177
    def step(self, action) -> GymStepReturn:
178
        """
179
        Execute smoothed action.
180
        
181
        Parameters:
182
        - action: Raw action from agent
183
        
184
        Returns:
185
        Environment step result with smoothed action
186
        """
187
    
188
    def reset(self, **kwargs) -> GymResetReturn:
189
        """Reset environment and action history."""
190
```
191

192
### Delayed Rewards
193

194
Wrapper that delays reward delivery by a specified number of steps, useful for testing credit assignment and memory.
195

196
```python { .api }
197
class DelayedRewardWrapper(gym.Wrapper):
198
    """
199
    Wrapper that delays reward delivery.
200
    Useful for testing credit assignment capabilities.
201
    """
202
    
203
    def __init__(
204
        self,
205
        env: gym.Env,
206
        delay: int = 10
207
    ):
208
        """
209
        Initialize DelayedRewardWrapper.
210
        
211
        Parameters:
212
        - env: Base environment to wrap
213
        - delay: Number of steps to delay rewards
214
        """
215
    
216
    def step(self, action) -> GymStepReturn:
217
        """
218
        Execute action with delayed reward delivery.
219
        
220
        Returns:
221
        Step result with current reward set to 0.0, delayed rewards delivered later
222
        """
223
    
224
    def reset(self, **kwargs) -> GymResetReturn:
225
        """Reset environment and reward buffer."""
226
```
227

228
Usage example:
229
```python
230
from rl_zoo3.wrappers import DelayedRewardWrapper
231
import gymnasium as gym
232

233
# Create environment with delayed rewards
234
env = gym.make("CartPole-v1")
235
delayed_env = DelayedRewardWrapper(env, delay=5)
236

237
# Rewards will be delayed by 5 steps
238
obs, info = delayed_env.reset()
239
total_reward = 0
240
for step in range(100):
241
    action = delayed_env.action_space.sample()
242
    obs, reward, terminated, truncated, info = delayed_env.step(action)
243
    total_reward += reward
244
    
245
    if terminated or truncated:
246
        print(f"Episode ended with total reward: {total_reward}")
247
        break
248
```
249

250
### Observation History
251

252
Wrapper that maintains a history of observations, useful for partially observable environments and recurrent policies.
253

254
```python { .api }
255
class HistoryWrapper(gym.Wrapper[np.ndarray, np.ndarray, np.ndarray, np.ndarray]):
256
    """
257
    Wrapper that maintains observation history.
258
    Useful for partial observability and recurrent policies.
259
    """
260
    
261
    def __init__(
262
        self,
263
        env: gym.Env,
264
        horizon: int = 2
265
    ):
266
        """
267
        Initialize HistoryWrapper.
268
        
269
        Parameters:
270
        - env: Base environment to wrap (must have Box observation space)
271
        - horizon: Number of past observations to include
272
        """
273
    
274
    def reset(self, **kwargs) -> GymResetReturn:
275
        """Reset environment and observation history."""
276
    
277
    def step(self, action) -> GymStepReturn:
278
        """
279
        Execute action and update observation history.
280
        
281
        Returns:
282
        Step result with concatenated observation history
283
        """
284
```
285

286
### Dictionary Observation History
287

288
Specialized history wrapper for environments with dictionary observation spaces.
289

290
```python { .api }
291
class HistoryWrapperObsDict(gym.Wrapper):
292
    """
293
    History wrapper for dictionary observation spaces.
294
    Maintains separate history for each observation key.
295
    """
296
    
297
    def __init__(
298
        self,
299
        env: gym.Env,
300
        horizon: int = 2
301
    ):
302
        """
303
        Initialize HistoryWrapperObsDict.
304
        
305
        Parameters:
306
        - env: Base environment with Dict observation space
307
        - horizon: Number of past observations to maintain per key
308
        """
309
    
310
    def reset(self, **kwargs) -> GymResetReturn:
311
        """Reset environment and all observation histories."""
312
    
313
    def step(self, action) -> GymStepReturn:
314
        """
315
        Execute action and update all observation histories.
316
        
317
        Returns:
318
        Step result with extended dictionary observations
319
        """
320
```
321

322
### Frame Skipping
323

324
Wrapper that skips frames and repeats actions, common in Atari and other environments for computational efficiency.
325

326
```python { .api }
327
class FrameSkip(gym.Wrapper):
328
    """
329
    Wrapper for frame skipping (action repeat).
330
    Repeats actions for multiple frames and returns the final result.
331
    """
332
    
333
    def __init__(
334
        self,
335
        env: gym.Env,
336
        skip: int = 4
337
    ):
338
        """
339
        Initialize FrameSkip wrapper.
340
        
341
        Parameters:
342
        - env: Base environment to wrap
343
        - skip: Number of frames to skip (action repeat count)
344
        """
345
    
346
    def step(self, action) -> GymStepReturn:
347
        """
348
        Execute action for multiple frames.
349
        
350
        Parameters:
351
        - action: Action to repeat
352
        
353
        Returns:
354
        Result after skipping frames with accumulated reward
355
        """
356
```
357

358
### Velocity Masking
359

360
Wrapper that masks velocity information from observations, useful for testing position-only policies.
361

362
```python { .api }
363
class MaskVelocityWrapper(gym.ObservationWrapper):
364
    """
365
    Wrapper that masks velocity information from observations.
366
    Useful for testing position-only policies.
367
    """
368
    
369
    def __init__(self, env: gym.Env):
370
        """
371
        Initialize MaskVelocityWrapper.
372
        
373
        Parameters:
374
        - env: Base environment (typically MuJoCo-based)
375
        """
376
    
377
    def observation(self, observation) -> np.ndarray:
378
        """
379
        Mask velocity components from observation.
380
        
381
        Parameters:
382
        - observation: Original observation
383
        
384
        Returns:
385
        Observation with velocity components set to zero
386
        """
387
```
388

389
### YAML-Compatible Resize
390

391
Wrapper for resizing observations with YAML-compatible configuration format.
392

393
```python { .api }
394
class YAMLCompatResizeObservation(ResizeObservation):
395
    """
396
    YAML-compatible version of ResizeObservation wrapper.
397
    Accepts list format for shape specification.
398
    """
399
    
400
    def __init__(self, env: gym.Env, shape: list[int]):
401
        """
402
        Initialize YAMLCompatResizeObservation.
403
        
404
        Parameters:
405
        - env: Base environment to wrap
406
        - shape: Target shape as list [height, width]
407
        """
408
```
409

410
## Wrapper Usage Patterns
411

412
### Combining Multiple Wrappers
413

414
```python
415
import gymnasium as gym
416
from rl_zoo3.wrappers import (
417
    TruncatedOnSuccessWrapper,
418
    ActionNoiseWrapper,
419
    DelayedRewardWrapper,
420
    HistoryWrapper
421
)
422

423
# Create base environment
424
env = gym.make("FetchReach-v1")
425

426
# Apply multiple wrappers (order matters)
427
env = TruncatedOnSuccessWrapper(env, reward_offset=1.0)
428
env = ActionNoiseWrapper(env, noise_std=0.05)
429
env = DelayedRewardWrapper(env, delay=3)
430
env = HistoryWrapper(env, horizon=4)
431

432
# Use wrapped environment
433
obs, info = env.reset()
434
for step in range(1000):
435
    action = env.action_space.sample()
436
    obs, reward, terminated, truncated, info = env.step(action)
437
    
438
    if terminated or truncated:
439
        obs, info = env.reset()
440
```
441

442
### Configuration-Driven Wrapper Creation
443

444
```python
445
from rl_zoo3.utils import get_wrapper_class
446

447
# Configuration dict (typically from hyperparameters file)
448
hyperparams = {
449
    "env_wrapper": [
450
        {
451
            "rl_zoo3.wrappers:TruncatedOnSuccessWrapper": {
452
                "reward_offset": 1.0,
453
                "n_successes": 2
454
            }
455
        },
456
        {
457
            "rl_zoo3.wrappers:ActionNoiseWrapper": {
458
                "noise_std": 0.1
459
            }
460
        }
461
    ]
462
}
463

464
# Get wrapper function from configuration
465
wrapper_fn = get_wrapper_class(hyperparams)
466

467
# Apply wrappers to environment
468
env = gym.make("FetchReach-v1")
469
if wrapper_fn is not None:
470
    env = wrapper_fn(env)
471
```
472

473
### Integration with ExperimentManager
474

475
```python
476
from rl_zoo3.exp_manager import ExperimentManager
477
import argparse
478

479
# Wrappers are automatically applied based on hyperparameters
480
args = argparse.Namespace(
481
    algo='sac',
482
    env='Pendulum-v1',
483
    n_timesteps=50000
484
)
485

486
# Hyperparameters with wrapper specifications
487
hyperparams = {
488
    "env_wrapper": "rl_zoo3.wrappers:ActionSmoothingWrapper",
489
    "env_wrapper_kwargs": {"smoothing_coef": 0.8}
490
}
491

492
exp_manager = ExperimentManager(
493
    args=args,
494
    algo='sac',
495
    env_id='Pendulum-v1',
496
    log_folder='./logs',
497
    hyperparams=hyperparams
498
)
499

500
# Wrappers applied automatically during environment creation
501
model = exp_manager.setup_experiment()
502
```

Version

Tile

Files

wrappers.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

wrappers.mddocs/