0
# Environment Wrappers
1
2
Custom Gymnasium environment wrappers for observation processing, reward modification, action manipulation, and training optimization. These wrappers extend environments with specialized functionality needed for effective RL training.
3
4
## Core Imports
5
6
```python
7
from rl_zoo3.wrappers import (
8
TruncatedOnSuccessWrapper,
9
ActionNoiseWrapper,
10
ActionSmoothingWrapper,
11
DelayedRewardWrapper,
12
HistoryWrapper,
13
HistoryWrapperObsDict,
14
FrameSkip,
15
MaskVelocityWrapper
16
)
17
import gymnasium as gym
18
import numpy as np
19
```
20
21
## Capabilities
22
23
### Success-Based Truncation
24
25
Wrapper that truncates episodes upon achieving success conditions, useful for goal-oriented environments and curriculum learning.
26
27
```python { .api }
28
class TruncatedOnSuccessWrapper(gym.Wrapper):
29
"""
30
Reset on success and offsets the reward.
31
Useful for GoalEnv and goal-oriented tasks.
32
"""
33
34
def __init__(
35
self,
36
env: gym.Env,
37
reward_offset: float = 0.0,
38
n_successes: int = 1
39
):
40
"""
41
Initialize TruncatedOnSuccessWrapper.
42
43
Parameters:
44
- env: Base environment to wrap
45
- reward_offset: Offset to add to all rewards
46
- n_successes: Number of consecutive successes needed for truncation
47
"""
48
49
def reset(
50
self,
51
seed: Optional[int] = None,
52
options: Optional[dict] = None
53
) -> GymResetReturn:
54
"""Reset environment and success counter."""
55
56
def step(self, action) -> GymStepReturn:
57
"""
58
Execute action and check for success truncation.
59
60
Returns:
61
Tuple of (observation, reward + offset, terminated, truncated, info)
62
"""
63
64
def compute_reward(self, achieved_goal, desired_goal, info):
65
"""Compute reward with offset for goal environments."""
66
```
67
68
Usage example:
69
```python
70
import gymnasium as gym
71
from rl_zoo3.wrappers import TruncatedOnSuccessWrapper
72
73
# Create base environment
74
env = gym.make("FetchReach-v1")
75
76
# Wrap with success truncation
77
wrapped_env = TruncatedOnSuccessWrapper(
78
env,
79
reward_offset=1.0, # Add bonus reward
80
n_successes=3 # Require 3 consecutive successes
81
)
82
83
# Use in training
84
obs, info = wrapped_env.reset()
85
for step in range(1000):
86
action = wrapped_env.action_space.sample()
87
obs, reward, terminated, truncated, info = wrapped_env.step(action)
88
89
if truncated and info.get("is_success", False):
90
print(f"Success achieved at step {step}")
91
break
92
```
93
94
### Action Noise
95
96
Wrapper that adds configurable noise to agent actions, useful for exploration and robustness testing.
97
98
```python { .api }
99
class ActionNoiseWrapper(gym.Wrapper[ObsType, np.ndarray, ObsType, np.ndarray]):
100
"""
101
Wrapper that adds noise to actions.
102
Useful for exploration and robustness evaluation.
103
"""
104
105
def __init__(
106
self,
107
env: gym.Env,
108
noise_std: float = 0.1,
109
noise_type: str = "gaussian"
110
):
111
"""
112
Initialize ActionNoiseWrapper.
113
114
Parameters:
115
- env: Base environment to wrap
116
- noise_std: Standard deviation of noise
117
- noise_type: Type of noise ('gaussian', 'uniform')
118
"""
119
120
def step(self, action) -> GymStepReturn:
121
"""
122
Execute action with added noise.
123
124
Parameters:
125
- action: Original action from agent
126
127
Returns:
128
Environment step result with noisy action applied
129
"""
130
```
131
132
Usage example:
133
```python
134
from rl_zoo3.wrappers import ActionNoiseWrapper
135
import gymnasium as gym
136
137
# Create environment
138
env = gym.make("Pendulum-v1")
139
140
# Add action noise
141
noisy_env = ActionNoiseWrapper(
142
env,
143
noise_std=0.05, # 5% noise
144
noise_type="gaussian"
145
)
146
147
# Actions will have noise added automatically
148
obs, info = noisy_env.reset()
149
action = np.array([0.5]) # Clean action
150
obs, reward, terminated, truncated, info = noisy_env.step(action) # Noise added internally
151
```
152
153
### Action Smoothing
154
155
Wrapper that smooths actions over multiple timesteps, reducing jerkiness in continuous control tasks.
156
157
```python { .api }
158
class ActionSmoothingWrapper(gym.Wrapper):
159
"""
160
Wrapper for action smoothing over multiple timesteps.
161
Reduces action jerkiness in continuous control.
162
"""
163
164
def __init__(
165
self,
166
env: gym.Env,
167
smoothing_coef: float = 0.9
168
):
169
"""
170
Initialize ActionSmoothingWrapper.
171
172
Parameters:
173
- env: Base environment to wrap
174
- smoothing_coef: Smoothing coefficient (0.0 = no smoothing, 1.0 = maximum smoothing)
175
"""
176
177
def step(self, action) -> GymStepReturn:
178
"""
179
Execute smoothed action.
180
181
Parameters:
182
- action: Raw action from agent
183
184
Returns:
185
Environment step result with smoothed action
186
"""
187
188
def reset(self, **kwargs) -> GymResetReturn:
189
"""Reset environment and action history."""
190
```
191
192
### Delayed Rewards
193
194
Wrapper that delays reward delivery by a specified number of steps, useful for testing credit assignment and memory.
195
196
```python { .api }
197
class DelayedRewardWrapper(gym.Wrapper):
198
"""
199
Wrapper that delays reward delivery.
200
Useful for testing credit assignment capabilities.
201
"""
202
203
def __init__(
204
self,
205
env: gym.Env,
206
delay: int = 10
207
):
208
"""
209
Initialize DelayedRewardWrapper.
210
211
Parameters:
212
- env: Base environment to wrap
213
- delay: Number of steps to delay rewards
214
"""
215
216
def step(self, action) -> GymStepReturn:
217
"""
218
Execute action with delayed reward delivery.
219
220
Returns:
221
Step result with current reward set to 0.0, delayed rewards delivered later
222
"""
223
224
def reset(self, **kwargs) -> GymResetReturn:
225
"""Reset environment and reward buffer."""
226
```
227
228
Usage example:
229
```python
230
from rl_zoo3.wrappers import DelayedRewardWrapper
231
import gymnasium as gym
232
233
# Create environment with delayed rewards
234
env = gym.make("CartPole-v1")
235
delayed_env = DelayedRewardWrapper(env, delay=5)
236
237
# Rewards will be delayed by 5 steps
238
obs, info = delayed_env.reset()
239
total_reward = 0
240
for step in range(100):
241
action = delayed_env.action_space.sample()
242
obs, reward, terminated, truncated, info = delayed_env.step(action)
243
total_reward += reward
244
245
if terminated or truncated:
246
print(f"Episode ended with total reward: {total_reward}")
247
break
248
```
249
250
### Observation History
251
252
Wrapper that maintains a history of observations, useful for partially observable environments and recurrent policies.
253
254
```python { .api }
255
class HistoryWrapper(gym.Wrapper[np.ndarray, np.ndarray, np.ndarray, np.ndarray]):
256
"""
257
Wrapper that maintains observation history.
258
Useful for partial observability and recurrent policies.
259
"""
260
261
def __init__(
262
self,
263
env: gym.Env,
264
horizon: int = 2
265
):
266
"""
267
Initialize HistoryWrapper.
268
269
Parameters:
270
- env: Base environment to wrap (must have Box observation space)
271
- horizon: Number of past observations to include
272
"""
273
274
def reset(self, **kwargs) -> GymResetReturn:
275
"""Reset environment and observation history."""
276
277
def step(self, action) -> GymStepReturn:
278
"""
279
Execute action and update observation history.
280
281
Returns:
282
Step result with concatenated observation history
283
"""
284
```
285
286
### Dictionary Observation History
287
288
Specialized history wrapper for environments with dictionary observation spaces.
289
290
```python { .api }
291
class HistoryWrapperObsDict(gym.Wrapper):
292
"""
293
History wrapper for dictionary observation spaces.
294
Maintains separate history for each observation key.
295
"""
296
297
def __init__(
298
self,
299
env: gym.Env,
300
horizon: int = 2
301
):
302
"""
303
Initialize HistoryWrapperObsDict.
304
305
Parameters:
306
- env: Base environment with Dict observation space
307
- horizon: Number of past observations to maintain per key
308
"""
309
310
def reset(self, **kwargs) -> GymResetReturn:
311
"""Reset environment and all observation histories."""
312
313
def step(self, action) -> GymStepReturn:
314
"""
315
Execute action and update all observation histories.
316
317
Returns:
318
Step result with extended dictionary observations
319
"""
320
```
321
322
### Frame Skipping
323
324
Wrapper that skips frames and repeats actions, common in Atari and other environments for computational efficiency.
325
326
```python { .api }
327
class FrameSkip(gym.Wrapper):
328
"""
329
Wrapper for frame skipping (action repeat).
330
Repeats actions for multiple frames and returns the final result.
331
"""
332
333
def __init__(
334
self,
335
env: gym.Env,
336
skip: int = 4
337
):
338
"""
339
Initialize FrameSkip wrapper.
340
341
Parameters:
342
- env: Base environment to wrap
343
- skip: Number of frames to skip (action repeat count)
344
"""
345
346
def step(self, action) -> GymStepReturn:
347
"""
348
Execute action for multiple frames.
349
350
Parameters:
351
- action: Action to repeat
352
353
Returns:
354
Result after skipping frames with accumulated reward
355
"""
356
```
357
358
### Velocity Masking
359
360
Wrapper that masks velocity information from observations, useful for testing position-only policies.
361
362
```python { .api }
363
class MaskVelocityWrapper(gym.ObservationWrapper):
364
"""
365
Wrapper that masks velocity information from observations.
366
Useful for testing position-only policies.
367
"""
368
369
def __init__(self, env: gym.Env):
370
"""
371
Initialize MaskVelocityWrapper.
372
373
Parameters:
374
- env: Base environment (typically MuJoCo-based)
375
"""
376
377
def observation(self, observation) -> np.ndarray:
378
"""
379
Mask velocity components from observation.
380
381
Parameters:
382
- observation: Original observation
383
384
Returns:
385
Observation with velocity components set to zero
386
"""
387
```
388
389
### YAML-Compatible Resize
390
391
Wrapper for resizing observations with YAML-compatible configuration format.
392
393
```python { .api }
394
class YAMLCompatResizeObservation(ResizeObservation):
395
"""
396
YAML-compatible version of ResizeObservation wrapper.
397
Accepts list format for shape specification.
398
"""
399
400
def __init__(self, env: gym.Env, shape: list[int]):
401
"""
402
Initialize YAMLCompatResizeObservation.
403
404
Parameters:
405
- env: Base environment to wrap
406
- shape: Target shape as list [height, width]
407
"""
408
```
409
410
## Wrapper Usage Patterns
411
412
### Combining Multiple Wrappers
413
414
```python
415
import gymnasium as gym
416
from rl_zoo3.wrappers import (
417
TruncatedOnSuccessWrapper,
418
ActionNoiseWrapper,
419
DelayedRewardWrapper,
420
HistoryWrapper
421
)
422
423
# Create base environment
424
env = gym.make("FetchReach-v1")
425
426
# Apply multiple wrappers (order matters)
427
env = TruncatedOnSuccessWrapper(env, reward_offset=1.0)
428
env = ActionNoiseWrapper(env, noise_std=0.05)
429
env = DelayedRewardWrapper(env, delay=3)
430
env = HistoryWrapper(env, horizon=4)
431
432
# Use wrapped environment
433
obs, info = env.reset()
434
for step in range(1000):
435
action = env.action_space.sample()
436
obs, reward, terminated, truncated, info = env.step(action)
437
438
if terminated or truncated:
439
obs, info = env.reset()
440
```
441
442
### Configuration-Driven Wrapper Creation
443
444
```python
445
from rl_zoo3.utils import get_wrapper_class
446
447
# Configuration dict (typically from hyperparameters file)
448
hyperparams = {
449
"env_wrapper": [
450
{
451
"rl_zoo3.wrappers:TruncatedOnSuccessWrapper": {
452
"reward_offset": 1.0,
453
"n_successes": 2
454
}
455
},
456
{
457
"rl_zoo3.wrappers:ActionNoiseWrapper": {
458
"noise_std": 0.1
459
}
460
}
461
]
462
}
463
464
# Get wrapper function from configuration
465
wrapper_fn = get_wrapper_class(hyperparams)
466
467
# Apply wrappers to environment
468
env = gym.make("FetchReach-v1")
469
if wrapper_fn is not None:
470
env = wrapper_fn(env)
471
```
472
473
### Integration with ExperimentManager
474
475
```python
476
from rl_zoo3.exp_manager import ExperimentManager
477
import argparse
478
479
# Wrappers are automatically applied based on hyperparameters
480
args = argparse.Namespace(
481
algo='sac',
482
env='Pendulum-v1',
483
n_timesteps=50000
484
)
485
486
# Hyperparameters with wrapper specifications
487
hyperparams = {
488
"env_wrapper": "rl_zoo3.wrappers:ActionSmoothingWrapper",
489
"env_wrapper_kwargs": {"smoothing_coef": 0.8}
490
}
491
492
exp_manager = ExperimentManager(
493
args=args,
494
algo='sac',
495
env_id='Pendulum-v1',
496
log_folder='./logs',
497
hyperparams=hyperparams
498
)
499
500
# Wrappers applied automatically during environment creation
501
model = exp_manager.setup_experiment()
502
```