or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

callbacks.mdcore-utilities.mdexperiment-management.mdhub-integration.mdhyperparameter-optimization.mdindex.mdplotting.mdwrappers.md

wrappers.mddocs/

0

# Environment Wrappers

1

2

Custom Gymnasium environment wrappers for observation processing, reward modification, action manipulation, and training optimization. These wrappers extend environments with specialized functionality needed for effective RL training.

3

4

## Core Imports

5

6

```python

7

from rl_zoo3.wrappers import (

8

TruncatedOnSuccessWrapper,

9

ActionNoiseWrapper,

10

ActionSmoothingWrapper,

11

DelayedRewardWrapper,

12

HistoryWrapper,

13

HistoryWrapperObsDict,

14

FrameSkip,

15

MaskVelocityWrapper

16

)

17

import gymnasium as gym

18

import numpy as np

19

```

20

21

## Capabilities

22

23

### Success-Based Truncation

24

25

Wrapper that truncates episodes upon achieving success conditions, useful for goal-oriented environments and curriculum learning.

26

27

```python { .api }

28

class TruncatedOnSuccessWrapper(gym.Wrapper):

29

"""

30

Reset on success and offsets the reward.

31

Useful for GoalEnv and goal-oriented tasks.

32

"""

33

34

def __init__(

35

self,

36

env: gym.Env,

37

reward_offset: float = 0.0,

38

n_successes: int = 1

39

):

40

"""

41

Initialize TruncatedOnSuccessWrapper.

42

43

Parameters:

44

- env: Base environment to wrap

45

- reward_offset: Offset to add to all rewards

46

- n_successes: Number of consecutive successes needed for truncation

47

"""

48

49

def reset(

50

self,

51

seed: Optional[int] = None,

52

options: Optional[dict] = None

53

) -> GymResetReturn:

54

"""Reset environment and success counter."""

55

56

def step(self, action) -> GymStepReturn:

57

"""

58

Execute action and check for success truncation.

59

60

Returns:

61

Tuple of (observation, reward + offset, terminated, truncated, info)

62

"""

63

64

def compute_reward(self, achieved_goal, desired_goal, info):

65

"""Compute reward with offset for goal environments."""

66

```

67

68

Usage example:

69

```python

70

import gymnasium as gym

71

from rl_zoo3.wrappers import TruncatedOnSuccessWrapper

72

73

# Create base environment

74

env = gym.make("FetchReach-v1")

75

76

# Wrap with success truncation

77

wrapped_env = TruncatedOnSuccessWrapper(

78

env,

79

reward_offset=1.0, # Add bonus reward

80

n_successes=3 # Require 3 consecutive successes

81

)

82

83

# Use in training

84

obs, info = wrapped_env.reset()

85

for step in range(1000):

86

action = wrapped_env.action_space.sample()

87

obs, reward, terminated, truncated, info = wrapped_env.step(action)

88

89

if truncated and info.get("is_success", False):

90

print(f"Success achieved at step {step}")

91

break

92

```

93

94

### Action Noise

95

96

Wrapper that adds configurable noise to agent actions, useful for exploration and robustness testing.

97

98

```python { .api }

99

class ActionNoiseWrapper(gym.Wrapper[ObsType, np.ndarray, ObsType, np.ndarray]):

100

"""

101

Wrapper that adds noise to actions.

102

Useful for exploration and robustness evaluation.

103

"""

104

105

def __init__(

106

self,

107

env: gym.Env,

108

noise_std: float = 0.1,

109

noise_type: str = "gaussian"

110

):

111

"""

112

Initialize ActionNoiseWrapper.

113

114

Parameters:

115

- env: Base environment to wrap

116

- noise_std: Standard deviation of noise

117

- noise_type: Type of noise ('gaussian', 'uniform')

118

"""

119

120

def step(self, action) -> GymStepReturn:

121

"""

122

Execute action with added noise.

123

124

Parameters:

125

- action: Original action from agent

126

127

Returns:

128

Environment step result with noisy action applied

129

"""

130

```

131

132

Usage example:

133

```python

134

from rl_zoo3.wrappers import ActionNoiseWrapper

135

import gymnasium as gym

136

137

# Create environment

138

env = gym.make("Pendulum-v1")

139

140

# Add action noise

141

noisy_env = ActionNoiseWrapper(

142

env,

143

noise_std=0.05, # 5% noise

144

noise_type="gaussian"

145

)

146

147

# Actions will have noise added automatically

148

obs, info = noisy_env.reset()

149

action = np.array([0.5]) # Clean action

150

obs, reward, terminated, truncated, info = noisy_env.step(action) # Noise added internally

151

```

152

153

### Action Smoothing

154

155

Wrapper that smooths actions over multiple timesteps, reducing jerkiness in continuous control tasks.

156

157

```python { .api }

158

class ActionSmoothingWrapper(gym.Wrapper):

159

"""

160

Wrapper for action smoothing over multiple timesteps.

161

Reduces action jerkiness in continuous control.

162

"""

163

164

def __init__(

165

self,

166

env: gym.Env,

167

smoothing_coef: float = 0.9

168

):

169

"""

170

Initialize ActionSmoothingWrapper.

171

172

Parameters:

173

- env: Base environment to wrap

174

- smoothing_coef: Smoothing coefficient (0.0 = no smoothing, 1.0 = maximum smoothing)

175

"""

176

177

def step(self, action) -> GymStepReturn:

178

"""

179

Execute smoothed action.

180

181

Parameters:

182

- action: Raw action from agent

183

184

Returns:

185

Environment step result with smoothed action

186

"""

187

188

def reset(self, **kwargs) -> GymResetReturn:

189

"""Reset environment and action history."""

190

```

191

192

### Delayed Rewards

193

194

Wrapper that delays reward delivery by a specified number of steps, useful for testing credit assignment and memory.

195

196

```python { .api }

197

class DelayedRewardWrapper(gym.Wrapper):

198

"""

199

Wrapper that delays reward delivery.

200

Useful for testing credit assignment capabilities.

201

"""

202

203

def __init__(

204

self,

205

env: gym.Env,

206

delay: int = 10

207

):

208

"""

209

Initialize DelayedRewardWrapper.

210

211

Parameters:

212

- env: Base environment to wrap

213

- delay: Number of steps to delay rewards

214

"""

215

216

def step(self, action) -> GymStepReturn:

217

"""

218

Execute action with delayed reward delivery.

219

220

Returns:

221

Step result with current reward set to 0.0, delayed rewards delivered later

222

"""

223

224

def reset(self, **kwargs) -> GymResetReturn:

225

"""Reset environment and reward buffer."""

226

```

227

228

Usage example:

229

```python

230

from rl_zoo3.wrappers import DelayedRewardWrapper

231

import gymnasium as gym

232

233

# Create environment with delayed rewards

234

env = gym.make("CartPole-v1")

235

delayed_env = DelayedRewardWrapper(env, delay=5)

236

237

# Rewards will be delayed by 5 steps

238

obs, info = delayed_env.reset()

239

total_reward = 0

240

for step in range(100):

241

action = delayed_env.action_space.sample()

242

obs, reward, terminated, truncated, info = delayed_env.step(action)

243

total_reward += reward

244

245

if terminated or truncated:

246

print(f"Episode ended with total reward: {total_reward}")

247

break

248

```

249

250

### Observation History

251

252

Wrapper that maintains a history of observations, useful for partially observable environments and recurrent policies.

253

254

```python { .api }

255

class HistoryWrapper(gym.Wrapper[np.ndarray, np.ndarray, np.ndarray, np.ndarray]):

256

"""

257

Wrapper that maintains observation history.

258

Useful for partial observability and recurrent policies.

259

"""

260

261

def __init__(

262

self,

263

env: gym.Env,

264

horizon: int = 2

265

):

266

"""

267

Initialize HistoryWrapper.

268

269

Parameters:

270

- env: Base environment to wrap (must have Box observation space)

271

- horizon: Number of past observations to include

272

"""

273

274

def reset(self, **kwargs) -> GymResetReturn:

275

"""Reset environment and observation history."""

276

277

def step(self, action) -> GymStepReturn:

278

"""

279

Execute action and update observation history.

280

281

Returns:

282

Step result with concatenated observation history

283

"""

284

```

285

286

### Dictionary Observation History

287

288

Specialized history wrapper for environments with dictionary observation spaces.

289

290

```python { .api }

291

class HistoryWrapperObsDict(gym.Wrapper):

292

"""

293

History wrapper for dictionary observation spaces.

294

Maintains separate history for each observation key.

295

"""

296

297

def __init__(

298

self,

299

env: gym.Env,

300

horizon: int = 2

301

):

302

"""

303

Initialize HistoryWrapperObsDict.

304

305

Parameters:

306

- env: Base environment with Dict observation space

307

- horizon: Number of past observations to maintain per key

308

"""

309

310

def reset(self, **kwargs) -> GymResetReturn:

311

"""Reset environment and all observation histories."""

312

313

def step(self, action) -> GymStepReturn:

314

"""

315

Execute action and update all observation histories.

316

317

Returns:

318

Step result with extended dictionary observations

319

"""

320

```

321

322

### Frame Skipping

323

324

Wrapper that skips frames and repeats actions, common in Atari and other environments for computational efficiency.

325

326

```python { .api }

327

class FrameSkip(gym.Wrapper):

328

"""

329

Wrapper for frame skipping (action repeat).

330

Repeats actions for multiple frames and returns the final result.

331

"""

332

333

def __init__(

334

self,

335

env: gym.Env,

336

skip: int = 4

337

):

338

"""

339

Initialize FrameSkip wrapper.

340

341

Parameters:

342

- env: Base environment to wrap

343

- skip: Number of frames to skip (action repeat count)

344

"""

345

346

def step(self, action) -> GymStepReturn:

347

"""

348

Execute action for multiple frames.

349

350

Parameters:

351

- action: Action to repeat

352

353

Returns:

354

Result after skipping frames with accumulated reward

355

"""

356

```

357

358

### Velocity Masking

359

360

Wrapper that masks velocity information from observations, useful for testing position-only policies.

361

362

```python { .api }

363

class MaskVelocityWrapper(gym.ObservationWrapper):

364

"""

365

Wrapper that masks velocity information from observations.

366

Useful for testing position-only policies.

367

"""

368

369

def __init__(self, env: gym.Env):

370

"""

371

Initialize MaskVelocityWrapper.

372

373

Parameters:

374

- env: Base environment (typically MuJoCo-based)

375

"""

376

377

def observation(self, observation) -> np.ndarray:

378

"""

379

Mask velocity components from observation.

380

381

Parameters:

382

- observation: Original observation

383

384

Returns:

385

Observation with velocity components set to zero

386

"""

387

```

388

389

### YAML-Compatible Resize

390

391

Wrapper for resizing observations with YAML-compatible configuration format.

392

393

```python { .api }

394

class YAMLCompatResizeObservation(ResizeObservation):

395

"""

396

YAML-compatible version of ResizeObservation wrapper.

397

Accepts list format for shape specification.

398

"""

399

400

def __init__(self, env: gym.Env, shape: list[int]):

401

"""

402

Initialize YAMLCompatResizeObservation.

403

404

Parameters:

405

- env: Base environment to wrap

406

- shape: Target shape as list [height, width]

407

"""

408

```

409

410

## Wrapper Usage Patterns

411

412

### Combining Multiple Wrappers

413

414

```python

415

import gymnasium as gym

416

from rl_zoo3.wrappers import (

417

TruncatedOnSuccessWrapper,

418

ActionNoiseWrapper,

419

DelayedRewardWrapper,

420

HistoryWrapper

421

)

422

423

# Create base environment

424

env = gym.make("FetchReach-v1")

425

426

# Apply multiple wrappers (order matters)

427

env = TruncatedOnSuccessWrapper(env, reward_offset=1.0)

428

env = ActionNoiseWrapper(env, noise_std=0.05)

429

env = DelayedRewardWrapper(env, delay=3)

430

env = HistoryWrapper(env, horizon=4)

431

432

# Use wrapped environment

433

obs, info = env.reset()

434

for step in range(1000):

435

action = env.action_space.sample()

436

obs, reward, terminated, truncated, info = env.step(action)

437

438

if terminated or truncated:

439

obs, info = env.reset()

440

```

441

442

### Configuration-Driven Wrapper Creation

443

444

```python

445

from rl_zoo3.utils import get_wrapper_class

446

447

# Configuration dict (typically from hyperparameters file)

448

hyperparams = {

449

"env_wrapper": [

450

{

451

"rl_zoo3.wrappers:TruncatedOnSuccessWrapper": {

452

"reward_offset": 1.0,

453

"n_successes": 2

454

}

455

},

456

{

457

"rl_zoo3.wrappers:ActionNoiseWrapper": {

458

"noise_std": 0.1

459

}

460

}

461

]

462

}

463

464

# Get wrapper function from configuration

465

wrapper_fn = get_wrapper_class(hyperparams)

466

467

# Apply wrappers to environment

468

env = gym.make("FetchReach-v1")

469

if wrapper_fn is not None:

470

env = wrapper_fn(env)

471

```

472

473

### Integration with ExperimentManager

474

475

```python

476

from rl_zoo3.exp_manager import ExperimentManager

477

import argparse

478

479

# Wrappers are automatically applied based on hyperparameters

480

args = argparse.Namespace(

481

algo='sac',

482

env='Pendulum-v1',

483

n_timesteps=50000

484

)

485

486

# Hyperparameters with wrapper specifications

487

hyperparams = {

488

"env_wrapper": "rl_zoo3.wrappers:ActionSmoothingWrapper",

489

"env_wrapper_kwargs": {"smoothing_coef": 0.8}

490

}

491

492

exp_manager = ExperimentManager(

493

args=args,

494

algo='sac',

495

env_id='Pendulum-v1',

496

log_folder='./logs',

497

hyperparams=hyperparams

498

)

499

500

# Wrappers applied automatically during environment creation

501

model = exp_manager.setup_experiment()

502

```