or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-api.mdindex.mdregistration.mdspaces.mdvector-environments.mdwrappers.md

vector-environments.mddocs/

0

# Vector Environments

1

2

Vector environments enable batched execution of multiple environment instances for improved performance when training with parallel environments. They provide the same interface as regular environments but operate on batches of observations, actions, and rewards.

3

4

## Capabilities

5

6

### Vector Environment Base Class

7

8

Base class that defines the interface for vectorized environments.

9

10

```python { .api }

11

class VectorEnv:

12

"""

13

Base class for vectorized environments.

14

15

Attributes:

16

num_envs: Number of parallel environments

17

single_observation_space: Observation space for individual environment

18

single_action_space: Action space for individual environment

19

observation_space: Batched observation space

20

action_space: Batched action space

21

closed: Whether environments are closed

22

"""

23

24

def step(self, actions: ActType) -> tuple[ObsType, ArrayType, ArrayType, ArrayType, dict[str, Any]]:

25

"""

26

Execute actions in all environments.

27

28

Args:

29

actions: Batch of actions for all environments

30

31

Returns:

32

tuple: (observations, rewards, terminations, truncations, infos)

33

- observations: Batch of observations

34

- rewards: Array of rewards

35

- terminations: Array of termination flags

36

- truncations: Array of truncation flags

37

- infos: Dictionary of batched info values

38

"""

39

40

def reset(self, seed: int | list[int] | None = None,

41

options: dict | list[dict] | None = None) -> tuple[ObsType, dict[str, Any]]:

42

"""

43

Reset all environments.

44

45

Args:

46

seed: Random seed(s) for environments

47

options: Environment options

48

49

Returns:

50

tuple: (observations, infos)

51

- observations: Batch of initial observations

52

- infos: Dictionary of batched info values

53

"""

54

55

def close(self) -> None:

56

"""Close all environments."""

57

58

def call(self, name: str, *args, **kwargs) -> list[Any]:

59

"""

60

Call method on all environments.

61

62

Args:

63

name: Method name to call

64

*args: Positional arguments

65

**kwargs: Keyword arguments

66

67

Returns:

68

List of method results

69

"""

70

71

def get_attr(self, name: str) -> list[Any]:

72

"""

73

Get attribute from all environments.

74

75

Args:

76

name: Attribute name

77

78

Returns:

79

List of attribute values

80

"""

81

82

def set_attr(self, name: str, values: Any | list[Any]) -> None:

83

"""

84

Set attribute on all environments.

85

86

Args:

87

name: Attribute name

88

values: Value(s) to set

89

"""

90

```

91

92

### Synchronous Vector Environment

93

94

Runs environments sequentially in the same process.

95

96

```python { .api }

97

class SyncVectorEnv(VectorEnv):

98

"""

99

Synchronous vectorized environment.

100

101

Runs environments sequentially in the same process.

102

Simple but may be slower for computationally intensive environments.

103

104

Args:

105

env_fns: Iterator or sequence of functions that return environment instances

106

copy: Whether to deepcopy observations

107

observation_mode: How to batch observation spaces ('same', 'different', or Space)

108

autoreset_mode: Autoreset mode for vector environment

109

"""

110

111

def __init__(self, env_fns: Iterator[Callable[[], Env]] | Sequence[Callable[[], Env]],

112

copy: bool = True, observation_mode: str | Space = "same",

113

autoreset_mode: str | AutoresetMode = AutoresetMode.NEXT_STEP):

114

pass

115

```

116

117

### Asynchronous Vector Environment

118

119

Runs environments in parallel processes for better performance.

120

121

```python { .api }

122

class AsyncVectorEnv(VectorEnv):

123

"""

124

Asynchronous vectorized environment.

125

126

Runs environments in parallel processes using multiprocessing.

127

Better performance for computationally intensive environments.

128

129

Args:

130

env_fns: Sequence of functions that return environment instances

131

shared_memory: Whether to use shared memory for observations

132

copy: Whether to deepcopy observations

133

context: Multiprocessing context ('spawn', 'fork', 'forkserver')

134

daemon: Whether worker processes are daemonic

135

worker: Custom worker function

136

observation_mode: How to batch observation spaces ('same', 'different', or Space)

137

autoreset_mode: Autoreset mode for vector environment

138

"""

139

140

def __init__(self, env_fns: Sequence[Callable[[], Env]], shared_memory: bool = True,

141

copy: bool = True, context: str | None = None, daemon: bool = True,

142

worker: Callable | None = None, observation_mode: str | Space = "same",

143

autoreset_mode: str | AutoresetMode = AutoresetMode.NEXT_STEP):

144

pass

145

146

def step_async(self, actions: ActType) -> None:

147

"""

148

Asynchronously execute actions (non-blocking).

149

150

Args:

151

actions: Batch of actions

152

"""

153

154

def step_wait(self) -> tuple[ObsType, ArrayType, ArrayType, ArrayType, dict[str, Any]]:

155

"""

156

Wait for asynchronous step to complete.

157

158

Returns:

159

tuple: (observations, rewards, terminations, truncations, infos)

160

"""

161

162

def reset_async(self, seed: int | list[int] | None = None,

163

options: dict | list[dict] | None = None) -> None:

164

"""

165

Asynchronously reset environments (non-blocking).

166

167

Args:

168

seed: Random seed(s)

169

options: Environment options

170

"""

171

172

def reset_wait(self) -> tuple[ObsType, dict[str, Any]]:

173

"""

174

Wait for asynchronous reset to complete.

175

176

Returns:

177

tuple: (observations, infos)

178

"""

179

```

180

181

### Vector Environment Wrappers

182

183

Wrappers for modifying vector environment behavior.

184

185

```python { .api }

186

class VectorWrapper(VectorEnv):

187

"""

188

Base wrapper for vector environments.

189

190

Args:

191

env: Vector environment to wrap

192

"""

193

194

def __init__(self, env: VectorEnv):

195

pass

196

197

class VectorObservationWrapper(VectorWrapper):

198

"""

199

Base class for vector observation wrappers.

200

"""

201

202

def observation(self, observations: ObsType) -> ObsType:

203

"""

204

Transform batch of observations.

205

206

Args:

207

observations: Batch of observations

208

209

Returns:

210

Transformed batch of observations

211

"""

212

213

class VectorActionWrapper(VectorWrapper):

214

"""

215

Base class for vector action wrappers.

216

"""

217

218

def action(self, actions: ActType) -> ActType:

219

"""

220

Transform batch of actions.

221

222

Args:

223

actions: Batch of actions

224

225

Returns:

226

Transformed batch of actions

227

"""

228

229

class VectorRewardWrapper(VectorWrapper):

230

"""

231

Base class for vector reward wrappers.

232

"""

233

234

def reward(self, rewards: ArrayType) -> ArrayType:

235

"""

236

Transform batch of rewards.

237

238

Args:

239

rewards: Batch of rewards

240

241

Returns:

242

Transformed batch of rewards

243

"""

244

```

245

246

### Utility Enums

247

248

Enumerations for vector environment configuration.

249

250

```python { .api }

251

class AutoresetMode(Enum):

252

"""

253

Auto-reset modes for vector environments.

254

255

Values:

256

NEXT_STEP: Reset on next step after termination/truncation

257

SAME_STEP: Reset immediately on same step

258

DISABLED: No auto-reset

259

"""

260

NEXT_STEP = "NextStep"

261

SAME_STEP = "SameStep"

262

DISABLED = "Disabled"

263

```

264

265

## Usage Examples

266

267

### Creating Vector Environments

268

269

```python

270

import gymnasium as gym

271

from gymnasium.vector import SyncVectorEnv, AsyncVectorEnv

272

273

# Create synchronous vector environment

274

def make_env():

275

return gym.make('CartPole-v1')

276

277

env_fns = [make_env for _ in range(4)]

278

sync_vec_env = SyncVectorEnv(env_fns)

279

280

# Create asynchronous vector environment (better for complex environments)

281

async_vec_env = AsyncVectorEnv(env_fns)

282

283

# Using make_vec for convenience

284

vec_env = gym.make_vec('CartPole-v1', num_envs=4, vectorization_mode='async')

285

```

286

287

### Working with Vector Environments

288

289

```python

290

import numpy as np

291

292

# Create vector environment

293

vec_env = gym.make_vec('CartPole-v1', num_envs=4)

294

295

# Reset all environments

296

observations, infos = vec_env.reset(seed=42)

297

print(f"Observations shape: {observations.shape}") # (4, 4) for CartPole

298

299

# Take actions in all environments

300

actions = vec_env.action_space.sample() # Sample batch of actions

301

print(f"Actions shape: {actions.shape}") # (4,)

302

303

# Step all environments

304

observations, rewards, terminations, truncations, infos = vec_env.step(actions)

305

print(f"Rewards: {rewards}") # Array of 4 rewards

306

print(f"Terminations: {terminations}") # Array of 4 boolean flags

307

308

# Handle individual environment resets automatically

309

for i in range(100):

310

actions = vec_env.action_space.sample()

311

observations, rewards, terminations, truncations, infos = vec_env.step(actions)

312

313

# Vector environments automatically reset terminated/truncated environments

314

# The observations array will contain reset observations for those environments

315

316

vec_env.close()

317

```

318

319

### Advanced Vector Environment Usage

320

321

```python

322

# Create environments with different configurations

323

def make_env(env_id, seed=None):

324

def _make():

325

env = gym.make(env_id)

326

if seed is not None:

327

env.reset(seed=seed)

328

return env

329

return _make

330

331

env_fns = [

332

make_env('CartPole-v1', seed=i)

333

for i in range(4)

334

]

335

336

vec_env = AsyncVectorEnv(env_fns, shared_memory=True)

337

338

# Call methods on all environments

339

max_steps = vec_env.call('spec')[0].max_episode_steps

340

print(f"Max episode steps: {max_steps}")

341

342

# Get attributes from all environments

343

action_spaces = vec_env.get_attr('action_space')

344

print(f"All action spaces are Discrete(2): {all(isinstance(space, gym.spaces.Discrete) and space.n == 2 for space in action_spaces)}")

345

346

# Set attributes on all environments (if supported)

347

vec_env.set_attr('render_mode', 'rgb_array')

348

```

349

350

### Asynchronous Operations

351

352

```python

353

# For AsyncVectorEnv, you can use async operations for better control

354

async_env = AsyncVectorEnv([make_env for _ in range(4)])

355

356

# Reset asynchronously

357

async_env.reset_async(seed=42)

358

observations, infos = async_env.reset_wait()

359

360

# Step asynchronously

361

actions = async_env.action_space.sample()

362

async_env.step_async(actions)

363

364

# Do other work here while environments are stepping...

365

366

# Wait for step to complete

367

observations, rewards, terminations, truncations, infos = async_env.step_wait()

368

```

369

370

### Vector Environment Wrappers

371

372

```python

373

from gymnasium.vector.utils import spaces as vector_spaces

374

375

class BatchedNormalizeObservation(VectorObservationWrapper):

376

"""Normalize observations across the batch."""

377

378

def __init__(self, env):

379

super().__init__(env)

380

self.running_mean = np.zeros(env.single_observation_space.shape)

381

self.running_var = np.ones(env.single_observation_space.shape)

382

self.count = 0

383

384

def observation(self, observations):

385

# Update running statistics

386

batch_mean = np.mean(observations, axis=0)

387

batch_var = np.var(observations, axis=0)

388

389

# Update running mean and variance (simplified)

390

self.running_mean = self.running_mean * 0.99 + batch_mean * 0.01

391

self.running_var = self.running_var * 0.99 + batch_var * 0.01

392

393

# Normalize

394

return (observations - self.running_mean) / np.sqrt(self.running_var + 1e-8)

395

396

# Apply wrapper

397

vec_env = gym.make_vec('CartPole-v1', num_envs=4)

398

vec_env = BatchedNormalizeObservation(vec_env)

399

```

400

401

### Error Handling and Cleanup

402

403

```python

404

import atexit

405

406

try:

407

# Create vector environment

408

vec_env = AsyncVectorEnv([make_env for _ in range(4)])

409

410

# Register cleanup function

411

atexit.register(vec_env.close)

412

413

# Training loop

414

observations, infos = vec_env.reset()

415

416

for step in range(1000):

417

actions = vec_env.action_space.sample()

418

observations, rewards, terminations, truncations, infos = vec_env.step(actions)

419

420

# Handle any exceptions in individual environments

421

for i, info in enumerate(infos):

422

if 'exception' in info:

423

print(f"Environment {i} had exception: {info['exception']}")

424

425

except KeyboardInterrupt:

426

print("Training interrupted")

427

finally:

428

vec_env.close()

429

```