0
# Vector Environments
1
2
Vector environments enable batched execution of multiple environment instances for improved performance when training with parallel environments. They provide the same interface as regular environments but operate on batches of observations, actions, and rewards.
3
4
## Capabilities
5
6
### Vector Environment Base Class
7
8
Base class that defines the interface for vectorized environments.
9
10
```python { .api }
11
class VectorEnv:
12
"""
13
Base class for vectorized environments.
14
15
Attributes:
16
num_envs: Number of parallel environments
17
single_observation_space: Observation space for individual environment
18
single_action_space: Action space for individual environment
19
observation_space: Batched observation space
20
action_space: Batched action space
21
closed: Whether environments are closed
22
"""
23
24
def step(self, actions: ActType) -> tuple[ObsType, ArrayType, ArrayType, ArrayType, dict[str, Any]]:
25
"""
26
Execute actions in all environments.
27
28
Args:
29
actions: Batch of actions for all environments
30
31
Returns:
32
tuple: (observations, rewards, terminations, truncations, infos)
33
- observations: Batch of observations
34
- rewards: Array of rewards
35
- terminations: Array of termination flags
36
- truncations: Array of truncation flags
37
- infos: Dictionary of batched info values
38
"""
39
40
def reset(self, seed: int | list[int] | None = None,
41
options: dict | list[dict] | None = None) -> tuple[ObsType, dict[str, Any]]:
42
"""
43
Reset all environments.
44
45
Args:
46
seed: Random seed(s) for environments
47
options: Environment options
48
49
Returns:
50
tuple: (observations, infos)
51
- observations: Batch of initial observations
52
- infos: Dictionary of batched info values
53
"""
54
55
def close(self) -> None:
56
"""Close all environments."""
57
58
def call(self, name: str, *args, **kwargs) -> list[Any]:
59
"""
60
Call method on all environments.
61
62
Args:
63
name: Method name to call
64
*args: Positional arguments
65
**kwargs: Keyword arguments
66
67
Returns:
68
List of method results
69
"""
70
71
def get_attr(self, name: str) -> list[Any]:
72
"""
73
Get attribute from all environments.
74
75
Args:
76
name: Attribute name
77
78
Returns:
79
List of attribute values
80
"""
81
82
def set_attr(self, name: str, values: Any | list[Any]) -> None:
83
"""
84
Set attribute on all environments.
85
86
Args:
87
name: Attribute name
88
values: Value(s) to set
89
"""
90
```
91
92
### Synchronous Vector Environment
93
94
Runs environments sequentially in the same process.
95
96
```python { .api }
97
class SyncVectorEnv(VectorEnv):
98
"""
99
Synchronous vectorized environment.
100
101
Runs environments sequentially in the same process.
102
Simple but may be slower for computationally intensive environments.
103
104
Args:
105
env_fns: Iterator or sequence of functions that return environment instances
106
copy: Whether to deepcopy observations
107
observation_mode: How to batch observation spaces ('same', 'different', or Space)
108
autoreset_mode: Autoreset mode for vector environment
109
"""
110
111
def __init__(self, env_fns: Iterator[Callable[[], Env]] | Sequence[Callable[[], Env]],
112
copy: bool = True, observation_mode: str | Space = "same",
113
autoreset_mode: str | AutoresetMode = AutoresetMode.NEXT_STEP):
114
pass
115
```
116
117
### Asynchronous Vector Environment
118
119
Runs environments in parallel processes for better performance.
120
121
```python { .api }
122
class AsyncVectorEnv(VectorEnv):
123
"""
124
Asynchronous vectorized environment.
125
126
Runs environments in parallel processes using multiprocessing.
127
Better performance for computationally intensive environments.
128
129
Args:
130
env_fns: Sequence of functions that return environment instances
131
shared_memory: Whether to use shared memory for observations
132
copy: Whether to deepcopy observations
133
context: Multiprocessing context ('spawn', 'fork', 'forkserver')
134
daemon: Whether worker processes are daemonic
135
worker: Custom worker function
136
observation_mode: How to batch observation spaces ('same', 'different', or Space)
137
autoreset_mode: Autoreset mode for vector environment
138
"""
139
140
def __init__(self, env_fns: Sequence[Callable[[], Env]], shared_memory: bool = True,
141
copy: bool = True, context: str | None = None, daemon: bool = True,
142
worker: Callable | None = None, observation_mode: str | Space = "same",
143
autoreset_mode: str | AutoresetMode = AutoresetMode.NEXT_STEP):
144
pass
145
146
def step_async(self, actions: ActType) -> None:
147
"""
148
Asynchronously execute actions (non-blocking).
149
150
Args:
151
actions: Batch of actions
152
"""
153
154
def step_wait(self) -> tuple[ObsType, ArrayType, ArrayType, ArrayType, dict[str, Any]]:
155
"""
156
Wait for asynchronous step to complete.
157
158
Returns:
159
tuple: (observations, rewards, terminations, truncations, infos)
160
"""
161
162
def reset_async(self, seed: int | list[int] | None = None,
163
options: dict | list[dict] | None = None) -> None:
164
"""
165
Asynchronously reset environments (non-blocking).
166
167
Args:
168
seed: Random seed(s)
169
options: Environment options
170
"""
171
172
def reset_wait(self) -> tuple[ObsType, dict[str, Any]]:
173
"""
174
Wait for asynchronous reset to complete.
175
176
Returns:
177
tuple: (observations, infos)
178
"""
179
```
180
181
### Vector Environment Wrappers
182
183
Wrappers for modifying vector environment behavior.
184
185
```python { .api }
186
class VectorWrapper(VectorEnv):
187
"""
188
Base wrapper for vector environments.
189
190
Args:
191
env: Vector environment to wrap
192
"""
193
194
def __init__(self, env: VectorEnv):
195
pass
196
197
class VectorObservationWrapper(VectorWrapper):
198
"""
199
Base class for vector observation wrappers.
200
"""
201
202
def observation(self, observations: ObsType) -> ObsType:
203
"""
204
Transform batch of observations.
205
206
Args:
207
observations: Batch of observations
208
209
Returns:
210
Transformed batch of observations
211
"""
212
213
class VectorActionWrapper(VectorWrapper):
214
"""
215
Base class for vector action wrappers.
216
"""
217
218
def action(self, actions: ActType) -> ActType:
219
"""
220
Transform batch of actions.
221
222
Args:
223
actions: Batch of actions
224
225
Returns:
226
Transformed batch of actions
227
"""
228
229
class VectorRewardWrapper(VectorWrapper):
230
"""
231
Base class for vector reward wrappers.
232
"""
233
234
def reward(self, rewards: ArrayType) -> ArrayType:
235
"""
236
Transform batch of rewards.
237
238
Args:
239
rewards: Batch of rewards
240
241
Returns:
242
Transformed batch of rewards
243
"""
244
```
245
246
### Utility Enums
247
248
Enumerations for vector environment configuration.
249
250
```python { .api }
251
class AutoresetMode(Enum):
252
"""
253
Auto-reset modes for vector environments.
254
255
Values:
256
NEXT_STEP: Reset on next step after termination/truncation
257
SAME_STEP: Reset immediately on same step
258
DISABLED: No auto-reset
259
"""
260
NEXT_STEP = "NextStep"
261
SAME_STEP = "SameStep"
262
DISABLED = "Disabled"
263
```
264
265
## Usage Examples
266
267
### Creating Vector Environments
268
269
```python
270
import gymnasium as gym
271
from gymnasium.vector import SyncVectorEnv, AsyncVectorEnv
272
273
# Create synchronous vector environment
274
def make_env():
275
return gym.make('CartPole-v1')
276
277
env_fns = [make_env for _ in range(4)]
278
sync_vec_env = SyncVectorEnv(env_fns)
279
280
# Create asynchronous vector environment (better for complex environments)
281
async_vec_env = AsyncVectorEnv(env_fns)
282
283
# Using make_vec for convenience
284
vec_env = gym.make_vec('CartPole-v1', num_envs=4, vectorization_mode='async')
285
```
286
287
### Working with Vector Environments
288
289
```python
290
import numpy as np
291
292
# Create vector environment
293
vec_env = gym.make_vec('CartPole-v1', num_envs=4)
294
295
# Reset all environments
296
observations, infos = vec_env.reset(seed=42)
297
print(f"Observations shape: {observations.shape}") # (4, 4) for CartPole
298
299
# Take actions in all environments
300
actions = vec_env.action_space.sample() # Sample batch of actions
301
print(f"Actions shape: {actions.shape}") # (4,)
302
303
# Step all environments
304
observations, rewards, terminations, truncations, infos = vec_env.step(actions)
305
print(f"Rewards: {rewards}") # Array of 4 rewards
306
print(f"Terminations: {terminations}") # Array of 4 boolean flags
307
308
# Handle individual environment resets automatically
309
for i in range(100):
310
actions = vec_env.action_space.sample()
311
observations, rewards, terminations, truncations, infos = vec_env.step(actions)
312
313
# Vector environments automatically reset terminated/truncated environments
314
# The observations array will contain reset observations for those environments
315
316
vec_env.close()
317
```
318
319
### Advanced Vector Environment Usage
320
321
```python
322
# Create environments with different configurations
323
def make_env(env_id, seed=None):
324
def _make():
325
env = gym.make(env_id)
326
if seed is not None:
327
env.reset(seed=seed)
328
return env
329
return _make
330
331
env_fns = [
332
make_env('CartPole-v1', seed=i)
333
for i in range(4)
334
]
335
336
vec_env = AsyncVectorEnv(env_fns, shared_memory=True)
337
338
# Call methods on all environments
339
max_steps = vec_env.call('spec')[0].max_episode_steps
340
print(f"Max episode steps: {max_steps}")
341
342
# Get attributes from all environments
343
action_spaces = vec_env.get_attr('action_space')
344
print(f"All action spaces are Discrete(2): {all(isinstance(space, gym.spaces.Discrete) and space.n == 2 for space in action_spaces)}")
345
346
# Set attributes on all environments (if supported)
347
vec_env.set_attr('render_mode', 'rgb_array')
348
```
349
350
### Asynchronous Operations
351
352
```python
353
# For AsyncVectorEnv, you can use async operations for better control
354
async_env = AsyncVectorEnv([make_env for _ in range(4)])
355
356
# Reset asynchronously
357
async_env.reset_async(seed=42)
358
observations, infos = async_env.reset_wait()
359
360
# Step asynchronously
361
actions = async_env.action_space.sample()
362
async_env.step_async(actions)
363
364
# Do other work here while environments are stepping...
365
366
# Wait for step to complete
367
observations, rewards, terminations, truncations, infos = async_env.step_wait()
368
```
369
370
### Vector Environment Wrappers
371
372
```python
373
from gymnasium.vector.utils import spaces as vector_spaces
374
375
class BatchedNormalizeObservation(VectorObservationWrapper):
376
"""Normalize observations across the batch."""
377
378
def __init__(self, env):
379
super().__init__(env)
380
self.running_mean = np.zeros(env.single_observation_space.shape)
381
self.running_var = np.ones(env.single_observation_space.shape)
382
self.count = 0
383
384
def observation(self, observations):
385
# Update running statistics
386
batch_mean = np.mean(observations, axis=0)
387
batch_var = np.var(observations, axis=0)
388
389
# Update running mean and variance (simplified)
390
self.running_mean = self.running_mean * 0.99 + batch_mean * 0.01
391
self.running_var = self.running_var * 0.99 + batch_var * 0.01
392
393
# Normalize
394
return (observations - self.running_mean) / np.sqrt(self.running_var + 1e-8)
395
396
# Apply wrapper
397
vec_env = gym.make_vec('CartPole-v1', num_envs=4)
398
vec_env = BatchedNormalizeObservation(vec_env)
399
```
400
401
### Error Handling and Cleanup
402
403
```python
404
import atexit
405
406
try:
407
# Create vector environment
408
vec_env = AsyncVectorEnv([make_env for _ in range(4)])
409
410
# Register cleanup function
411
atexit.register(vec_env.close)
412
413
# Training loop
414
observations, infos = vec_env.reset()
415
416
for step in range(1000):
417
actions = vec_env.action_space.sample()
418
observations, rewards, terminations, truncations, infos = vec_env.step(actions)
419
420
# Handle any exceptions in individual environments
421
for i, info in enumerate(infos):
422
if 'exception' in info:
423
print(f"Environment {i} had exception: {info['exception']}")
424
425
except KeyboardInterrupt:
426
print("Training interrupted")
427
finally:
428
vec_env.close()
429
```