0
# Core API
1
2
The core API provides the fundamental classes and interfaces that form the foundation of all Gymnasium environments and wrappers.
3
4
## Capabilities
5
6
### Environment Base Class
7
8
The main class for implementing reinforcement learning environments with the standard step/reset/render/close interface.
9
10
```python { .api }
11
class Env:
12
"""
13
Base environment class for reinforcement learning.
14
15
Attributes:
16
action_space: The Space object for valid actions
17
observation_space: The Space object for valid observations
18
spec: Environment specification metadata
19
metadata: Environment metadata dict
20
render_mode: Current rendering mode
21
np_random: Random number generator for the environment
22
"""
23
24
def step(self, action) -> tuple[ObsType, SupportsFloat, bool, bool, dict[str, Any]]:
25
"""
26
Execute one environment step.
27
28
Args:
29
action: Action to take in the environment
30
31
Returns:
32
tuple: (observation, reward, terminated, truncated, info)
33
- observation: Agent's observation of environment
34
- reward: Reward for taking the action
35
- terminated: Whether episode ended due to terminal state
36
- truncated: Whether episode ended due to time limit
37
- info: Additional information dict
38
"""
39
40
def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[ObsType, dict[str, Any]]:
41
"""
42
Reset environment to initial state.
43
44
Args:
45
seed (int, optional): Random seed for reproducibility
46
options (dict, optional): Environment-specific options
47
48
Returns:
49
tuple: (observation, info)
50
- observation: Initial observation
51
- info: Additional information dict
52
"""
53
54
def render(self) -> RenderFrame | list[RenderFrame] | None:
55
"""
56
Render the environment for visualization.
57
58
Returns:
59
Rendered frame(s) or None depending on render_mode
60
"""
61
62
def close(self) -> None:
63
"""Clean up environment resources."""
64
65
@property
66
def unwrapped(self):
67
"""Returns the base non-wrapped environment."""
68
69
@property
70
def np_random_seed(self) -> int:
71
"""Returns the environment's random seed."""
72
73
def has_wrapper_attr(self, name: str) -> bool:
74
"""Checks if the given attribute exists."""
75
76
def get_wrapper_attr(self, name: str):
77
"""Gets the attribute from the environment."""
78
79
def set_wrapper_attr(self, name: str, value, *, force: bool = True) -> bool:
80
"""Sets the attribute on the environment."""
81
```
82
83
### Wrapper Base Classes
84
85
Base classes for modifying environment behavior without changing the underlying environment.
86
87
```python { .api }
88
class Wrapper(Env):
89
"""
90
Base wrapper class that delegates to wrapped environment.
91
92
Attributes:
93
env: The wrapped environment
94
unwrapped: The base unwrapped environment
95
"""
96
97
def __init__(self, env):
98
"""
99
Initialize wrapper with environment to wrap.
100
101
Args:
102
env: Environment to wrap
103
"""
104
105
class ObservationWrapper(Wrapper):
106
"""Base class for wrappers that modify observations."""
107
108
def observation(self, observation):
109
"""
110
Transform observation (must be implemented by subclasses).
111
112
Args:
113
observation: Original observation
114
115
Returns:
116
Transformed observation
117
"""
118
119
class ActionWrapper(Wrapper):
120
"""Base class for wrappers that modify actions."""
121
122
def action(self, action):
123
"""
124
Transform action before passing to environment.
125
126
Args:
127
action: Action from agent
128
129
Returns:
130
Transformed action for environment
131
"""
132
133
134
class RewardWrapper(Wrapper):
135
"""Base class for wrappers that modify rewards."""
136
137
def reward(self, reward):
138
"""
139
Transform reward (must be implemented by subclasses).
140
141
Args:
142
reward: Original reward
143
144
Returns:
145
Transformed reward
146
"""
147
```
148
149
### Space Base Class
150
151
Base class for defining action and observation spaces.
152
153
```python { .api }
154
class Space:
155
"""
156
Base class for action and observation spaces.
157
158
Attributes:
159
shape: Shape of space samples (tuple[int, ...] | None)
160
dtype: Data type of space samples (np.dtype | None)
161
np_random: Random number generator for the space
162
"""
163
164
def sample(self, mask: Any | None = None, probability: Any | None = None) -> T_cov:
165
"""
166
Sample a random element from the space.
167
168
Args:
169
mask: A mask used for random sampling
170
probability: A probability mask used for sampling
171
172
Returns:
173
Random sample from the space
174
"""
175
176
def contains(self, x) -> bool:
177
"""
178
Check if x is contained in the space.
179
180
Args:
181
x: Element to check
182
183
Returns:
184
True if x is in the space, False otherwise
185
"""
186
187
def seed(self, seed: int | None = None) -> int | list[int] | dict[str, int]:
188
"""
189
Set random seed for sampling.
190
191
Args:
192
seed (int, optional): Random seed
193
194
Returns:
195
Seed values used for the PRNGs
196
"""
197
198
@property
199
def np_random(self) -> np.random.Generator:
200
"""Returns the random number generator for this space."""
201
202
@property
203
def is_np_flattenable(self) -> bool:
204
"""Checks whether this space can be flattened to a Box."""
205
```
206
207
## Usage Examples
208
209
### Creating a Custom Environment
210
211
```python
212
import gymnasium as gym
213
from gymnasium import spaces
214
import numpy as np
215
216
class CustomEnv(gym.Env):
217
def __init__(self):
218
super().__init__()
219
220
# Define action and observation spaces
221
self.action_space = spaces.Discrete(2)
222
self.observation_space = spaces.Box(
223
low=0, high=1, shape=(4,), dtype=np.float32
224
)
225
226
# Initialize state
227
self.state = None
228
229
def step(self, action):
230
# Implement environment logic
231
observation = self.observation_space.sample()
232
reward = 1.0
233
terminated = False
234
truncated = False
235
info = {}
236
237
return observation, reward, terminated, truncated, info
238
239
def reset(self, seed=None, options=None):
240
super().reset(seed=seed)
241
242
# Reset environment state
243
self.state = self.observation_space.sample()
244
observation = self.state
245
info = {}
246
247
return observation, info
248
```
249
250
### Creating a Custom Wrapper
251
252
```python
253
class LoggingWrapper(gym.Wrapper):
254
def __init__(self, env):
255
super().__init__(env)
256
self.step_count = 0
257
258
def step(self, action):
259
observation, reward, terminated, truncated, info = self.env.step(action)
260
self.step_count += 1
261
print(f"Step {self.step_count}: action={action}, reward={reward}")
262
return observation, reward, terminated, truncated, info
263
264
def reset(self, **kwargs):
265
self.step_count = 0
266
return self.env.reset(**kwargs)
267
268
# Usage
269
env = gym.make('CartPole-v1')
270
wrapped_env = LoggingWrapper(env)
271
```