0
# Storage Backends
1
2
emcee provides flexible storage backends for persisting MCMC chains and sampling results. Backends enable efficient storage, retrieval, and analysis of sampling data, supporting both in-memory and file-based storage with features like compression and resumable sampling.
3
4
## Capabilities
5
6
### Backend Base Class
7
8
The foundation for all storage backends, providing common interface and in-memory storage.
9
10
```python { .api }
11
class Backend:
12
def __init__(self, dtype=None):
13
"""
14
Initialize backend.
15
16
Args:
17
dtype: Data type for stored arrays (default: np.float64)
18
"""
19
20
def reset(self, nwalkers: int, ndim: int):
21
"""
22
Clear backend state and prepare for new sampling.
23
24
Args:
25
nwalkers: Number of walkers in ensemble
26
ndim: Number of dimensions in parameter space
27
"""
28
29
def has_blobs(self):
30
"""
31
Check if backend stores blob data.
32
33
Returns:
34
bool: True if blobs are stored
35
"""
36
37
def get_chain(self, flat: bool = False, thin: int = 1, discard: int = 0):
38
"""
39
Retrieve stored MCMC chain.
40
41
Args:
42
flat: Flatten chain across ensemble dimension
43
thin: Take every thin steps
44
discard: Discard first discard steps as burn-in
45
46
Returns:
47
ndarray: Chain data [steps, nwalkers, ndim] or [steps*nwalkers, ndim] if flat
48
"""
49
50
def get_log_prob(self, flat: bool = False, thin: int = 1, discard: int = 0):
51
"""
52
Retrieve log probability values.
53
54
Returns:
55
ndarray: Log probabilities [steps, nwalkers] or [steps*nwalkers] if flat
56
"""
57
58
def get_blobs(self, flat: bool = False, thin: int = 1, discard: int = 0):
59
"""
60
Retrieve blob data if available.
61
62
Returns:
63
ndarray or None: Blob data if stored
64
"""
65
66
def save_step(self, state, accepted):
67
"""
68
Store a sampling step.
69
70
Args:
71
state: Current ensemble state
72
accepted: Boolean array of accepted proposals
73
"""
74
```
75
76
### HDF5 Backend
77
78
File-based backend using HDF5 format for persistent storage with compression and metadata support.
79
80
```python { .api }
81
class HDFBackend(Backend):
82
def __init__(self, filename: str, name: str = "mcmc", read_only: bool = False):
83
"""
84
Initialize HDF5 backend.
85
86
Args:
87
filename: Path to HDF5 file
88
name: Group name within HDF5 file
89
read_only: Open file in read-only mode
90
"""
91
92
@property
93
def filename(self):
94
"""Get the HDF5 filename."""
95
96
@property
97
def name(self):
98
"""Get the group name."""
99
100
@property
101
def iteration(self):
102
"""Get current iteration count."""
103
104
@property
105
def shape(self):
106
"""Get chain shape (nwalkers, ndim)."""
107
108
def get_autocorr_time(self, **kwargs):
109
"""
110
Compute autocorrelation time from stored chain.
111
112
Returns:
113
ndarray: Autocorrelation times for each parameter
114
"""
115
116
class TempHDFBackend:
117
def __init__(self, **kwargs):
118
"""
119
Temporary HDF5 backend that creates a temporary file.
120
121
Args:
122
**kwargs: Arguments passed to HDFBackend
123
"""
124
```
125
126
### Backend Utilities
127
128
Functions for working with multiple backends and testing.
129
130
```python { .api }
131
def get_test_backends():
132
"""
133
Get list of available backends for testing.
134
135
Returns:
136
list: Available backend classes
137
"""
138
```
139
140
## Usage Examples
141
142
### In-Memory Backend (Default)
143
144
```python
145
import emcee
146
import numpy as np
147
148
def log_prob(theta):
149
return -0.5 * np.sum(theta**2)
150
151
# Default backend is in-memory
152
sampler = emcee.EnsembleSampler(32, 2, log_prob)
153
154
# Or explicitly specify
155
backend = emcee.backends.Backend()
156
sampler = emcee.EnsembleSampler(32, 2, log_prob, backend=backend)
157
158
# Run sampling
159
pos = np.random.randn(32, 2)
160
sampler.run_mcmc(pos, 1000)
161
162
# Access results
163
chain = sampler.get_chain()
164
log_prob_vals = sampler.get_log_prob()
165
```
166
167
### HDF5 Backend for Persistent Storage
168
169
```python
170
from emcee.backends import HDFBackend
171
172
# Create HDF5 backend
173
filename = "mcmc_results.h5"
174
backend = HDFBackend(filename)
175
176
sampler = emcee.EnsembleSampler(32, 2, log_prob, backend=backend)
177
178
# Run sampling - results saved to file
179
sampler.run_mcmc(pos, 1000)
180
181
# Results are automatically saved
182
print(f"Chain shape: {backend.shape}")
183
print(f"Iterations completed: {backend.iteration}")
184
```
185
186
### Resuming from HDF5 Backend
187
188
```python
189
# Resume sampling from existing file
190
backend = HDFBackend(filename, read_only=False)
191
192
# Check existing progress
193
print(f"Previous iterations: {backend.iteration}")
194
previous_chain = backend.get_chain()
195
196
# Resume from last state
197
if backend.iteration > 0:
198
last_state = backend.get_last_sample()
199
sampler = emcee.EnsembleSampler(32, 2, log_prob, backend=backend)
200
201
# Continue sampling
202
sampler.run_mcmc(last_state, 500) # Additional 500 steps
203
```
204
205
### Multiple Sampling Runs in Same File
206
207
```python
208
# Use different group names for multiple runs
209
backend1 = HDFBackend("results.h5", name="run1")
210
backend2 = HDFBackend("results.h5", name="run2")
211
212
# First run
213
sampler1 = emcee.EnsembleSampler(32, 2, log_prob, backend=backend1)
214
sampler1.run_mcmc(pos, 1000)
215
216
# Second run with different parameters
217
sampler2 = emcee.EnsembleSampler(32, 2, log_prob, backend=backend2)
218
sampler2.run_mcmc(pos, 1000)
219
220
# Access results from specific runs
221
chain1 = backend1.get_chain()
222
chain2 = backend2.get_chain()
223
```
224
225
### Temporary HDF5 Backend
226
227
```python
228
from emcee.backends import TempHDFBackend
229
230
# Creates temporary file that's automatically cleaned up
231
with TempHDFBackend() as backend:
232
sampler = emcee.EnsembleSampler(32, 2, log_prob, backend=backend)
233
sampler.run_mcmc(pos, 1000)
234
235
# Use results while in context
236
chain = backend.get_chain()
237
# File is automatically deleted when context exits
238
```
239
240
### Backend with Blob Data
241
242
```python
243
def log_prob_with_blobs(theta):
244
log_p = -0.5 * np.sum(theta**2)
245
# Return additional metadata as blobs
246
blobs = {"energy": np.sum(theta**2), "step_size": np.linalg.norm(theta)}
247
return log_p, blobs
248
249
# Backend automatically handles blobs
250
backend = HDFBackend("results_with_blobs.h5")
251
sampler = emcee.EnsembleSampler(32, 2, log_prob_with_blobs, backend=backend)
252
253
sampler.run_mcmc(pos, 1000)
254
255
# Access blob data
256
blobs = backend.get_blobs()
257
print(f"Blob keys: {blobs.dtype.names}")
258
```
259
260
### Analyzing Stored Results
261
262
```python
263
# Load existing results for analysis
264
backend = HDFBackend("results.h5", read_only=True)
265
266
# Get chain with burn-in removal
267
chain = backend.get_chain(discard=200, flat=True)
268
log_prob_vals = backend.get_log_prob(discard=200, flat=True)
269
270
# Compute autocorrelation time
271
tau = backend.get_autocorr_time()
272
print(f"Autocorrelation time: {tau}")
273
274
# Thin chain based on autocorrelation
275
thin_factor = int(2 * np.max(tau))
276
thinned_chain = backend.get_chain(discard=200, thin=thin_factor, flat=True)
277
```
278
279
### Custom Backend Configuration
280
281
```python
282
# Backend with specific data type
283
backend = emcee.backends.Backend(dtype=np.float32)
284
285
# HDF5 with compression (requires h5py)
286
import h5py
287
backend = HDFBackend("compressed.h5")
288
# HDF5 compression is automatically applied when available
289
```
290
291
### Backend Inspection
292
293
```python
294
# Check backend properties
295
backend = HDFBackend("results.h5")
296
297
print(f"Backend type: {type(backend).__name__}")
298
print(f"Has blobs: {backend.has_blobs()}")
299
print(f"Chain shape: {backend.shape}")
300
print(f"Iterations: {backend.iteration}")
301
302
# Access raw HDF5 file (advanced usage)
303
with h5py.File(backend.filename, 'r') as f:
304
print(f"HDF5 groups: {list(f.keys())}")
305
print(f"Chain dataset shape: {f[backend.name]['chain'].shape}")
306
```