Tessl Tile for pypi/emcee@3.1.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

autocorr.md backends.md ensemble-sampling.md index.md moves.md state.md

backends.mddocs/

0
# Storage Backends
1

2
emcee provides flexible storage backends for persisting MCMC chains and sampling results. Backends enable efficient storage, retrieval, and analysis of sampling data, supporting both in-memory and file-based storage with features like compression and resumable sampling.
3

4
## Capabilities
5

6
### Backend Base Class
7

8
The foundation for all storage backends, providing common interface and in-memory storage.
9

10
```python { .api }
11
class Backend:
12
    def __init__(self, dtype=None):
13
        """
14
        Initialize backend.
15
        
16
        Args:
17
            dtype: Data type for stored arrays (default: np.float64)
18
        """
19
    
20
    def reset(self, nwalkers: int, ndim: int):
21
        """
22
        Clear backend state and prepare for new sampling.
23
        
24
        Args:
25
            nwalkers: Number of walkers in ensemble
26
            ndim: Number of dimensions in parameter space
27
        """
28
    
29
    def has_blobs(self):
30
        """
31
        Check if backend stores blob data.
32
        
33
        Returns:
34
            bool: True if blobs are stored
35
        """
36
    
37
    def get_chain(self, flat: bool = False, thin: int = 1, discard: int = 0):
38
        """
39
        Retrieve stored MCMC chain.
40
        
41
        Args:
42
            flat: Flatten chain across ensemble dimension
43
            thin: Take every thin steps
44
            discard: Discard first discard steps as burn-in
45
            
46
        Returns:
47
            ndarray: Chain data [steps, nwalkers, ndim] or [steps*nwalkers, ndim] if flat
48
        """
49
    
50
    def get_log_prob(self, flat: bool = False, thin: int = 1, discard: int = 0):
51
        """
52
        Retrieve log probability values.
53
        
54
        Returns:
55
            ndarray: Log probabilities [steps, nwalkers] or [steps*nwalkers] if flat
56
        """
57
    
58
    def get_blobs(self, flat: bool = False, thin: int = 1, discard: int = 0):
59
        """
60
        Retrieve blob data if available.
61
        
62
        Returns:
63
            ndarray or None: Blob data if stored
64
        """
65
    
66
    def save_step(self, state, accepted):
67
        """
68
        Store a sampling step.
69
        
70
        Args:
71
            state: Current ensemble state
72
            accepted: Boolean array of accepted proposals
73
        """
74
```
75

76
### HDF5 Backend
77

78
File-based backend using HDF5 format for persistent storage with compression and metadata support.
79

80
```python { .api }
81
class HDFBackend(Backend):
82
    def __init__(self, filename: str, name: str = "mcmc", read_only: bool = False):
83
        """
84
        Initialize HDF5 backend.
85
        
86
        Args:
87
            filename: Path to HDF5 file
88
            name: Group name within HDF5 file
89
            read_only: Open file in read-only mode
90
        """
91
    
92
    @property
93
    def filename(self):
94
        """Get the HDF5 filename."""
95
    
96
    @property
97
    def name(self):
98
        """Get the group name."""
99
    
100
    @property
101
    def iteration(self):
102
        """Get current iteration count."""
103
    
104
    @property
105
    def shape(self):
106
        """Get chain shape (nwalkers, ndim)."""
107
    
108
    def get_autocorr_time(self, **kwargs):
109
        """
110
        Compute autocorrelation time from stored chain.
111
        
112
        Returns:
113
            ndarray: Autocorrelation times for each parameter
114
        """
115

116
class TempHDFBackend:
117
    def __init__(self, **kwargs):
118
        """
119
        Temporary HDF5 backend that creates a temporary file.
120
        
121
        Args:
122
            **kwargs: Arguments passed to HDFBackend
123
        """
124
```
125

126
### Backend Utilities
127

128
Functions for working with multiple backends and testing.
129

130
```python { .api }
131
def get_test_backends():
132
    """
133
    Get list of available backends for testing.
134
    
135
    Returns:
136
        list: Available backend classes
137
    """
138
```
139

140
## Usage Examples
141

142
### In-Memory Backend (Default)
143

144
```python
145
import emcee
146
import numpy as np
147

148
def log_prob(theta):
149
    return -0.5 * np.sum(theta**2)
150

151
# Default backend is in-memory
152
sampler = emcee.EnsembleSampler(32, 2, log_prob)
153

154
# Or explicitly specify
155
backend = emcee.backends.Backend()
156
sampler = emcee.EnsembleSampler(32, 2, log_prob, backend=backend)
157

158
# Run sampling
159
pos = np.random.randn(32, 2)
160
sampler.run_mcmc(pos, 1000)
161

162
# Access results
163
chain = sampler.get_chain()
164
log_prob_vals = sampler.get_log_prob()
165
```
166

167
### HDF5 Backend for Persistent Storage
168

169
```python
170
from emcee.backends import HDFBackend
171

172
# Create HDF5 backend
173
filename = "mcmc_results.h5"
174
backend = HDFBackend(filename)
175

176
sampler = emcee.EnsembleSampler(32, 2, log_prob, backend=backend)
177

178
# Run sampling - results saved to file
179
sampler.run_mcmc(pos, 1000)
180

181
# Results are automatically saved
182
print(f"Chain shape: {backend.shape}")
183
print(f"Iterations completed: {backend.iteration}")
184
```
185

186
### Resuming from HDF5 Backend
187

188
```python
189
# Resume sampling from existing file
190
backend = HDFBackend(filename, read_only=False)
191

192
# Check existing progress
193
print(f"Previous iterations: {backend.iteration}")
194
previous_chain = backend.get_chain()
195

196
# Resume from last state
197
if backend.iteration > 0:
198
    last_state = backend.get_last_sample()
199
    sampler = emcee.EnsembleSampler(32, 2, log_prob, backend=backend)
200
    
201
    # Continue sampling
202
    sampler.run_mcmc(last_state, 500)  # Additional 500 steps
203
```
204

205
### Multiple Sampling Runs in Same File
206

207
```python
208
# Use different group names for multiple runs
209
backend1 = HDFBackend("results.h5", name="run1")
210
backend2 = HDFBackend("results.h5", name="run2")
211

212
# First run
213
sampler1 = emcee.EnsembleSampler(32, 2, log_prob, backend=backend1)
214
sampler1.run_mcmc(pos, 1000)
215

216
# Second run with different parameters
217
sampler2 = emcee.EnsembleSampler(32, 2, log_prob, backend=backend2)
218
sampler2.run_mcmc(pos, 1000)
219

220
# Access results from specific runs
221
chain1 = backend1.get_chain()
222
chain2 = backend2.get_chain()
223
```
224

225
### Temporary HDF5 Backend
226

227
```python
228
from emcee.backends import TempHDFBackend
229

230
# Creates temporary file that's automatically cleaned up
231
with TempHDFBackend() as backend:
232
    sampler = emcee.EnsembleSampler(32, 2, log_prob, backend=backend)
233
    sampler.run_mcmc(pos, 1000)
234
    
235
    # Use results while in context
236
    chain = backend.get_chain()
237
    # File is automatically deleted when context exits
238
```
239

240
### Backend with Blob Data
241

242
```python
243
def log_prob_with_blobs(theta):
244
    log_p = -0.5 * np.sum(theta**2)
245
    # Return additional metadata as blobs
246
    blobs = {"energy": np.sum(theta**2), "step_size": np.linalg.norm(theta)}
247
    return log_p, blobs
248

249
# Backend automatically handles blobs
250
backend = HDFBackend("results_with_blobs.h5")
251
sampler = emcee.EnsembleSampler(32, 2, log_prob_with_blobs, backend=backend)
252

253
sampler.run_mcmc(pos, 1000)
254

255
# Access blob data
256
blobs = backend.get_blobs()
257
print(f"Blob keys: {blobs.dtype.names}")
258
```
259

260
### Analyzing Stored Results
261

262
```python
263
# Load existing results for analysis
264
backend = HDFBackend("results.h5", read_only=True)
265

266
# Get chain with burn-in removal
267
chain = backend.get_chain(discard=200, flat=True)
268
log_prob_vals = backend.get_log_prob(discard=200, flat=True)
269

270
# Compute autocorrelation time
271
tau = backend.get_autocorr_time()
272
print(f"Autocorrelation time: {tau}")
273

274
# Thin chain based on autocorrelation
275
thin_factor = int(2 * np.max(tau))
276
thinned_chain = backend.get_chain(discard=200, thin=thin_factor, flat=True)
277
```
278

279
### Custom Backend Configuration
280

281
```python
282
# Backend with specific data type
283
backend = emcee.backends.Backend(dtype=np.float32)
284

285
# HDF5 with compression (requires h5py)
286
import h5py
287
backend = HDFBackend("compressed.h5")
288
# HDF5 compression is automatically applied when available
289
```
290

291
### Backend Inspection
292

293
```python
294
# Check backend properties
295
backend = HDFBackend("results.h5")
296

297
print(f"Backend type: {type(backend).__name__}")
298
print(f"Has blobs: {backend.has_blobs()}")
299
print(f"Chain shape: {backend.shape}")
300
print(f"Iterations: {backend.iteration}")
301

302
# Access raw HDF5 file (advanced usage)
303
with h5py.File(backend.filename, 'r') as f:
304
    print(f"HDF5 groups: {list(f.keys())}")
305
    print(f"Chain dataset shape: {f[backend.name]['chain'].shape}")
306
```

Version

Tile

Files

backends.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

backends.mddocs/