or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

autocorr.mdbackends.mdensemble-sampling.mdindex.mdmoves.mdstate.md

backends.mddocs/

0

# Storage Backends

1

2

emcee provides flexible storage backends for persisting MCMC chains and sampling results. Backends enable efficient storage, retrieval, and analysis of sampling data, supporting both in-memory and file-based storage with features like compression and resumable sampling.

3

4

## Capabilities

5

6

### Backend Base Class

7

8

The foundation for all storage backends, providing common interface and in-memory storage.

9

10

```python { .api }

11

class Backend:

12

def __init__(self, dtype=None):

13

"""

14

Initialize backend.

15

16

Args:

17

dtype: Data type for stored arrays (default: np.float64)

18

"""

19

20

def reset(self, nwalkers: int, ndim: int):

21

"""

22

Clear backend state and prepare for new sampling.

23

24

Args:

25

nwalkers: Number of walkers in ensemble

26

ndim: Number of dimensions in parameter space

27

"""

28

29

def has_blobs(self):

30

"""

31

Check if backend stores blob data.

32

33

Returns:

34

bool: True if blobs are stored

35

"""

36

37

def get_chain(self, flat: bool = False, thin: int = 1, discard: int = 0):

38

"""

39

Retrieve stored MCMC chain.

40

41

Args:

42

flat: Flatten chain across ensemble dimension

43

thin: Take every thin steps

44

discard: Discard first discard steps as burn-in

45

46

Returns:

47

ndarray: Chain data [steps, nwalkers, ndim] or [steps*nwalkers, ndim] if flat

48

"""

49

50

def get_log_prob(self, flat: bool = False, thin: int = 1, discard: int = 0):

51

"""

52

Retrieve log probability values.

53

54

Returns:

55

ndarray: Log probabilities [steps, nwalkers] or [steps*nwalkers] if flat

56

"""

57

58

def get_blobs(self, flat: bool = False, thin: int = 1, discard: int = 0):

59

"""

60

Retrieve blob data if available.

61

62

Returns:

63

ndarray or None: Blob data if stored

64

"""

65

66

def save_step(self, state, accepted):

67

"""

68

Store a sampling step.

69

70

Args:

71

state: Current ensemble state

72

accepted: Boolean array of accepted proposals

73

"""

74

```

75

76

### HDF5 Backend

77

78

File-based backend using HDF5 format for persistent storage with compression and metadata support.

79

80

```python { .api }

81

class HDFBackend(Backend):

82

def __init__(self, filename: str, name: str = "mcmc", read_only: bool = False):

83

"""

84

Initialize HDF5 backend.

85

86

Args:

87

filename: Path to HDF5 file

88

name: Group name within HDF5 file

89

read_only: Open file in read-only mode

90

"""

91

92

@property

93

def filename(self):

94

"""Get the HDF5 filename."""

95

96

@property

97

def name(self):

98

"""Get the group name."""

99

100

@property

101

def iteration(self):

102

"""Get current iteration count."""

103

104

@property

105

def shape(self):

106

"""Get chain shape (nwalkers, ndim)."""

107

108

def get_autocorr_time(self, **kwargs):

109

"""

110

Compute autocorrelation time from stored chain.

111

112

Returns:

113

ndarray: Autocorrelation times for each parameter

114

"""

115

116

class TempHDFBackend:

117

def __init__(self, **kwargs):

118

"""

119

Temporary HDF5 backend that creates a temporary file.

120

121

Args:

122

**kwargs: Arguments passed to HDFBackend

123

"""

124

```

125

126

### Backend Utilities

127

128

Functions for working with multiple backends and testing.

129

130

```python { .api }

131

def get_test_backends():

132

"""

133

Get list of available backends for testing.

134

135

Returns:

136

list: Available backend classes

137

"""

138

```

139

140

## Usage Examples

141

142

### In-Memory Backend (Default)

143

144

```python

145

import emcee

146

import numpy as np

147

148

def log_prob(theta):

149

return -0.5 * np.sum(theta**2)

150

151

# Default backend is in-memory

152

sampler = emcee.EnsembleSampler(32, 2, log_prob)

153

154

# Or explicitly specify

155

backend = emcee.backends.Backend()

156

sampler = emcee.EnsembleSampler(32, 2, log_prob, backend=backend)

157

158

# Run sampling

159

pos = np.random.randn(32, 2)

160

sampler.run_mcmc(pos, 1000)

161

162

# Access results

163

chain = sampler.get_chain()

164

log_prob_vals = sampler.get_log_prob()

165

```

166

167

### HDF5 Backend for Persistent Storage

168

169

```python

170

from emcee.backends import HDFBackend

171

172

# Create HDF5 backend

173

filename = "mcmc_results.h5"

174

backend = HDFBackend(filename)

175

176

sampler = emcee.EnsembleSampler(32, 2, log_prob, backend=backend)

177

178

# Run sampling - results saved to file

179

sampler.run_mcmc(pos, 1000)

180

181

# Results are automatically saved

182

print(f"Chain shape: {backend.shape}")

183

print(f"Iterations completed: {backend.iteration}")

184

```

185

186

### Resuming from HDF5 Backend

187

188

```python

189

# Resume sampling from existing file

190

backend = HDFBackend(filename, read_only=False)

191

192

# Check existing progress

193

print(f"Previous iterations: {backend.iteration}")

194

previous_chain = backend.get_chain()

195

196

# Resume from last state

197

if backend.iteration > 0:

198

last_state = backend.get_last_sample()

199

sampler = emcee.EnsembleSampler(32, 2, log_prob, backend=backend)

200

201

# Continue sampling

202

sampler.run_mcmc(last_state, 500) # Additional 500 steps

203

```

204

205

### Multiple Sampling Runs in Same File

206

207

```python

208

# Use different group names for multiple runs

209

backend1 = HDFBackend("results.h5", name="run1")

210

backend2 = HDFBackend("results.h5", name="run2")

211

212

# First run

213

sampler1 = emcee.EnsembleSampler(32, 2, log_prob, backend=backend1)

214

sampler1.run_mcmc(pos, 1000)

215

216

# Second run with different parameters

217

sampler2 = emcee.EnsembleSampler(32, 2, log_prob, backend=backend2)

218

sampler2.run_mcmc(pos, 1000)

219

220

# Access results from specific runs

221

chain1 = backend1.get_chain()

222

chain2 = backend2.get_chain()

223

```

224

225

### Temporary HDF5 Backend

226

227

```python

228

from emcee.backends import TempHDFBackend

229

230

# Creates temporary file that's automatically cleaned up

231

with TempHDFBackend() as backend:

232

sampler = emcee.EnsembleSampler(32, 2, log_prob, backend=backend)

233

sampler.run_mcmc(pos, 1000)

234

235

# Use results while in context

236

chain = backend.get_chain()

237

# File is automatically deleted when context exits

238

```

239

240

### Backend with Blob Data

241

242

```python

243

def log_prob_with_blobs(theta):

244

log_p = -0.5 * np.sum(theta**2)

245

# Return additional metadata as blobs

246

blobs = {"energy": np.sum(theta**2), "step_size": np.linalg.norm(theta)}

247

return log_p, blobs

248

249

# Backend automatically handles blobs

250

backend = HDFBackend("results_with_blobs.h5")

251

sampler = emcee.EnsembleSampler(32, 2, log_prob_with_blobs, backend=backend)

252

253

sampler.run_mcmc(pos, 1000)

254

255

# Access blob data

256

blobs = backend.get_blobs()

257

print(f"Blob keys: {blobs.dtype.names}")

258

```

259

260

### Analyzing Stored Results

261

262

```python

263

# Load existing results for analysis

264

backend = HDFBackend("results.h5", read_only=True)

265

266

# Get chain with burn-in removal

267

chain = backend.get_chain(discard=200, flat=True)

268

log_prob_vals = backend.get_log_prob(discard=200, flat=True)

269

270

# Compute autocorrelation time

271

tau = backend.get_autocorr_time()

272

print(f"Autocorrelation time: {tau}")

273

274

# Thin chain based on autocorrelation

275

thin_factor = int(2 * np.max(tau))

276

thinned_chain = backend.get_chain(discard=200, thin=thin_factor, flat=True)

277

```

278

279

### Custom Backend Configuration

280

281

```python

282

# Backend with specific data type

283

backend = emcee.backends.Backend(dtype=np.float32)

284

285

# HDF5 with compression (requires h5py)

286

import h5py

287

backend = HDFBackend("compressed.h5")

288

# HDF5 compression is automatically applied when available

289

```

290

291

### Backend Inspection

292

293

```python

294

# Check backend properties

295

backend = HDFBackend("results.h5")

296

297

print(f"Backend type: {type(backend).__name__}")

298

print(f"Has blobs: {backend.has_blobs()}")

299

print(f"Chain shape: {backend.shape}")

300

print(f"Iterations: {backend.iteration}")

301

302

# Access raw HDF5 file (advanced usage)

303

with h5py.File(backend.filename, 'r') as f:

304

print(f"HDF5 groups: {list(f.keys())}")

305

print(f"Chain dataset shape: {f[backend.name]['chain'].shape}")

306

```