The Python ensemble sampling toolkit for affine-invariant MCMC
—
emcee provides flexible storage backends for persisting MCMC chains and sampling results. Backends enable efficient storage, retrieval, and analysis of sampling data, supporting both in-memory and file-based storage with features like compression and resumable sampling.
The foundation for all storage backends, providing common interface and in-memory storage.
class Backend:
def __init__(self, dtype=None):
"""
Initialize backend.
Args:
dtype: Data type for stored arrays (default: np.float64)
"""
def reset(self, nwalkers: int, ndim: int):
"""
Clear backend state and prepare for new sampling.
Args:
nwalkers: Number of walkers in ensemble
ndim: Number of dimensions in parameter space
"""
def has_blobs(self):
"""
Check if backend stores blob data.
Returns:
bool: True if blobs are stored
"""
def get_chain(self, flat: bool = False, thin: int = 1, discard: int = 0):
"""
Retrieve stored MCMC chain.
Args:
flat: Flatten chain across ensemble dimension
thin: Take every thin steps
discard: Discard first discard steps as burn-in
Returns:
ndarray: Chain data [steps, nwalkers, ndim] or [steps*nwalkers, ndim] if flat
"""
def get_log_prob(self, flat: bool = False, thin: int = 1, discard: int = 0):
"""
Retrieve log probability values.
Returns:
ndarray: Log probabilities [steps, nwalkers] or [steps*nwalkers] if flat
"""
def get_blobs(self, flat: bool = False, thin: int = 1, discard: int = 0):
"""
Retrieve blob data if available.
Returns:
ndarray or None: Blob data if stored
"""
def save_step(self, state, accepted):
"""
Store a sampling step.
Args:
state: Current ensemble state
accepted: Boolean array of accepted proposals
"""File-based backend using HDF5 format for persistent storage with compression and metadata support.
class HDFBackend(Backend):
def __init__(self, filename: str, name: str = "mcmc", read_only: bool = False):
"""
Initialize HDF5 backend.
Args:
filename: Path to HDF5 file
name: Group name within HDF5 file
read_only: Open file in read-only mode
"""
@property
def filename(self):
"""Get the HDF5 filename."""
@property
def name(self):
"""Get the group name."""
@property
def iteration(self):
"""Get current iteration count."""
@property
def shape(self):
"""Get chain shape (nwalkers, ndim)."""
def get_autocorr_time(self, **kwargs):
"""
Compute autocorrelation time from stored chain.
Returns:
ndarray: Autocorrelation times for each parameter
"""
class TempHDFBackend:
def __init__(self, **kwargs):
"""
Temporary HDF5 backend that creates a temporary file.
Args:
**kwargs: Arguments passed to HDFBackend
"""Functions for working with multiple backends and testing.
def get_test_backends():
"""
Get list of available backends for testing.
Returns:
list: Available backend classes
"""import emcee
import numpy as np
def log_prob(theta):
return -0.5 * np.sum(theta**2)
# Default backend is in-memory
sampler = emcee.EnsembleSampler(32, 2, log_prob)
# Or explicitly specify
backend = emcee.backends.Backend()
sampler = emcee.EnsembleSampler(32, 2, log_prob, backend=backend)
# Run sampling
pos = np.random.randn(32, 2)
sampler.run_mcmc(pos, 1000)
# Access results
chain = sampler.get_chain()
log_prob_vals = sampler.get_log_prob()from emcee.backends import HDFBackend
# Create HDF5 backend
filename = "mcmc_results.h5"
backend = HDFBackend(filename)
sampler = emcee.EnsembleSampler(32, 2, log_prob, backend=backend)
# Run sampling - results saved to file
sampler.run_mcmc(pos, 1000)
# Results are automatically saved
print(f"Chain shape: {backend.shape}")
print(f"Iterations completed: {backend.iteration}")# Resume sampling from existing file
backend = HDFBackend(filename, read_only=False)
# Check existing progress
print(f"Previous iterations: {backend.iteration}")
previous_chain = backend.get_chain()
# Resume from last state
if backend.iteration > 0:
last_state = backend.get_last_sample()
sampler = emcee.EnsembleSampler(32, 2, log_prob, backend=backend)
# Continue sampling
sampler.run_mcmc(last_state, 500) # Additional 500 steps# Use different group names for multiple runs
backend1 = HDFBackend("results.h5", name="run1")
backend2 = HDFBackend("results.h5", name="run2")
# First run
sampler1 = emcee.EnsembleSampler(32, 2, log_prob, backend=backend1)
sampler1.run_mcmc(pos, 1000)
# Second run with different parameters
sampler2 = emcee.EnsembleSampler(32, 2, log_prob, backend=backend2)
sampler2.run_mcmc(pos, 1000)
# Access results from specific runs
chain1 = backend1.get_chain()
chain2 = backend2.get_chain()from emcee.backends import TempHDFBackend
# Creates temporary file that's automatically cleaned up
with TempHDFBackend() as backend:
sampler = emcee.EnsembleSampler(32, 2, log_prob, backend=backend)
sampler.run_mcmc(pos, 1000)
# Use results while in context
chain = backend.get_chain()
# File is automatically deleted when context exitsdef log_prob_with_blobs(theta):
log_p = -0.5 * np.sum(theta**2)
# Return additional metadata as blobs
blobs = {"energy": np.sum(theta**2), "step_size": np.linalg.norm(theta)}
return log_p, blobs
# Backend automatically handles blobs
backend = HDFBackend("results_with_blobs.h5")
sampler = emcee.EnsembleSampler(32, 2, log_prob_with_blobs, backend=backend)
sampler.run_mcmc(pos, 1000)
# Access blob data
blobs = backend.get_blobs()
print(f"Blob keys: {blobs.dtype.names}")# Load existing results for analysis
backend = HDFBackend("results.h5", read_only=True)
# Get chain with burn-in removal
chain = backend.get_chain(discard=200, flat=True)
log_prob_vals = backend.get_log_prob(discard=200, flat=True)
# Compute autocorrelation time
tau = backend.get_autocorr_time()
print(f"Autocorrelation time: {tau}")
# Thin chain based on autocorrelation
thin_factor = int(2 * np.max(tau))
thinned_chain = backend.get_chain(discard=200, thin=thin_factor, flat=True)# Backend with specific data type
backend = emcee.backends.Backend(dtype=np.float32)
# HDF5 with compression (requires h5py)
import h5py
backend = HDFBackend("compressed.h5")
# HDF5 compression is automatically applied when available# Check backend properties
backend = HDFBackend("results.h5")
print(f"Backend type: {type(backend).__name__}")
print(f"Has blobs: {backend.has_blobs()}")
print(f"Chain shape: {backend.shape}")
print(f"Iterations: {backend.iteration}")
# Access raw HDF5 file (advanced usage)
with h5py.File(backend.filename, 'r') as f:
print(f"HDF5 groups: {list(f.keys())}")
print(f"Chain dataset shape: {f[backend.name]['chain'].shape}")Install with Tessl CLI
npx tessl i tessl/pypi-emcee