The Python ensemble sampling toolkit for affine-invariant MCMC
—
emcee provides statistical tools for assessing MCMC chain convergence through autocorrelation analysis. These functions help determine when chains have converged and how many effective samples have been obtained, which is crucial for reliable Bayesian inference.
Estimate the integrated autocorrelation time, which measures how correlated successive samples are in the chain.
def integrated_time(x, c: int = 5, tol: int = 50, quiet: bool = False,
has_walkers: bool = True):
"""
Estimate integrated autocorrelation time of time series.
Args:
x: Input time series [steps, ...] or [steps, nwalkers, ndim]
c: Window factor for automatic windowing (default: 5)
tol: Minimum chain length as multiple of autocorr time (default: 50)
quiet: Suppress convergence warnings (default: False)
has_walkers: Whether input has walker dimension (default: True)
Returns:
ndarray: Autocorrelation time for each parameter
Raises:
AutocorrError: If autocorrelation time cannot be reliably estimated
"""Compute the normalized autocorrelation function for 1D time series.
def function_1d(x):
"""
Estimate normalized autocorrelation function of 1D series.
Args:
x: 1D time series as numpy array
Returns:
ndarray: Autocorrelation function of the time series
Raises:
ValueError: If input is not 1D
"""Exception raised when autocorrelation analysis fails or is unreliable.
class AutocorrError(Exception):
"""
Raised when autocorrelation time estimation fails.
This typically occurs when:
- Chain is too short relative to autocorrelation time
- Chain has not converged
- Numerical issues in autocorrelation computation
"""import emcee
import numpy as np
def log_prob(theta):
return -0.5 * np.sum(theta**2)
# Run MCMC
sampler = emcee.EnsembleSampler(32, 2, log_prob)
pos = np.random.randn(32, 2)
sampler.run_mcmc(pos, 1000)
# Compute autocorrelation time
try:
tau = sampler.get_autocorr_time()
print(f"Autocorrelation time: {tau}")
print(f"Mean tau: {np.mean(tau):.1f}")
except emcee.autocorr.AutocorrError as e:
print(f"Autocorr analysis failed: {e}")# Check if chain is long enough
chain = sampler.get_chain()
nsteps = chain.shape[0]
try:
tau = sampler.get_autocorr_time(tol=20) # Less strict tolerance
# Rule of thumb: chain should be >> 50 * tau
min_length = 50 * np.max(tau)
if nsteps > min_length:
print(f"Chain length ({nsteps}) is sufficient (need > {min_length:.0f})")
else:
print(f"Chain too short! Need > {min_length:.0f} steps, have {nsteps}")
except emcee.autocorr.AutocorrError:
print("Chain too short for reliable autocorr estimation")# Monitor autocorrelation during sampling
def monitor_autocorr(sampler, nsteps=100, check_interval=50):
pos = np.random.randn(sampler.nwalkers, sampler.ndim)
for i, state in enumerate(sampler.sample(pos, iterations=nsteps)):
if i % check_interval == 0 and i > 0:
try:
tau = sampler.get_autocorr_time(tol=10, quiet=True)
print(f"Step {i}: tau = {np.mean(tau):.1f}")
# Check convergence criterion
if i > 50 * np.max(tau):
print(f"Converged at step {i}")
break
except emcee.autocorr.AutocorrError:
print(f"Step {i}: tau estimation failed")
sampler = emcee.EnsembleSampler(32, 2, log_prob)
monitor_autocorr(sampler, nsteps=2000)# Calculate effective number of independent samples
chain = sampler.get_chain(flat=True)
nsamples = chain.shape[0]
try:
tau = sampler.get_autocorr_time()
# Effective sample size
n_eff = nsamples / (2 * tau)
print(f"Total samples: {nsamples}")
print(f"Effective samples per parameter: {n_eff}")
print(f"Minimum effective samples: {np.min(n_eff):.0f}")
except emcee.autocorr.AutocorrError:
print("Cannot compute effective sample size - autocorr failed")from emcee.autocorr import function_1d
# Get single parameter chain
chain = sampler.get_chain(flat=True)
param_chain = chain[:, 0] # First parameter
# Compute autocorrelation function
acf = function_1d(param_chain)
# Plot autocorrelation function
import matplotlib.pyplot as plt
plt.figure()
plt.plot(acf[:100]) # First 100 lags
plt.axhline(y=0, color='k', linestyle='--', alpha=0.3)
plt.xlabel('Lag')
plt.ylabel('Autocorrelation')
plt.title('Autocorrelation Function')
plt.show()
# Find where ACF drops to 1/e
import numpy as np
cutoff = 1/np.e
lag_1e = np.argmax(acf < cutoff)
print(f"Autocorr drops to 1/e at lag: {lag_1e}")# Use autocorrelation to determine burn-in
def estimate_burnin(sampler, min_length=100):
chain = sampler.get_chain()
nsteps = chain.shape[0]
# Try different burn-in lengths
burnin_candidates = np.arange(min_length, nsteps//2, 50)
tau_estimates = []
for burnin in burnin_candidates:
try:
# Compute tau for post-burn-in chain
post_burnin_chain = chain[burnin:]
tau = emcee.autocorr.integrated_time(post_burnin_chain, quiet=True)
tau_estimates.append(np.mean(tau))
except emcee.autocorr.AutocorrError:
tau_estimates.append(np.inf)
# Find where tau stabilizes
tau_estimates = np.array(tau_estimates)
stable_idx = np.argmin(np.abs(tau_estimates - np.median(tau_estimates[:-10])))
recommended_burnin = burnin_candidates[stable_idx]
print(f"Recommended burn-in: {recommended_burnin} steps")
return recommended_burnin
burnin = estimate_burnin(sampler)# Thin chain to get approximately independent samples
try:
tau = sampler.get_autocorr_time()
thin_factor = int(2 * np.max(tau)) # Thin by ~2x autocorr time
print(f"Autocorrelation time: {np.max(tau):.1f}")
print(f"Recommended thinning factor: {thin_factor}")
# Get thinned chain
thinned_chain = sampler.get_chain(discard=burnin, thin=thin_factor, flat=True)
print(f"Thinned chain shape: {thinned_chain.shape}")
# Effective sample size after thinning
n_eff_thinned = thinned_chain.shape[0]
print(f"Effective independent samples: {n_eff_thinned}")
except emcee.autocorr.AutocorrError:
print("Using default thinning factor of 10")
thinned_chain = sampler.get_chain(discard=burnin, thin=10, flat=True)def convergence_diagnostics(sampler, target_n_eff=1000):
"""Comprehensive convergence assessment."""
chain = sampler.get_chain()
nsteps, nwalkers, ndim = chain.shape
print(f"Chain shape: {nsteps} steps × {nwalkers} walkers × {ndim} dims")
print(f"Total samples: {nsteps * nwalkers}")
try:
# Autocorrelation analysis
tau = sampler.get_autocorr_time(tol=20)
max_tau = np.max(tau)
print(f"\nAutocorrelation times: {tau}")
print(f"Maximum tau: {max_tau:.1f}")
# Chain length assessment
min_required = 50 * max_tau
if nsteps > min_required:
print(f"✓ Chain length sufficient ({nsteps} > {min_required:.0f})")
else:
print(f"✗ Chain too short (need {min_required:.0f}, have {nsteps})")
# Effective sample size
n_eff = nsteps * nwalkers / (2 * tau)
min_n_eff = np.min(n_eff)
print(f"Effective samples per param: {n_eff}")
print(f"Minimum effective samples: {min_n_eff:.0f}")
if min_n_eff > target_n_eff:
print(f"✓ Sufficient effective samples ({min_n_eff:.0f} > {target_n_eff})")
else:
print(f"✗ Need more samples (want {target_n_eff}, have {min_n_eff:.0f})")
# Acceptance fraction
acc_frac = np.mean(sampler.acceptance_fraction)
print(f"\nAcceptance fraction: {acc_frac:.3f}")
if 0.2 <= acc_frac <= 0.5:
print("✓ Acceptance fraction in good range")
else:
print("⚠ Acceptance fraction outside optimal range (0.2-0.5)")
except emcee.autocorr.AutocorrError as e:
print(f"✗ Autocorrelation analysis failed: {e}")
print("Chain likely too short or not converged")
# Run diagnostics
convergence_diagnostics(sampler)Install with Tessl CLI
npx tessl i tessl/pypi-emcee