tessl/pypi-emcee

The Python ensemble sampling toolkit for affine-invariant MCMC

—

Pending

Overview

Eval results

Files

Convergence Analysis

Name: tessl/pypi-emcee
Author: tessl

emcee provides statistical tools for assessing MCMC chain convergence through autocorrelation analysis. These functions help determine when chains have converged and how many effective samples have been obtained, which is crucial for reliable Bayesian inference.

Capabilities

Autocorrelation Time Estimation

Estimate the integrated autocorrelation time, which measures how correlated successive samples are in the chain.

def integrated_time(x, c: int = 5, tol: int = 50, quiet: bool = False, 
                   has_walkers: bool = True):
    """
    Estimate integrated autocorrelation time of time series.
    
    Args:
        x: Input time series [steps, ...] or [steps, nwalkers, ndim]
        c: Window factor for automatic windowing (default: 5)
        tol: Minimum chain length as multiple of autocorr time (default: 50)
        quiet: Suppress convergence warnings (default: False)  
        has_walkers: Whether input has walker dimension (default: True)
        
    Returns:
        ndarray: Autocorrelation time for each parameter
        
    Raises:
        AutocorrError: If autocorrelation time cannot be reliably estimated
    """

Autocorrelation Function

Compute the normalized autocorrelation function for 1D time series.

def function_1d(x):
    """
    Estimate normalized autocorrelation function of 1D series.
    
    Args:
        x: 1D time series as numpy array
        
    Returns:
        ndarray: Autocorrelation function of the time series
        
    Raises:
        ValueError: If input is not 1D
    """

Autocorrelation Exceptions

Exception raised when autocorrelation analysis fails or is unreliable.

class AutocorrError(Exception):
    """
    Raised when autocorrelation time estimation fails.
    
    This typically occurs when:
    - Chain is too short relative to autocorrelation time
    - Chain has not converged
    - Numerical issues in autocorrelation computation
    """

Usage Examples

Basic Autocorrelation Analysis

import emcee
import numpy as np

def log_prob(theta):
    return -0.5 * np.sum(theta**2)

# Run MCMC
sampler = emcee.EnsembleSampler(32, 2, log_prob)
pos = np.random.randn(32, 2)
sampler.run_mcmc(pos, 1000)

# Compute autocorrelation time
try:
    tau = sampler.get_autocorr_time()
    print(f"Autocorrelation time: {tau}")
    print(f"Mean tau: {np.mean(tau):.1f}")
except emcee.autocorr.AutocorrError as e:
    print(f"Autocorr analysis failed: {e}")

Chain Length Assessment

# Check if chain is long enough
chain = sampler.get_chain()
nsteps = chain.shape[0]

try:
    tau = sampler.get_autocorr_time(tol=20)  # Less strict tolerance
    
    # Rule of thumb: chain should be >> 50 * tau
    min_length = 50 * np.max(tau)
    if nsteps > min_length:
        print(f"Chain length ({nsteps}) is sufficient (need > {min_length:.0f})")
    else:
        print(f"Chain too short! Need > {min_length:.0f} steps, have {nsteps}")
        
except emcee.autocorr.AutocorrError:
    print("Chain too short for reliable autocorr estimation")

Progressive Autocorrelation Monitoring

# Monitor autocorrelation during sampling
def monitor_autocorr(sampler, nsteps=100, check_interval=50):
    pos = np.random.randn(sampler.nwalkers, sampler.ndim)
    
    for i, state in enumerate(sampler.sample(pos, iterations=nsteps)):
        if i % check_interval == 0 and i > 0:
            try:
                tau = sampler.get_autocorr_time(tol=10, quiet=True)
                print(f"Step {i}: tau = {np.mean(tau):.1f}")
                
                # Check convergence criterion
                if i > 50 * np.max(tau):
                    print(f"Converged at step {i}")
                    break
                    
            except emcee.autocorr.AutocorrError:
                print(f"Step {i}: tau estimation failed")

sampler = emcee.EnsembleSampler(32, 2, log_prob)
monitor_autocorr(sampler, nsteps=2000)

Effective Sample Size

# Calculate effective number of independent samples
chain = sampler.get_chain(flat=True)
nsamples = chain.shape[0]

try:
    tau = sampler.get_autocorr_time()
    
    # Effective sample size
    n_eff = nsamples / (2 * tau)
    print(f"Total samples: {nsamples}")
    print(f"Effective samples per parameter: {n_eff}")
    print(f"Minimum effective samples: {np.min(n_eff):.0f}")
    
except emcee.autocorr.AutocorrError:
    print("Cannot compute effective sample size - autocorr failed")

1D Autocorrelation Function

from emcee.autocorr import function_1d

# Get single parameter chain
chain = sampler.get_chain(flat=True)
param_chain = chain[:, 0]  # First parameter

# Compute autocorrelation function
acf = function_1d(param_chain)

# Plot autocorrelation function
import matplotlib.pyplot as plt
plt.figure()
plt.plot(acf[:100])  # First 100 lags
plt.axhline(y=0, color='k', linestyle='--', alpha=0.3)
plt.xlabel('Lag')
plt.ylabel('Autocorrelation')
plt.title('Autocorrelation Function')
plt.show()

# Find where ACF drops to 1/e
import numpy as np
cutoff = 1/np.e
lag_1e = np.argmax(acf < cutoff)
print(f"Autocorr drops to 1/e at lag: {lag_1e}")

Burn-in Determination

# Use autocorrelation to determine burn-in
def estimate_burnin(sampler, min_length=100):
    chain = sampler.get_chain()
    nsteps = chain.shape[0]
    
    # Try different burn-in lengths
    burnin_candidates = np.arange(min_length, nsteps//2, 50)
    tau_estimates = []
    
    for burnin in burnin_candidates:
        try:
            # Compute tau for post-burn-in chain
            post_burnin_chain = chain[burnin:]
            tau = emcee.autocorr.integrated_time(post_burnin_chain, quiet=True)
            tau_estimates.append(np.mean(tau))
        except emcee.autocorr.AutocorrError:
            tau_estimates.append(np.inf)
    
    # Find where tau stabilizes
    tau_estimates = np.array(tau_estimates)
    stable_idx = np.argmin(np.abs(tau_estimates - np.median(tau_estimates[:-10])))
    
    recommended_burnin = burnin_candidates[stable_idx]
    print(f"Recommended burn-in: {recommended_burnin} steps")
    
    return recommended_burnin

burnin = estimate_burnin(sampler)

Thinning Based on Autocorrelation

# Thin chain to get approximately independent samples
try:
    tau = sampler.get_autocorr_time()
    thin_factor = int(2 * np.max(tau))  # Thin by ~2x autocorr time
    
    print(f"Autocorrelation time: {np.max(tau):.1f}")
    print(f"Recommended thinning factor: {thin_factor}")
    
    # Get thinned chain
    thinned_chain = sampler.get_chain(discard=burnin, thin=thin_factor, flat=True)
    print(f"Thinned chain shape: {thinned_chain.shape}")
    
    # Effective sample size after thinning
    n_eff_thinned = thinned_chain.shape[0]
    print(f"Effective independent samples: {n_eff_thinned}")
    
except emcee.autocorr.AutocorrError:
    print("Using default thinning factor of 10")
    thinned_chain = sampler.get_chain(discard=burnin, thin=10, flat=True)

Convergence Diagnostics

def convergence_diagnostics(sampler, target_n_eff=1000):
    """Comprehensive convergence assessment."""
    
    chain = sampler.get_chain()
    nsteps, nwalkers, ndim = chain.shape
    
    print(f"Chain shape: {nsteps} steps × {nwalkers} walkers × {ndim} dims")
    print(f"Total samples: {nsteps * nwalkers}")
    
    try:
        # Autocorrelation analysis
        tau = sampler.get_autocorr_time(tol=20)
        max_tau = np.max(tau)
        
        print(f"\nAutocorrelation times: {tau}")
        print(f"Maximum tau: {max_tau:.1f}")
        
        # Chain length assessment
        min_required = 50 * max_tau
        if nsteps > min_required:
            print(f"✓ Chain length sufficient ({nsteps} > {min_required:.0f})")
        else:
            print(f"✗ Chain too short (need {min_required:.0f}, have {nsteps})")
        
        # Effective sample size
        n_eff = nsteps * nwalkers / (2 * tau)
        min_n_eff = np.min(n_eff)
        
        print(f"Effective samples per param: {n_eff}")
        print(f"Minimum effective samples: {min_n_eff:.0f}")
        
        if min_n_eff > target_n_eff:
            print(f"✓ Sufficient effective samples ({min_n_eff:.0f} > {target_n_eff})")
        else:
            print(f"✗ Need more samples (want {target_n_eff}, have {min_n_eff:.0f})")
        
        # Acceptance fraction
        acc_frac = np.mean(sampler.acceptance_fraction)
        print(f"\nAcceptance fraction: {acc_frac:.3f}")
        
        if 0.2 <= acc_frac <= 0.5:
            print("✓ Acceptance fraction in good range")
        else:
            print("⚠ Acceptance fraction outside optimal range (0.2-0.5)")
            
    except emcee.autocorr.AutocorrError as e:
        print(f"✗ Autocorrelation analysis failed: {e}")
        print("Chain likely too short or not converged")

# Run diagnostics
convergence_diagnostics(sampler)

Install with Tessl CLI