Python interface to CmdStan that provides comprehensive access to the Stan compiler and all Bayesian inference algorithms.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Functions for data formatting, CSV file handling, and interoperability with the Stan ecosystem. These utilities support data preparation, result persistence, and integration with other analysis tools.
Convert Python data structures to Stan-compatible JSON format for model input.
def write_stan_json(data, filename=None):
"""
Write data to Stan-compatible JSON format.
Parameters:
- data (dict): Dictionary mapping variable names to values
- filename (str or PathLike, optional): Output filename
Returns:
str: JSON string if filename not provided, otherwise filename
Raises:
ValueError: If data contains unsupported types
"""Usage Examples:
import cmdstanpy as csp
import numpy as np
# Prepare data for Stan model
data = {
"N": 100,
"K": 3,
"x": np.random.normal(0, 1, 100),
"y": np.random.normal(0, 1, 100),
"group": np.random.randint(1, 4, 100)
}
# Write to file
csp.write_stan_json(data, "model_data.json")
# Get JSON string
json_str = csp.write_stan_json(data)
print(json_str[:100]) # Preview first 100 charactersLoad Stan CSV output files back into fit objects for analysis and reproducibility.
def from_csv(path=None, method=None):
"""
Instantiate CmdStan fit object from Stan CSV files.
Parameters:
- path (str, list, or PathLike): Path(s) to CSV files, directory, or glob pattern
- method (str, optional): Expected method type for validation
("sample", "optimize", "variational", "pathfinder", "laplace")
Returns:
CmdStanMCMC, CmdStanMLE, CmdStanVB, CmdStanPathfinder, CmdStanLaplace, or None
Raises:
ValueError: If files not found or invalid format
"""Usage Examples:
import cmdstanpy as csp
# Load from directory
fit = csp.from_csv("./mcmc_output/")
# Load specific files
fit = csp.from_csv([
"chain_1.csv",
"chain_2.csv",
"chain_3.csv",
"chain_4.csv"
])
# Load with glob pattern
fit = csp.from_csv("results/chain_*.csv")
# Load with method validation
fit = csp.from_csv("./results/", method="sample")
# Access loaded results
print(f"Loaded {fit.chains} chains")
print(fit.summary())Display comprehensive system and dependency information for debugging and reproducibility.
def show_versions(output=True):
"""
Display system and dependency information for debugging.
Parameters:
- output (bool): Whether to print to console
Returns:
str: Formatted version information
"""Usage Example:
import cmdstanpy as csp
# Print version information
csp.show_versions()
# Get as string for logging
version_info = csp.show_versions(output=False)
with open("session_info.txt", "w") as f:
f.write(version_info)# Prepare complex nested data for Stan
data = {
# Scalars
"N": 100,
"K": 5,
# Vectors
"y": np.random.normal(0, 1, 100),
"weights": np.ones(100),
# Matrices
"X": np.random.normal(0, 1, (100, 5)),
# Arrays
"group_data": np.random.normal(0, 1, (10, 5, 3)),
# Integer arrays
"indices": np.arange(1, 101), # Stan uses 1-based indexing
# Boolean (converted to int)
"include_intercept": 1
}
# Validate data types
for key, value in data.items():
if isinstance(value, np.ndarray):
print(f"{key}: {value.dtype} shape {value.shape}")
else:
print(f"{key}: {type(value)} = {value}")
# Write to JSON
csp.write_stan_json(data, "complex_data.json")def validate_stan_data(data):
"""Custom function to validate data for Stan compatibility."""
for key, value in data.items():
if isinstance(value, np.ndarray):
# Check for NaN or infinite values
if np.any(~np.isfinite(value)):
raise ValueError(f"Non-finite values in {key}")
# Ensure proper data types
if value.dtype == np.bool_:
data[key] = value.astype(int)
print(f"Converted {key} from bool to int")
# Check for proper indexing (1-based for Stan)
if "index" in key.lower() and np.any(value <= 0):
print(f"Warning: {key} contains non-positive indices")
return data
# Use validation before fitting
validated_data = validate_stan_data(data)
fit = model.sample(data=validated_data)import os
from datetime import datetime
# Create organized directory structure
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = f"analysis_{timestamp}"
os.makedirs(output_dir, exist_ok=True)
# Run analysis with organized outputs
fit = model.sample(
data=data,
output_dir=output_dir,
chains=4
)
# Save additional outputs
fit.save_csvfiles(dir=f"{output_dir}/chains")
csp.write_stan_json(data, f"{output_dir}/data.json")
# Save metadata
with open(f"{output_dir}/session_info.txt", "w") as f:
f.write(csp.show_versions(output=False))
print(f"Analysis saved to {output_dir}")import glob
from pathlib import Path
# Process multiple datasets
data_files = glob.glob("datasets/*.json")
results_dir = Path("batch_results")
results_dir.mkdir(exist_ok=True)
for data_file in data_files:
dataset_name = Path(data_file).stem
print(f"Processing {dataset_name}...")
# Load data (would need custom JSON loader for complex types)
with open(data_file, 'r') as f:
data = json.load(f)
# Run analysis
fit = model.sample(data=data, chains=4)
# Save results
output_subdir = results_dir / dataset_name
output_subdir.mkdir(exist_ok=True)
fit.save_csvfiles(dir=str(output_subdir))
# Save summary
summary = fit.summary()
summary.to_csv(output_subdir / "summary.csv")
print(f"Completed {dataset_name}")import json
import pickle
from pathlib import Path
def save_analysis_archive(fit, data, model_file, output_dir):
"""Save complete analysis archive for reproducibility."""
output_path = Path(output_dir)
output_path.mkdir(exist_ok=True)
# Save CSV files
fit.save_csvfiles(dir=str(output_path / "csvs"))
# Save data
csp.write_stan_json(data, output_path / "data.json")
# Copy Stan model file
import shutil
shutil.copy2(model_file, output_path / "model.stan")
# Save Python objects
with open(output_path / "fit.pkl", "wb") as f:
pickle.dump(fit, f)
# Save metadata
metadata = {
"cmdstanpy_version": csp.__version__,
"cmdstan_version": csp.cmdstan_version(),
"timestamp": datetime.now().isoformat(),
"chains": fit.chains,
"draws_per_chain": fit.num_draws_sampling
}
with open(output_path / "metadata.json", "w") as f:
json.dump(metadata, f, indent=2)
# Save system info
with open(output_path / "system_info.txt", "w") as f:
f.write(csp.show_versions(output=False))
print(f"Analysis archived to {output_path}")
# Use archive function
save_analysis_archive(
fit=mcmc_fit,
data=model_data,
model_file="my_model.stan",
output_dir="analysis_archive"
)def load_analysis_archive(archive_dir):
"""Load archived analysis results."""
archive_path = Path(archive_dir)
# Load fit object
with open(archive_path / "fit.pkl", "rb") as f:
fit = pickle.load(f)
# Load metadata
with open(archive_path / "metadata.json", "r") as f:
metadata = json.load(f)
print(f"Loaded analysis from {metadata['timestamp']}")
print(f"CmdStanPy version: {metadata['cmdstanpy_version']}")
return fit, metadata
# Restore archived results
restored_fit, meta = load_analysis_archive("analysis_archive")
print(restored_fit.summary())Install with Tessl CLI
npx tessl i tessl/pypi-cmdstanpy