or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced-variational.mddata-io-utilities.mdgenerated-quantities.mdindex.mdinstallation-setup.mdmcmc-results.mdmodel-compilation.mdmodel-interface.mdoptimization-results.mdvariational-results.md

data-io-utilities.mddocs/

0

# Data and I/O Utilities

1

2

Functions for data formatting, CSV file handling, and interoperability with the Stan ecosystem. These utilities support data preparation, result persistence, and integration with other analysis tools.

3

4

## Capabilities

5

6

### JSON Data Writing

7

8

Convert Python data structures to Stan-compatible JSON format for model input.

9

10

```python { .api }

11

def write_stan_json(data, filename=None):

12

"""

13

Write data to Stan-compatible JSON format.

14

15

Parameters:

16

- data (dict): Dictionary mapping variable names to values

17

- filename (str or PathLike, optional): Output filename

18

19

Returns:

20

str: JSON string if filename not provided, otherwise filename

21

22

Raises:

23

ValueError: If data contains unsupported types

24

"""

25

```

26

27

**Usage Examples:**

28

29

```python

30

import cmdstanpy as csp

31

import numpy as np

32

33

# Prepare data for Stan model

34

data = {

35

"N": 100,

36

"K": 3,

37

"x": np.random.normal(0, 1, 100),

38

"y": np.random.normal(0, 1, 100),

39

"group": np.random.randint(1, 4, 100)

40

}

41

42

# Write to file

43

csp.write_stan_json(data, "model_data.json")

44

45

# Get JSON string

46

json_str = csp.write_stan_json(data)

47

print(json_str[:100]) # Preview first 100 characters

48

```

49

50

### CSV File Loading

51

52

Load Stan CSV output files back into fit objects for analysis and reproducibility.

53

54

```python { .api }

55

def from_csv(path=None, method=None):

56

"""

57

Instantiate CmdStan fit object from Stan CSV files.

58

59

Parameters:

60

- path (str, list, or PathLike): Path(s) to CSV files, directory, or glob pattern

61

- method (str, optional): Expected method type for validation

62

("sample", "optimize", "variational", "pathfinder", "laplace")

63

64

Returns:

65

CmdStanMCMC, CmdStanMLE, CmdStanVB, CmdStanPathfinder, CmdStanLaplace, or None

66

67

Raises:

68

ValueError: If files not found or invalid format

69

"""

70

```

71

72

**Usage Examples:**

73

74

```python

75

import cmdstanpy as csp

76

77

# Load from directory

78

fit = csp.from_csv("./mcmc_output/")

79

80

# Load specific files

81

fit = csp.from_csv([

82

"chain_1.csv",

83

"chain_2.csv",

84

"chain_3.csv",

85

"chain_4.csv"

86

])

87

88

# Load with glob pattern

89

fit = csp.from_csv("results/chain_*.csv")

90

91

# Load with method validation

92

fit = csp.from_csv("./results/", method="sample")

93

94

# Access loaded results

95

print(f"Loaded {fit.chains} chains")

96

print(fit.summary())

97

```

98

99

### System Information

100

101

Display comprehensive system and dependency information for debugging and reproducibility.

102

103

```python { .api }

104

def show_versions(output=True):

105

"""

106

Display system and dependency information for debugging.

107

108

Parameters:

109

- output (bool): Whether to print to console

110

111

Returns:

112

str: Formatted version information

113

"""

114

```

115

116

**Usage Example:**

117

118

```python

119

import cmdstanpy as csp

120

121

# Print version information

122

csp.show_versions()

123

124

# Get as string for logging

125

version_info = csp.show_versions(output=False)

126

with open("session_info.txt", "w") as f:

127

f.write(version_info)

128

```

129

130

## Data Preparation Patterns

131

132

### Complex Data Structures

133

134

```python

135

# Prepare complex nested data for Stan

136

data = {

137

# Scalars

138

"N": 100,

139

"K": 5,

140

141

# Vectors

142

"y": np.random.normal(0, 1, 100),

143

"weights": np.ones(100),

144

145

# Matrices

146

"X": np.random.normal(0, 1, (100, 5)),

147

148

# Arrays

149

"group_data": np.random.normal(0, 1, (10, 5, 3)),

150

151

# Integer arrays

152

"indices": np.arange(1, 101), # Stan uses 1-based indexing

153

154

# Boolean (converted to int)

155

"include_intercept": 1

156

}

157

158

# Validate data types

159

for key, value in data.items():

160

if isinstance(value, np.ndarray):

161

print(f"{key}: {value.dtype} shape {value.shape}")

162

else:

163

print(f"{key}: {type(value)} = {value}")

164

165

# Write to JSON

166

csp.write_stan_json(data, "complex_data.json")

167

```

168

169

### Data Validation

170

171

```python

172

def validate_stan_data(data):

173

"""Custom function to validate data for Stan compatibility."""

174

for key, value in data.items():

175

if isinstance(value, np.ndarray):

176

# Check for NaN or infinite values

177

if np.any(~np.isfinite(value)):

178

raise ValueError(f"Non-finite values in {key}")

179

180

# Ensure proper data types

181

if value.dtype == np.bool_:

182

data[key] = value.astype(int)

183

print(f"Converted {key} from bool to int")

184

185

# Check for proper indexing (1-based for Stan)

186

if "index" in key.lower() and np.any(value <= 0):

187

print(f"Warning: {key} contains non-positive indices")

188

189

return data

190

191

# Use validation before fitting

192

validated_data = validate_stan_data(data)

193

fit = model.sample(data=validated_data)

194

```

195

196

## File Management Patterns

197

198

### Organized Output Directories

199

200

```python

201

import os

202

from datetime import datetime

203

204

# Create organized directory structure

205

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

206

output_dir = f"analysis_{timestamp}"

207

os.makedirs(output_dir, exist_ok=True)

208

209

# Run analysis with organized outputs

210

fit = model.sample(

211

data=data,

212

output_dir=output_dir,

213

chains=4

214

)

215

216

# Save additional outputs

217

fit.save_csvfiles(dir=f"{output_dir}/chains")

218

csp.write_stan_json(data, f"{output_dir}/data.json")

219

220

# Save metadata

221

with open(f"{output_dir}/session_info.txt", "w") as f:

222

f.write(csp.show_versions(output=False))

223

224

print(f"Analysis saved to {output_dir}")

225

```

226

227

### Batch Processing

228

229

```python

230

import glob

231

from pathlib import Path

232

233

# Process multiple datasets

234

data_files = glob.glob("datasets/*.json")

235

results_dir = Path("batch_results")

236

results_dir.mkdir(exist_ok=True)

237

238

for data_file in data_files:

239

dataset_name = Path(data_file).stem

240

print(f"Processing {dataset_name}...")

241

242

# Load data (would need custom JSON loader for complex types)

243

with open(data_file, 'r') as f:

244

data = json.load(f)

245

246

# Run analysis

247

fit = model.sample(data=data, chains=4)

248

249

# Save results

250

output_subdir = results_dir / dataset_name

251

output_subdir.mkdir(exist_ok=True)

252

253

fit.save_csvfiles(dir=str(output_subdir))

254

255

# Save summary

256

summary = fit.summary()

257

summary.to_csv(output_subdir / "summary.csv")

258

259

print(f"Completed {dataset_name}")

260

```

261

262

### Archive and Reproducibility

263

264

```python

265

import json

266

import pickle

267

from pathlib import Path

268

269

def save_analysis_archive(fit, data, model_file, output_dir):

270

"""Save complete analysis archive for reproducibility."""

271

output_path = Path(output_dir)

272

output_path.mkdir(exist_ok=True)

273

274

# Save CSV files

275

fit.save_csvfiles(dir=str(output_path / "csvs"))

276

277

# Save data

278

csp.write_stan_json(data, output_path / "data.json")

279

280

# Copy Stan model file

281

import shutil

282

shutil.copy2(model_file, output_path / "model.stan")

283

284

# Save Python objects

285

with open(output_path / "fit.pkl", "wb") as f:

286

pickle.dump(fit, f)

287

288

# Save metadata

289

metadata = {

290

"cmdstanpy_version": csp.__version__,

291

"cmdstan_version": csp.cmdstan_version(),

292

"timestamp": datetime.now().isoformat(),

293

"chains": fit.chains,

294

"draws_per_chain": fit.num_draws_sampling

295

}

296

297

with open(output_path / "metadata.json", "w") as f:

298

json.dump(metadata, f, indent=2)

299

300

# Save system info

301

with open(output_path / "system_info.txt", "w") as f:

302

f.write(csp.show_versions(output=False))

303

304

print(f"Analysis archived to {output_path}")

305

306

# Use archive function

307

save_analysis_archive(

308

fit=mcmc_fit,

309

data=model_data,

310

model_file="my_model.stan",

311

output_dir="analysis_archive"

312

)

313

```

314

315

### Loading Archived Results

316

317

```python

318

def load_analysis_archive(archive_dir):

319

"""Load archived analysis results."""

320

archive_path = Path(archive_dir)

321

322

# Load fit object

323

with open(archive_path / "fit.pkl", "rb") as f:

324

fit = pickle.load(f)

325

326

# Load metadata

327

with open(archive_path / "metadata.json", "r") as f:

328

metadata = json.load(f)

329

330

print(f"Loaded analysis from {metadata['timestamp']}")

331

print(f"CmdStanPy version: {metadata['cmdstanpy_version']}")

332

333

return fit, metadata

334

335

# Restore archived results

336

restored_fit, meta = load_analysis_archive("analysis_archive")

337

print(restored_fit.summary())

338

```