Tessl Tile for pypi/cmdstanpy@1.2.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

advanced-variational.md data-io-utilities.md generated-quantities.md index.md installation-setup.md mcmc-results.md model-compilation.md model-interface.md optimization-results.md variational-results.md

data-io-utilities.mddocs/

0
# Data and I/O Utilities
1

2
Functions for data formatting, CSV file handling, and interoperability with the Stan ecosystem. These utilities support data preparation, result persistence, and integration with other analysis tools.
3

4
## Capabilities
5

6
### JSON Data Writing
7

8
Convert Python data structures to Stan-compatible JSON format for model input.
9

10
```python { .api }
11
def write_stan_json(data, filename=None):
12
    """
13
    Write data to Stan-compatible JSON format.
14
    
15
    Parameters:
16
    - data (dict): Dictionary mapping variable names to values
17
    - filename (str or PathLike, optional): Output filename
18
    
19
    Returns:
20
    str: JSON string if filename not provided, otherwise filename
21
    
22
    Raises:
23
    ValueError: If data contains unsupported types
24
    """
25
```
26

27
**Usage Examples:**
28

29
```python
30
import cmdstanpy as csp
31
import numpy as np
32

33
# Prepare data for Stan model
34
data = {
35
    "N": 100,
36
    "K": 3,
37
    "x": np.random.normal(0, 1, 100),
38
    "y": np.random.normal(0, 1, 100),
39
    "group": np.random.randint(1, 4, 100)
40
}
41

42
# Write to file
43
csp.write_stan_json(data, "model_data.json")
44

45
# Get JSON string
46
json_str = csp.write_stan_json(data)
47
print(json_str[:100])  # Preview first 100 characters
48
```
49

50
### CSV File Loading
51

52
Load Stan CSV output files back into fit objects for analysis and reproducibility.
53

54
```python { .api }
55
def from_csv(path=None, method=None):
56
    """
57
    Instantiate CmdStan fit object from Stan CSV files.
58
    
59
    Parameters:
60
    - path (str, list, or PathLike): Path(s) to CSV files, directory, or glob pattern
61
    - method (str, optional): Expected method type for validation
62
        ("sample", "optimize", "variational", "pathfinder", "laplace")
63
    
64
    Returns:
65
    CmdStanMCMC, CmdStanMLE, CmdStanVB, CmdStanPathfinder, CmdStanLaplace, or None
66
    
67
    Raises:
68
    ValueError: If files not found or invalid format
69
    """
70
```
71

72
**Usage Examples:**
73

74
```python
75
import cmdstanpy as csp
76

77
# Load from directory
78
fit = csp.from_csv("./mcmc_output/")
79

80
# Load specific files
81
fit = csp.from_csv([
82
    "chain_1.csv", 
83
    "chain_2.csv", 
84
    "chain_3.csv", 
85
    "chain_4.csv"
86
])
87

88
# Load with glob pattern
89
fit = csp.from_csv("results/chain_*.csv")
90

91
# Load with method validation
92
fit = csp.from_csv("./results/", method="sample")
93

94
# Access loaded results
95
print(f"Loaded {fit.chains} chains")
96
print(fit.summary())
97
```
98

99
### System Information
100

101
Display comprehensive system and dependency information for debugging and reproducibility.
102

103
```python { .api }
104
def show_versions(output=True):
105
    """
106
    Display system and dependency information for debugging.
107
    
108
    Parameters:
109
    - output (bool): Whether to print to console
110
    
111
    Returns:
112
    str: Formatted version information
113
    """
114
```
115

116
**Usage Example:**
117

118
```python
119
import cmdstanpy as csp
120

121
# Print version information
122
csp.show_versions()
123

124
# Get as string for logging
125
version_info = csp.show_versions(output=False)
126
with open("session_info.txt", "w") as f:
127
    f.write(version_info)
128
```
129

130
## Data Preparation Patterns
131

132
### Complex Data Structures
133

134
```python
135
# Prepare complex nested data for Stan
136
data = {
137
    # Scalars
138
    "N": 100,
139
    "K": 5,
140
    
141
    # Vectors
142
    "y": np.random.normal(0, 1, 100),
143
    "weights": np.ones(100),
144
    
145
    # Matrices
146
    "X": np.random.normal(0, 1, (100, 5)),
147
    
148
    # Arrays
149
    "group_data": np.random.normal(0, 1, (10, 5, 3)),
150
    
151
    # Integer arrays
152
    "indices": np.arange(1, 101),  # Stan uses 1-based indexing
153
    
154
    # Boolean (converted to int)
155
    "include_intercept": 1
156
}
157

158
# Validate data types
159
for key, value in data.items():
160
    if isinstance(value, np.ndarray):
161
        print(f"{key}: {value.dtype} shape {value.shape}")
162
    else:
163
        print(f"{key}: {type(value)} = {value}")
164

165
# Write to JSON
166
csp.write_stan_json(data, "complex_data.json")
167
```
168

169
### Data Validation
170

171
```python
172
def validate_stan_data(data):
173
    """Custom function to validate data for Stan compatibility."""
174
    for key, value in data.items():
175
        if isinstance(value, np.ndarray):
176
            # Check for NaN or infinite values
177
            if np.any(~np.isfinite(value)):
178
                raise ValueError(f"Non-finite values in {key}")
179
            
180
            # Ensure proper data types
181
            if value.dtype == np.bool_:
182
                data[key] = value.astype(int)
183
                print(f"Converted {key} from bool to int")
184
            
185
            # Check for proper indexing (1-based for Stan)
186
            if "index" in key.lower() and np.any(value <= 0):
187
                print(f"Warning: {key} contains non-positive indices")
188
    
189
    return data
190

191
# Use validation before fitting
192
validated_data = validate_stan_data(data)
193
fit = model.sample(data=validated_data)
194
```
195

196
## File Management Patterns
197

198
### Organized Output Directories
199

200
```python
201
import os
202
from datetime import datetime
203

204
# Create organized directory structure
205
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
206
output_dir = f"analysis_{timestamp}"
207
os.makedirs(output_dir, exist_ok=True)
208

209
# Run analysis with organized outputs
210
fit = model.sample(
211
    data=data,
212
    output_dir=output_dir,
213
    chains=4
214
)
215

216
# Save additional outputs
217
fit.save_csvfiles(dir=f"{output_dir}/chains")
218
csp.write_stan_json(data, f"{output_dir}/data.json")
219

220
# Save metadata
221
with open(f"{output_dir}/session_info.txt", "w") as f:
222
    f.write(csp.show_versions(output=False))
223

224
print(f"Analysis saved to {output_dir}")
225
```
226

227
### Batch Processing
228

229
```python
230
import glob
231
from pathlib import Path
232

233
# Process multiple datasets
234
data_files = glob.glob("datasets/*.json")
235
results_dir = Path("batch_results")
236
results_dir.mkdir(exist_ok=True)
237

238
for data_file in data_files:
239
    dataset_name = Path(data_file).stem
240
    print(f"Processing {dataset_name}...")
241
    
242
    # Load data (would need custom JSON loader for complex types)
243
    with open(data_file, 'r') as f:
244
        data = json.load(f)
245
    
246
    # Run analysis
247
    fit = model.sample(data=data, chains=4)
248
    
249
    # Save results
250
    output_subdir = results_dir / dataset_name
251
    output_subdir.mkdir(exist_ok=True)
252
    
253
    fit.save_csvfiles(dir=str(output_subdir))
254
    
255
    # Save summary
256
    summary = fit.summary()
257
    summary.to_csv(output_subdir / "summary.csv")
258
    
259
    print(f"Completed {dataset_name}")
260
```
261

262
### Archive and Reproducibility
263

264
```python
265
import json
266
import pickle
267
from pathlib import Path
268

269
def save_analysis_archive(fit, data, model_file, output_dir):
270
    """Save complete analysis archive for reproducibility."""
271
    output_path = Path(output_dir)
272
    output_path.mkdir(exist_ok=True)
273
    
274
    # Save CSV files
275
    fit.save_csvfiles(dir=str(output_path / "csvs"))
276
    
277
    # Save data
278
    csp.write_stan_json(data, output_path / "data.json")
279
    
280
    # Copy Stan model file
281
    import shutil
282
    shutil.copy2(model_file, output_path / "model.stan")
283
    
284
    # Save Python objects
285
    with open(output_path / "fit.pkl", "wb") as f:
286
        pickle.dump(fit, f)
287
    
288
    # Save metadata
289
    metadata = {
290
        "cmdstanpy_version": csp.__version__,
291
        "cmdstan_version": csp.cmdstan_version(),
292
        "timestamp": datetime.now().isoformat(),
293
        "chains": fit.chains,
294
        "draws_per_chain": fit.num_draws_sampling
295
    }
296
    
297
    with open(output_path / "metadata.json", "w") as f:
298
        json.dump(metadata, f, indent=2)
299
    
300
    # Save system info
301
    with open(output_path / "system_info.txt", "w") as f:
302
        f.write(csp.show_versions(output=False))
303
    
304
    print(f"Analysis archived to {output_path}")
305

306
# Use archive function
307
save_analysis_archive(
308
    fit=mcmc_fit,
309
    data=model_data,
310
    model_file="my_model.stan",
311
    output_dir="analysis_archive"
312
)
313
```
314

315
### Loading Archived Results
316

317
```python
318
def load_analysis_archive(archive_dir):
319
    """Load archived analysis results."""
320
    archive_path = Path(archive_dir)
321
    
322
    # Load fit object
323
    with open(archive_path / "fit.pkl", "rb") as f:
324
        fit = pickle.load(f)
325
    
326
    # Load metadata
327
    with open(archive_path / "metadata.json", "r") as f:
328
        metadata = json.load(f)
329
    
330
    print(f"Loaded analysis from {metadata['timestamp']}")
331
    print(f"CmdStanPy version: {metadata['cmdstanpy_version']}")
332
    
333
    return fit, metadata
334

335
# Restore archived results
336
restored_fit, meta = load_analysis_archive("analysis_archive")
337
print(restored_fit.summary())
338
```

Version

Tile

Files

data-io-utilities.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

data-io-utilities.mddocs/