0
# Data Operations
1
2
Comprehensive data loading, conversion, and manipulation capabilities supporting multiple Bayesian frameworks and file formats. Create, transform, and manage InferenceData objects with built-in dataset examples and extensive I/O operations.
3
4
## Core Data Structure
5
6
```python { .api }
7
class InferenceData:
8
"""
9
Main data container for Bayesian inference results.
10
11
NetCDF-based data structure using xarray groups to organize
12
posterior samples, prior samples, observed data, diagnostics,
13
and metadata from Bayesian inference.
14
15
Groups:
16
posterior: MCMC samples from posterior distribution
17
prior: Samples from prior distribution
18
observed_data: Observed/input data used in model
19
posterior_predictive: Samples from posterior predictive distribution
20
sample_stats: MCMC diagnostics and metadata
21
log_likelihood: Log likelihood evaluations for model comparison
22
"""
23
```
24
25
## Data Loading and Management
26
27
### Built-in Datasets
28
29
```python { .api }
30
def load_arviz_data(dataset: str, data_home: str = None, **kwargs) -> InferenceData:
31
"""
32
Load built-in example datasets for testing and learning.
33
34
Args:
35
dataset (str): Name of dataset to load ('centered_eight', 'non_centered_eight', etc.)
36
data_home (str, optional): Directory to cache datasets
37
**kwargs: Additional parameters for data loading
38
39
Returns:
40
InferenceData: Loaded example dataset
41
"""
42
43
def list_datasets() -> list:
44
"""
45
List all available built-in datasets.
46
47
Returns:
48
list: Names of available datasets
49
"""
50
51
def clear_data_home(data_home: str = None):
52
"""
53
Clear cached datasets from local storage.
54
55
Args:
56
data_home (str, optional): Directory containing cached data
57
"""
58
```
59
60
### Basic Usage
61
62
```python
63
import arviz as az
64
65
# Load example dataset
66
idata = az.load_arviz_data("centered_eight")
67
68
# List available datasets
69
datasets = az.list_datasets()
70
print(datasets)
71
72
# Clear cache
73
az.clear_data_home()
74
```
75
76
## Data Manipulation
77
78
### Concatenation and Extraction
79
80
```python { .api }
81
def concat(*args, dim: str, copy: bool = True, inplace: bool = False, reset_dim: bool = True) -> InferenceData:
82
"""
83
Concatenate multiple InferenceData objects along specified dimension.
84
85
Args:
86
*args: InferenceData objects to concatenate
87
dim (str): Dimension to concatenate along ('chain', 'draw', etc.)
88
copy (bool): Whether to copy data (default True)
89
inplace (bool): Whether to modify first object in-place (default False)
90
reset_dim (bool): Whether to reset dimension coordinates (default True)
91
92
Returns:
93
InferenceData: Concatenated inference data
94
"""
95
96
def extract(data: InferenceData, *, var_names: list = None, groups: list = None, num_samples: int = None, **kwargs) -> InferenceData:
97
"""
98
Extract subset of data from InferenceData object.
99
100
Args:
101
data (InferenceData): Source inference data
102
var_names (list, optional): Variables to extract
103
groups (list, optional): Groups to extract ('posterior', 'prior', etc.)
104
num_samples (int, optional): Number of samples to extract
105
**kwargs: Additional extraction parameters
106
107
Returns:
108
InferenceData: Extracted subset of data
109
"""
110
111
def extract_dataset(data: InferenceData, *, var_names: list = None, groups: list = None, num_samples: int = None, **kwargs) -> dict:
112
"""
113
Extract xarray datasets from InferenceData object.
114
115
Args:
116
data (InferenceData): Source inference data
117
var_names (list, optional): Variables to extract
118
groups (list, optional): Groups to extract
119
num_samples (int, optional): Number of samples to extract
120
**kwargs: Additional extraction parameters
121
122
Returns:
123
dict: Dictionary mapping group names to xarray datasets
124
"""
125
```
126
127
### Usage Examples
128
129
```python
130
# Concatenate multiple inference runs
131
idata1 = az.load_arviz_data("centered_eight")
132
idata2 = az.load_arviz_data("non_centered_eight")
133
combined = az.concat(idata1, idata2, dim="chain")
134
135
# Extract subset of variables
136
subset = az.extract(idata1, var_names=["mu", "tau"], num_samples=1000)
137
138
# Extract specific groups as datasets
139
datasets = az.extract_dataset(idata1, groups=["posterior", "sample_stats"])
140
```
141
142
## Data Conversion
143
144
### General Conversion Functions
145
146
```python { .api }
147
def convert_to_inference_data(obj, *, group: str = None, coords: dict = None, dims: dict = None, **kwargs) -> InferenceData:
148
"""
149
Convert various objects to InferenceData format.
150
151
Args:
152
obj: Object to convert (dict, xarray Dataset, numpy array, etc.)
153
group (str, optional): Target group name ('posterior', 'prior', etc.)
154
coords (dict, optional): Coordinate specifications
155
dims (dict, optional): Dimension specifications
156
**kwargs: Additional conversion parameters
157
158
Returns:
159
InferenceData: Converted inference data object
160
"""
161
162
def convert_to_dataset(obj, *, group: str = None, coords: dict = None, dims: dict = None) -> dict:
163
"""
164
Convert objects to xarray Dataset format.
165
166
Args:
167
obj: Object to convert
168
group (str, optional): Target group name
169
coords (dict, optional): Coordinate specifications
170
dims (dict, optional): Dimension specifications
171
172
Returns:
173
dict: Dictionary containing xarray datasets
174
"""
175
```
176
177
### Specialized Conversion Functions
178
179
```python { .api }
180
def numpy_to_data_array(ary: np.ndarray, *, var_name: str = "x", coords: dict = None, dims: list = None) -> xr.DataArray:
181
"""
182
Convert numpy array to xarray DataArray.
183
184
Args:
185
ary (np.ndarray): Input numpy array
186
var_name (str): Variable name (default 'x')
187
coords (dict, optional): Coordinate specifications
188
dims (list, optional): Dimension names
189
190
Returns:
191
xr.DataArray: Converted data array
192
"""
193
194
def dict_to_dataset(data: dict, *, coords: dict = None, dims: dict = None, **kwargs) -> xr.Dataset:
195
"""
196
Convert dictionary to xarray Dataset.
197
198
Args:
199
data (dict): Dictionary containing variable data
200
coords (dict, optional): Coordinate specifications
201
dims (dict, optional): Dimension specifications
202
**kwargs: Additional parameters
203
204
Returns:
205
xr.Dataset: Converted dataset
206
"""
207
208
def pytree_to_dataset(data, *, var_names: list = None, coords: dict = None, dims: dict = None) -> xr.Dataset:
209
"""
210
Convert pytree structure to xarray Dataset.
211
212
Args:
213
data: Pytree structure (JAX, PyTorch, etc.)
214
var_names (list, optional): Variable names
215
coords (dict, optional): Coordinate specifications
216
dims (dict, optional): Dimension specifications
217
218
Returns:
219
xr.Dataset: Converted dataset
220
"""
221
```
222
223
### Usage Examples
224
225
```python
226
import numpy as np
227
228
# Convert numpy array to InferenceData
229
samples = np.random.normal(0, 1, (4, 1000, 10)) # 4 chains, 1000 draws, 10 parameters
230
idata = az.convert_to_inference_data(samples, group="posterior")
231
232
# Convert dictionary to InferenceData
233
data_dict = {
234
"mu": np.random.normal(0, 1, (4, 1000)),
235
"sigma": np.random.lognormal(0, 1, (4, 1000))
236
}
237
idata = az.convert_to_inference_data(data_dict, group="posterior")
238
239
# Convert numpy array to DataArray
240
arr = np.random.normal(0, 1, (100, 50))
241
da = az.numpy_to_data_array(arr, var_name="theta", dims=["draw", "parameter"])
242
```
243
244
## File I/O Operations
245
246
### NetCDF Format
247
248
```python { .api }
249
def from_netcdf(filename: str, *, engine: str = None, group_kwargs: dict = None, regex: str = None) -> InferenceData:
250
"""
251
Load InferenceData from NetCDF file.
252
253
Args:
254
filename (str): Path to NetCDF file
255
engine (str, optional): NetCDF engine to use
256
group_kwargs (dict, optional): Group-specific loading arguments
257
regex (str, optional): Regular expression to filter groups
258
259
Returns:
260
InferenceData: Loaded inference data
261
"""
262
263
def to_netcdf(data: InferenceData, filename: str, *, groups: list = None, **kwargs):
264
"""
265
Save InferenceData to NetCDF file.
266
267
Args:
268
data (InferenceData): Inference data to save
269
filename (str): Output file path
270
groups (list, optional): Groups to save
271
**kwargs: Additional saving parameters
272
"""
273
```
274
275
### JSON Format
276
277
```python { .api }
278
def from_json(filename: str) -> InferenceData:
279
"""
280
Load InferenceData from JSON file.
281
282
Args:
283
filename (str): Path to JSON file
284
285
Returns:
286
InferenceData: Loaded inference data
287
"""
288
289
def to_json(data: InferenceData, filename: str, *, groups: list = None, **kwargs):
290
"""
291
Save InferenceData to JSON file.
292
293
Args:
294
data (InferenceData): Inference data to save
295
filename (str): Output file path
296
groups (list, optional): Groups to save
297
**kwargs: Additional saving parameters
298
"""
299
```
300
301
### Zarr Format
302
303
```python { .api }
304
def from_zarr(store, *, groups: list = None, **kwargs) -> InferenceData:
305
"""
306
Load InferenceData from Zarr store.
307
308
Args:
309
store: Zarr store path or object
310
groups (list, optional): Groups to load
311
**kwargs: Additional loading parameters
312
313
Returns:
314
InferenceData: Loaded inference data
315
"""
316
317
def to_zarr(data: InferenceData, store, *, groups: list = None, **kwargs):
318
"""
319
Save InferenceData to Zarr store.
320
321
Args:
322
data (InferenceData): Inference data to save
323
store: Zarr store path or object
324
groups (list, optional): Groups to save
325
**kwargs: Additional saving parameters
326
"""
327
```
328
329
### DataTree Integration
330
331
```python { .api }
332
def from_datatree(datatree) -> InferenceData:
333
"""
334
Convert xarray DataTree to InferenceData.
335
336
Args:
337
datatree: xarray DataTree object
338
339
Returns:
340
InferenceData: Converted inference data
341
"""
342
343
def to_datatree(data: InferenceData) -> object:
344
"""
345
Convert InferenceData to xarray DataTree.
346
347
Args:
348
data (InferenceData): Inference data to convert
349
350
Returns:
351
DataTree: Converted datatree object
352
"""
353
```
354
355
### Usage Examples
356
357
```python
358
# Save and load NetCDF
359
az.to_netcdf(idata, "my_analysis.nc")
360
loaded_idata = az.from_netcdf("my_analysis.nc")
361
362
# Save and load JSON
363
az.to_json(idata, "my_analysis.json")
364
loaded_idata = az.from_json("my_analysis.json")
365
366
# Save and load Zarr
367
az.to_zarr(idata, "my_analysis.zarr")
368
loaded_idata = az.from_zarr("my_analysis.zarr")
369
```
370
371
## Framework Integration Functions
372
373
### Dictionary and Basic Conversion
374
375
```python { .api }
376
def from_dict(posterior=None, *, posterior_predictive=None, predictions=None, prior=None, prior_predictive=None, observed_data=None, constant_data=None, predictions_constant_data=None, log_likelihood=None, log_prior=None, sample_stats=None, sample_stats_prior=None, **kwargs) -> InferenceData:
377
"""
378
Convert Python dictionaries to InferenceData format.
379
380
Args:
381
posterior (dict, optional): Posterior samples dictionary
382
posterior_predictive (dict, optional): Posterior predictive samples
383
predictions (dict, optional): Out of sample predictions
384
prior (dict, optional): Prior samples dictionary
385
prior_predictive (dict, optional): Prior predictive samples
386
observed_data (dict, optional): Observed data dictionary
387
constant_data (dict, optional): Model constants dictionary
388
predictions_constant_data (dict, optional): Constants for predictions
389
log_likelihood (dict, optional): Log likelihood evaluations
390
log_prior (dict, optional): Log prior evaluations
391
sample_stats (dict, optional): MCMC sample statistics
392
sample_stats_prior (dict, optional): Prior sample statistics
393
**kwargs: Additional conversion parameters (coords, dims, etc.)
394
395
Returns:
396
InferenceData: Converted inference data object
397
"""
398
```
399
400
### Stan Integration
401
402
```python { .api }
403
def from_cmdstan(posterior=None, *, posterior_predictive=None, predictions=None, prior=None, prior_predictive=None, observed_data=None, constant_data=None, predictions_constant_data=None, log_likelihood=None, save_warmup=False, **kwargs) -> InferenceData:
404
"""
405
Convert CmdStan output files to InferenceData.
406
407
Args:
408
posterior (str or list): Path(s) to posterior CSV files
409
posterior_predictive (str or list, optional): Path(s) to posterior predictive CSV files
410
predictions (str or list, optional): Path(s) to predictions CSV files
411
prior (str or list, optional): Path(s) to prior CSV files
412
prior_predictive (str or list, optional): Path(s) to prior predictive CSV files
413
observed_data (dict, optional): Observed data dictionary
414
constant_data (dict, optional): Model constants dictionary
415
predictions_constant_data (dict, optional): Constants for predictions
416
log_likelihood (dict, optional): Log likelihood evaluations
417
save_warmup (bool): Whether to save warmup samples
418
**kwargs: Additional conversion parameters
419
420
Returns:
421
InferenceData: Converted inference data
422
"""
423
424
def from_cmdstanpy(fit, *, posterior_predictive=None, predictions=None, prior=None, prior_predictive=None, observed_data=None, constant_data=None, predictions_constant_data=None, log_likelihood=None, **kwargs) -> InferenceData:
425
"""
426
Convert CmdStanPy fit results to InferenceData.
427
428
Args:
429
fit: CmdStanPy fit object
430
posterior_predictive (str or array, optional): Posterior predictive samples
431
predictions (str or array, optional): Out of sample predictions
432
prior (str or array, optional): Prior samples
433
prior_predictive (str or array, optional): Prior predictive samples
434
observed_data (dict, optional): Observed data dictionary
435
constant_data (dict, optional): Model constants dictionary
436
predictions_constant_data (dict, optional): Constants for predictions
437
log_likelihood (dict, optional): Log likelihood evaluations
438
**kwargs: Additional conversion parameters
439
440
Returns:
441
InferenceData: Converted inference data
442
"""
443
444
def from_pystan(fit, *, posterior_predictive=None, observed_data=None, constant_data=None, predictions=None, log_likelihood=None, coords=None, dims=None, **kwargs) -> InferenceData:
445
"""
446
Convert PyStan fit results to InferenceData.
447
448
Args:
449
fit: PyStan fit object (Stan 2.x or 3.x)
450
posterior_predictive (str or array, optional): Posterior predictive samples
451
observed_data (dict, optional): Observed data dictionary
452
constant_data (dict, optional): Model constants dictionary
453
predictions (str or array, optional): Out of sample predictions
454
log_likelihood (dict, optional): Log likelihood evaluations
455
coords (dict, optional): Coordinate specifications
456
dims (dict, optional): Dimension specifications
457
**kwargs: Additional conversion parameters
458
459
Returns:
460
InferenceData: Converted inference data
461
"""
462
```
463
464
### PyMC Integration
465
466
```python { .api }
467
def from_pymc(trace=None, *, prior=None, posterior_predictive=None, log_likelihood=None, coords=None, dims=None, model=None, save_warmup=False, **kwargs) -> InferenceData:
468
"""
469
Convert PyMC trace to InferenceData.
470
471
Args:
472
trace: PyMC MultiTrace or InferenceData object
473
prior (dict, optional): Prior samples
474
posterior_predictive (dict, optional): Posterior predictive samples
475
log_likelihood (dict, optional): Log likelihood evaluations
476
coords (dict, optional): Coordinate specifications
477
dims (dict, optional): Dimension specifications
478
model: PyMC model object
479
save_warmup (bool): Whether to save warmup samples
480
**kwargs: Additional conversion parameters
481
482
Returns:
483
InferenceData: Converted inference data
484
"""
485
```
486
487
### JAX/NumPyro Integration
488
489
```python { .api }
490
def from_numpyro(posterior=None, *, prior=None, posterior_predictive=None, predictions=None, constant_data=None, predictions_constant_data=None, observed_data=None, **kwargs) -> InferenceData:
491
"""
492
Convert NumPyro MCMC results to InferenceData.
493
494
Args:
495
posterior: NumPyro MCMC object or posterior samples dict
496
prior (dict, optional): Prior samples dictionary
497
posterior_predictive (dict, optional): Posterior predictive samples
498
predictions (dict, optional): Out of sample predictions
499
constant_data (dict, optional): Model constants dictionary
500
predictions_constant_data (dict, optional): Constants for predictions
501
observed_data (dict, optional): Observed data dictionary
502
**kwargs: Additional conversion parameters
503
504
Returns:
505
InferenceData: Converted inference data
506
"""
507
508
def from_pyro(posterior=None, *, prior=None, posterior_predictive=None, **kwargs) -> InferenceData:
509
"""
510
Convert Pyro MCMC results to InferenceData.
511
512
Args:
513
posterior: Pyro MCMC object or posterior samples dict
514
prior (dict, optional): Prior samples dictionary
515
posterior_predictive (dict, optional): Posterior predictive samples
516
**kwargs: Additional conversion parameters
517
518
Returns:
519
InferenceData: Converted inference data
520
"""
521
522
def from_pytree(posterior, *, prior=None, posterior_predictive=None, sample_stats=None, observed_data=None, **kwargs) -> InferenceData:
523
"""
524
Convert PyTree structures (JAX, etc.) to InferenceData.
525
526
Args:
527
posterior: PyTree with posterior samples
528
prior (dict, optional): Prior samples pytree
529
posterior_predictive (dict, optional): Posterior predictive samples pytree
530
sample_stats (dict, optional): Sample statistics pytree
531
observed_data (dict, optional): Observed data dictionary
532
**kwargs: Additional conversion parameters
533
534
Returns:
535
InferenceData: Converted inference data
536
"""
537
```
538
539
### Other Framework Integration
540
541
```python { .api }
542
def from_emcee(sampler, *, var_names=None, slices=None, coords=None, dims=None, **kwargs) -> InferenceData:
543
"""
544
Convert emcee sampler results to InferenceData.
545
546
Args:
547
sampler: emcee EnsembleSampler object
548
var_names (list, optional): Variable names for samples
549
slices (dict, optional): Slices for multi-dimensional parameters
550
coords (dict, optional): Coordinate specifications
551
dims (dict, optional): Dimension specifications
552
**kwargs: Additional conversion parameters
553
554
Returns:
555
InferenceData: Converted inference data
556
"""
557
558
def from_pyjags(fit, *, var_names=None, coords=None, dims=None, **kwargs) -> InferenceData:
559
"""
560
Convert PyJAGS results to InferenceData.
561
562
Args:
563
fit: PyJAGS fit object
564
var_names (list, optional): Variable names to extract
565
coords (dict, optional): Coordinate specifications
566
dims (dict, optional): Dimension specifications
567
**kwargs: Additional conversion parameters
568
569
Returns:
570
InferenceData: Converted inference data
571
"""
572
573
def from_beanmachine(samples, *, prior=None, posterior_predictive=None, **kwargs) -> InferenceData:
574
"""
575
Convert Meta's Bean Machine samples to InferenceData.
576
577
Args:
578
samples: Bean Machine posterior samples
579
prior (dict, optional): Prior samples
580
posterior_predictive (dict, optional): Posterior predictive samples
581
**kwargs: Additional conversion parameters
582
583
Returns:
584
InferenceData: Converted inference data
585
"""
586
```
587
588
### Framework Integration Usage Examples
589
590
```python
591
# Stan integration
592
idata = az.from_cmdstanpy(fit, observed_data={"y": y_obs})
593
594
# PyMC integration
595
with model:
596
trace = pm.sample(1000)
597
idata = az.from_pymc(trace, model=model)
598
599
# NumPyro integration
600
mcmc = MCMC(NUTS(model), num_warmup=500, num_samples=1000)
601
mcmc.run(rng_key, **data)
602
idata = az.from_numpyro(mcmc)
603
604
# Dictionary conversion
605
posterior_dict = {"mu": samples_mu, "sigma": samples_sigma}
606
idata = az.from_dict(posterior=posterior_dict, observed_data={"y": y_obs})
607
```
608
609
## Type Definitions
610
611
```python { .api }
612
CoordSpec = Dict[str, List[Any]]
613
"""Type alias for coordinate specifications in data conversion."""
614
615
DimSpec = Dict[str, List[str]]
616
"""Type alias for dimension specifications in data conversion."""
617
```