0
# Performance and Utilities
1
2
Performance optimization utilities including Numba JIT compilation, Dask parallelization, and interactive backend management for Jupyter environments.
3
4
## JIT Compilation with Numba
5
6
```python { .api }
7
class Numba:
8
"""
9
Numba JIT compilation utilities for performance optimization.
10
11
Enables Just-In-Time compilation of critical ArviZ functions
12
for significant performance improvements, especially with large datasets.
13
"""
14
15
numba_flag: bool
16
"""Current state of Numba JIT compilation (True if enabled)."""
17
18
@classmethod
19
def enable_numba(cls):
20
"""
21
Enable Numba JIT compilation for supported ArviZ functions.
22
23
Improves performance for computationally intensive operations
24
like statistical calculations and data transformations.
25
"""
26
27
@classmethod
28
def disable_numba(cls):
29
"""
30
Disable Numba JIT compilation and fall back to pure Python/NumPy.
31
32
Useful for debugging or when Numba installation issues occur.
33
"""
34
```
35
36
### Usage Examples
37
38
```python
39
import arviz as az
40
41
# Check current Numba status
42
print(f"Numba enabled: {az.Numba.numba_flag}")
43
44
# Enable Numba acceleration
45
az.Numba.enable_numba()
46
47
# Compute statistics with JIT acceleration
48
idata = az.load_arviz_data("centered_eight")
49
summary = az.summary(idata) # Faster with Numba
50
rhat = az.rhat(idata) # Accelerated convergence diagnostics
51
ess = az.ess(idata) # Faster ESS computation
52
53
# Disable if needed (e.g., for debugging)
54
az.Numba.disable_numba()
55
```
56
57
## Parallel Computation with Dask
58
59
```python { .api }
60
class Dask:
61
"""
62
Dask parallel computation utilities for distributed processing.
63
64
Enables parallel execution of ArviZ computations across multiple
65
cores or distributed clusters for improved performance on large datasets.
66
"""
67
68
dask_flag: bool
69
"""Current state of Dask parallelization (True if enabled)."""
70
71
dask_kwargs: dict
72
"""Dictionary of Dask configuration parameters."""
73
74
@classmethod
75
def enable_dask(cls, dask_kwargs: dict = None):
76
"""
77
Enable Dask parallel computation for supported ArviZ functions.
78
79
Args:
80
dask_kwargs (dict, optional): Dask scheduler and worker configuration
81
Example: {"scheduler": "threads", "num_workers": 4}
82
"""
83
84
@classmethod
85
def disable_dask(cls):
86
"""
87
Disable Dask parallelization and use single-threaded computation.
88
"""
89
```
90
91
### Usage Examples
92
93
```python
94
# Check current Dask status
95
print(f"Dask enabled: {az.Dask.dask_flag}")
96
print(f"Dask config: {az.Dask.dask_kwargs}")
97
98
# Enable Dask with custom configuration
99
dask_config = {
100
"scheduler": "threads", # or "processes", "distributed"
101
"num_workers": 4 # number of parallel workers
102
}
103
az.Dask.enable_dask(dask_config)
104
105
# Computations now run in parallel
106
large_idata = az.load_arviz_data("rugby")
107
summary = az.summary(large_idata) # Parallel summary computation
108
loo_result = az.loo(large_idata) # Parallel LOO-CV computation
109
110
# Disable Dask
111
az.Dask.disable_dask()
112
```
113
114
### Advanced Dask Configuration
115
116
```python
117
# Distributed computing setup
118
distributed_config = {
119
"scheduler": "distributed",
120
"address": "scheduler-address:8786", # Dask scheduler address
121
"num_workers": 8
122
}
123
az.Dask.enable_dask(distributed_config)
124
125
# Process-based parallelism (for CPU-bound tasks)
126
process_config = {
127
"scheduler": "processes",
128
"num_workers": 4,
129
"threads_per_worker": 2
130
}
131
az.Dask.enable_dask(process_config)
132
133
# Thread-based parallelism (for I/O-bound tasks)
134
thread_config = {
135
"scheduler": "threads",
136
"num_workers": 8
137
}
138
az.Dask.enable_dask(thread_config)
139
```
140
141
## Interactive Backend Management
142
143
```python { .api }
144
class interactive_backend:
145
"""
146
Context manager for interactive plotting backends in Jupyter environments.
147
148
Manages switching between inline static plots and interactive plots
149
that can be displayed in separate windows or embedded widgets.
150
"""
151
152
def __init__(self, backend: str = ""):
153
"""
154
Initialize interactive backend context manager.
155
156
Args:
157
backend (str, optional): Interactive backend to use
158
Options: "notebook", "lab", "colab", "kaggle"
159
If empty, automatically detects environment
160
"""
161
162
def __enter__(self):
163
"""Enter interactive plotting mode."""
164
165
def __exit__(self, exc_type, exc_val, exc_tb):
166
"""Exit interactive mode and restore previous settings."""
167
```
168
169
### Usage Examples
170
171
```python
172
# Basic interactive plotting
173
with az.interactive_backend():
174
az.plot_trace(idata) # Opens in interactive window
175
az.plot_posterior(idata) # Interactive plot with zoom/pan
176
177
# Specific backend for Jupyter Lab
178
with az.interactive_backend("lab"):
179
az.plot_pair(idata) # Interactive pair plot in JupyterLab
180
181
# Auto-detect environment
182
with az.interactive_backend():
183
# Automatically uses appropriate backend:
184
# - "notebook" for Jupyter Notebook
185
# - "lab" for JupyterLab
186
# - "colab" for Google Colab
187
# - "kaggle" for Kaggle Notebooks
188
az.plot_forest(idata)
189
```
190
191
## Performance Optimization Strategies
192
193
### Combining Numba and Dask
194
195
```python
196
# Optimal configuration for large-scale analysis
197
def setup_high_performance():
198
"""Configure ArviZ for maximum performance."""
199
200
# Enable JIT compilation
201
az.Numba.enable_numba()
202
203
# Enable parallel processing
204
az.Dask.enable_dask({
205
"scheduler": "threads",
206
"num_workers": 4
207
})
208
209
print("High-performance mode enabled")
210
211
# Use for computationally intensive tasks
212
setup_high_performance()
213
214
# Large dataset processing
215
large_models = {f"model_{i}": large_idata_list[i] for i in range(10)}
216
comparison = az.compare(large_models) # Fast parallel model comparison
217
```
218
219
### Memory-Efficient Processing
220
221
```python
222
# Configuration for memory-constrained environments
223
def setup_memory_efficient():
224
"""Configure ArviZ for memory efficiency."""
225
226
# Use lazy loading
227
az.rcParams["data.load"] = "lazy"
228
229
# Disable warmup saving to reduce memory
230
az.rcParams["data.save_warmup"] = False
231
232
# Enable Numba for faster processing (less memory overhead)
233
az.Numba.enable_numba()
234
235
# Use process-based parallelism to avoid memory sharing
236
az.Dask.enable_dask({
237
"scheduler": "processes",
238
"num_workers": 2 # Fewer workers to conserve memory
239
})
240
241
setup_memory_efficient()
242
```
243
244
### Benchmark Performance
245
246
```python
247
import time
248
249
def benchmark_configuration():
250
"""Compare performance with different configurations."""
251
252
# Load test data
253
idata = az.load_arviz_data("rugby")
254
255
# Baseline (no optimization)
256
az.Numba.disable_numba()
257
az.Dask.disable_dask()
258
259
start = time.time()
260
summary1 = az.summary(idata)
261
baseline_time = time.time() - start
262
263
# With Numba
264
az.Numba.enable_numba()
265
266
start = time.time()
267
summary2 = az.summary(idata)
268
numba_time = time.time() - start
269
270
# With Numba + Dask
271
az.Dask.enable_dask({"scheduler": "threads", "num_workers": 4})
272
273
start = time.time()
274
summary3 = az.summary(idata)
275
combined_time = time.time() - start
276
277
print(f"Baseline: {baseline_time:.2f}s")
278
print(f"Numba: {numba_time:.2f}s ({baseline_time/numba_time:.1f}x speedup)")
279
print(f"Numba+Dask: {combined_time:.2f}s ({baseline_time/combined_time:.1f}x speedup)")
280
281
benchmark_configuration()
282
```
283
284
## Environment Detection
285
286
```python
287
def detect_environment():
288
"""Detect current computational environment and optimize accordingly."""
289
290
import sys
291
292
# Detect Jupyter environments
293
if 'ipykernel' in sys.modules:
294
if 'google.colab' in sys.modules:
295
print("Google Colab detected")
296
# Colab-specific optimizations
297
az.rcParams["plot.backend"] = "matplotlib"
298
az.Numba.enable_numba()
299
300
elif 'ipywidgets' in sys.modules:
301
print("JupyterLab detected")
302
# JupyterLab optimizations
303
az.rcParams["plot.backend"] = "bokeh"
304
az.Numba.enable_numba()
305
az.Dask.enable_dask({"scheduler": "threads", "num_workers": 2})
306
307
else:
308
print("Jupyter Notebook detected")
309
az.rcParams["plot.backend"] = "matplotlib"
310
az.Numba.enable_numba()
311
312
else:
313
print("Script/CLI environment detected")
314
# Command-line optimizations
315
az.Numba.enable_numba()
316
az.Dask.enable_dask({"scheduler": "processes", "num_workers": 4})
317
318
# Auto-configure based on environment
319
detect_environment()
320
```
321
322
## Troubleshooting Performance Issues
323
324
### Numba Installation Issues
325
326
```python
327
try:
328
az.Numba.enable_numba()
329
print("Numba enabled successfully")
330
except ImportError:
331
print("Numba not available. Install with: pip install numba")
332
except Exception as e:
333
print(f"Numba error: {e}")
334
print("Falling back to pure Python implementation")
335
az.Numba.disable_numba()
336
```
337
338
### Dask Configuration Problems
339
340
```python
341
try:
342
az.Dask.enable_dask({"scheduler": "threads", "num_workers": 4})
343
print("Dask enabled successfully")
344
except ImportError:
345
print("Dask not available. Install with: pip install dask")
346
except Exception as e:
347
print(f"Dask error: {e}")
348
print("Using single-threaded computation")
349
az.Dask.disable_dask()
350
```
351
352
### Memory Issues
353
354
```python
355
def handle_memory_constraints():
356
"""Configure ArviZ for memory-constrained environments."""
357
358
import psutil
359
360
# Check available memory
361
available_gb = psutil.virtual_memory().available / (1024**3)
362
363
if available_gb < 4:
364
print("Limited memory detected. Using conservative settings.")
365
az.rcParams["data.load"] = "lazy"
366
az.rcParams["data.save_warmup"] = False
367
az.Dask.enable_dask({"scheduler": "threads", "num_workers": 1})
368
369
elif available_gb < 8:
370
print("Moderate memory available. Using balanced settings.")
371
az.Dask.enable_dask({"scheduler": "threads", "num_workers": 2})
372
az.Numba.enable_numba()
373
374
else:
375
print("Sufficient memory available. Using high-performance settings.")
376
az.Dask.enable_dask({"scheduler": "threads", "num_workers": 4})
377
az.Numba.enable_numba()
378
379
handle_memory_constraints()
380
```
381
382
## Additional Utility Functions
383
384
```python { .api }
385
def flatten_inference_data_to_dict(data: InferenceData, *, var_names: list = None, groups: list = None, dimensions: dict = None, group_info: bool = False, var_name_format: str = None, index_origin: int = None) -> dict:
386
"""
387
Flatten InferenceData to dictionary format for external use.
388
389
Converts ArviZ InferenceData objects to flat dictionary structures
390
that can be used with other libraries or data analysis tools.
391
392
Args:
393
data (InferenceData): Input inference data to flatten
394
var_names (list, optional): Variables to include in output
395
groups (list, optional): Groups to include (default: all)
396
dimensions (dict, optional): Dimension specifications
397
group_info (bool): Whether to include group information (default False)
398
var_name_format (str, optional): Format string for variable names
399
index_origin (int, optional): Starting index for array indexing
400
401
Returns:
402
dict: Flattened dictionary with data and metadata
403
"""
404
```