or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration-management.mddata-operations.mdframework-integrations.mdindex.mdperformance-utilities.mdstatistical-analysis.mdvisualization-plotting.md

performance-utilities.mddocs/

0

# Performance and Utilities

1

2

Performance optimization utilities including Numba JIT compilation, Dask parallelization, and interactive backend management for Jupyter environments.

3

4

## JIT Compilation with Numba

5

6

```python { .api }

7

class Numba:

8

"""

9

Numba JIT compilation utilities for performance optimization.

10

11

Enables Just-In-Time compilation of critical ArviZ functions

12

for significant performance improvements, especially with large datasets.

13

"""

14

15

numba_flag: bool

16

"""Current state of Numba JIT compilation (True if enabled)."""

17

18

@classmethod

19

def enable_numba(cls):

20

"""

21

Enable Numba JIT compilation for supported ArviZ functions.

22

23

Improves performance for computationally intensive operations

24

like statistical calculations and data transformations.

25

"""

26

27

@classmethod

28

def disable_numba(cls):

29

"""

30

Disable Numba JIT compilation and fall back to pure Python/NumPy.

31

32

Useful for debugging or when Numba installation issues occur.

33

"""

34

```

35

36

### Usage Examples

37

38

```python

39

import arviz as az

40

41

# Check current Numba status

42

print(f"Numba enabled: {az.Numba.numba_flag}")

43

44

# Enable Numba acceleration

45

az.Numba.enable_numba()

46

47

# Compute statistics with JIT acceleration

48

idata = az.load_arviz_data("centered_eight")

49

summary = az.summary(idata) # Faster with Numba

50

rhat = az.rhat(idata) # Accelerated convergence diagnostics

51

ess = az.ess(idata) # Faster ESS computation

52

53

# Disable if needed (e.g., for debugging)

54

az.Numba.disable_numba()

55

```

56

57

## Parallel Computation with Dask

58

59

```python { .api }

60

class Dask:

61

"""

62

Dask parallel computation utilities for distributed processing.

63

64

Enables parallel execution of ArviZ computations across multiple

65

cores or distributed clusters for improved performance on large datasets.

66

"""

67

68

dask_flag: bool

69

"""Current state of Dask parallelization (True if enabled)."""

70

71

dask_kwargs: dict

72

"""Dictionary of Dask configuration parameters."""

73

74

@classmethod

75

def enable_dask(cls, dask_kwargs: dict = None):

76

"""

77

Enable Dask parallel computation for supported ArviZ functions.

78

79

Args:

80

dask_kwargs (dict, optional): Dask scheduler and worker configuration

81

Example: {"scheduler": "threads", "num_workers": 4}

82

"""

83

84

@classmethod

85

def disable_dask(cls):

86

"""

87

Disable Dask parallelization and use single-threaded computation.

88

"""

89

```

90

91

### Usage Examples

92

93

```python

94

# Check current Dask status

95

print(f"Dask enabled: {az.Dask.dask_flag}")

96

print(f"Dask config: {az.Dask.dask_kwargs}")

97

98

# Enable Dask with custom configuration

99

dask_config = {

100

"scheduler": "threads", # or "processes", "distributed"

101

"num_workers": 4 # number of parallel workers

102

}

103

az.Dask.enable_dask(dask_config)

104

105

# Computations now run in parallel

106

large_idata = az.load_arviz_data("rugby")

107

summary = az.summary(large_idata) # Parallel summary computation

108

loo_result = az.loo(large_idata) # Parallel LOO-CV computation

109

110

# Disable Dask

111

az.Dask.disable_dask()

112

```

113

114

### Advanced Dask Configuration

115

116

```python

117

# Distributed computing setup

118

distributed_config = {

119

"scheduler": "distributed",

120

"address": "scheduler-address:8786", # Dask scheduler address

121

"num_workers": 8

122

}

123

az.Dask.enable_dask(distributed_config)

124

125

# Process-based parallelism (for CPU-bound tasks)

126

process_config = {

127

"scheduler": "processes",

128

"num_workers": 4,

129

"threads_per_worker": 2

130

}

131

az.Dask.enable_dask(process_config)

132

133

# Thread-based parallelism (for I/O-bound tasks)

134

thread_config = {

135

"scheduler": "threads",

136

"num_workers": 8

137

}

138

az.Dask.enable_dask(thread_config)

139

```

140

141

## Interactive Backend Management

142

143

```python { .api }

144

class interactive_backend:

145

"""

146

Context manager for interactive plotting backends in Jupyter environments.

147

148

Manages switching between inline static plots and interactive plots

149

that can be displayed in separate windows or embedded widgets.

150

"""

151

152

def __init__(self, backend: str = ""):

153

"""

154

Initialize interactive backend context manager.

155

156

Args:

157

backend (str, optional): Interactive backend to use

158

Options: "notebook", "lab", "colab", "kaggle"

159

If empty, automatically detects environment

160

"""

161

162

def __enter__(self):

163

"""Enter interactive plotting mode."""

164

165

def __exit__(self, exc_type, exc_val, exc_tb):

166

"""Exit interactive mode and restore previous settings."""

167

```

168

169

### Usage Examples

170

171

```python

172

# Basic interactive plotting

173

with az.interactive_backend():

174

az.plot_trace(idata) # Opens in interactive window

175

az.plot_posterior(idata) # Interactive plot with zoom/pan

176

177

# Specific backend for Jupyter Lab

178

with az.interactive_backend("lab"):

179

az.plot_pair(idata) # Interactive pair plot in JupyterLab

180

181

# Auto-detect environment

182

with az.interactive_backend():

183

# Automatically uses appropriate backend:

184

# - "notebook" for Jupyter Notebook

185

# - "lab" for JupyterLab

186

# - "colab" for Google Colab

187

# - "kaggle" for Kaggle Notebooks

188

az.plot_forest(idata)

189

```

190

191

## Performance Optimization Strategies

192

193

### Combining Numba and Dask

194

195

```python

196

# Optimal configuration for large-scale analysis

197

def setup_high_performance():

198

"""Configure ArviZ for maximum performance."""

199

200

# Enable JIT compilation

201

az.Numba.enable_numba()

202

203

# Enable parallel processing

204

az.Dask.enable_dask({

205

"scheduler": "threads",

206

"num_workers": 4

207

})

208

209

print("High-performance mode enabled")

210

211

# Use for computationally intensive tasks

212

setup_high_performance()

213

214

# Large dataset processing

215

large_models = {f"model_{i}": large_idata_list[i] for i in range(10)}

216

comparison = az.compare(large_models) # Fast parallel model comparison

217

```

218

219

### Memory-Efficient Processing

220

221

```python

222

# Configuration for memory-constrained environments

223

def setup_memory_efficient():

224

"""Configure ArviZ for memory efficiency."""

225

226

# Use lazy loading

227

az.rcParams["data.load"] = "lazy"

228

229

# Disable warmup saving to reduce memory

230

az.rcParams["data.save_warmup"] = False

231

232

# Enable Numba for faster processing (less memory overhead)

233

az.Numba.enable_numba()

234

235

# Use process-based parallelism to avoid memory sharing

236

az.Dask.enable_dask({

237

"scheduler": "processes",

238

"num_workers": 2 # Fewer workers to conserve memory

239

})

240

241

setup_memory_efficient()

242

```

243

244

### Benchmark Performance

245

246

```python

247

import time

248

249

def benchmark_configuration():

250

"""Compare performance with different configurations."""

251

252

# Load test data

253

idata = az.load_arviz_data("rugby")

254

255

# Baseline (no optimization)

256

az.Numba.disable_numba()

257

az.Dask.disable_dask()

258

259

start = time.time()

260

summary1 = az.summary(idata)

261

baseline_time = time.time() - start

262

263

# With Numba

264

az.Numba.enable_numba()

265

266

start = time.time()

267

summary2 = az.summary(idata)

268

numba_time = time.time() - start

269

270

# With Numba + Dask

271

az.Dask.enable_dask({"scheduler": "threads", "num_workers": 4})

272

273

start = time.time()

274

summary3 = az.summary(idata)

275

combined_time = time.time() - start

276

277

print(f"Baseline: {baseline_time:.2f}s")

278

print(f"Numba: {numba_time:.2f}s ({baseline_time/numba_time:.1f}x speedup)")

279

print(f"Numba+Dask: {combined_time:.2f}s ({baseline_time/combined_time:.1f}x speedup)")

280

281

benchmark_configuration()

282

```

283

284

## Environment Detection

285

286

```python

287

def detect_environment():

288

"""Detect current computational environment and optimize accordingly."""

289

290

import sys

291

292

# Detect Jupyter environments

293

if 'ipykernel' in sys.modules:

294

if 'google.colab' in sys.modules:

295

print("Google Colab detected")

296

# Colab-specific optimizations

297

az.rcParams["plot.backend"] = "matplotlib"

298

az.Numba.enable_numba()

299

300

elif 'ipywidgets' in sys.modules:

301

print("JupyterLab detected")

302

# JupyterLab optimizations

303

az.rcParams["plot.backend"] = "bokeh"

304

az.Numba.enable_numba()

305

az.Dask.enable_dask({"scheduler": "threads", "num_workers": 2})

306

307

else:

308

print("Jupyter Notebook detected")

309

az.rcParams["plot.backend"] = "matplotlib"

310

az.Numba.enable_numba()

311

312

else:

313

print("Script/CLI environment detected")

314

# Command-line optimizations

315

az.Numba.enable_numba()

316

az.Dask.enable_dask({"scheduler": "processes", "num_workers": 4})

317

318

# Auto-configure based on environment

319

detect_environment()

320

```

321

322

## Troubleshooting Performance Issues

323

324

### Numba Installation Issues

325

326

```python

327

try:

328

az.Numba.enable_numba()

329

print("Numba enabled successfully")

330

except ImportError:

331

print("Numba not available. Install with: pip install numba")

332

except Exception as e:

333

print(f"Numba error: {e}")

334

print("Falling back to pure Python implementation")

335

az.Numba.disable_numba()

336

```

337

338

### Dask Configuration Problems

339

340

```python

341

try:

342

az.Dask.enable_dask({"scheduler": "threads", "num_workers": 4})

343

print("Dask enabled successfully")

344

except ImportError:

345

print("Dask not available. Install with: pip install dask")

346

except Exception as e:

347

print(f"Dask error: {e}")

348

print("Using single-threaded computation")

349

az.Dask.disable_dask()

350

```

351

352

### Memory Issues

353

354

```python

355

def handle_memory_constraints():

356

"""Configure ArviZ for memory-constrained environments."""

357

358

import psutil

359

360

# Check available memory

361

available_gb = psutil.virtual_memory().available / (1024**3)

362

363

if available_gb < 4:

364

print("Limited memory detected. Using conservative settings.")

365

az.rcParams["data.load"] = "lazy"

366

az.rcParams["data.save_warmup"] = False

367

az.Dask.enable_dask({"scheduler": "threads", "num_workers": 1})

368

369

elif available_gb < 8:

370

print("Moderate memory available. Using balanced settings.")

371

az.Dask.enable_dask({"scheduler": "threads", "num_workers": 2})

372

az.Numba.enable_numba()

373

374

else:

375

print("Sufficient memory available. Using high-performance settings.")

376

az.Dask.enable_dask({"scheduler": "threads", "num_workers": 4})

377

az.Numba.enable_numba()

378

379

handle_memory_constraints()

380

```

381

382

## Additional Utility Functions

383

384

```python { .api }

385

def flatten_inference_data_to_dict(data: InferenceData, *, var_names: list = None, groups: list = None, dimensions: dict = None, group_info: bool = False, var_name_format: str = None, index_origin: int = None) -> dict:

386

"""

387

Flatten InferenceData to dictionary format for external use.

388

389

Converts ArviZ InferenceData objects to flat dictionary structures

390

that can be used with other libraries or data analysis tools.

391

392

Args:

393

data (InferenceData): Input inference data to flatten

394

var_names (list, optional): Variables to include in output

395

groups (list, optional): Groups to include (default: all)

396

dimensions (dict, optional): Dimension specifications

397

group_info (bool): Whether to include group information (default False)

398

var_name_format (str, optional): Format string for variable names

399

index_origin (int, optional): Starting index for array indexing

400

401

Returns:

402

dict: Flattened dictionary with data and metadata

403

"""

404

```