or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

data-loading.mddataflow.mdevents.mdexpressions.mdindex.mdparsing.mdscales.mdscenegraph.mdstatistics.mdtime.mdutilities.mdview.md

statistics.mddocs/

0

# Statistical Functions

1

2

Vega's comprehensive statistical analysis library provides distribution functions, regression analysis, binning, bootstrap methods, and random number generation for data analysis and visualization.

3

4

## Capabilities

5

6

### Distribution Functions

7

8

Core statistical distribution analysis and data binning.

9

10

```typescript { .api }

11

/**

12

* Calculate optimal bandwidth for kernel density estimation using Normal Reference Distribution rule

13

* @param values - Array of numeric values

14

* @returns Optimal bandwidth value

15

*/

16

function bandwidthNRD(values: number[]): number;

17

18

/**

19

* Create histogram bins for data

20

* @param options - Binning configuration options

21

* @returns Binning function

22

*/

23

function bin(options: BinOptions): (values: number[]) => Bin[];

24

25

/**

26

* Create dot plot bins with specified parameters

27

* @param options - Dot binning options

28

* @returns Dot binning function

29

*/

30

function dotbin(options: DotBinOptions): (values: number[]) => DotBin[];

31

32

/**

33

* Calculate data quantiles

34

* @param values - Array of numeric values

35

* @param p - Array of quantile probabilities (0-1)

36

* @returns Array of quantile values

37

*/

38

function quantiles(values: number[], p: number[]): number[];

39

40

/**

41

* Calculate data quartiles (25th, 50th, 75th percentiles)

42

* @param values - Array of numeric values

43

* @returns Tuple of [Q1, Q2, Q3] values

44

*/

45

function quartiles(values: number[]): [number, number, number];

46

47

interface BinOptions {

48

/** Minimum bin boundary */

49

min?: number;

50

51

/** Maximum bin boundary */

52

max?: number;

53

54

/** Number of bins */

55

maxbins?: number;

56

57

/** Bin step size */

58

step?: number;

59

60

/** Bin boundaries array */

61

steps?: number[];

62

63

/** Nice bin boundaries */

64

nice?: boolean;

65

}

66

67

interface Bin {

68

/** Bin start value */

69

x0: number;

70

71

/** Bin end value */

72

x1: number;

73

74

/** Count of values in bin */

75

count: number;

76

}

77

78

interface DotBinOptions {

79

/** Dot size */

80

size?: number;

81

82

/** Spacing between dots */

83

spacing?: number;

84

85

/** Grouping field */

86

groupby?: string;

87

}

88

89

interface DotBin {

90

/** Bin position */

91

bin: number;

92

93

/** Values in bin */

94

values: any[];

95

96

/** Bin count */

97

count: number;

98

}

99

```

100

101

### Random Number Generation

102

103

Comprehensive random number generation with multiple distribution support.

104

105

```typescript { .api }

106

/**

107

* Set global random number generator

108

* @param rng - Random number generator function

109

*/

110

function setRandom(rng: () => number): void;

111

112

/**

113

* Get global random number generator

114

* @returns Current random number generator

115

*/

116

function random(): () => number;

117

118

/**

119

* Linear congruential generator for reproducible random numbers

120

* @param seed - Optional seed value

121

* @returns Random number generator function

122

*/

123

function randomLCG(seed?: number): () => number;

124

125

/**

126

* Random integer generator

127

* @param min - Minimum value (inclusive)

128

* @param max - Maximum value (exclusive)

129

* @returns Random integer generator function

130

*/

131

function randomInteger(min: number, max: number): () => number;

132

133

/**

134

* Kernel density estimation random sampling

135

* @param data - Data points for KDE

136

* @param bandwidth - KDE bandwidth

137

* @returns Random sampler function

138

*/

139

function randomKDE(data: number[], bandwidth?: number): () => number;

140

141

/**

142

* Random sampling from mixture distributions

143

* @param distributions - Array of distribution objects

144

* @param weights - Optional weights for each distribution

145

* @returns Random sampler function

146

*/

147

function randomMixture(distributions: Distribution[], weights?: number[]): () => number;

148

149

interface Distribution {

150

/** Distribution sampler function */

151

sample: () => number;

152

153

/** Distribution weight */

154

weight?: number;

155

}

156

```

157

158

### Normal Distribution

159

160

Complete normal distribution functions including sampling, density, and quantiles.

161

162

```typescript { .api }

163

/**

164

* Random normal distribution generator

165

* @param mu - Mean (default: 0)

166

* @param sigma - Standard deviation (default: 1)

167

* @returns Random normal sampler function

168

*/

169

function randomNormal(mu?: number, sigma?: number): () => number;

170

171

/**

172

* Sample from normal distribution

173

* @param mu - Mean

174

* @param sigma - Standard deviation

175

* @returns Random sample

176

*/

177

function sampleNormal(mu: number, sigma: number): number;

178

179

/**

180

* Normal distribution probability density function

181

* @param x - Value to evaluate

182

* @param mu - Mean

183

* @param sigma - Standard deviation

184

* @returns Density value

185

*/

186

function densityNormal(x: number, mu: number, sigma: number): number;

187

188

/**

189

* Normal distribution cumulative distribution function

190

* @param x - Value to evaluate

191

* @param mu - Mean

192

* @param sigma - Standard deviation

193

* @returns Cumulative probability

194

*/

195

function cumulativeNormal(x: number, mu: number, sigma: number): number;

196

197

/**

198

* Normal distribution quantile function (inverse CDF)

199

* @param p - Probability (0-1)

200

* @param mu - Mean

201

* @param sigma - Standard deviation

202

* @returns Quantile value

203

*/

204

function quantileNormal(p: number, mu: number, sigma: number): number;

205

```

206

207

### Log-Normal Distribution

208

209

Log-normal distribution functions for skewed data analysis.

210

211

```typescript { .api }

212

/**

213

* Random log-normal distribution generator

214

* @param mu - Log-scale mean (default: 0)

215

* @param sigma - Log-scale standard deviation (default: 1)

216

* @returns Random log-normal sampler function

217

*/

218

function randomLogNormal(mu?: number, sigma?: number): () => number;

219

220

/**

221

* Sample from log-normal distribution

222

* @param mu - Log-scale mean

223

* @param sigma - Log-scale standard deviation

224

* @returns Random sample

225

*/

226

function sampleLogNormal(mu: number, sigma: number): number;

227

228

/**

229

* Log-normal distribution probability density function

230

* @param x - Value to evaluate

231

* @param mu - Log-scale mean

232

* @param sigma - Log-scale standard deviation

233

* @returns Density value

234

*/

235

function densityLogNormal(x: number, mu: number, sigma: number): number;

236

237

/**

238

* Log-normal distribution cumulative distribution function

239

* @param x - Value to evaluate

240

* @param mu - Log-scale mean

241

* @param sigma - Log-scale standard deviation

242

* @returns Cumulative probability

243

*/

244

function cumulativeLogNormal(x: number, mu: number, sigma: number): number;

245

246

/**

247

* Log-normal distribution quantile function

248

* @param p - Probability (0-1)

249

* @param mu - Log-scale mean

250

* @param sigma - Log-scale standard deviation

251

* @returns Quantile value

252

*/

253

function quantileLogNormal(p: number, mu: number, sigma: number): number;

254

```

255

256

### Uniform Distribution

257

258

Uniform distribution functions for random sampling within ranges.

259

260

```typescript { .api }

261

/**

262

* Random uniform distribution generator

263

* @param min - Minimum value (default: 0)

264

* @param max - Maximum value (default: 1)

265

* @returns Random uniform sampler function

266

*/

267

function randomUniform(min?: number, max?: number): () => number;

268

269

/**

270

* Sample from uniform distribution

271

* @param min - Minimum value

272

* @param max - Maximum value

273

* @returns Random sample

274

*/

275

function sampleUniform(min: number, max: number): number;

276

277

/**

278

* Uniform distribution probability density function

279

* @param x - Value to evaluate

280

* @param min - Minimum value

281

* @param max - Maximum value

282

* @returns Density value

283

*/

284

function densityUniform(x: number, min: number, max: number): number;

285

286

/**

287

* Uniform distribution cumulative distribution function

288

* @param x - Value to evaluate

289

* @param min - Minimum value

290

* @param max - Maximum value

291

* @returns Cumulative probability

292

*/

293

function cumulativeUniform(x: number, min: number, max: number): number;

294

295

/**

296

* Uniform distribution quantile function

297

* @param p - Probability (0-1)

298

* @param min - Minimum value

299

* @param max - Maximum value

300

* @returns Quantile value

301

*/

302

function quantileUniform(p: number, min: number, max: number): number;

303

```

304

305

### Regression Analysis

306

307

Comprehensive regression methods for trend analysis and curve fitting.

308

309

```typescript { .api }

310

/**

311

* Constant regression (horizontal line)

312

* @param data - Array of [x, y] data points

313

* @returns Regression result with constant value

314

*/

315

function regressionConstant(data: [number, number][]): RegressionResult;

316

317

/**

318

* Linear regression (y = ax + b)

319

* @param data - Array of [x, y] data points

320

* @returns Linear regression result

321

*/

322

function regressionLinear(data: [number, number][]): RegressionResult;

323

324

/**

325

* Logarithmic regression (y = a * log(x) + b)

326

* @param data - Array of [x, y] data points

327

* @returns Logarithmic regression result

328

*/

329

function regressionLog(data: [number, number][]): RegressionResult;

330

331

/**

332

* Exponential regression (y = a * e^(b * x))

333

* @param data - Array of [x, y] data points

334

* @returns Exponential regression result

335

*/

336

function regressionExp(data: [number, number][]): RegressionResult;

337

338

/**

339

* Power regression (y = a * x^b)

340

* @param data - Array of [x, y] data points

341

* @returns Power regression result

342

*/

343

function regressionPow(data: [number, number][]): RegressionResult;

344

345

/**

346

* Quadratic regression (y = ax^2 + bx + c)

347

* @param data - Array of [x, y] data points

348

* @returns Quadratic regression result

349

*/

350

function regressionQuad(data: [number, number][]): RegressionResult;

351

352

/**

353

* Polynomial regression of specified degree

354

* @param data - Array of [x, y] data points

355

* @param order - Polynomial degree

356

* @returns Polynomial regression result

357

*/

358

function regressionPoly(data: [number, number][], order: number): RegressionResult;

359

360

/**

361

* LOESS (locally weighted regression) smoothing

362

* @param data - Array of [x, y] data points

363

* @param options - LOESS configuration options

364

* @returns LOESS regression result

365

*/

366

function regressionLoess(data: [number, number][], options?: LoessOptions): RegressionResult;

367

368

interface RegressionResult {

369

/** Regression coefficients */

370

coef: number[];

371

372

/** R-squared value */

373

rSquared: number;

374

375

/** Prediction function */

376

predict: (x: number) => number;

377

378

/** Generate curve points */

379

points: (n?: number) => [number, number][];

380

}

381

382

interface LoessOptions {

383

/** Bandwidth parameter (0-1) */

384

bandwidth?: number;

385

386

/** Number of robustness iterations */

387

robustnessIterations?: number;

388

389

/** Accuracy threshold */

390

accuracy?: number;

391

}

392

```

393

394

### Bootstrap Methods

395

396

Bootstrap resampling for confidence interval estimation.

397

398

```typescript { .api }

399

/**

400

* Bootstrap confidence interval calculation

401

* @param data - Original dataset

402

* @param statistic - Statistic function to bootstrap

403

* @param options - Bootstrap configuration

404

* @returns Bootstrap confidence interval

405

*/

406

function bootstrapCI(

407

data: any[],

408

statistic: (sample: any[]) => number,

409

options?: BootstrapOptions

410

): BootstrapResult;

411

412

/**

413

* Sample points along a curve

414

* @param curve - Curve function or data points

415

* @param options - Sampling options

416

* @returns Array of sampled points

417

*/

418

function sampleCurve(

419

curve: ((x: number) => number) | [number, number][],

420

options?: SampleCurveOptions

421

): [number, number][];

422

423

interface BootstrapOptions {

424

/** Number of bootstrap samples */

425

samples?: number;

426

427

/** Confidence level (default: 0.95) */

428

confidence?: number;

429

430

/** Random number generator */

431

random?: () => number;

432

}

433

434

interface BootstrapResult {

435

/** Lower confidence bound */

436

lo: number;

437

438

/** Upper confidence bound */

439

hi: number;

440

441

/** Bootstrap samples */

442

samples?: number[];

443

}

444

445

interface SampleCurveOptions {

446

/** Number of sample points */

447

steps?: number;

448

449

/** X-axis range */

450

extent?: [number, number];

451

452

/** Minimum X value */

453

min?: number;

454

455

/** Maximum X value */

456

max?: number;

457

}

458

```

459

460

## Usage Examples

461

462

### Data Binning

463

464

```typescript

465

import { bin, quantiles } from "vega";

466

467

const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];

468

469

// Create histogram bins

470

const binFunc = bin({ maxbins: 5, nice: true });

471

const bins = binFunc(data);

472

console.log(bins);

473

// [{x0: 0, x1: 2, count: 2}, {x0: 2, x1: 4, count: 2}, ...]

474

475

// Calculate quantiles

476

const q = quantiles(data, [0.25, 0.5, 0.75]);

477

console.log(q); // [3.25, 5.5, 7.75]

478

```

479

480

### Random Sampling

481

482

```typescript

483

import { randomNormal, randomUniform, setRandom, randomLCG } from "vega";

484

485

// Use reproducible random generator

486

setRandom(randomLCG(42));

487

488

// Generate normal random numbers

489

const normal = randomNormal(0, 1);

490

const samples = Array.from({length: 1000}, () => normal());

491

492

// Generate uniform random numbers

493

const uniform = randomUniform(0, 100);

494

const uniformSamples = Array.from({length: 100}, () => uniform());

495

```

496

497

### Regression Analysis

498

499

```typescript

500

import { regressionLinear, regressionLoess, sampleCurve } from "vega";

501

502

const data = [

503

[1, 2], [2, 4], [3, 7], [4, 9], [5, 12]

504

];

505

506

// Linear regression

507

const linear = regressionLinear(data);

508

console.log(linear.coef); // [slope, intercept]

509

console.log(linear.rSquared); // R-squared value

510

511

// Generate prediction points

512

const predictions = linear.points(50);

513

514

// LOESS smoothing

515

const loess = regressionLoess(data, {

516

bandwidth: 0.3,

517

robustnessIterations: 2

518

});

519

520

const smoothed = loess.points(100);

521

```

522

523

### Distribution Functions

524

525

```typescript

526

import {

527

densityNormal,

528

cumulativeNormal,

529

quantileNormal,

530

sampleNormal

531

} from "vega";

532

533

// Calculate normal distribution values

534

const x = 1.5;

535

const mu = 0, sigma = 1;

536

537

const density = densityNormal(x, mu, sigma);

538

const cumulative = cumulativeNormal(x, mu, sigma);

539

const quantile = quantileNormal(0.95, mu, sigma);

540

541

console.log({ density, cumulative, quantile });

542

543

// Generate samples

544

const samples = Array.from({length: 1000}, () => sampleNormal(mu, sigma));

545

```

546

547

### Bootstrap Confidence Intervals

548

549

```typescript

550

import { bootstrapCI } from "vega";

551

552

const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];

553

554

// Bootstrap mean confidence interval

555

const meanCI = bootstrapCI(data, (sample) => {

556

return sample.reduce((a, b) => a + b) / sample.length;

557

}, {

558

samples: 1000,

559

confidence: 0.95

560

});

561

562

console.log(`Mean 95% CI: [${meanCI.lo}, ${meanCI.hi}]`);

563

564

// Bootstrap median confidence interval

565

const medianCI = bootstrapCI(data, (sample) => {

566

const sorted = sample.slice().sort((a, b) => a - b);

567

const mid = Math.floor(sorted.length / 2);

568

return sorted.length % 2 ? sorted[mid] : (sorted[mid-1] + sorted[mid]) / 2;

569

});

570

571

console.log(`Median 95% CI: [${medianCI.lo}, ${medianCI.hi}]`);

572

```

573

574

### Kernel Density Estimation

575

576

```typescript

577

import { randomKDE, bandwidthNRD } from "vega";

578

579

const observations = [1, 2, 2.5, 3, 3.5, 4, 4.5, 5];

580

581

// Calculate optimal bandwidth

582

const bandwidth = bandwidthNRD(observations);

583

584

// Create KDE sampler

585

const kdesampler = randomKDE(observations, bandwidth);

586

587

// Generate samples from estimated distribution

588

const samples = Array.from({length: 1000}, () => kdesampler());

589

```

590

591

### Advanced Binning

592

593

```typescript

594

import { dotbin } from "vega";

595

596

const values = [1, 1, 2, 2, 2, 3, 3, 4, 5, 5, 5, 5];

597

598

// Create dot plot bins

599

const dotBinFunc = dotbin({

600

size: 1,

601

spacing: 0.1

602

});

603

604

const dots = dotBinFunc(values);

605

console.log(dots);

606

// Array of dot bins with positions and counts

607

```