or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

arithmetic.mddata-types.mdexpressions.mdindex.mdmatrices.mdprobability.mdstatistics.mdtrigonometry.mdunits.md

statistics.mddocs/

0

# Statistics Functions

1

2

This document covers Math.js's statistical analysis capabilities, including descriptive statistics, data analysis functions, and statistical measures for arrays, matrices, and datasets.

3

4

## Import

5

6

```typescript

7

import {

8

// Central tendency

9

mean, median, mode,

10

// Variability

11

std, variance, mad,

12

// Distribution

13

min, max, range as mathRange, quantileSeq,

14

// Aggregation

15

sum, prod, count, cumsum,

16

// Correlation

17

corr,

18

// Utility

19

sort, partitionSelect

20

} from 'mathjs'

21

```

22

23

## Central Tendency Measures

24

25

### Mean (Average)

26

27

```typescript

28

mean(...values: MathType[]): MathType

29

mean(values: MathCollection, dim?: number): MathType | MathCollection

30

```

31

{ .api }

32

33

```typescript

34

// Array input

35

mean([1, 2, 3, 4, 5]) // 3

36

37

// Variadic arguments

38

mean(1, 2, 3, 4, 5) // 3

39

40

// Matrix operations

41

const data = matrix([[1, 2, 3], [4, 5, 6]])

42

mean(data) // 3.5 (overall mean)

43

mean(data, 0) // [2.5, 3.5, 4.5] (column means)

44

mean(data, 1) // [2, 5] (row means)

45

46

// With different number types

47

mean([bignumber('1.1'), bignumber('2.2'), bignumber('3.3')]) // BigNumber(2.2)

48

49

// Complex numbers

50

mean([complex(1, 2), complex(3, 4)]) // Complex(2, 3)

51

```

52

53

### Median

54

55

```typescript

56

median(...values: MathType[]): MathType

57

```

58

{ .api }

59

60

```typescript

61

// Odd number of elements

62

median([1, 3, 5, 7, 9]) // 5 (middle value)

63

64

// Even number of elements

65

median([1, 2, 3, 4]) // 2.5 (average of two middle values)

66

67

// Variadic input

68

median(3, 1, 4, 1, 5, 9) // 3.5

69

70

// With duplicates

71

median([1, 1, 2, 3, 3, 3]) // 2.5

72

73

// Works with any comparable type

74

median([bignumber('1'), bignumber('2'), bignumber('3')]) // BigNumber(2)

75

```

76

77

### Mode

78

79

```typescript

80

mode(...values: MathType[]): MathType[]

81

```

82

{ .api }

83

84

```typescript

85

// Single mode

86

mode([1, 2, 2, 3, 4]) // [2]

87

88

// Multiple modes (bimodal)

89

mode([1, 1, 2, 2, 3]) // [1, 2]

90

91

// No mode (all unique)

92

mode([1, 2, 3, 4, 5]) // [1, 2, 3, 4, 5]

93

94

// String data

95

mode(['a', 'b', 'b', 'c']) // ['b']

96

97

// Variadic input

98

mode(1, 2, 2, 3, 2) // [2]

99

```

100

101

## Variability Measures

102

103

### Standard Deviation

104

105

```typescript

106

std(array: MathCollection, normalization?: 'unbiased' | 'uncorrected' | 'biased'): MathType

107

```

108

{ .api }

109

110

```typescript

111

const data = [1, 2, 3, 4, 5]

112

113

// Default: unbiased (N-1 denominator)

114

std(data) // ~1.58 (sample standard deviation)

115

116

// Biased (N denominator)

117

std(data, 'biased') // ~1.41 (population standard deviation)

118

119

// Uncorrected (same as biased)

120

std(data, 'uncorrected') // ~1.41

121

122

// Matrix operations

123

const matrix2d = matrix([[1, 2, 3], [4, 5, 6]])

124

std(matrix2d) // Standard deviation of all elements

125

126

// Column-wise std (not directly supported, use map)

127

const colStds = matrix2d.map((col, index) =>

128

std(subset(matrix2d, index(range(0, 2), index)))

129

)

130

```

131

132

### Variance

133

134

```typescript

135

variance(array: MathCollection, normalization?: 'unbiased' | 'uncorrected' | 'biased'): MathType

136

```

137

{ .api }

138

139

```typescript

140

const data = [1, 2, 3, 4, 5]

141

142

// Unbiased variance (sample variance)

143

variance(data) // 2.5 (N-1 denominator)

144

145

// Biased variance (population variance)

146

variance(data, 'biased') // 2 (N denominator)

147

148

// Relationship: std = sqrt(variance)

149

sqrt(variance(data)) === std(data) // true

150

151

// With BigNumbers for high precision

152

const bigData = [bignumber('1.1'), bignumber('2.2'), bignumber('3.3')]

153

variance(bigData) // BigNumber result

154

```

155

156

### Median Absolute Deviation

157

158

```typescript

159

mad(array: MathCollection): MathType

160

```

161

{ .api }

162

163

```typescript

164

// Robust measure of variability

165

const data = [1, 2, 3, 4, 100] // Contains outlier

166

mad(data) // ~1 (robust to outlier)

167

std(data) // ~39 (sensitive to outlier)

168

169

// MAD = median(|x_i - median(x)|)

170

const medianValue = median(data)

171

const deviations = data.map(x => abs(subtract(x, medianValue)))

172

mad(data) === median(deviations) // true

173

```

174

175

## Distribution Properties

176

177

### Minimum and Maximum

178

179

```typescript

180

min(...args: MathType[], dim?: number): MathType | MathCollection

181

max(...args: MathType[], dim?: number): MathType | MathCollection

182

```

183

{ .api }

184

185

```typescript

186

// Single array

187

min([3, 1, 4, 1, 5]) // 1

188

max([3, 1, 4, 1, 5]) // 5

189

190

// Variadic arguments

191

min(3, 1, 4, 1, 5) // 1

192

max(3, 1, 4, 1, 5) // 5

193

194

// Matrix operations

195

const data = matrix([[1, 5, 3], [2, 4, 6]])

196

min(data) // 1 (global minimum)

197

max(data) // 6 (global maximum)

198

199

// Dimension-wise operations

200

min(data, 0) // [1, 4, 3] (column minimums)

201

max(data, 0) // [2, 5, 6] (column maximums)

202

min(data, 1) // [1, 2] (row minimums)

203

max(data, 1) // [5, 6] (row maximums)

204

205

// With units

206

min([unit('5 m'), unit('300 cm'), unit('0.002 km')]) // unit('2 m')

207

```

208

209

### Range

210

211

```typescript

212

// Note: range() creates sequences; use subtract(max(), min()) for statistical range

213

```

214

215

```typescript

216

const data = [1, 3, 7, 2, 9, 4]

217

const dataRange = subtract(max(data), min(data)) // 9 - 1 = 8

218

219

// Interquartile range (IQR)

220

function iqr(data) {

221

const sorted = sort(data)

222

const n = size(sorted)[0]

223

const q1 = quantileSeq(sorted, 0.25)

224

const q3 = quantileSeq(sorted, 0.75)

225

return subtract(q3, q1)

226

}

227

```

228

229

### Quantiles

230

231

```typescript

232

quantileSeq(array: MathCollection, prob: MathType | MathCollection, sorted?: boolean): MathType

233

```

234

{ .api }

235

236

```typescript

237

const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

238

239

// Single quantile

240

quantileSeq(data, 0.5) // 5.5 (median, 50th percentile)

241

quantileSeq(data, 0.25) // 3.25 (1st quartile)

242

quantileSeq(data, 0.75) // 7.75 (3rd quartile)

243

244

// Multiple quantiles

245

quantileSeq(data, [0.25, 0.5, 0.75]) // [3.25, 5.5, 7.75]

246

247

// Pre-sorted data (more efficient)

248

const sortedData = sort(data)

249

quantileSeq(sortedData, 0.9, true) // 9.1 (90th percentile)

250

251

// Common percentiles

252

const percentiles = [0.1, 0.25, 0.5, 0.75, 0.9]

253

quantileSeq(data, percentiles) // Deciles and quartiles

254

```

255

256

## Aggregation Functions

257

258

### Sum

259

260

```typescript

261

sum(...args: MathType[], dim?: number): MathType | MathCollection

262

```

263

{ .api }

264

265

```typescript

266

// Array sum

267

sum([1, 2, 3, 4, 5]) // 15

268

269

// Variadic arguments

270

sum(1, 2, 3, 4, 5) // 15

271

272

// Matrix operations

273

const data = matrix([[1, 2, 3], [4, 5, 6]])

274

sum(data) // 21 (total sum)

275

sum(data, 0) // [5, 7, 9] (column sums)

276

sum(data, 1) // [6, 15] (row sums)

277

278

// With different types

279

sum([fraction(1, 2), fraction(1, 3), fraction(1, 6)]) // Fraction(1, 1) = 1

280

sum([complex(1, 2), complex(3, 4)]) // Complex(4, 6)

281

```

282

283

### Product

284

285

```typescript

286

prod(...args: MathType[]): MathType

287

```

288

{ .api }

289

290

```typescript

291

// Array product

292

prod([1, 2, 3, 4, 5]) // 120

293

294

// Variadic arguments

295

prod(2, 3, 4) // 24

296

297

// Factorial using prod and range

298

prod(range(1, 6)) // 120 (5!)

299

300

// With fractions

301

prod([fraction(1, 2), fraction(2, 3), fraction(3, 4)]) // Fraction(1, 4)

302

```

303

304

### Count

305

306

```typescript

307

count(x: MathCollection): number

308

```

309

{ .api }

310

311

```typescript

312

// Count elements

313

count([1, 2, 3, 4, 5]) // 5

314

count([[1, 2], [3, 4], [5, 6]]) // 6 (total elements)

315

316

// Count non-zero elements (use filter)

317

const data = [1, 0, 3, 0, 5]

318

count(filter(data, x => !equal(x, 0))) // 3

319

320

// Count specific values

321

count(filter(data, x => equal(x, 0))) // 2 (zeros)

322

```

323

324

### Cumulative Sum

325

326

```typescript

327

cumsum(array: MathCollection, dim?: number): MathCollection

328

```

329

{ .api }

330

331

```typescript

332

// Running sum

333

cumsum([1, 2, 3, 4, 5]) // [1, 3, 6, 10, 15]

334

335

// Matrix operations

336

const data = matrix([[1, 2], [3, 4]])

337

cumsum(data, 0) // [[1, 2], [4, 6]] (cumsum along rows)

338

cumsum(data, 1) // [[1, 3], [3, 7]] (cumsum along columns)

339

340

// Financial applications: compound returns

341

const returns = [0.1, -0.05, 0.08, 0.03]

342

const cumulativeReturns = cumsum(returns) // [0.1, 0.05, 0.13, 0.16]

343

```

344

345

## Correlation Analysis

346

347

### Correlation Coefficient

348

349

```typescript

350

corr(x: MathCollection, y: MathCollection): MathType

351

```

352

{ .api }

353

354

```typescript

355

// Pearson correlation coefficient

356

const x = [1, 2, 3, 4, 5]

357

const y = [2, 4, 6, 8, 10] // Perfect positive correlation

358

corr(x, y) // 1

359

360

const z = [10, 8, 6, 4, 2] // Perfect negative correlation

361

corr(x, z) // -1

362

363

const w = [1, 3, 2, 5, 4] // Some correlation

364

corr(x, w) // ~0.8

365

366

// No correlation

367

const random1 = [1, 5, 3, 2, 4]

368

const random2 = [2, 1, 4, 5, 3]

369

corr(random1, random2) // ~0 (depends on data)

370

371

// Correlation matrix (manual implementation)

372

function corrMatrix(data) {

373

const n = size(data)[1] // number of variables

374

const C = zeros(n, n)

375

376

for (let i = 0; i < n; i++) {

377

for (let j = 0; j < n; j++) {

378

const xi = subset(data, index(range(0, size(data)[0]), i))

379

const xj = subset(data, index(range(0, size(data)[0]), j))

380

C.set([i, j], corr(xi, xj))

381

}

382

}

383

384

return C

385

}

386

```

387

388

## Advanced Statistical Functions

389

390

### Descriptive Statistics Summary

391

392

```typescript

393

// Create comprehensive summary statistics

394

function describe(data) {

395

const sorted = sort(data)

396

const n = count(data)

397

398

return {

399

count: n,

400

mean: mean(data),

401

std: std(data),

402

min: min(data),

403

'25%': quantileSeq(sorted, 0.25, true),

404

'50%': median(data),

405

'75%': quantileSeq(sorted, 0.75, true),

406

max: max(data),

407

range: subtract(max(data), min(data)),

408

iqr: subtract(quantileSeq(sorted, 0.75, true), quantileSeq(sorted, 0.25, true)),

409

mad: mad(data),

410

variance: variance(data),

411

skewness: skewness(data), // Would need custom implementation

412

kurtosis: kurtosis(data) // Would need custom implementation

413

}

414

}

415

416

const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

417

const summary = describe(data)

418

```

419

420

### Z-Score Standardization

421

422

```typescript

423

function zscore(data) {

424

const mu = mean(data)

425

const sigma = std(data)

426

return data.map(x => divide(subtract(x, mu), sigma))

427

}

428

429

const data = [1, 2, 3, 4, 5]

430

const standardized = zscore(data) // Mean ≈ 0, std ≈ 1

431

```

432

433

### Moving Statistics

434

435

```typescript

436

// Moving average

437

function movingAverage(data, window) {

438

const result = []

439

for (let i = window - 1; i < data.length; i++) {

440

const slice = data.slice(i - window + 1, i + 1)

441

result.push(mean(slice))

442

}

443

return result

444

}

445

446

// Exponential moving average

447

function ema(data, alpha) {

448

const result = [data[0]]

449

for (let i = 1; i < data.length; i++) {

450

const newValue = add(

451

multiply(alpha, data[i]),

452

multiply(subtract(1, alpha), result[i - 1])

453

)

454

result.push(newValue)

455

}

456

return result

457

}

458

459

const prices = [10, 12, 11, 13, 15, 14, 16]

460

const sma = movingAverage(prices, 3) // Simple moving average

461

const emaData = ema(prices, 0.3) // Exponential moving average

462

```

463

464

### Rank and Percentile Rank

465

466

```typescript

467

function rank(data, method = 'average') {

468

const sorted = [...data].sort((a, b) => subtract(a, b))

469

return data.map(value => {

470

const count = sorted.filter(x => smaller(x, value)).length

471

return add(count, 1) // 1-based ranking

472

})

473

}

474

475

function percentileRank(data, value) {

476

const count = data.filter(x => smaller(x, value)).length

477

return divide(count, data.length) * 100

478

}

479

480

const scores = [85, 90, 78, 92, 88]

481

const ranks = rank(scores) // [3, 4, 1, 5, 2]

482

const pRank = percentileRank(scores, 88) // 60% (88 is better than 60% of scores)

483

```

484

485

## Frequency Analysis

486

487

### Histogram (Binning)

488

489

```typescript

490

function histogram(data, bins = 10) {

491

const dataMin = min(data)

492

const dataMax = max(data)

493

const binWidth = divide(subtract(dataMax, dataMin), bins)

494

495

const counts = new Array(bins).fill(0)

496

const edges = []

497

498

for (let i = 0; i <= bins; i++) {

499

edges.push(add(dataMin, multiply(i, binWidth)))

500

}

501

502

data.forEach(value => {

503

let binIndex = floor(divide(subtract(value, dataMin), binWidth))

504

if (binIndex === bins) binIndex = bins - 1 // Handle edge case

505

counts[binIndex]++

506

})

507

508

return { counts, edges, binWidth }

509

}

510

511

const data = [1.1, 1.5, 2.3, 2.8, 3.2, 3.9, 4.1, 4.7, 5.2, 5.8]

512

const hist = histogram(data, 5)

513

```

514

515

### Frequency Table

516

517

```typescript

518

function frequencyTable(data) {

519

const freq = new Map()

520

521

data.forEach(value => {

522

const key = string(value) // Convert to string for consistent keys

523

freq.set(key, (freq.get(key) || 0) + 1)

524

})

525

526

return freq

527

}

528

529

const categories = ['A', 'B', 'A', 'C', 'B', 'A', 'C', 'C']

530

const freqTable = frequencyTable(categories)

531

// Map: { 'A' => 3, 'B' => 2, 'C' => 3 }

532

```

533

534

## Working with Missing Data

535

536

```typescript

537

// Filter out NaN/null/undefined values

538

function cleanData(data) {

539

return filter(data, value =>

540

!isNaN(value) &&

541

!isNull(value) &&

542

!isUndefined(value)

543

)

544

}

545

546

// Replace missing values with mean

547

function fillMissing(data, fillValue = null) {

548

const cleaned = cleanData(data)

549

const replacement = fillValue !== null ? fillValue : mean(cleaned)

550

551

return data.map(value =>

552

(isNaN(value) || isNull(value) || isUndefined(value))

553

? replacement

554

: value

555

)

556

}

557

558

const dataWithMissing = [1, 2, NaN, 4, 5, null, 7]

559

const filled = fillMissing(dataWithMissing) // [1, 2, 3.8, 4, 5, 3.8, 7]

560

```

561

562

## Performance Optimization

563

564

### Large Dataset Processing

565

566

```typescript

567

// Use typed arrays for numerical data

568

function processLargeDataset(data) {

569

// Convert to efficient format if needed

570

const numericData = data.map(x => number(x))

571

572

// Batch operations

573

const batchSize = 1000

574

const results = []

575

576

for (let i = 0; i < numericData.length; i += batchSize) {

577

const batch = numericData.slice(i, i + batchSize)

578

results.push({

579

mean: mean(batch),

580

std: std(batch),

581

min: min(batch),

582

max: max(batch)

583

})

584

}

585

586

return results

587

}

588

```

589

590

### Streaming Statistics

591

592

```typescript

593

// Online algorithms for streaming data

594

class StreamingStats {

595

constructor() {

596

this.n = 0

597

this.mean = 0

598

this.m2 = 0 // For variance calculation

599

}

600

601

update(value) {

602

this.n++

603

const delta = subtract(value, this.mean)

604

this.mean = add(this.mean, divide(delta, this.n))

605

const delta2 = subtract(value, this.mean)

606

this.m2 = add(this.m2, multiply(delta, delta2))

607

}

608

609

getMean() {

610

return this.mean

611

}

612

613

getVariance() {

614

return this.n < 2 ? 0 : divide(this.m2, subtract(this.n, 1))

615

}

616

617

getStd() {

618

return sqrt(this.getVariance())

619

}

620

}

621

622

// Usage for large streaming datasets

623

const stats = new StreamingStats()

624

largeDataStream.forEach(value => stats.update(value))

625

```

626

627

## Chain Operations

628

629

All statistical functions work with the chain interface:

630

631

```typescript

632

const result = chain([1, 2, 3, 4, 5])

633

.mean() // 3

634

.done()

635

636

const analysis = chain(dataset)

637

.filter(x => larger(x, 0)) // Remove non-positive values

638

.map(x => log(x)) // Log transform

639

.std() // Standard deviation of log values

640

.done()

641

```

642

643

## Common Statistical Patterns

644

645

### Normalization and Scaling

646

647

```typescript

648

// Min-max scaling to [0, 1]

649

function minMaxScale(data) {

650

const dataMin = min(data)

651

const dataMax = max(data)

652

const range = subtract(dataMax, dataMin)

653

return data.map(x => divide(subtract(x, dataMin), range))

654

}

655

656

// Robust scaling (using median and MAD)

657

function robustScale(data) {

658

const med = median(data)

659

const madValue = mad(data)

660

return data.map(x => divide(subtract(x, med), madValue))

661

}

662

```

663

664

### Outlier Detection

665

666

```typescript

667

// IQR-based outlier detection

668

function detectOutliers(data, factor = 1.5) {

669

const sorted = sort(data)

670

const q1 = quantileSeq(sorted, 0.25, true)

671

const q3 = quantileSeq(sorted, 0.75, true)

672

const iqr = subtract(q3, q1)

673

674

const lowerBound = subtract(q1, multiply(factor, iqr))

675

const upperBound = add(q3, multiply(factor, iqr))

676

677

return {

678

outliers: filter(data, x => smaller(x, lowerBound) || larger(x, upperBound)),

679

bounds: { lower: lowerBound, upper: upperBound },

680

cleaned: filter(data, x => !smaller(x, lowerBound) && !larger(x, upperBound))

681

}

682

}

683

684

// Z-score based outlier detection

685

function detectOutliersZScore(data, threshold = 3) {

686

const zScores = zscore(data)

687

return {

688

outliers: data.filter((_, i) => larger(abs(zScores[i]), threshold)),

689

indices: zScores.map((z, i) => larger(abs(z), threshold) ? i : -1).filter(i => i >= 0)

690

}

691

}

692

```