0
# Statistics Functions
1
2
This document covers Math.js's statistical analysis capabilities, including descriptive statistics, data analysis functions, and statistical measures for arrays, matrices, and datasets.
3
4
## Import
5
6
```typescript
7
import {
8
// Central tendency
9
mean, median, mode,
10
// Variability
11
std, variance, mad,
12
// Distribution
13
min, max, range as mathRange, quantileSeq,
14
// Aggregation
15
sum, prod, count, cumsum,
16
// Correlation
17
corr,
18
// Utility
19
sort, partitionSelect
20
} from 'mathjs'
21
```
22
23
## Central Tendency Measures
24
25
### Mean (Average)
26
27
```typescript
28
mean(...values: MathType[]): MathType
29
mean(values: MathCollection, dim?: number): MathType | MathCollection
30
```
31
{ .api }
32
33
```typescript
34
// Array input
35
mean([1, 2, 3, 4, 5]) // 3
36
37
// Variadic arguments
38
mean(1, 2, 3, 4, 5) // 3
39
40
// Matrix operations
41
const data = matrix([[1, 2, 3], [4, 5, 6]])
42
mean(data) // 3.5 (overall mean)
43
mean(data, 0) // [2.5, 3.5, 4.5] (column means)
44
mean(data, 1) // [2, 5] (row means)
45
46
// With different number types
47
mean([bignumber('1.1'), bignumber('2.2'), bignumber('3.3')]) // BigNumber(2.2)
48
49
// Complex numbers
50
mean([complex(1, 2), complex(3, 4)]) // Complex(2, 3)
51
```
52
53
### Median
54
55
```typescript
56
median(...values: MathType[]): MathType
57
```
58
{ .api }
59
60
```typescript
61
// Odd number of elements
62
median([1, 3, 5, 7, 9]) // 5 (middle value)
63
64
// Even number of elements
65
median([1, 2, 3, 4]) // 2.5 (average of two middle values)
66
67
// Variadic input
68
median(3, 1, 4, 1, 5, 9) // 3.5
69
70
// With duplicates
71
median([1, 1, 2, 3, 3, 3]) // 2.5
72
73
// Works with any comparable type
74
median([bignumber('1'), bignumber('2'), bignumber('3')]) // BigNumber(2)
75
```
76
77
### Mode
78
79
```typescript
80
mode(...values: MathType[]): MathType[]
81
```
82
{ .api }
83
84
```typescript
85
// Single mode
86
mode([1, 2, 2, 3, 4]) // [2]
87
88
// Multiple modes (bimodal)
89
mode([1, 1, 2, 2, 3]) // [1, 2]
90
91
// No mode (all unique)
92
mode([1, 2, 3, 4, 5]) // [1, 2, 3, 4, 5]
93
94
// String data
95
mode(['a', 'b', 'b', 'c']) // ['b']
96
97
// Variadic input
98
mode(1, 2, 2, 3, 2) // [2]
99
```
100
101
## Variability Measures
102
103
### Standard Deviation
104
105
```typescript
106
std(array: MathCollection, normalization?: 'unbiased' | 'uncorrected' | 'biased'): MathType
107
```
108
{ .api }
109
110
```typescript
111
const data = [1, 2, 3, 4, 5]
112
113
// Default: unbiased (N-1 denominator)
114
std(data) // ~1.58 (sample standard deviation)
115
116
// Biased (N denominator)
117
std(data, 'biased') // ~1.41 (population standard deviation)
118
119
// Uncorrected (same as biased)
120
std(data, 'uncorrected') // ~1.41
121
122
// Matrix operations
123
const matrix2d = matrix([[1, 2, 3], [4, 5, 6]])
124
std(matrix2d) // Standard deviation of all elements
125
126
// Column-wise std (not directly supported, use map)
127
const colStds = matrix2d.map((col, index) =>
128
std(subset(matrix2d, index(range(0, 2), index)))
129
)
130
```
131
132
### Variance
133
134
```typescript
135
variance(array: MathCollection, normalization?: 'unbiased' | 'uncorrected' | 'biased'): MathType
136
```
137
{ .api }
138
139
```typescript
140
const data = [1, 2, 3, 4, 5]
141
142
// Unbiased variance (sample variance)
143
variance(data) // 2.5 (N-1 denominator)
144
145
// Biased variance (population variance)
146
variance(data, 'biased') // 2 (N denominator)
147
148
// Relationship: std = sqrt(variance)
149
sqrt(variance(data)) === std(data) // true
150
151
// With BigNumbers for high precision
152
const bigData = [bignumber('1.1'), bignumber('2.2'), bignumber('3.3')]
153
variance(bigData) // BigNumber result
154
```
155
156
### Median Absolute Deviation
157
158
```typescript
159
mad(array: MathCollection): MathType
160
```
161
{ .api }
162
163
```typescript
164
// Robust measure of variability
165
const data = [1, 2, 3, 4, 100] // Contains outlier
166
mad(data) // ~1 (robust to outlier)
167
std(data) // ~39 (sensitive to outlier)
168
169
// MAD = median(|x_i - median(x)|)
170
const medianValue = median(data)
171
const deviations = data.map(x => abs(subtract(x, medianValue)))
172
mad(data) === median(deviations) // true
173
```
174
175
## Distribution Properties
176
177
### Minimum and Maximum
178
179
```typescript
180
min(...args: MathType[], dim?: number): MathType | MathCollection
181
max(...args: MathType[], dim?: number): MathType | MathCollection
182
```
183
{ .api }
184
185
```typescript
186
// Single array
187
min([3, 1, 4, 1, 5]) // 1
188
max([3, 1, 4, 1, 5]) // 5
189
190
// Variadic arguments
191
min(3, 1, 4, 1, 5) // 1
192
max(3, 1, 4, 1, 5) // 5
193
194
// Matrix operations
195
const data = matrix([[1, 5, 3], [2, 4, 6]])
196
min(data) // 1 (global minimum)
197
max(data) // 6 (global maximum)
198
199
// Dimension-wise operations
200
min(data, 0) // [1, 4, 3] (column minimums)
201
max(data, 0) // [2, 5, 6] (column maximums)
202
min(data, 1) // [1, 2] (row minimums)
203
max(data, 1) // [5, 6] (row maximums)
204
205
// With units
206
min([unit('5 m'), unit('300 cm'), unit('0.002 km')]) // unit('2 m')
207
```
208
209
### Range
210
211
```typescript
212
// Note: range() creates sequences; use subtract(max(), min()) for statistical range
213
```
214
215
```typescript
216
const data = [1, 3, 7, 2, 9, 4]
217
const dataRange = subtract(max(data), min(data)) // 9 - 1 = 8
218
219
// Interquartile range (IQR)
220
function iqr(data) {
221
const sorted = sort(data)
222
const n = size(sorted)[0]
223
const q1 = quantileSeq(sorted, 0.25)
224
const q3 = quantileSeq(sorted, 0.75)
225
return subtract(q3, q1)
226
}
227
```
228
229
### Quantiles
230
231
```typescript
232
quantileSeq(array: MathCollection, prob: MathType | MathCollection, sorted?: boolean): MathType
233
```
234
{ .api }
235
236
```typescript
237
const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
238
239
// Single quantile
240
quantileSeq(data, 0.5) // 5.5 (median, 50th percentile)
241
quantileSeq(data, 0.25) // 3.25 (1st quartile)
242
quantileSeq(data, 0.75) // 7.75 (3rd quartile)
243
244
// Multiple quantiles
245
quantileSeq(data, [0.25, 0.5, 0.75]) // [3.25, 5.5, 7.75]
246
247
// Pre-sorted data (more efficient)
248
const sortedData = sort(data)
249
quantileSeq(sortedData, 0.9, true) // 9.1 (90th percentile)
250
251
// Common percentiles
252
const percentiles = [0.1, 0.25, 0.5, 0.75, 0.9]
253
quantileSeq(data, percentiles) // Deciles and quartiles
254
```
255
256
## Aggregation Functions
257
258
### Sum
259
260
```typescript
261
sum(...args: MathType[], dim?: number): MathType | MathCollection
262
```
263
{ .api }
264
265
```typescript
266
// Array sum
267
sum([1, 2, 3, 4, 5]) // 15
268
269
// Variadic arguments
270
sum(1, 2, 3, 4, 5) // 15
271
272
// Matrix operations
273
const data = matrix([[1, 2, 3], [4, 5, 6]])
274
sum(data) // 21 (total sum)
275
sum(data, 0) // [5, 7, 9] (column sums)
276
sum(data, 1) // [6, 15] (row sums)
277
278
// With different types
279
sum([fraction(1, 2), fraction(1, 3), fraction(1, 6)]) // Fraction(1, 1) = 1
280
sum([complex(1, 2), complex(3, 4)]) // Complex(4, 6)
281
```
282
283
### Product
284
285
```typescript
286
prod(...args: MathType[]): MathType
287
```
288
{ .api }
289
290
```typescript
291
// Array product
292
prod([1, 2, 3, 4, 5]) // 120
293
294
// Variadic arguments
295
prod(2, 3, 4) // 24
296
297
// Factorial using prod and range
298
prod(range(1, 6)) // 120 (5!)
299
300
// With fractions
301
prod([fraction(1, 2), fraction(2, 3), fraction(3, 4)]) // Fraction(1, 4)
302
```
303
304
### Count
305
306
```typescript
307
count(x: MathCollection): number
308
```
309
{ .api }
310
311
```typescript
312
// Count elements
313
count([1, 2, 3, 4, 5]) // 5
314
count([[1, 2], [3, 4], [5, 6]]) // 6 (total elements)
315
316
// Count non-zero elements (use filter)
317
const data = [1, 0, 3, 0, 5]
318
count(filter(data, x => !equal(x, 0))) // 3
319
320
// Count specific values
321
count(filter(data, x => equal(x, 0))) // 2 (zeros)
322
```
323
324
### Cumulative Sum
325
326
```typescript
327
cumsum(array: MathCollection, dim?: number): MathCollection
328
```
329
{ .api }
330
331
```typescript
332
// Running sum
333
cumsum([1, 2, 3, 4, 5]) // [1, 3, 6, 10, 15]
334
335
// Matrix operations
336
const data = matrix([[1, 2], [3, 4]])
337
cumsum(data, 0) // [[1, 2], [4, 6]] (cumsum along rows)
338
cumsum(data, 1) // [[1, 3], [3, 7]] (cumsum along columns)
339
340
// Financial applications: compound returns
341
const returns = [0.1, -0.05, 0.08, 0.03]
342
const cumulativeReturns = cumsum(returns) // [0.1, 0.05, 0.13, 0.16]
343
```
344
345
## Correlation Analysis
346
347
### Correlation Coefficient
348
349
```typescript
350
corr(x: MathCollection, y: MathCollection): MathType
351
```
352
{ .api }
353
354
```typescript
355
// Pearson correlation coefficient
356
const x = [1, 2, 3, 4, 5]
357
const y = [2, 4, 6, 8, 10] // Perfect positive correlation
358
corr(x, y) // 1
359
360
const z = [10, 8, 6, 4, 2] // Perfect negative correlation
361
corr(x, z) // -1
362
363
const w = [1, 3, 2, 5, 4] // Some correlation
364
corr(x, w) // ~0.8
365
366
// No correlation
367
const random1 = [1, 5, 3, 2, 4]
368
const random2 = [2, 1, 4, 5, 3]
369
corr(random1, random2) // ~0 (depends on data)
370
371
// Correlation matrix (manual implementation)
372
function corrMatrix(data) {
373
const n = size(data)[1] // number of variables
374
const C = zeros(n, n)
375
376
for (let i = 0; i < n; i++) {
377
for (let j = 0; j < n; j++) {
378
const xi = subset(data, index(range(0, size(data)[0]), i))
379
const xj = subset(data, index(range(0, size(data)[0]), j))
380
C.set([i, j], corr(xi, xj))
381
}
382
}
383
384
return C
385
}
386
```
387
388
## Advanced Statistical Functions
389
390
### Descriptive Statistics Summary
391
392
```typescript
393
// Create comprehensive summary statistics
394
function describe(data) {
395
const sorted = sort(data)
396
const n = count(data)
397
398
return {
399
count: n,
400
mean: mean(data),
401
std: std(data),
402
min: min(data),
403
'25%': quantileSeq(sorted, 0.25, true),
404
'50%': median(data),
405
'75%': quantileSeq(sorted, 0.75, true),
406
max: max(data),
407
range: subtract(max(data), min(data)),
408
iqr: subtract(quantileSeq(sorted, 0.75, true), quantileSeq(sorted, 0.25, true)),
409
mad: mad(data),
410
variance: variance(data),
411
skewness: skewness(data), // Would need custom implementation
412
kurtosis: kurtosis(data) // Would need custom implementation
413
}
414
}
415
416
const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
417
const summary = describe(data)
418
```
419
420
### Z-Score Standardization
421
422
```typescript
423
function zscore(data) {
424
const mu = mean(data)
425
const sigma = std(data)
426
return data.map(x => divide(subtract(x, mu), sigma))
427
}
428
429
const data = [1, 2, 3, 4, 5]
430
const standardized = zscore(data) // Mean ≈ 0, std ≈ 1
431
```
432
433
### Moving Statistics
434
435
```typescript
436
// Moving average
437
function movingAverage(data, window) {
438
const result = []
439
for (let i = window - 1; i < data.length; i++) {
440
const slice = data.slice(i - window + 1, i + 1)
441
result.push(mean(slice))
442
}
443
return result
444
}
445
446
// Exponential moving average
447
function ema(data, alpha) {
448
const result = [data[0]]
449
for (let i = 1; i < data.length; i++) {
450
const newValue = add(
451
multiply(alpha, data[i]),
452
multiply(subtract(1, alpha), result[i - 1])
453
)
454
result.push(newValue)
455
}
456
return result
457
}
458
459
const prices = [10, 12, 11, 13, 15, 14, 16]
460
const sma = movingAverage(prices, 3) // Simple moving average
461
const emaData = ema(prices, 0.3) // Exponential moving average
462
```
463
464
### Rank and Percentile Rank
465
466
```typescript
467
function rank(data, method = 'average') {
468
const sorted = [...data].sort((a, b) => subtract(a, b))
469
return data.map(value => {
470
const count = sorted.filter(x => smaller(x, value)).length
471
return add(count, 1) // 1-based ranking
472
})
473
}
474
475
function percentileRank(data, value) {
476
const count = data.filter(x => smaller(x, value)).length
477
return divide(count, data.length) * 100
478
}
479
480
const scores = [85, 90, 78, 92, 88]
481
const ranks = rank(scores) // [3, 4, 1, 5, 2]
482
const pRank = percentileRank(scores, 88) // 60% (88 is better than 60% of scores)
483
```
484
485
## Frequency Analysis
486
487
### Histogram (Binning)
488
489
```typescript
490
function histogram(data, bins = 10) {
491
const dataMin = min(data)
492
const dataMax = max(data)
493
const binWidth = divide(subtract(dataMax, dataMin), bins)
494
495
const counts = new Array(bins).fill(0)
496
const edges = []
497
498
for (let i = 0; i <= bins; i++) {
499
edges.push(add(dataMin, multiply(i, binWidth)))
500
}
501
502
data.forEach(value => {
503
let binIndex = floor(divide(subtract(value, dataMin), binWidth))
504
if (binIndex === bins) binIndex = bins - 1 // Handle edge case
505
counts[binIndex]++
506
})
507
508
return { counts, edges, binWidth }
509
}
510
511
const data = [1.1, 1.5, 2.3, 2.8, 3.2, 3.9, 4.1, 4.7, 5.2, 5.8]
512
const hist = histogram(data, 5)
513
```
514
515
### Frequency Table
516
517
```typescript
518
function frequencyTable(data) {
519
const freq = new Map()
520
521
data.forEach(value => {
522
const key = string(value) // Convert to string for consistent keys
523
freq.set(key, (freq.get(key) || 0) + 1)
524
})
525
526
return freq
527
}
528
529
const categories = ['A', 'B', 'A', 'C', 'B', 'A', 'C', 'C']
530
const freqTable = frequencyTable(categories)
531
// Map: { 'A' => 3, 'B' => 2, 'C' => 3 }
532
```
533
534
## Working with Missing Data
535
536
```typescript
537
// Filter out NaN/null/undefined values
538
function cleanData(data) {
539
return filter(data, value =>
540
!isNaN(value) &&
541
!isNull(value) &&
542
!isUndefined(value)
543
)
544
}
545
546
// Replace missing values with mean
547
function fillMissing(data, fillValue = null) {
548
const cleaned = cleanData(data)
549
const replacement = fillValue !== null ? fillValue : mean(cleaned)
550
551
return data.map(value =>
552
(isNaN(value) || isNull(value) || isUndefined(value))
553
? replacement
554
: value
555
)
556
}
557
558
const dataWithMissing = [1, 2, NaN, 4, 5, null, 7]
559
const filled = fillMissing(dataWithMissing) // [1, 2, 3.8, 4, 5, 3.8, 7]
560
```
561
562
## Performance Optimization
563
564
### Large Dataset Processing
565
566
```typescript
567
// Use typed arrays for numerical data
568
function processLargeDataset(data) {
569
// Convert to efficient format if needed
570
const numericData = data.map(x => number(x))
571
572
// Batch operations
573
const batchSize = 1000
574
const results = []
575
576
for (let i = 0; i < numericData.length; i += batchSize) {
577
const batch = numericData.slice(i, i + batchSize)
578
results.push({
579
mean: mean(batch),
580
std: std(batch),
581
min: min(batch),
582
max: max(batch)
583
})
584
}
585
586
return results
587
}
588
```
589
590
### Streaming Statistics
591
592
```typescript
593
// Online algorithms for streaming data
594
class StreamingStats {
595
constructor() {
596
this.n = 0
597
this.mean = 0
598
this.m2 = 0 // For variance calculation
599
}
600
601
update(value) {
602
this.n++
603
const delta = subtract(value, this.mean)
604
this.mean = add(this.mean, divide(delta, this.n))
605
const delta2 = subtract(value, this.mean)
606
this.m2 = add(this.m2, multiply(delta, delta2))
607
}
608
609
getMean() {
610
return this.mean
611
}
612
613
getVariance() {
614
return this.n < 2 ? 0 : divide(this.m2, subtract(this.n, 1))
615
}
616
617
getStd() {
618
return sqrt(this.getVariance())
619
}
620
}
621
622
// Usage for large streaming datasets
623
const stats = new StreamingStats()
624
largeDataStream.forEach(value => stats.update(value))
625
```
626
627
## Chain Operations
628
629
All statistical functions work with the chain interface:
630
631
```typescript
632
const result = chain([1, 2, 3, 4, 5])
633
.mean() // 3
634
.done()
635
636
const analysis = chain(dataset)
637
.filter(x => larger(x, 0)) // Remove non-positive values
638
.map(x => log(x)) // Log transform
639
.std() // Standard deviation of log values
640
.done()
641
```
642
643
## Common Statistical Patterns
644
645
### Normalization and Scaling
646
647
```typescript
648
// Min-max scaling to [0, 1]
649
function minMaxScale(data) {
650
const dataMin = min(data)
651
const dataMax = max(data)
652
const range = subtract(dataMax, dataMin)
653
return data.map(x => divide(subtract(x, dataMin), range))
654
}
655
656
// Robust scaling (using median and MAD)
657
function robustScale(data) {
658
const med = median(data)
659
const madValue = mad(data)
660
return data.map(x => divide(subtract(x, med), madValue))
661
}
662
```
663
664
### Outlier Detection
665
666
```typescript
667
// IQR-based outlier detection
668
function detectOutliers(data, factor = 1.5) {
669
const sorted = sort(data)
670
const q1 = quantileSeq(sorted, 0.25, true)
671
const q3 = quantileSeq(sorted, 0.75, true)
672
const iqr = subtract(q3, q1)
673
674
const lowerBound = subtract(q1, multiply(factor, iqr))
675
const upperBound = add(q3, multiply(factor, iqr))
676
677
return {
678
outliers: filter(data, x => smaller(x, lowerBound) || larger(x, upperBound)),
679
bounds: { lower: lowerBound, upper: upperBound },
680
cleaned: filter(data, x => !smaller(x, lowerBound) && !larger(x, upperBound))
681
}
682
}
683
684
// Z-score based outlier detection
685
function detectOutliersZScore(data, threshold = 3) {
686
const zScores = zscore(data)
687
return {
688
outliers: data.filter((_, i) => larger(abs(zScores[i]), threshold)),
689
indices: zScores.map((z, i) => larger(abs(z), threshold) ? i : -1).filter(i => i >= 0)
690
}
691
}
692
```