CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-mathjs

Math.js is an extensive math library for JavaScript and Node.js featuring a flexible expression parser, symbolic computation, and support for numbers, big numbers, complex numbers, fractions, units, and matrices.

84

1.47x
Overview
Eval results
Files

statistics.mddocs/

Statistics Functions

This document covers Math.js's statistical analysis capabilities, including descriptive statistics, data analysis functions, and statistical measures for arrays, matrices, and datasets.

Import

import {
  // Central tendency
  mean, median, mode,
  // Variability  
  std, variance, mad,
  // Distribution
  min, max, range as mathRange, quantileSeq,
  // Aggregation
  sum, prod, count, cumsum,
  // Correlation
  corr,
  // Utility
  sort, partitionSelect
} from 'mathjs'

Central Tendency Measures

Mean (Average)

mean(...values: MathType[]): MathType
mean(values: MathCollection, dim?: number): MathType | MathCollection

{ .api }

// Array input
mean([1, 2, 3, 4, 5]) // 3

// Variadic arguments
mean(1, 2, 3, 4, 5) // 3

// Matrix operations
const data = matrix([[1, 2, 3], [4, 5, 6]])
mean(data) // 3.5 (overall mean)
mean(data, 0) // [2.5, 3.5, 4.5] (column means)
mean(data, 1) // [2, 5] (row means)

// With different number types
mean([bignumber('1.1'), bignumber('2.2'), bignumber('3.3')]) // BigNumber(2.2)

// Complex numbers
mean([complex(1, 2), complex(3, 4)]) // Complex(2, 3)

Median

median(...values: MathType[]): MathType

{ .api }

// Odd number of elements
median([1, 3, 5, 7, 9]) // 5 (middle value)

// Even number of elements  
median([1, 2, 3, 4]) // 2.5 (average of two middle values)

// Variadic input
median(3, 1, 4, 1, 5, 9) // 3.5

// With duplicates
median([1, 1, 2, 3, 3, 3]) // 2.5

// Works with any comparable type
median([bignumber('1'), bignumber('2'), bignumber('3')]) // BigNumber(2)

Mode

mode(...values: MathType[]): MathType[]

{ .api }

// Single mode
mode([1, 2, 2, 3, 4]) // [2]

// Multiple modes (bimodal)
mode([1, 1, 2, 2, 3]) // [1, 2]

// No mode (all unique)
mode([1, 2, 3, 4, 5]) // [1, 2, 3, 4, 5]

// String data
mode(['a', 'b', 'b', 'c']) // ['b']

// Variadic input
mode(1, 2, 2, 3, 2) // [2]

Variability Measures

Standard Deviation

std(array: MathCollection, normalization?: 'unbiased' | 'uncorrected' | 'biased'): MathType

{ .api }

const data = [1, 2, 3, 4, 5]

// Default: unbiased (N-1 denominator)
std(data) // ~1.58 (sample standard deviation)

// Biased (N denominator) 
std(data, 'biased') // ~1.41 (population standard deviation)

// Uncorrected (same as biased)
std(data, 'uncorrected') // ~1.41

// Matrix operations
const matrix2d = matrix([[1, 2, 3], [4, 5, 6]])
std(matrix2d) // Standard deviation of all elements

// Column-wise std (not directly supported, use map)
const colStds = matrix2d.map((col, index) => 
  std(subset(matrix2d, index(range(0, 2), index)))
)

Variance

variance(array: MathCollection, normalization?: 'unbiased' | 'uncorrected' | 'biased'): MathType

{ .api }

const data = [1, 2, 3, 4, 5]

// Unbiased variance (sample variance)
variance(data) // 2.5 (N-1 denominator)

// Biased variance (population variance)  
variance(data, 'biased') // 2 (N denominator)

// Relationship: std = sqrt(variance)
sqrt(variance(data)) === std(data) // true

// With BigNumbers for high precision
const bigData = [bignumber('1.1'), bignumber('2.2'), bignumber('3.3')]
variance(bigData) // BigNumber result

Median Absolute Deviation

mad(array: MathCollection): MathType

{ .api }

// Robust measure of variability
const data = [1, 2, 3, 4, 100] // Contains outlier
mad(data) // ~1 (robust to outlier)
std(data) // ~39 (sensitive to outlier)

// MAD = median(|x_i - median(x)|)
const medianValue = median(data)
const deviations = data.map(x => abs(subtract(x, medianValue)))
mad(data) === median(deviations) // true

Distribution Properties

Minimum and Maximum

min(...args: MathType[], dim?: number): MathType | MathCollection  
max(...args: MathType[], dim?: number): MathType | MathCollection

{ .api }

// Single array
min([3, 1, 4, 1, 5]) // 1
max([3, 1, 4, 1, 5]) // 5

// Variadic arguments
min(3, 1, 4, 1, 5) // 1
max(3, 1, 4, 1, 5) // 5

// Matrix operations
const data = matrix([[1, 5, 3], [2, 4, 6]])
min(data) // 1 (global minimum)
max(data) // 6 (global maximum)

// Dimension-wise operations
min(data, 0) // [1, 4, 3] (column minimums)
max(data, 0) // [2, 5, 6] (column maximums)
min(data, 1) // [1, 2] (row minimums)
max(data, 1) // [5, 6] (row maximums)

// With units
min([unit('5 m'), unit('300 cm'), unit('0.002 km')]) // unit('2 m')

Range

// Note: range() creates sequences; use subtract(max(), min()) for statistical range
const data = [1, 3, 7, 2, 9, 4]
const dataRange = subtract(max(data), min(data)) // 9 - 1 = 8

// Interquartile range (IQR)
function iqr(data) {
  const sorted = sort(data)
  const n = size(sorted)[0]
  const q1 = quantileSeq(sorted, 0.25)
  const q3 = quantileSeq(sorted, 0.75)  
  return subtract(q3, q1)
}

Quantiles

quantileSeq(array: MathCollection, prob: MathType | MathCollection, sorted?: boolean): MathType

{ .api }

const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

// Single quantile
quantileSeq(data, 0.5) // 5.5 (median, 50th percentile)
quantileSeq(data, 0.25) // 3.25 (1st quartile)
quantileSeq(data, 0.75) // 7.75 (3rd quartile)

// Multiple quantiles
quantileSeq(data, [0.25, 0.5, 0.75]) // [3.25, 5.5, 7.75]

// Pre-sorted data (more efficient)
const sortedData = sort(data)
quantileSeq(sortedData, 0.9, true) // 9.1 (90th percentile)

// Common percentiles
const percentiles = [0.1, 0.25, 0.5, 0.75, 0.9]
quantileSeq(data, percentiles) // Deciles and quartiles

Aggregation Functions

Sum

sum(...args: MathType[], dim?: number): MathType | MathCollection

{ .api }

// Array sum
sum([1, 2, 3, 4, 5]) // 15

// Variadic arguments  
sum(1, 2, 3, 4, 5) // 15

// Matrix operations
const data = matrix([[1, 2, 3], [4, 5, 6]])
sum(data) // 21 (total sum)
sum(data, 0) // [5, 7, 9] (column sums)
sum(data, 1) // [6, 15] (row sums)

// With different types
sum([fraction(1, 2), fraction(1, 3), fraction(1, 6)]) // Fraction(1, 1) = 1
sum([complex(1, 2), complex(3, 4)]) // Complex(4, 6)

Product

prod(...args: MathType[]): MathType

{ .api }

// Array product
prod([1, 2, 3, 4, 5]) // 120

// Variadic arguments
prod(2, 3, 4) // 24

// Factorial using prod and range  
prod(range(1, 6)) // 120 (5!)

// With fractions
prod([fraction(1, 2), fraction(2, 3), fraction(3, 4)]) // Fraction(1, 4)

Count

count(x: MathCollection): number

{ .api }

// Count elements
count([1, 2, 3, 4, 5]) // 5
count([[1, 2], [3, 4], [5, 6]]) // 6 (total elements)

// Count non-zero elements (use filter)
const data = [1, 0, 3, 0, 5]
count(filter(data, x => !equal(x, 0))) // 3

// Count specific values
count(filter(data, x => equal(x, 0))) // 2 (zeros)

Cumulative Sum

cumsum(array: MathCollection, dim?: number): MathCollection

{ .api }

// Running sum
cumsum([1, 2, 3, 4, 5]) // [1, 3, 6, 10, 15]

// Matrix operations
const data = matrix([[1, 2], [3, 4]])
cumsum(data, 0) // [[1, 2], [4, 6]] (cumsum along rows)
cumsum(data, 1) // [[1, 3], [3, 7]] (cumsum along columns)

// Financial applications: compound returns
const returns = [0.1, -0.05, 0.08, 0.03]
const cumulativeReturns = cumsum(returns) // [0.1, 0.05, 0.13, 0.16]

Correlation Analysis

Correlation Coefficient

corr(x: MathCollection, y: MathCollection): MathType

{ .api }

// Pearson correlation coefficient
const x = [1, 2, 3, 4, 5]
const y = [2, 4, 6, 8, 10] // Perfect positive correlation
corr(x, y) // 1

const z = [10, 8, 6, 4, 2] // Perfect negative correlation  
corr(x, z) // -1

const w = [1, 3, 2, 5, 4] // Some correlation
corr(x, w) // ~0.8

// No correlation
const random1 = [1, 5, 3, 2, 4]
const random2 = [2, 1, 4, 5, 3]
corr(random1, random2) // ~0 (depends on data)

// Correlation matrix (manual implementation)
function corrMatrix(data) {
  const n = size(data)[1] // number of variables
  const C = zeros(n, n)
  
  for (let i = 0; i < n; i++) {
    for (let j = 0; j < n; j++) {
      const xi = subset(data, index(range(0, size(data)[0]), i))
      const xj = subset(data, index(range(0, size(data)[0]), j))
      C.set([i, j], corr(xi, xj))
    }
  }
  
  return C
}

Advanced Statistical Functions

Descriptive Statistics Summary

// Create comprehensive summary statistics
function describe(data) {
  const sorted = sort(data)
  const n = count(data)
  
  return {
    count: n,
    mean: mean(data),
    std: std(data),
    min: min(data),
    '25%': quantileSeq(sorted, 0.25, true),
    '50%': median(data),  
    '75%': quantileSeq(sorted, 0.75, true),
    max: max(data),
    range: subtract(max(data), min(data)),
    iqr: subtract(quantileSeq(sorted, 0.75, true), quantileSeq(sorted, 0.25, true)),
    mad: mad(data),
    variance: variance(data),
    skewness: skewness(data), // Would need custom implementation
    kurtosis: kurtosis(data)  // Would need custom implementation
  }
}

const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
const summary = describe(data)

Z-Score Standardization

function zscore(data) {
  const mu = mean(data)  
  const sigma = std(data)
  return data.map(x => divide(subtract(x, mu), sigma))
}

const data = [1, 2, 3, 4, 5]
const standardized = zscore(data) // Mean ≈ 0, std ≈ 1

Moving Statistics

// Moving average
function movingAverage(data, window) {
  const result = []
  for (let i = window - 1; i < data.length; i++) {
    const slice = data.slice(i - window + 1, i + 1)
    result.push(mean(slice))
  }
  return result
}

// Exponential moving average  
function ema(data, alpha) {
  const result = [data[0]]
  for (let i = 1; i < data.length; i++) {
    const newValue = add(
      multiply(alpha, data[i]),
      multiply(subtract(1, alpha), result[i - 1])  
    )
    result.push(newValue)
  }
  return result
}

const prices = [10, 12, 11, 13, 15, 14, 16]
const sma = movingAverage(prices, 3) // Simple moving average
const emaData = ema(prices, 0.3) // Exponential moving average

Rank and Percentile Rank

function rank(data, method = 'average') {
  const sorted = [...data].sort((a, b) => subtract(a, b))
  return data.map(value => {
    const count = sorted.filter(x => smaller(x, value)).length
    return add(count, 1) // 1-based ranking
  })
}

function percentileRank(data, value) {
  const count = data.filter(x => smaller(x, value)).length
  return divide(count, data.length) * 100
}

const scores = [85, 90, 78, 92, 88]
const ranks = rank(scores) // [3, 4, 1, 5, 2]
const pRank = percentileRank(scores, 88) // 60% (88 is better than 60% of scores)

Frequency Analysis

Histogram (Binning)

function histogram(data, bins = 10) {
  const dataMin = min(data)
  const dataMax = max(data)  
  const binWidth = divide(subtract(dataMax, dataMin), bins)
  
  const counts = new Array(bins).fill(0)
  const edges = []
  
  for (let i = 0; i <= bins; i++) {
    edges.push(add(dataMin, multiply(i, binWidth)))
  }
  
  data.forEach(value => {
    let binIndex = floor(divide(subtract(value, dataMin), binWidth))
    if (binIndex === bins) binIndex = bins - 1 // Handle edge case
    counts[binIndex]++
  })
  
  return { counts, edges, binWidth }
}

const data = [1.1, 1.5, 2.3, 2.8, 3.2, 3.9, 4.1, 4.7, 5.2, 5.8]
const hist = histogram(data, 5)

Frequency Table

function frequencyTable(data) {
  const freq = new Map()
  
  data.forEach(value => {
    const key = string(value) // Convert to string for consistent keys
    freq.set(key, (freq.get(key) || 0) + 1)
  })
  
  return freq
}

const categories = ['A', 'B', 'A', 'C', 'B', 'A', 'C', 'C']
const freqTable = frequencyTable(categories)
// Map: { 'A' => 3, 'B' => 2, 'C' => 3 }

Working with Missing Data

// Filter out NaN/null/undefined values
function cleanData(data) {
  return filter(data, value => 
    !isNaN(value) && 
    !isNull(value) && 
    !isUndefined(value)
  )
}

// Replace missing values with mean
function fillMissing(data, fillValue = null) {
  const cleaned = cleanData(data)
  const replacement = fillValue !== null ? fillValue : mean(cleaned)
  
  return data.map(value => 
    (isNaN(value) || isNull(value) || isUndefined(value)) 
      ? replacement 
      : value
  )
}

const dataWithMissing = [1, 2, NaN, 4, 5, null, 7]
const filled = fillMissing(dataWithMissing) // [1, 2, 3.8, 4, 5, 3.8, 7]

Performance Optimization

Large Dataset Processing

// Use typed arrays for numerical data
function processLargeDataset(data) {
  // Convert to efficient format if needed
  const numericData = data.map(x => number(x))
  
  // Batch operations
  const batchSize = 1000
  const results = []
  
  for (let i = 0; i < numericData.length; i += batchSize) {
    const batch = numericData.slice(i, i + batchSize)
    results.push({
      mean: mean(batch),
      std: std(batch),
      min: min(batch),
      max: max(batch)
    })
  }
  
  return results
}

Streaming Statistics

// Online algorithms for streaming data
class StreamingStats {
  constructor() {
    this.n = 0
    this.mean = 0
    this.m2 = 0 // For variance calculation
  }
  
  update(value) {
    this.n++
    const delta = subtract(value, this.mean)
    this.mean = add(this.mean, divide(delta, this.n))
    const delta2 = subtract(value, this.mean)
    this.m2 = add(this.m2, multiply(delta, delta2))
  }
  
  getMean() {
    return this.mean
  }
  
  getVariance() {
    return this.n < 2 ? 0 : divide(this.m2, subtract(this.n, 1))
  }
  
  getStd() {
    return sqrt(this.getVariance())
  }
}

// Usage for large streaming datasets
const stats = new StreamingStats()
largeDataStream.forEach(value => stats.update(value))

Chain Operations

All statistical functions work with the chain interface:

const result = chain([1, 2, 3, 4, 5])
  .mean()     // 3
  .done()

const analysis = chain(dataset)
  .filter(x => larger(x, 0))  // Remove non-positive values
  .map(x => log(x))           // Log transform
  .std()                      // Standard deviation of log values  
  .done()

Common Statistical Patterns

Normalization and Scaling

// Min-max scaling to [0, 1]
function minMaxScale(data) {
  const dataMin = min(data)
  const dataMax = max(data)  
  const range = subtract(dataMax, dataMin)
  return data.map(x => divide(subtract(x, dataMin), range))
}

// Robust scaling (using median and MAD)
function robustScale(data) {
  const med = median(data)
  const madValue = mad(data)
  return data.map(x => divide(subtract(x, med), madValue))
}

Outlier Detection

// IQR-based outlier detection
function detectOutliers(data, factor = 1.5) {
  const sorted = sort(data)
  const q1 = quantileSeq(sorted, 0.25, true)
  const q3 = quantileSeq(sorted, 0.75, true) 
  const iqr = subtract(q3, q1)
  
  const lowerBound = subtract(q1, multiply(factor, iqr))
  const upperBound = add(q3, multiply(factor, iqr))
  
  return {
    outliers: filter(data, x => smaller(x, lowerBound) || larger(x, upperBound)),
    bounds: { lower: lowerBound, upper: upperBound },
    cleaned: filter(data, x => !smaller(x, lowerBound) && !larger(x, upperBound))
  }
}

// Z-score based outlier detection
function detectOutliersZScore(data, threshold = 3) {
  const zScores = zscore(data)
  return {
    outliers: data.filter((_, i) => larger(abs(zScores[i]), threshold)),
    indices: zScores.map((z, i) => larger(abs(z), threshold) ? i : -1).filter(i => i >= 0)
  }
}

Install with Tessl CLI

npx tessl i tessl/npm-mathjs

docs

arithmetic.md

data-types.md

expressions.md

index.md

matrices.md

probability.md

statistics.md

trigonometry.md

units.md

tile.json