CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-simple-statistics

A JavaScript implementation of descriptive, regression, and inference statistics

Pending
Overview
Eval results
Files

data-manipulation.mddocs/

Data Manipulation

Functions for sampling, shuffling, and array manipulation utilities.

Core Imports

import { 
  sample,
  sampleWithReplacement,
  shuffle,
  shuffleInPlace,
  chunk,
  numericSort,
  quickselect,
  uniqueCountSorted,
  sum,
  sumSimple,
  product,
  sumNthPowerDeviations,
  equalIntervalBreaks
} from "simple-statistics";

Sampling Functions

sample { .api }

function sample<T>(population: T[], n: number, randomSource?: () => number): T[];

Random sampling without replacement from a population.

Parameters:

  • population: T[] - Source array to sample from
  • n: number - Number of items to sample
  • randomSource?: () => number - Optional random number generator (0-1 range)

Returns: T[] - Array of sampled items (no duplicates)

import { sample } from "simple-statistics";

// Survey sampling
const population = ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank', 'Grace'];
const surveySample = sample(population, 3);
console.log(`Survey participants: ${surveySample.join(', ')}`);
// Example: ['Charlie', 'Alice', 'Frank']

// A/B testing user selection
const userIds = [1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010];
const testGroup = sample(userIds, 5);
console.log(`Test group: ${testGroup.join(', ')}`);

sampleWithReplacement { .api }

function sampleWithReplacement<T>(population: T[], n: number, randomSource?: () => number): T[];

Random sampling with replacement - items can be selected multiple times.

Parameters:

  • population: T[] - Source array to sample from
  • n: number - Number of items to sample
  • randomSource?: () => number - Optional random number generator

Returns: T[] - Array of sampled items (may contain duplicates)

import { sampleWithReplacement } from "simple-statistics";

// Bootstrap sampling for statistics
const originalData = [10, 15, 20, 25, 30];
const bootstrapSample = sampleWithReplacement(originalData, 10);
console.log(`Bootstrap sample: ${bootstrapSample.join(', ')}`);
// Example: [15, 25, 10, 25, 20, 30, 15, 20, 25, 10]

// Monte Carlo simulation sampling
const outcomes = ['win', 'lose', 'draw'];
const probabilities = [0.4, 0.5, 0.1]; // Weighted sampling (conceptual)
const simulations = sampleWithReplacement(outcomes, 100);

Array Shuffling

shuffle { .api }

function shuffle<T>(array: T[], randomSource?: () => number): T[];

Fisher-Yates shuffle that returns a new shuffled array (immutable).

Parameters:

  • array: T[] - Array to shuffle
  • randomSource?: () => number - Optional random number generator

Returns: T[] - New shuffled array

import { shuffle } from "simple-statistics";

// Card deck shuffling
const deck = ['A♠', 'K♠', 'Q♠', 'J♠', '10♠', '9♠', '8♠', '7♠'];
const shuffledDeck = shuffle(deck);
console.log(`Original: ${deck.join(' ')}`);
console.log(`Shuffled: ${shuffledDeck.join(' ')}`);
// Original array remains unchanged

shuffleInPlace { .api }

function shuffleInPlace<T>(array: T[], randomSource?: () => number): T[];

Fisher-Yates shuffle that modifies the original array (mutable).

Parameters:

  • array: T[] - Array to shuffle in place
  • randomSource?: () => number - Optional random number generator

Returns: T[] - Reference to the modified array

import { shuffleInPlace } from "simple-statistics";

// Randomize playlist order
const playlist = ['Song1', 'Song2', 'Song3', 'Song4', 'Song5'];
shuffleInPlace(playlist);
console.log(`Shuffled playlist: ${playlist.join(', ')}`);
// Original array is modified

Array Manipulation

chunk { .api }

function chunk<T>(array: T[], chunkSize: number): T[][];

Splits an array into chunks of specified size.

Parameters:

  • array: T[] - Array to chunk
  • chunkSize: number - Size of each chunk

Returns: T[][] - Array of chunks

import { chunk } from "simple-statistics";

// Batch processing
const dataPoints = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
const batches = chunk(dataPoints, 4);
console.log(`Batches:`, batches);
// [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]

// Pagination
const users = ['User1', 'User2', 'User3', 'User4', 'User5', 'User6', 'User7'];
const pages = chunk(users, 3);
pages.forEach((page, i) => {
  console.log(`Page ${i + 1}: ${page.join(', ')}`);
});

numericSort { .api }

function numericSort(array: number[]): number[];

Sorts an array of numbers in ascending order.

Parameters:

  • array: number[] - Array of numbers to sort

Returns: number[] - New sorted array

import { numericSort } from "simple-statistics";

const unsorted = [23, 1, 45, 12, 7, 89, 34];
const sorted = numericSort(unsorted);
console.log(`Sorted: ${sorted.join(', ')}`); // 1, 7, 12, 23, 34, 45, 89

quickselect { .api }

function quickselect(array: number[], k: number, left?: number, right?: number): number;

Finds the kth smallest element using the quickselect algorithm (O(n) average time).

Parameters:

  • array: number[] - Array of numbers
  • k: number - Index of element to find (0-based)
  • left?: number - Optional left boundary
  • right?: number - Optional right boundary

Returns: number - The kth smallest element

import { quickselect } from "simple-statistics";

const numbers = [7, 2, 9, 1, 5, 8, 3];

// Find median without full sort
const medianIndex = Math.floor(numbers.length / 2);
const median = quickselect([...numbers], medianIndex); // 5

// Find 2nd smallest
const secondSmallest = quickselect([...numbers], 1); // 2

console.log(`Median: ${median}`);
console.log(`2nd smallest: ${secondSmallest}`);

uniqueCountSorted { .api }

function uniqueCountSorted(array: number[]): number;

Counts unique values in a pre-sorted array.

Parameters:

  • array: number[] - Pre-sorted array of numbers

Returns: number - Count of unique values

import { uniqueCountSorted } from "simple-statistics";

const sortedWithDuplicates = [1, 1, 2, 2, 2, 3, 4, 4, 5];
const uniqueCount = uniqueCountSorted(sortedWithDuplicates); // 5
console.log(`Unique values: ${uniqueCount}`);

Summation and Products

sum { .api }

function sum(values: number[]): number;

Accurate summation using Kahan compensated summation algorithm to minimize floating-point errors.

Parameters:

  • values: number[] - Array of numbers to sum

Returns: number - Sum with improved numerical precision

import { sum } from "simple-statistics";

// High precision summation
const preciseValues = [0.1, 0.2, 0.3, 0.4, 0.5];
const accurateSum = sum(preciseValues); // 1.5 (exactly)
const naiveSum = preciseValues.reduce((a, b) => a + b, 0); // May have floating-point error

console.log(`Accurate sum: ${accurateSum}`);
console.log(`Naive sum: ${naiveSum}`);

sumSimple { .api }

function sumSimple(values: number[]): number;

Simple summation without compensation (faster but less precise).

product { .api }

function product(values: number[]): number;

Calculates the product of all values in an array.

Parameters:

  • values: number[] - Array of numbers

Returns: number - Product of all values

import { product } from "simple-statistics";

const factors = [2, 3, 4, 5];
const result = product(factors); // 120
console.log(`Product: ${result}`);

// Compound interest calculation
const growthRates = [1.05, 1.03, 1.07, 1.02]; // 5%, 3%, 7%, 2% annual growth
const totalGrowth = product(growthRates); // 1.177...
console.log(`Total growth factor: ${totalGrowth.toFixed(3)}`);

sumNthPowerDeviations { .api }

function sumNthPowerDeviations(values: number[], mean?: number, n?: number): number;

Calculates sum of nth power deviations from the mean.

Parameters:

  • values: number[] - Array of numbers
  • mean?: number - Optional mean (calculated if not provided)
  • n?: number - Power (default: 2 for sum of squared deviations)

Returns: number - Sum of nth power deviations

import { sumNthPowerDeviations, mean } from "simple-statistics";

const data = [1, 2, 3, 4, 5];
const dataMean = mean(data); // 3

// Sum of squared deviations (for variance calculation)
const sumSquaredDeviations = sumNthPowerDeviations(data, dataMean, 2); // 10

// Sum of cubed deviations (for skewness calculation)
const sumCubedDeviations = sumNthPowerDeviations(data, dataMean, 3); // 0

equalIntervalBreaks { .api }

function equalIntervalBreaks(values: number[], nClasses: number): number[];

Creates equal-width intervals for data binning and histogram creation.

Parameters:

  • values: number[] - Data values to create breaks for
  • nClasses: number - Number of intervals/classes to create

Returns: number[] - Array of break points defining intervals

import { equalIntervalBreaks } from "simple-statistics";

// Income distribution binning
const incomes = [25000, 35000, 42000, 58000, 67000, 78000, 95000, 120000];
const incomeBreaks = equalIntervalBreaks(incomes, 4);
console.log(`Income brackets: ${incomeBreaks.join(', ')}`);
// Example: [25000, 48750, 72500, 96250, 120000]

// Create histogram bins
const data = [1, 3, 7, 8, 12, 15, 18, 22, 25, 28];
const breaks = equalIntervalBreaks(data, 5);
const bins = breaks.slice(0, -1).map((breakpoint, i) => ({
  range: `${breakpoint}-${breaks[i + 1]}`,
  count: data.filter(d => d >= breakpoint && d < breaks[i + 1]).length
}));

console.log("Histogram bins:");
bins.forEach(bin => console.log(`${bin.range}: ${bin.count} items`));

Usage Examples

Data Science Pipeline

import { sample, shuffle, chunk, sum, numericSort } from "simple-statistics";

// Prepare dataset for machine learning
const fullDataset = Array.from({ length: 1000 }, (_, i) => ({
  id: i + 1,
  feature1: Math.random() * 100,
  feature2: Math.random() * 50,
  label: Math.random() > 0.5 ? 1 : 0
}));

// 1. Shuffle data to remove ordering bias
const shuffledData = shuffle(fullDataset);

// 2. Split into train/test sets
const trainSize = Math.floor(shuffledData.length * 0.8);
const trainData = shuffledData.slice(0, trainSize);
const testData = shuffledData.slice(trainSize);

// 3. Create mini-batches for training
const batchSize = 32;
const trainBatches = chunk(trainData, batchSize);

console.log(`Dataset split: ${trainData.length} train, ${testData.length} test`);
console.log(`Training batches: ${trainBatches.length} batches of ${batchSize}`);

// 4. Bootstrap sampling for model validation
const bootstrapSamples = Array.from({ length: 100 }, () => 
  sampleWithReplacement(trainData, trainData.length)
);

console.log(`Created ${bootstrapSamples.length} bootstrap samples`);

A/B Testing Framework

import { sample, shuffle, mean, sum } from "simple-statistics";

// User pool for A/B testing
const allUsers = Array.from({ length: 10000 }, (_, i) => ({
  userId: i + 1,
  segment: Math.random() > 0.7 ? 'premium' : 'free',
  activity: Math.random() * 100
}));

// Stratified sampling to ensure representative groups
const premiumUsers = allUsers.filter(u => u.segment === 'premium');
const freeUsers = allUsers.filter(u => u.segment === 'free');

const testSize = 1000;
const premiumRatio = premiumUsers.length / allUsers.length;
const premiumTestSize = Math.floor(testSize * premiumRatio);
const freeTestSize = testSize - premiumTestSize;

// Sample from each stratum
const testPremium = sample(premiumUsers, premiumTestSize);
const testFree = sample(freeUsers, freeTestSize);
const testGroup = shuffle([...testPremium, ...testFree]);

// Split test group between variants
const midpoint = Math.floor(testGroup.length / 2);
const variantA = testGroup.slice(0, midpoint);
const variantB = testGroup.slice(midpoint);

console.log("A/B Test Setup:");
console.log(`Variant A: ${variantA.length} users`);
console.log(`Variant B: ${variantB.length} users`);
console.log(`Premium users in test: ${sum([testPremium.length])} (${(premiumRatio * 100).toFixed(1)}%)`);

Monte Carlo Simulation

import { sampleWithReplacement, mean, sum, chunk } from "simple-statistics";

// Portfolio risk simulation
const stockReturns = {
  'AAPL': [0.12, -0.05, 0.08, 0.15, -0.02, 0.11, 0.06],
  'GOOGL': [0.18, -0.08, 0.12, 0.22, -0.01, 0.14, 0.09],
  'MSFT': [0.15, -0.03, 0.09, 0.18, 0.01, 0.12, 0.07]
};

const portfolio = { 'AAPL': 0.4, 'GOOGL': 0.35, 'MSFT': 0.25 };
const numSimulations = 10000;
const timeHorizon = 252; // trading days in a year

// Monte Carlo simulation
const simulationResults = [];

for (let sim = 0; sim < numSimulations; sim++) {
  let portfolioValue = 100000; // Starting value
  
  for (let day = 0; day < timeHorizon; day++) {
    let dailyReturn = 0;
    
    for (const [stock, weight] of Object.entries(portfolio)) {
      const historicalReturns = stockReturns[stock as keyof typeof stockReturns];
      const randomReturn = sampleWithReplacement(historicalReturns, 1)[0];
      dailyReturn += weight * randomReturn;
    }
    
    portfolioValue *= (1 + dailyReturn / 252); // Daily compounding
  }
  
  simulationResults.push(portfolioValue);
}

// Analyze results
const sortedResults = numericSort(simulationResults);
const meanValue = mean(sortedResults);
const var95 = sortedResults[Math.floor(sortedResults.length * 0.05)]; // 5th percentile

console.log("Portfolio Simulation Results:");
console.log(`Expected value: $${meanValue.toLocaleString()}`);
console.log(`95% VaR: $${(100000 - var95).toLocaleString()} loss`);
console.log(`Probability of loss: ${(sortedResults.filter(v => v < 100000).length / numSimulations * 100).toFixed(1)}%`);

Install with Tessl CLI

npx tessl i tessl/npm-simple-statistics

docs

array-operations.md

combinatorics.md

data-manipulation.md

descriptive-statistics.md

distributions.md

index.md

machine-learning.md

math-utilities.md

quantiles.md

regression.md

testing.md

tile.json