A JavaScript implementation of descriptive, regression, and inference statistics
—
Classification algorithms and clustering methods for supervised and unsupervised learning tasks.
import {
BayesianClassifier,
PerceptronModel,
ckmeans,
kernelDensityEstimation
} from "simple-statistics";class BayesianClassifier {
totalCount: number;
data: Record<string, any>;
constructor();
train(item: Record<string, any>, category: string): void;
score(item: Record<string, any>): Record<string, number>;
}
const bayesian: typeof BayesianClassifier;Naive Bayesian classifier for categorical data. Assumes independence between features.
Properties:
totalCount: number - Total number of training examplesdata: Record<string, any> - Internal storage for training dataMethods:
train(item: Record<string, any>, category: string): void;Trains the classifier with a labeled example.
Parameters:
item: Record<string, any> - Feature object with key-value pairscategory: string - Category label for this itemscore(item: Record<string, any>): Record<string, number>;Classifies an item and returns probability scores for each category.
Parameters:
item: Record<string, any> - Feature object to classifyReturns: Record<string, number> - Probability scores for each category
import { BayesianClassifier } from "simple-statistics";
// Email spam classification
const classifier = new BayesianClassifier();
// Train with examples
classifier.train({ word: "free", count: 1 }, "spam");
classifier.train({ word: "meeting", count: 1 }, "ham");
classifier.train({ word: "free", count: 2, urgent: true }, "spam");
classifier.train({ word: "project", count: 1 }, "ham");
// Classify new email
const scores = classifier.score({ word: "free", count: 1 });
console.log(scores); // { spam: 0.67, ham: 0.33 }
// Get most likely category
const category = Object.keys(scores).reduce((a, b) =>
scores[a] > scores[b] ? a : b
);
console.log(`Predicted category: ${category}`);class PerceptronModel {
weights: number[];
bias: number;
constructor();
predict(features: number[]): number;
train(features: number[], label: number): PerceptronModel;
}
const perceptron: typeof PerceptronModel;Linear perceptron for binary classification. Good for linearly separable data.
Properties:
weights: number[] - Feature weights learned during trainingbias: number - Bias termMethods:
predict(features: number[]): number;Makes a prediction for given features.
Parameters:
features: number[] - Feature vectorReturns: number - Predicted class (0 or 1)
train(features: number[], label: number): PerceptronModel;Updates the model with a training example.
Parameters:
features: number[] - Feature vectorlabel: number - True label (0 or 1)Returns: PerceptronModel - Updated model (for chaining)
import { PerceptronModel } from "simple-statistics";
// Binary classification: predict loan approval
const perceptron = new PerceptronModel();
// Training data: [income, credit_score, debt_ratio]
const trainingData = [
{ features: [50000, 700, 0.3], label: 1 }, // approved
{ features: [30000, 600, 0.8], label: 0 }, // denied
{ features: [80000, 750, 0.2], label: 1 }, // approved
{ features: [25000, 550, 0.9], label: 0 }, // denied
];
// Train the model
trainingData.forEach(({ features, label }) => {
perceptron.train(features, label);
});
// Make predictions
const applicant1 = [60000, 720, 0.4];
const prediction1 = perceptron.predict(applicant1);
console.log(`Loan decision: ${prediction1 ? 'Approved' : 'Denied'}`);
const applicant2 = [20000, 500, 1.2];
const prediction2 = perceptron.predict(applicant2);
console.log(`Loan decision: ${prediction2 ? 'Approved' : 'Denied'}`);function ckmeans<T>(data: T[], nClusters: number): T[][];Optimal k-means clustering in 1D using dynamic programming. Also known as Jenks natural breaks.
Parameters:
data: T[] - Array of values to clusternClusters: number - Number of clusters to createReturns: T[][] - Array of clusters, each containing grouped values
Use Cases:
import { ckmeans } from "simple-statistics";
// Income distribution clustering
const incomes = [25000, 28000, 30000, 45000, 48000, 50000, 75000, 80000, 150000, 200000];
const clusters = ckmeans(incomes, 3);
console.log("Income clusters:");
clusters.forEach((cluster, i) => {
const min = Math.min(...cluster);
const max = Math.max(...cluster);
console.log(`Cluster ${i + 1}: $${min.toLocaleString()} - $${max.toLocaleString()}`);
});
// Result might be:
// Cluster 1: $25,000 - $30,000 (Low income)
// Cluster 2: $45,000 - $50,000 (Middle income)
// Cluster 3: $75,000 - $200,000 (High income)type Kernel = 'gaussian' | ((x: number) => number);
type BandwidthMethod = 'nrd' | number;
function kernelDensityEstimation(
X: number[],
kernel?: Kernel,
bandwidthMethod?: BandwidthMethod
): (x: number) => number;Estimates probability density function from sample data using kernel density estimation.
Parameters:
X: number[] - Sample data pointskernel?: Kernel - Kernel function ('gaussian' or custom function)bandwidthMethod?: BandwidthMethod - Bandwidth selection ('nrd' or numeric value)Returns: (x: number) => number - Density function that estimates P(X=x)
import { kernelDensityEstimation } from "simple-statistics";
// Estimate probability density of test scores
const testScores = [65, 70, 75, 78, 80, 82, 85, 88, 90, 92];
const densityFunction = kernelDensityEstimation(testScores);
// Get density estimates
const density75 = densityFunction(75);
const density85 = densityFunction(85);
const density95 = densityFunction(95);
console.log(`Density at 75: ${density75.toFixed(4)}`);
console.log(`Density at 85: ${density85.toFixed(4)}`);
console.log(`Density at 95: ${density95.toFixed(4)}`);
// Plot density curve
const xRange = Array.from({ length: 50 }, (_, i) => 60 + i);
const densityCurve = xRange.map(x => ({ x, density: densityFunction(x) }));
console.log("Density curve:", densityCurve);import { BayesianClassifier } from "simple-statistics";
// Sentiment analysis classifier
const sentimentClassifier = new BayesianClassifier();
// Training data
const trainingTexts = [
{ text: "love this product amazing quality", sentiment: "positive" },
{ text: "terrible service very disappointed", sentiment: "negative" },
{ text: "excellent fast shipping", sentiment: "positive" },
{ text: "defective item poor quality", sentiment: "negative" },
{ text: "great value highly recommend", sentiment: "positive" },
];
// Simple feature extraction (word presence)
function extractFeatures(text: string): Record<string, boolean> {
const words = text.toLowerCase().split(' ');
const features: Record<string, boolean> = {};
words.forEach(word => {
features[`word_${word}`] = true;
});
return features;
}
// Train classifier
trainingTexts.forEach(({ text, sentiment }) => {
const features = extractFeatures(text);
sentimentClassifier.train(features, sentiment);
});
// Classify new text
const newReview = "fast delivery great product";
const features = extractFeatures(newReview);
const scores = sentimentClassifier.score(features);
console.log("Review:", newReview);
console.log("Sentiment scores:", scores);
console.log("Predicted sentiment:",
Object.keys(scores).reduce((a, b) => scores[a] > scores[b] ? a : b)
);import { ckmeans, mean, standardDeviation } from "simple-statistics";
// Customer purchase amounts over past year
const customerSpending = [
50, 75, 80, 120, 150, 180, 200, 250, 300, 350,
400, 500, 600, 800, 1000, 1200, 1500, 2000, 2500, 3000
];
// Find natural customer segments
const segments = ckmeans(customerSpending, 4);
console.log("Customer Segments:");
segments.forEach((segment, i) => {
const segmentMean = mean(segment);
const segmentStd = standardDeviation(segment);
const min = Math.min(...segment);
const max = Math.max(...segment);
console.log(`Segment ${i + 1}:`);
console.log(` Range: $${min} - $${max}`);
console.log(` Average: $${segmentMean.toFixed(0)}`);
console.log(` Std Dev: $${segmentStd.toFixed(0)}`);
console.log(` Count: ${segment.length} customers`);
});
// Use segments for targeted marketing strategies
const segmentNames = ["Low Spenders", "Regular Customers", "High Value", "VIP"];
segments.forEach((segment, i) => {
console.log(`${segmentNames[i]}: ${segment.length} customers spending $${Math.min(...segment)}-$${Math.max(...segment)}`);
});import { kernelDensityEstimation, mean, standardDeviation } from "simple-statistics";
// Network traffic data (requests per minute)
const normalTraffic = [
45, 52, 48, 55, 50, 47, 53, 49, 51, 46,
54, 48, 52, 50, 49, 47, 53, 51, 48, 50
];
// Build density model of normal traffic
const densityModel = kernelDensityEstimation(normalTraffic);
const meanTraffic = mean(normalTraffic);
const stdTraffic = standardDeviation(normalTraffic);
// Monitor new traffic values
const newTrafficValues = [52, 48, 95, 51, 150, 49]; // Some potential anomalies
console.log(`Normal traffic: ${meanTraffic.toFixed(1)} ± ${stdTraffic.toFixed(1)} requests/min`);
console.log("\nAnomaly Detection:");
newTrafficValues.forEach(traffic => {
const density = densityModel(traffic);
const zScore = Math.abs((traffic - meanTraffic) / stdTraffic);
console.log(`Traffic: ${traffic} req/min`);
console.log(` Density: ${density.toFixed(6)}`);
console.log(` Z-score: ${zScore.toFixed(2)}`);
if (density < 0.001 || zScore > 3) {
console.log(` 🚨 ANOMALY DETECTED`);
} else {
console.log(` ✓ Normal`);
}
});Install with Tessl CLI
npx tessl i tessl/npm-simple-statistics