tessl/npm-simple-statistics

A JavaScript implementation of descriptive, regression, and inference statistics

—

Pending

Overview

Eval results

Files

Machine Learning

Name: tessl/npm-simple-statistics
Author: tessl

Classification algorithms and clustering methods for supervised and unsupervised learning tasks.

Core Imports

import { 
  BayesianClassifier,
  PerceptronModel,
  ckmeans,
  kernelDensityEstimation
} from "simple-statistics";

Classification Algorithms

BayesianClassifier (alias: bayesian) { .api }

class BayesianClassifier {
  totalCount: number;
  data: Record<string, any>;
  
  constructor();
  train(item: Record<string, any>, category: string): void;
  score(item: Record<string, any>): Record<string, number>;
}

const bayesian: typeof BayesianClassifier;

Naive Bayesian classifier for categorical data. Assumes independence between features.

Properties:

totalCount: number - Total number of training examples
data: Record<string, any> - Internal storage for training data

Methods:

train { .api }

train(item: Record<string, any>, category: string): void;

Trains the classifier with a labeled example.

Parameters:

item: Record<string, any> - Feature object with key-value pairs
category: string - Category label for this item

score { .api }

score(item: Record<string, any>): Record<string, number>;

Classifies an item and returns probability scores for each category.

Parameters:

item: Record<string, any> - Feature object to classify

Returns: Record<string, number> - Probability scores for each category

import { BayesianClassifier } from "simple-statistics";

// Email spam classification
const classifier = new BayesianClassifier();

// Train with examples
classifier.train({ word: "free", count: 1 }, "spam");
classifier.train({ word: "meeting", count: 1 }, "ham");
classifier.train({ word: "free", count: 2, urgent: true }, "spam");
classifier.train({ word: "project", count: 1 }, "ham");

// Classify new email
const scores = classifier.score({ word: "free", count: 1 });
console.log(scores); // { spam: 0.67, ham: 0.33 }

// Get most likely category
const category = Object.keys(scores).reduce((a, b) => 
  scores[a] > scores[b] ? a : b
);
console.log(`Predicted category: ${category}`);

PerceptronModel (alias: perceptron) { .api }

class PerceptronModel {
  weights: number[];
  bias: number;
  
  constructor();
  predict(features: number[]): number;
  train(features: number[], label: number): PerceptronModel;
}

const perceptron: typeof PerceptronModel;

Linear perceptron for binary classification. Good for linearly separable data.

Properties:

weights: number[] - Feature weights learned during training
bias: number - Bias term

Methods:

predict { .api }

predict(features: number[]): number;

Makes a prediction for given features.

Parameters:

features: number[] - Feature vector

Returns: number - Predicted class (0 or 1)

train { .api }

train(features: number[], label: number): PerceptronModel;

Updates the model with a training example.

Parameters:

features: number[] - Feature vector
label: number - True label (0 or 1)

Returns: PerceptronModel - Updated model (for chaining)

import { PerceptronModel } from "simple-statistics";

// Binary classification: predict loan approval
const perceptron = new PerceptronModel();

// Training data: [income, credit_score, debt_ratio]
const trainingData = [
  { features: [50000, 700, 0.3], label: 1 }, // approved
  { features: [30000, 600, 0.8], label: 0 }, // denied
  { features: [80000, 750, 0.2], label: 1 }, // approved
  { features: [25000, 550, 0.9], label: 0 }, // denied
];

// Train the model
trainingData.forEach(({ features, label }) => {
  perceptron.train(features, label);
});

// Make predictions
const applicant1 = [60000, 720, 0.4];
const prediction1 = perceptron.predict(applicant1);
console.log(`Loan decision: ${prediction1 ? 'Approved' : 'Denied'}`);

const applicant2 = [20000, 500, 1.2];
const prediction2 = perceptron.predict(applicant2);
console.log(`Loan decision: ${prediction2 ? 'Approved' : 'Denied'}`);

Clustering

ckmeans { .api }

function ckmeans<T>(data: T[], nClusters: number): T[][];

Optimal k-means clustering in 1D using dynamic programming. Also known as Jenks natural breaks.

Parameters:

data: T[] - Array of values to cluster
nClusters: number - Number of clusters to create

Returns: T[][] - Array of clusters, each containing grouped values

Use Cases:

Data visualization (choropleth maps)
Natural breakpoints in continuous data
Optimal binning for histograms

import { ckmeans } from "simple-statistics";

// Income distribution clustering
const incomes = [25000, 28000, 30000, 45000, 48000, 50000, 75000, 80000, 150000, 200000];
const clusters = ckmeans(incomes, 3);

console.log("Income clusters:");
clusters.forEach((cluster, i) => {
  const min = Math.min(...cluster);
  const max = Math.max(...cluster);
  console.log(`Cluster ${i + 1}: $${min.toLocaleString()} - $${max.toLocaleString()}`);
});

// Result might be:
// Cluster 1: $25,000 - $30,000 (Low income)
// Cluster 2: $45,000 - $50,000 (Middle income)  
// Cluster 3: $75,000 - $200,000 (High income)

Density Estimation

kernelDensityEstimation (alias: kde) { .api }

type Kernel = 'gaussian' | ((x: number) => number);
type BandwidthMethod = 'nrd' | number;

function kernelDensityEstimation(
  X: number[], 
  kernel?: Kernel, 
  bandwidthMethod?: BandwidthMethod
): (x: number) => number;

Estimates probability density function from sample data using kernel density estimation.

Parameters:

X: number[] - Sample data points
kernel?: Kernel - Kernel function ('gaussian' or custom function)
bandwidthMethod?: BandwidthMethod - Bandwidth selection ('nrd' or numeric value)

Returns: (x: number) => number - Density function that estimates P(X=x)

import { kernelDensityEstimation } from "simple-statistics";

// Estimate probability density of test scores
const testScores = [65, 70, 75, 78, 80, 82, 85, 88, 90, 92];
const densityFunction = kernelDensityEstimation(testScores);

// Get density estimates
const density75 = densityFunction(75);
const density85 = densityFunction(85);
const density95 = densityFunction(95);

console.log(`Density at 75: ${density75.toFixed(4)}`);
console.log(`Density at 85: ${density85.toFixed(4)}`);
console.log(`Density at 95: ${density95.toFixed(4)}`);

// Plot density curve
const xRange = Array.from({ length: 50 }, (_, i) => 60 + i);
const densityCurve = xRange.map(x => ({ x, density: densityFunction(x) }));
console.log("Density curve:", densityCurve);

Usage Examples

Text Classification with Naive Bayes

import { BayesianClassifier } from "simple-statistics";

// Sentiment analysis classifier
const sentimentClassifier = new BayesianClassifier();

// Training data
const trainingTexts = [
  { text: "love this product amazing quality", sentiment: "positive" },
  { text: "terrible service very disappointed", sentiment: "negative" },
  { text: "excellent fast shipping", sentiment: "positive" },
  { text: "defective item poor quality", sentiment: "negative" },
  { text: "great value highly recommend", sentiment: "positive" },
];

// Simple feature extraction (word presence)
function extractFeatures(text: string): Record<string, boolean> {
  const words = text.toLowerCase().split(' ');
  const features: Record<string, boolean> = {};
  words.forEach(word => {
    features[`word_${word}`] = true;
  });
  return features;
}

// Train classifier
trainingTexts.forEach(({ text, sentiment }) => {
  const features = extractFeatures(text);
  sentimentClassifier.train(features, sentiment);
});

// Classify new text
const newReview = "fast delivery great product";
const features = extractFeatures(newReview);
const scores = sentimentClassifier.score(features);

console.log("Review:", newReview);
console.log("Sentiment scores:", scores);
console.log("Predicted sentiment:", 
  Object.keys(scores).reduce((a, b) => scores[a] > scores[b] ? a : b)
);

Customer Segmentation with Clustering

import { ckmeans, mean, standardDeviation } from "simple-statistics";

// Customer purchase amounts over past year
const customerSpending = [
  50, 75, 80, 120, 150, 180, 200, 250, 300, 350,
  400, 500, 600, 800, 1000, 1200, 1500, 2000, 2500, 3000
];

// Find natural customer segments
const segments = ckmeans(customerSpending, 4);

console.log("Customer Segments:");
segments.forEach((segment, i) => {
  const segmentMean = mean(segment);
  const segmentStd = standardDeviation(segment);
  const min = Math.min(...segment);
  const max = Math.max(...segment);
  
  console.log(`Segment ${i + 1}:`);
  console.log(`  Range: $${min} - $${max}`);
  console.log(`  Average: $${segmentMean.toFixed(0)}`);
  console.log(`  Std Dev: $${segmentStd.toFixed(0)}`);
  console.log(`  Count: ${segment.length} customers`);
});

// Use segments for targeted marketing strategies
const segmentNames = ["Low Spenders", "Regular Customers", "High Value", "VIP"];
segments.forEach((segment, i) => {
  console.log(`${segmentNames[i]}: ${segment.length} customers spending $${Math.min(...segment)}-$${Math.max(...segment)}`);
});

Anomaly Detection with Density Estimation

import { kernelDensityEstimation, mean, standardDeviation } from "simple-statistics";

// Network traffic data (requests per minute)
const normalTraffic = [
  45, 52, 48, 55, 50, 47, 53, 49, 51, 46,
  54, 48, 52, 50, 49, 47, 53, 51, 48, 50
];

// Build density model of normal traffic
const densityModel = kernelDensityEstimation(normalTraffic);
const meanTraffic = mean(normalTraffic);
const stdTraffic = standardDeviation(normalTraffic);

// Monitor new traffic values
const newTrafficValues = [52, 48, 95, 51, 150, 49]; // Some potential anomalies

console.log(`Normal traffic: ${meanTraffic.toFixed(1)} ± ${stdTraffic.toFixed(1)} requests/min`);
console.log("\nAnomaly Detection:");

newTrafficValues.forEach(traffic => {
  const density = densityModel(traffic);
  const zScore = Math.abs((traffic - meanTraffic) / stdTraffic);
  
  console.log(`Traffic: ${traffic} req/min`);
  console.log(`  Density: ${density.toFixed(6)}`);
  console.log(`  Z-score: ${zScore.toFixed(2)}`);
  
  if (density < 0.001 || zScore > 3) {
    console.log(`  🚨 ANOMALY DETECTED`);
  } else {
    console.log(`  ✓ Normal`);
  }
});

Install with Tessl CLI