0
# Data Manipulation
1
2
Functions for sampling, shuffling, and array manipulation utilities.
3
4
## Core Imports
5
6
```typescript
7
import {
8
sample,
9
sampleWithReplacement,
10
shuffle,
11
shuffleInPlace,
12
chunk,
13
numericSort,
14
quickselect,
15
uniqueCountSorted,
16
sum,
17
sumSimple,
18
product,
19
sumNthPowerDeviations,
20
equalIntervalBreaks
21
} from "simple-statistics";
22
```
23
24
## Sampling Functions
25
26
### sample { .api }
27
28
```typescript { .api }
29
function sample<T>(population: T[], n: number, randomSource?: () => number): T[];
30
```
31
32
Random sampling without replacement from a population.
33
34
**Parameters:**
35
- `population: T[]` - Source array to sample from
36
- `n: number` - Number of items to sample
37
- `randomSource?: () => number` - Optional random number generator (0-1 range)
38
39
**Returns:** `T[]` - Array of sampled items (no duplicates)
40
41
```typescript
42
import { sample } from "simple-statistics";
43
44
// Survey sampling
45
const population = ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank', 'Grace'];
46
const surveySample = sample(population, 3);
47
console.log(`Survey participants: ${surveySample.join(', ')}`);
48
// Example: ['Charlie', 'Alice', 'Frank']
49
50
// A/B testing user selection
51
const userIds = [1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010];
52
const testGroup = sample(userIds, 5);
53
console.log(`Test group: ${testGroup.join(', ')}`);
54
```
55
56
### sampleWithReplacement { .api }
57
58
```typescript { .api }
59
function sampleWithReplacement<T>(population: T[], n: number, randomSource?: () => number): T[];
60
```
61
62
Random sampling with replacement - items can be selected multiple times.
63
64
**Parameters:**
65
- `population: T[]` - Source array to sample from
66
- `n: number` - Number of items to sample
67
- `randomSource?: () => number` - Optional random number generator
68
69
**Returns:** `T[]` - Array of sampled items (may contain duplicates)
70
71
```typescript
72
import { sampleWithReplacement } from "simple-statistics";
73
74
// Bootstrap sampling for statistics
75
const originalData = [10, 15, 20, 25, 30];
76
const bootstrapSample = sampleWithReplacement(originalData, 10);
77
console.log(`Bootstrap sample: ${bootstrapSample.join(', ')}`);
78
// Example: [15, 25, 10, 25, 20, 30, 15, 20, 25, 10]
79
80
// Monte Carlo simulation sampling
81
const outcomes = ['win', 'lose', 'draw'];
82
const probabilities = [0.4, 0.5, 0.1]; // Weighted sampling (conceptual)
83
const simulations = sampleWithReplacement(outcomes, 100);
84
```
85
86
## Array Shuffling
87
88
### shuffle { .api }
89
90
```typescript { .api }
91
function shuffle<T>(array: T[], randomSource?: () => number): T[];
92
```
93
94
Fisher-Yates shuffle that returns a new shuffled array (immutable).
95
96
**Parameters:**
97
- `array: T[]` - Array to shuffle
98
- `randomSource?: () => number` - Optional random number generator
99
100
**Returns:** `T[]` - New shuffled array
101
102
```typescript
103
import { shuffle } from "simple-statistics";
104
105
// Card deck shuffling
106
const deck = ['A♠', 'K♠', 'Q♠', 'J♠', '10♠', '9♠', '8♠', '7♠'];
107
const shuffledDeck = shuffle(deck);
108
console.log(`Original: ${deck.join(' ')}`);
109
console.log(`Shuffled: ${shuffledDeck.join(' ')}`);
110
// Original array remains unchanged
111
```
112
113
### shuffleInPlace { .api }
114
115
```typescript { .api }
116
function shuffleInPlace<T>(array: T[], randomSource?: () => number): T[];
117
```
118
119
Fisher-Yates shuffle that modifies the original array (mutable).
120
121
**Parameters:**
122
- `array: T[]` - Array to shuffle in place
123
- `randomSource?: () => number` - Optional random number generator
124
125
**Returns:** `T[]` - Reference to the modified array
126
127
```typescript
128
import { shuffleInPlace } from "simple-statistics";
129
130
// Randomize playlist order
131
const playlist = ['Song1', 'Song2', 'Song3', 'Song4', 'Song5'];
132
shuffleInPlace(playlist);
133
console.log(`Shuffled playlist: ${playlist.join(', ')}`);
134
// Original array is modified
135
```
136
137
## Array Manipulation
138
139
### chunk { .api }
140
141
```typescript { .api }
142
function chunk<T>(array: T[], chunkSize: number): T[][];
143
```
144
145
Splits an array into chunks of specified size.
146
147
**Parameters:**
148
- `array: T[]` - Array to chunk
149
- `chunkSize: number` - Size of each chunk
150
151
**Returns:** `T[][]` - Array of chunks
152
153
```typescript
154
import { chunk } from "simple-statistics";
155
156
// Batch processing
157
const dataPoints = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
158
const batches = chunk(dataPoints, 4);
159
console.log(`Batches:`, batches);
160
// [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
161
162
// Pagination
163
const users = ['User1', 'User2', 'User3', 'User4', 'User5', 'User6', 'User7'];
164
const pages = chunk(users, 3);
165
pages.forEach((page, i) => {
166
console.log(`Page ${i + 1}: ${page.join(', ')}`);
167
});
168
```
169
170
### numericSort { .api }
171
172
```typescript { .api }
173
function numericSort(array: number[]): number[];
174
```
175
176
Sorts an array of numbers in ascending order.
177
178
**Parameters:**
179
- `array: number[]` - Array of numbers to sort
180
181
**Returns:** `number[]` - New sorted array
182
183
```typescript
184
import { numericSort } from "simple-statistics";
185
186
const unsorted = [23, 1, 45, 12, 7, 89, 34];
187
const sorted = numericSort(unsorted);
188
console.log(`Sorted: ${sorted.join(', ')}`); // 1, 7, 12, 23, 34, 45, 89
189
```
190
191
### quickselect { .api }
192
193
```typescript { .api }
194
function quickselect(array: number[], k: number, left?: number, right?: number): number;
195
```
196
197
Finds the kth smallest element using the quickselect algorithm (O(n) average time).
198
199
**Parameters:**
200
- `array: number[]` - Array of numbers
201
- `k: number` - Index of element to find (0-based)
202
- `left?: number` - Optional left boundary
203
- `right?: number` - Optional right boundary
204
205
**Returns:** `number` - The kth smallest element
206
207
```typescript
208
import { quickselect } from "simple-statistics";
209
210
const numbers = [7, 2, 9, 1, 5, 8, 3];
211
212
// Find median without full sort
213
const medianIndex = Math.floor(numbers.length / 2);
214
const median = quickselect([...numbers], medianIndex); // 5
215
216
// Find 2nd smallest
217
const secondSmallest = quickselect([...numbers], 1); // 2
218
219
console.log(`Median: ${median}`);
220
console.log(`2nd smallest: ${secondSmallest}`);
221
```
222
223
### uniqueCountSorted { .api }
224
225
```typescript { .api }
226
function uniqueCountSorted(array: number[]): number;
227
```
228
229
Counts unique values in a pre-sorted array.
230
231
**Parameters:**
232
- `array: number[]` - Pre-sorted array of numbers
233
234
**Returns:** `number` - Count of unique values
235
236
```typescript
237
import { uniqueCountSorted } from "simple-statistics";
238
239
const sortedWithDuplicates = [1, 1, 2, 2, 2, 3, 4, 4, 5];
240
const uniqueCount = uniqueCountSorted(sortedWithDuplicates); // 5
241
console.log(`Unique values: ${uniqueCount}`);
242
```
243
244
## Summation and Products
245
246
### sum { .api }
247
248
```typescript { .api }
249
function sum(values: number[]): number;
250
```
251
252
Accurate summation using Kahan compensated summation algorithm to minimize floating-point errors.
253
254
**Parameters:**
255
- `values: number[]` - Array of numbers to sum
256
257
**Returns:** `number` - Sum with improved numerical precision
258
259
```typescript
260
import { sum } from "simple-statistics";
261
262
// High precision summation
263
const preciseValues = [0.1, 0.2, 0.3, 0.4, 0.5];
264
const accurateSum = sum(preciseValues); // 1.5 (exactly)
265
const naiveSum = preciseValues.reduce((a, b) => a + b, 0); // May have floating-point error
266
267
console.log(`Accurate sum: ${accurateSum}`);
268
console.log(`Naive sum: ${naiveSum}`);
269
```
270
271
### sumSimple { .api }
272
273
```typescript { .api }
274
function sumSimple(values: number[]): number;
275
```
276
277
Simple summation without compensation (faster but less precise).
278
279
### product { .api }
280
281
```typescript { .api }
282
function product(values: number[]): number;
283
```
284
285
Calculates the product of all values in an array.
286
287
**Parameters:**
288
- `values: number[]` - Array of numbers
289
290
**Returns:** `number` - Product of all values
291
292
```typescript
293
import { product } from "simple-statistics";
294
295
const factors = [2, 3, 4, 5];
296
const result = product(factors); // 120
297
console.log(`Product: ${result}`);
298
299
// Compound interest calculation
300
const growthRates = [1.05, 1.03, 1.07, 1.02]; // 5%, 3%, 7%, 2% annual growth
301
const totalGrowth = product(growthRates); // 1.177...
302
console.log(`Total growth factor: ${totalGrowth.toFixed(3)}`);
303
```
304
305
### sumNthPowerDeviations { .api }
306
307
```typescript { .api }
308
function sumNthPowerDeviations(values: number[], mean?: number, n?: number): number;
309
```
310
311
Calculates sum of nth power deviations from the mean.
312
313
**Parameters:**
314
- `values: number[]` - Array of numbers
315
- `mean?: number` - Optional mean (calculated if not provided)
316
- `n?: number` - Power (default: 2 for sum of squared deviations)
317
318
**Returns:** `number` - Sum of nth power deviations
319
320
```typescript
321
import { sumNthPowerDeviations, mean } from "simple-statistics";
322
323
const data = [1, 2, 3, 4, 5];
324
const dataMean = mean(data); // 3
325
326
// Sum of squared deviations (for variance calculation)
327
const sumSquaredDeviations = sumNthPowerDeviations(data, dataMean, 2); // 10
328
329
// Sum of cubed deviations (for skewness calculation)
330
const sumCubedDeviations = sumNthPowerDeviations(data, dataMean, 3); // 0
331
```
332
333
### equalIntervalBreaks { .api }
334
335
```typescript { .api }
336
function equalIntervalBreaks(values: number[], nClasses: number): number[];
337
```
338
339
Creates equal-width intervals for data binning and histogram creation.
340
341
**Parameters:**
342
- `values: number[]` - Data values to create breaks for
343
- `nClasses: number` - Number of intervals/classes to create
344
345
**Returns:** `number[]` - Array of break points defining intervals
346
347
```typescript
348
import { equalIntervalBreaks } from "simple-statistics";
349
350
// Income distribution binning
351
const incomes = [25000, 35000, 42000, 58000, 67000, 78000, 95000, 120000];
352
const incomeBreaks = equalIntervalBreaks(incomes, 4);
353
console.log(`Income brackets: ${incomeBreaks.join(', ')}`);
354
// Example: [25000, 48750, 72500, 96250, 120000]
355
356
// Create histogram bins
357
const data = [1, 3, 7, 8, 12, 15, 18, 22, 25, 28];
358
const breaks = equalIntervalBreaks(data, 5);
359
const bins = breaks.slice(0, -1).map((breakpoint, i) => ({
360
range: `${breakpoint}-${breaks[i + 1]}`,
361
count: data.filter(d => d >= breakpoint && d < breaks[i + 1]).length
362
}));
363
364
console.log("Histogram bins:");
365
bins.forEach(bin => console.log(`${bin.range}: ${bin.count} items`));
366
```
367
368
## Usage Examples
369
370
### Data Science Pipeline
371
372
```typescript
373
import { sample, shuffle, chunk, sum, numericSort } from "simple-statistics";
374
375
// Prepare dataset for machine learning
376
const fullDataset = Array.from({ length: 1000 }, (_, i) => ({
377
id: i + 1,
378
feature1: Math.random() * 100,
379
feature2: Math.random() * 50,
380
label: Math.random() > 0.5 ? 1 : 0
381
}));
382
383
// 1. Shuffle data to remove ordering bias
384
const shuffledData = shuffle(fullDataset);
385
386
// 2. Split into train/test sets
387
const trainSize = Math.floor(shuffledData.length * 0.8);
388
const trainData = shuffledData.slice(0, trainSize);
389
const testData = shuffledData.slice(trainSize);
390
391
// 3. Create mini-batches for training
392
const batchSize = 32;
393
const trainBatches = chunk(trainData, batchSize);
394
395
console.log(`Dataset split: ${trainData.length} train, ${testData.length} test`);
396
console.log(`Training batches: ${trainBatches.length} batches of ${batchSize}`);
397
398
// 4. Bootstrap sampling for model validation
399
const bootstrapSamples = Array.from({ length: 100 }, () =>
400
sampleWithReplacement(trainData, trainData.length)
401
);
402
403
console.log(`Created ${bootstrapSamples.length} bootstrap samples`);
404
```
405
406
### A/B Testing Framework
407
408
```typescript
409
import { sample, shuffle, mean, sum } from "simple-statistics";
410
411
// User pool for A/B testing
412
const allUsers = Array.from({ length: 10000 }, (_, i) => ({
413
userId: i + 1,
414
segment: Math.random() > 0.7 ? 'premium' : 'free',
415
activity: Math.random() * 100
416
}));
417
418
// Stratified sampling to ensure representative groups
419
const premiumUsers = allUsers.filter(u => u.segment === 'premium');
420
const freeUsers = allUsers.filter(u => u.segment === 'free');
421
422
const testSize = 1000;
423
const premiumRatio = premiumUsers.length / allUsers.length;
424
const premiumTestSize = Math.floor(testSize * premiumRatio);
425
const freeTestSize = testSize - premiumTestSize;
426
427
// Sample from each stratum
428
const testPremium = sample(premiumUsers, premiumTestSize);
429
const testFree = sample(freeUsers, freeTestSize);
430
const testGroup = shuffle([...testPremium, ...testFree]);
431
432
// Split test group between variants
433
const midpoint = Math.floor(testGroup.length / 2);
434
const variantA = testGroup.slice(0, midpoint);
435
const variantB = testGroup.slice(midpoint);
436
437
console.log("A/B Test Setup:");
438
console.log(`Variant A: ${variantA.length} users`);
439
console.log(`Variant B: ${variantB.length} users`);
440
console.log(`Premium users in test: ${sum([testPremium.length])} (${(premiumRatio * 100).toFixed(1)}%)`);
441
```
442
443
### Monte Carlo Simulation
444
445
```typescript
446
import { sampleWithReplacement, mean, sum, chunk } from "simple-statistics";
447
448
// Portfolio risk simulation
449
const stockReturns = {
450
'AAPL': [0.12, -0.05, 0.08, 0.15, -0.02, 0.11, 0.06],
451
'GOOGL': [0.18, -0.08, 0.12, 0.22, -0.01, 0.14, 0.09],
452
'MSFT': [0.15, -0.03, 0.09, 0.18, 0.01, 0.12, 0.07]
453
};
454
455
const portfolio = { 'AAPL': 0.4, 'GOOGL': 0.35, 'MSFT': 0.25 };
456
const numSimulations = 10000;
457
const timeHorizon = 252; // trading days in a year
458
459
// Monte Carlo simulation
460
const simulationResults = [];
461
462
for (let sim = 0; sim < numSimulations; sim++) {
463
let portfolioValue = 100000; // Starting value
464
465
for (let day = 0; day < timeHorizon; day++) {
466
let dailyReturn = 0;
467
468
for (const [stock, weight] of Object.entries(portfolio)) {
469
const historicalReturns = stockReturns[stock as keyof typeof stockReturns];
470
const randomReturn = sampleWithReplacement(historicalReturns, 1)[0];
471
dailyReturn += weight * randomReturn;
472
}
473
474
portfolioValue *= (1 + dailyReturn / 252); // Daily compounding
475
}
476
477
simulationResults.push(portfolioValue);
478
}
479
480
// Analyze results
481
const sortedResults = numericSort(simulationResults);
482
const meanValue = mean(sortedResults);
483
const var95 = sortedResults[Math.floor(sortedResults.length * 0.05)]; // 5th percentile
484
485
console.log("Portfolio Simulation Results:");
486
console.log(`Expected value: $${meanValue.toLocaleString()}`);
487
console.log(`95% VaR: $${(100000 - var95).toLocaleString()} loss`);
488
console.log(`Probability of loss: ${(sortedResults.filter(v => v < 100000).length / numSimulations * 100).toFixed(1)}%`);
489
```