or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

array-operations.mdcombinatorics.mddata-manipulation.mddescriptive-statistics.mddistributions.mdindex.mdmachine-learning.mdmath-utilities.mdquantiles.mdregression.mdtesting.md

data-manipulation.mddocs/

0

# Data Manipulation

1

2

Functions for sampling, shuffling, and array manipulation utilities.

3

4

## Core Imports

5

6

```typescript

7

import {

8

sample,

9

sampleWithReplacement,

10

shuffle,

11

shuffleInPlace,

12

chunk,

13

numericSort,

14

quickselect,

15

uniqueCountSorted,

16

sum,

17

sumSimple,

18

product,

19

sumNthPowerDeviations,

20

equalIntervalBreaks

21

} from "simple-statistics";

22

```

23

24

## Sampling Functions

25

26

### sample { .api }

27

28

```typescript { .api }

29

function sample<T>(population: T[], n: number, randomSource?: () => number): T[];

30

```

31

32

Random sampling without replacement from a population.

33

34

**Parameters:**

35

- `population: T[]` - Source array to sample from

36

- `n: number` - Number of items to sample

37

- `randomSource?: () => number` - Optional random number generator (0-1 range)

38

39

**Returns:** `T[]` - Array of sampled items (no duplicates)

40

41

```typescript

42

import { sample } from "simple-statistics";

43

44

// Survey sampling

45

const population = ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank', 'Grace'];

46

const surveySample = sample(population, 3);

47

console.log(`Survey participants: ${surveySample.join(', ')}`);

48

// Example: ['Charlie', 'Alice', 'Frank']

49

50

// A/B testing user selection

51

const userIds = [1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010];

52

const testGroup = sample(userIds, 5);

53

console.log(`Test group: ${testGroup.join(', ')}`);

54

```

55

56

### sampleWithReplacement { .api }

57

58

```typescript { .api }

59

function sampleWithReplacement<T>(population: T[], n: number, randomSource?: () => number): T[];

60

```

61

62

Random sampling with replacement - items can be selected multiple times.

63

64

**Parameters:**

65

- `population: T[]` - Source array to sample from

66

- `n: number` - Number of items to sample

67

- `randomSource?: () => number` - Optional random number generator

68

69

**Returns:** `T[]` - Array of sampled items (may contain duplicates)

70

71

```typescript

72

import { sampleWithReplacement } from "simple-statistics";

73

74

// Bootstrap sampling for statistics

75

const originalData = [10, 15, 20, 25, 30];

76

const bootstrapSample = sampleWithReplacement(originalData, 10);

77

console.log(`Bootstrap sample: ${bootstrapSample.join(', ')}`);

78

// Example: [15, 25, 10, 25, 20, 30, 15, 20, 25, 10]

79

80

// Monte Carlo simulation sampling

81

const outcomes = ['win', 'lose', 'draw'];

82

const probabilities = [0.4, 0.5, 0.1]; // Weighted sampling (conceptual)

83

const simulations = sampleWithReplacement(outcomes, 100);

84

```

85

86

## Array Shuffling

87

88

### shuffle { .api }

89

90

```typescript { .api }

91

function shuffle<T>(array: T[], randomSource?: () => number): T[];

92

```

93

94

Fisher-Yates shuffle that returns a new shuffled array (immutable).

95

96

**Parameters:**

97

- `array: T[]` - Array to shuffle

98

- `randomSource?: () => number` - Optional random number generator

99

100

**Returns:** `T[]` - New shuffled array

101

102

```typescript

103

import { shuffle } from "simple-statistics";

104

105

// Card deck shuffling

106

const deck = ['A♠', 'K♠', 'Q♠', 'J♠', '10♠', '9♠', '8♠', '7♠'];

107

const shuffledDeck = shuffle(deck);

108

console.log(`Original: ${deck.join(' ')}`);

109

console.log(`Shuffled: ${shuffledDeck.join(' ')}`);

110

// Original array remains unchanged

111

```

112

113

### shuffleInPlace { .api }

114

115

```typescript { .api }

116

function shuffleInPlace<T>(array: T[], randomSource?: () => number): T[];

117

```

118

119

Fisher-Yates shuffle that modifies the original array (mutable).

120

121

**Parameters:**

122

- `array: T[]` - Array to shuffle in place

123

- `randomSource?: () => number` - Optional random number generator

124

125

**Returns:** `T[]` - Reference to the modified array

126

127

```typescript

128

import { shuffleInPlace } from "simple-statistics";

129

130

// Randomize playlist order

131

const playlist = ['Song1', 'Song2', 'Song3', 'Song4', 'Song5'];

132

shuffleInPlace(playlist);

133

console.log(`Shuffled playlist: ${playlist.join(', ')}`);

134

// Original array is modified

135

```

136

137

## Array Manipulation

138

139

### chunk { .api }

140

141

```typescript { .api }

142

function chunk<T>(array: T[], chunkSize: number): T[][];

143

```

144

145

Splits an array into chunks of specified size.

146

147

**Parameters:**

148

- `array: T[]` - Array to chunk

149

- `chunkSize: number` - Size of each chunk

150

151

**Returns:** `T[][]` - Array of chunks

152

153

```typescript

154

import { chunk } from "simple-statistics";

155

156

// Batch processing

157

const dataPoints = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];

158

const batches = chunk(dataPoints, 4);

159

console.log(`Batches:`, batches);

160

// [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]

161

162

// Pagination

163

const users = ['User1', 'User2', 'User3', 'User4', 'User5', 'User6', 'User7'];

164

const pages = chunk(users, 3);

165

pages.forEach((page, i) => {

166

console.log(`Page ${i + 1}: ${page.join(', ')}`);

167

});

168

```

169

170

### numericSort { .api }

171

172

```typescript { .api }

173

function numericSort(array: number[]): number[];

174

```

175

176

Sorts an array of numbers in ascending order.

177

178

**Parameters:**

179

- `array: number[]` - Array of numbers to sort

180

181

**Returns:** `number[]` - New sorted array

182

183

```typescript

184

import { numericSort } from "simple-statistics";

185

186

const unsorted = [23, 1, 45, 12, 7, 89, 34];

187

const sorted = numericSort(unsorted);

188

console.log(`Sorted: ${sorted.join(', ')}`); // 1, 7, 12, 23, 34, 45, 89

189

```

190

191

### quickselect { .api }

192

193

```typescript { .api }

194

function quickselect(array: number[], k: number, left?: number, right?: number): number;

195

```

196

197

Finds the kth smallest element using the quickselect algorithm (O(n) average time).

198

199

**Parameters:**

200

- `array: number[]` - Array of numbers

201

- `k: number` - Index of element to find (0-based)

202

- `left?: number` - Optional left boundary

203

- `right?: number` - Optional right boundary

204

205

**Returns:** `number` - The kth smallest element

206

207

```typescript

208

import { quickselect } from "simple-statistics";

209

210

const numbers = [7, 2, 9, 1, 5, 8, 3];

211

212

// Find median without full sort

213

const medianIndex = Math.floor(numbers.length / 2);

214

const median = quickselect([...numbers], medianIndex); // 5

215

216

// Find 2nd smallest

217

const secondSmallest = quickselect([...numbers], 1); // 2

218

219

console.log(`Median: ${median}`);

220

console.log(`2nd smallest: ${secondSmallest}`);

221

```

222

223

### uniqueCountSorted { .api }

224

225

```typescript { .api }

226

function uniqueCountSorted(array: number[]): number;

227

```

228

229

Counts unique values in a pre-sorted array.

230

231

**Parameters:**

232

- `array: number[]` - Pre-sorted array of numbers

233

234

**Returns:** `number` - Count of unique values

235

236

```typescript

237

import { uniqueCountSorted } from "simple-statistics";

238

239

const sortedWithDuplicates = [1, 1, 2, 2, 2, 3, 4, 4, 5];

240

const uniqueCount = uniqueCountSorted(sortedWithDuplicates); // 5

241

console.log(`Unique values: ${uniqueCount}`);

242

```

243

244

## Summation and Products

245

246

### sum { .api }

247

248

```typescript { .api }

249

function sum(values: number[]): number;

250

```

251

252

Accurate summation using Kahan compensated summation algorithm to minimize floating-point errors.

253

254

**Parameters:**

255

- `values: number[]` - Array of numbers to sum

256

257

**Returns:** `number` - Sum with improved numerical precision

258

259

```typescript

260

import { sum } from "simple-statistics";

261

262

// High precision summation

263

const preciseValues = [0.1, 0.2, 0.3, 0.4, 0.5];

264

const accurateSum = sum(preciseValues); // 1.5 (exactly)

265

const naiveSum = preciseValues.reduce((a, b) => a + b, 0); // May have floating-point error

266

267

console.log(`Accurate sum: ${accurateSum}`);

268

console.log(`Naive sum: ${naiveSum}`);

269

```

270

271

### sumSimple { .api }

272

273

```typescript { .api }

274

function sumSimple(values: number[]): number;

275

```

276

277

Simple summation without compensation (faster but less precise).

278

279

### product { .api }

280

281

```typescript { .api }

282

function product(values: number[]): number;

283

```

284

285

Calculates the product of all values in an array.

286

287

**Parameters:**

288

- `values: number[]` - Array of numbers

289

290

**Returns:** `number` - Product of all values

291

292

```typescript

293

import { product } from "simple-statistics";

294

295

const factors = [2, 3, 4, 5];

296

const result = product(factors); // 120

297

console.log(`Product: ${result}`);

298

299

// Compound interest calculation

300

const growthRates = [1.05, 1.03, 1.07, 1.02]; // 5%, 3%, 7%, 2% annual growth

301

const totalGrowth = product(growthRates); // 1.177...

302

console.log(`Total growth factor: ${totalGrowth.toFixed(3)}`);

303

```

304

305

### sumNthPowerDeviations { .api }

306

307

```typescript { .api }

308

function sumNthPowerDeviations(values: number[], mean?: number, n?: number): number;

309

```

310

311

Calculates sum of nth power deviations from the mean.

312

313

**Parameters:**

314

- `values: number[]` - Array of numbers

315

- `mean?: number` - Optional mean (calculated if not provided)

316

- `n?: number` - Power (default: 2 for sum of squared deviations)

317

318

**Returns:** `number` - Sum of nth power deviations

319

320

```typescript

321

import { sumNthPowerDeviations, mean } from "simple-statistics";

322

323

const data = [1, 2, 3, 4, 5];

324

const dataMean = mean(data); // 3

325

326

// Sum of squared deviations (for variance calculation)

327

const sumSquaredDeviations = sumNthPowerDeviations(data, dataMean, 2); // 10

328

329

// Sum of cubed deviations (for skewness calculation)

330

const sumCubedDeviations = sumNthPowerDeviations(data, dataMean, 3); // 0

331

```

332

333

### equalIntervalBreaks { .api }

334

335

```typescript { .api }

336

function equalIntervalBreaks(values: number[], nClasses: number): number[];

337

```

338

339

Creates equal-width intervals for data binning and histogram creation.

340

341

**Parameters:**

342

- `values: number[]` - Data values to create breaks for

343

- `nClasses: number` - Number of intervals/classes to create

344

345

**Returns:** `number[]` - Array of break points defining intervals

346

347

```typescript

348

import { equalIntervalBreaks } from "simple-statistics";

349

350

// Income distribution binning

351

const incomes = [25000, 35000, 42000, 58000, 67000, 78000, 95000, 120000];

352

const incomeBreaks = equalIntervalBreaks(incomes, 4);

353

console.log(`Income brackets: ${incomeBreaks.join(', ')}`);

354

// Example: [25000, 48750, 72500, 96250, 120000]

355

356

// Create histogram bins

357

const data = [1, 3, 7, 8, 12, 15, 18, 22, 25, 28];

358

const breaks = equalIntervalBreaks(data, 5);

359

const bins = breaks.slice(0, -1).map((breakpoint, i) => ({

360

range: `${breakpoint}-${breaks[i + 1]}`,

361

count: data.filter(d => d >= breakpoint && d < breaks[i + 1]).length

362

}));

363

364

console.log("Histogram bins:");

365

bins.forEach(bin => console.log(`${bin.range}: ${bin.count} items`));

366

```

367

368

## Usage Examples

369

370

### Data Science Pipeline

371

372

```typescript

373

import { sample, shuffle, chunk, sum, numericSort } from "simple-statistics";

374

375

// Prepare dataset for machine learning

376

const fullDataset = Array.from({ length: 1000 }, (_, i) => ({

377

id: i + 1,

378

feature1: Math.random() * 100,

379

feature2: Math.random() * 50,

380

label: Math.random() > 0.5 ? 1 : 0

381

}));

382

383

// 1. Shuffle data to remove ordering bias

384

const shuffledData = shuffle(fullDataset);

385

386

// 2. Split into train/test sets

387

const trainSize = Math.floor(shuffledData.length * 0.8);

388

const trainData = shuffledData.slice(0, trainSize);

389

const testData = shuffledData.slice(trainSize);

390

391

// 3. Create mini-batches for training

392

const batchSize = 32;

393

const trainBatches = chunk(trainData, batchSize);

394

395

console.log(`Dataset split: ${trainData.length} train, ${testData.length} test`);

396

console.log(`Training batches: ${trainBatches.length} batches of ${batchSize}`);

397

398

// 4. Bootstrap sampling for model validation

399

const bootstrapSamples = Array.from({ length: 100 }, () =>

400

sampleWithReplacement(trainData, trainData.length)

401

);

402

403

console.log(`Created ${bootstrapSamples.length} bootstrap samples`);

404

```

405

406

### A/B Testing Framework

407

408

```typescript

409

import { sample, shuffle, mean, sum } from "simple-statistics";

410

411

// User pool for A/B testing

412

const allUsers = Array.from({ length: 10000 }, (_, i) => ({

413

userId: i + 1,

414

segment: Math.random() > 0.7 ? 'premium' : 'free',

415

activity: Math.random() * 100

416

}));

417

418

// Stratified sampling to ensure representative groups

419

const premiumUsers = allUsers.filter(u => u.segment === 'premium');

420

const freeUsers = allUsers.filter(u => u.segment === 'free');

421

422

const testSize = 1000;

423

const premiumRatio = premiumUsers.length / allUsers.length;

424

const premiumTestSize = Math.floor(testSize * premiumRatio);

425

const freeTestSize = testSize - premiumTestSize;

426

427

// Sample from each stratum

428

const testPremium = sample(premiumUsers, premiumTestSize);

429

const testFree = sample(freeUsers, freeTestSize);

430

const testGroup = shuffle([...testPremium, ...testFree]);

431

432

// Split test group between variants

433

const midpoint = Math.floor(testGroup.length / 2);

434

const variantA = testGroup.slice(0, midpoint);

435

const variantB = testGroup.slice(midpoint);

436

437

console.log("A/B Test Setup:");

438

console.log(`Variant A: ${variantA.length} users`);

439

console.log(`Variant B: ${variantB.length} users`);

440

console.log(`Premium users in test: ${sum([testPremium.length])} (${(premiumRatio * 100).toFixed(1)}%)`);

441

```

442

443

### Monte Carlo Simulation

444

445

```typescript

446

import { sampleWithReplacement, mean, sum, chunk } from "simple-statistics";

447

448

// Portfolio risk simulation

449

const stockReturns = {

450

'AAPL': [0.12, -0.05, 0.08, 0.15, -0.02, 0.11, 0.06],

451

'GOOGL': [0.18, -0.08, 0.12, 0.22, -0.01, 0.14, 0.09],

452

'MSFT': [0.15, -0.03, 0.09, 0.18, 0.01, 0.12, 0.07]

453

};

454

455

const portfolio = { 'AAPL': 0.4, 'GOOGL': 0.35, 'MSFT': 0.25 };

456

const numSimulations = 10000;

457

const timeHorizon = 252; // trading days in a year

458

459

// Monte Carlo simulation

460

const simulationResults = [];

461

462

for (let sim = 0; sim < numSimulations; sim++) {

463

let portfolioValue = 100000; // Starting value

464

465

for (let day = 0; day < timeHorizon; day++) {

466

let dailyReturn = 0;

467

468

for (const [stock, weight] of Object.entries(portfolio)) {

469

const historicalReturns = stockReturns[stock as keyof typeof stockReturns];

470

const randomReturn = sampleWithReplacement(historicalReturns, 1)[0];

471

dailyReturn += weight * randomReturn;

472

}

473

474

portfolioValue *= (1 + dailyReturn / 252); // Daily compounding

475

}

476

477

simulationResults.push(portfolioValue);

478

}

479

480

// Analyze results

481

const sortedResults = numericSort(simulationResults);

482

const meanValue = mean(sortedResults);

483

const var95 = sortedResults[Math.floor(sortedResults.length * 0.05)]; // 5th percentile

484

485

console.log("Portfolio Simulation Results:");

486

console.log(`Expected value: $${meanValue.toLocaleString()}`);

487

console.log(`95% VaR: $${(100000 - var95).toLocaleString()} loss`);

488

console.log(`Probability of loss: ${(sortedResults.filter(v => v < 100000).length / numSimulations * 100).toFixed(1)}%`);

489

```