tessl/npm-mnemonist

Curated collection of efficient data structures for JavaScript/TypeScript applications.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Securityby

Pending

The risk profile of this skill

Overview

Eval results

Files

Specialized Algorithms & Structures

Name: tessl/npm-mnemonist
Author: tessl

Advanced data structures for specific algorithmic applications including bloom filters, suffix arrays, and search indexes.

Capabilities

BloomFilter

Probabilistic data structure for membership testing with configurable false positive rate.

/**
 * Bloom filter for probabilistic membership testing
 * @param capacity Expected number of items
 */
function BloomFilter(capacity: number): BloomFilter;

/**
 * Bloom filter with options object
 * @param options Configuration object with capacity and optional errorRate
 */
function BloomFilter(options: BloomFilterOptions): BloomFilter;

type BloomFilterOptions = {
  capacity: number;
  errorRate?: number;
}

interface BloomFilter {
  /** Expected capacity */
  readonly capacity: number;
  
  /** Configured error rate */
  readonly errorRate: number;
  
  /** Number of hash functions used */
  readonly hashFunctions: number;
  
  /** Add string to filter */
  add(item: string): this;
  
  /** Test if string might be in filter */
  test(item: string): boolean;
  
  /** Remove all items */
  clear(): void;
  
  /** Export filter state as Uint8Array */
  toJSON(): Uint8Array;
  
  inspect(): any;
  
  /** Create bloom filter from iterable */
  static from(iterable: Iterable<string>, options?: number | BloomFilterOptions): BloomFilter;
}

SuffixArray

Suffix array for efficient string matching and substring queries.

/**
 * Suffix array for string pattern matching
 * @param text Input string or array of strings
 */
function SuffixArray(text: string | Array<string>): SuffixArray;

interface SuffixArray {
  /** The suffix array */
  readonly array: Uint32Array;
  
  /** Length of the suffix array */
  readonly length: number;
  
  /** Original string(s) */
  readonly string: string | Array<string>;
  
  /** Search for pattern occurrences */
  search(pattern: string): Array<number>;
  
  /** Find longest common substring */
  longestCommonSubstring(): {length: number, index: number} | null;
  
  inspect(): any;
}

GeneralizedSuffixArray

Suffix array for multiple strings with cross-string pattern matching.

/**
 * Generalized suffix array for multiple strings
 * @param strings Array of input strings
 */
function GeneralizedSuffixArray(strings: Array<string>): GeneralizedSuffixArray;

interface GeneralizedSuffixArray {
  readonly array: Uint32Array;
  readonly length: number;
  readonly strings: Array<string>;
  
  /** Search for pattern across all strings */
  search(pattern: string): Array<{string: number, index: number}>;
  
  /** Find longest common substring across strings */
  longestCommonSubstring(): {length: number, strings: Array<number>, index: number} | null;
  
  inspect(): any;
}

InvertedIndex

Full-text search index with TF-IDF scoring and configurable tokenization.

/**
 * Inverted index for full-text search
 * @param tokenizer Optional custom tokenizer function
 */
function InvertedIndex(tokenizer?: TokenizerFunction): InvertedIndex;

interface InvertedIndex {
  readonly size: number;
  
  /** Add document with its tokens */
  add(document: any, tokens: Array<string>): this;
  
  /** Search for documents matching query */
  search(query: Array<string>): Array<{document: any, score: number}>;
  
  /** Get all documents containing term */
  get(term: string): Array<any>;
  
  /** Remove all documents and terms */
  clear(): void;
  
  inspect(): any;
}

type TokenizerFunction = (text: string) => Array<string>;

SymSpell

Spelling correction data structure using symmetric delete spelling correction algorithm.

/**
 * SymSpell algorithm for spelling correction
 * @param options Configuration options
 */
function SymSpell(options?: SymSpellOptions): SymSpell;

interface SymSpellOptions {
  /** Maximum edit distance for corrections (default: 2) */
  maxEditDistance?: number;
  /** Minimum frequency threshold (default: 1) */
  countThreshold?: number;
  /** Maximum number of suggestions (default: 5) */
  maxSuggestions?: number;
}

interface SymSpell {
  readonly size: number;
  
  /** Add word with optional frequency */
  add(word: string, frequency?: number): this;
  
  /** Get spelling corrections for query */
  search(query: string, maxEditDistance?: number, suggestion?: SuggestionVerbosity): Array<Suggestion>;
  
  /** Remove all words */
  clear(): void;
  
  inspect(): any;
}

interface Suggestion {
  term: string;
  distance: number;
  frequency: number;
}

enum SuggestionVerbosity {
  Top,      // Only top suggestion
  Closest,  // All suggestions with minimum edit distance
  All       // All suggestions within edit distance
}

PassjoinIndex

Index for efficient similarity joins using the Passjoin algorithm.

/**
 * Passjoin index for similarity joins
 * @param options Configuration for similarity matching
 */
function PassjoinIndex(options: PassjoinOptions): PassjoinIndex;

interface PassjoinOptions {
  /** Similarity threshold (0-1) */
  threshold: number;
  /** Tokenizer function */
  tokenizer: TokenizerFunction;
  /** Optional distance function */
  distance?: DistanceFunction;
}

interface PassjoinIndex {
  readonly size: number;
  
  /** Add record to index */
  add(record: any): this;
  
  /** Find similar records above threshold */
  search(query: any, threshold?: number): Array<{record: any, similarity: number}>;
  
  /** Remove all records */
  clear(): void;
  
  inspect(): any;
}

HashedArrayTree

Hash array mapped trie for efficient dynamic array operations.

/**
 * Hashed Array Tree for dynamic arrays
 * @param options Optional configuration
 */
function HashedArrayTree<T>(options?: HATOptions): HashedArrayTree<T>;

interface HATOptions {
  initialCapacity?: number;
  factor?: number;
}

interface HashedArrayTree<T> {
  readonly length: number;
  readonly size: number;
  
  /** Add item to end */
  push(item: T): number;
  
  /** Get item at index */
  get(index: number): T | undefined;
  
  /** Set item at index */
  set(index: number, value: T): void;
  
  /** Remove and return last item */
  pop(): T | undefined;
  
  /** Remove all items */
  clear(): void;
  
  values(): IterableIterator<T>;
  [Symbol.iterator](): IterableIterator<T>;
  
  inspect(): any;
}

Usage Examples

BloomFilter Example

import {BloomFilter} from "mnemonist";

// URL deduplication with 1% false positive rate
const seenUrls = new BloomFilter(1000000, 0.01);

function crawlUrl(url: string) {
  if (seenUrls.test(url)) {
    console.log("Probably already crawled:", url);
    return; // Might be false positive, but likely already seen
  }
  
  // Crawl the URL
  console.log("Crawling new URL:", url);
  seenUrls.add(url);
}

crawlUrl("https://example.com");
crawlUrl("https://example.org");
crawlUrl("https://example.com"); // "Probably already crawled"

SuffixArray Example

import {SuffixArray} from "mnemonist";

// Text search and analysis
const text = "banana";
const suffixArray = new SuffixArray(text);

// Find all occurrences of pattern
const matches = suffixArray.search("ana");
console.log(matches); // [1, 3] - positions where "ana" occurs

// Find longest repeated substring
const lcs = suffixArray.longestCommonSubstring();
console.log(lcs); // {length: 3, index: 1} - "ana" at position 1

// DNA sequence analysis
const dna = "ATCGATCGATCG";
const dnaArray = new SuffixArray(dna);
const repeats = dnaArray.search("ATCG");
console.log(repeats); // All positions of ATCG pattern

InvertedIndex Example

import {InvertedIndex} from "mnemonist";

// Simple search engine
const searchIndex = new InvertedIndex();

// Custom tokenizer
function tokenize(text: string): Array<string> {
  return text.toLowerCase()
    .replace(/[^\w\s]/g, "")
    .split(/\s+/)
    .filter(token => token.length > 2);
}

const index = new InvertedIndex(tokenize);

// Add documents
index.add({id: 1, title: "Machine Learning Basics"}, 
          tokenize("Introduction to machine learning algorithms"));
index.add({id: 2, title: "Deep Learning Guide"},
          tokenize("Deep neural networks and machine learning"));
index.add({id: 3, title: "Data Science Tools"},
          tokenize("Tools for data analysis and statistics"));

// Search for documents
const results = index.search(tokenize("machine learning"));
results.forEach(result => {
  console.log(`Document ${result.document.id}: ${result.score}`);
});

SymSpell Example

import {SymSpell} from "mnemonist";

// Spell checker
const spellChecker = new SymSpell({
  maxEditDistance: 2,
  maxSuggestions: 5
});

// Add dictionary words with frequencies
const dictionary = [
  {word: "javascript", freq: 1000},
  {word: "typescript", freq: 800},
  {word: "python", freq: 1200},
  {word: "programming", freq: 900}
];

dictionary.forEach(({word, freq}) => {
  spellChecker.add(word, freq);
});

// Get spelling suggestions
const suggestions = spellChecker.search("javascrip");
suggestions.forEach(suggestion => {
  console.log(`${suggestion.term} (distance: ${suggestion.distance}, freq: ${suggestion.frequency})`);
});
// Output: javascript (distance: 1, freq: 1000)

PassjoinIndex Example

import {PassjoinIndex} from "mnemonist";

// Duplicate detection in records
function jaccardSimilarity(tokensA: Set<string>, tokensB: Set<string>): number {
  const intersection = new Set([...tokensA].filter(x => tokensB.has(x)));
  const union = new Set([...tokensA, ...tokensB]);
  return intersection.size / union.size;
}

const duplicateDetector = new PassjoinIndex({
  threshold: 0.8,
  tokenizer: (text: string) => text.toLowerCase().split(/\s+/),
  distance: jaccardSimilarity
});

// Add product records
duplicateDetector.add({id: 1, name: "Apple iPhone 13 Pro Max"});
duplicateDetector.add({id: 2, name: "Samsung Galaxy S21 Ultra"});
duplicateDetector.add({id: 3, name: "iPhone 13 Pro Max Apple"});

// Find duplicates
const query = {name: "Apple iPhone 13 Pro Max 256GB"};
const similar = duplicateDetector.search(query, 0.7);
similar.forEach(match => {
  console.log(`Similar: ${match.record.name} (${match.similarity})`);
});

Performance Characteristics

Structure	Add/Insert	Search	Space	Notes
BloomFilter	O(k)	O(k)	O(m)	k=hash functions, m=bits
SuffixArray	O(n log n)	O(m log n)	O(n)	n=text length, m=pattern
InvertedIndex	O(d)	O(q + r)	O(V + D)	d=doc terms, q=query, r=results
SymSpell	O(1)	O(n)	O(n·k)	n=dictionary size, k=deletions
PassjoinIndex	O(s)	O(s·c)	O(n·s)	s=signature size, c=candidates
HashedArrayTree	O(1) amortized	O(1)	O(n)	Dynamic array operations