Curated collection of efficient data structures for JavaScript/TypeScript applications.
—
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Pending
The risk profile of this skill
Advanced data structures for specific algorithmic applications including bloom filters, suffix arrays, and search indexes.
Probabilistic data structure for membership testing with configurable false positive rate.
/**
* Bloom filter for probabilistic membership testing
* @param capacity Expected number of items
*/
function BloomFilter(capacity: number): BloomFilter;
/**
* Bloom filter with options object
* @param options Configuration object with capacity and optional errorRate
*/
function BloomFilter(options: BloomFilterOptions): BloomFilter;
type BloomFilterOptions = {
capacity: number;
errorRate?: number;
}
interface BloomFilter {
/** Expected capacity */
readonly capacity: number;
/** Configured error rate */
readonly errorRate: number;
/** Number of hash functions used */
readonly hashFunctions: number;
/** Add string to filter */
add(item: string): this;
/** Test if string might be in filter */
test(item: string): boolean;
/** Remove all items */
clear(): void;
/** Export filter state as Uint8Array */
toJSON(): Uint8Array;
inspect(): any;
/** Create bloom filter from iterable */
static from(iterable: Iterable<string>, options?: number | BloomFilterOptions): BloomFilter;
}Suffix array for efficient string matching and substring queries.
/**
* Suffix array for string pattern matching
* @param text Input string or array of strings
*/
function SuffixArray(text: string | Array<string>): SuffixArray;
interface SuffixArray {
/** The suffix array */
readonly array: Uint32Array;
/** Length of the suffix array */
readonly length: number;
/** Original string(s) */
readonly string: string | Array<string>;
/** Search for pattern occurrences */
search(pattern: string): Array<number>;
/** Find longest common substring */
longestCommonSubstring(): {length: number, index: number} | null;
inspect(): any;
}Suffix array for multiple strings with cross-string pattern matching.
/**
* Generalized suffix array for multiple strings
* @param strings Array of input strings
*/
function GeneralizedSuffixArray(strings: Array<string>): GeneralizedSuffixArray;
interface GeneralizedSuffixArray {
readonly array: Uint32Array;
readonly length: number;
readonly strings: Array<string>;
/** Search for pattern across all strings */
search(pattern: string): Array<{string: number, index: number}>;
/** Find longest common substring across strings */
longestCommonSubstring(): {length: number, strings: Array<number>, index: number} | null;
inspect(): any;
}Full-text search index with TF-IDF scoring and configurable tokenization.
/**
* Inverted index for full-text search
* @param tokenizer Optional custom tokenizer function
*/
function InvertedIndex(tokenizer?: TokenizerFunction): InvertedIndex;
interface InvertedIndex {
readonly size: number;
/** Add document with its tokens */
add(document: any, tokens: Array<string>): this;
/** Search for documents matching query */
search(query: Array<string>): Array<{document: any, score: number}>;
/** Get all documents containing term */
get(term: string): Array<any>;
/** Remove all documents and terms */
clear(): void;
inspect(): any;
}
type TokenizerFunction = (text: string) => Array<string>;Spelling correction data structure using symmetric delete spelling correction algorithm.
/**
* SymSpell algorithm for spelling correction
* @param options Configuration options
*/
function SymSpell(options?: SymSpellOptions): SymSpell;
interface SymSpellOptions {
/** Maximum edit distance for corrections (default: 2) */
maxEditDistance?: number;
/** Minimum frequency threshold (default: 1) */
countThreshold?: number;
/** Maximum number of suggestions (default: 5) */
maxSuggestions?: number;
}
interface SymSpell {
readonly size: number;
/** Add word with optional frequency */
add(word: string, frequency?: number): this;
/** Get spelling corrections for query */
search(query: string, maxEditDistance?: number, suggestion?: SuggestionVerbosity): Array<Suggestion>;
/** Remove all words */
clear(): void;
inspect(): any;
}
interface Suggestion {
term: string;
distance: number;
frequency: number;
}
enum SuggestionVerbosity {
Top, // Only top suggestion
Closest, // All suggestions with minimum edit distance
All // All suggestions within edit distance
}Index for efficient similarity joins using the Passjoin algorithm.
/**
* Passjoin index for similarity joins
* @param options Configuration for similarity matching
*/
function PassjoinIndex(options: PassjoinOptions): PassjoinIndex;
interface PassjoinOptions {
/** Similarity threshold (0-1) */
threshold: number;
/** Tokenizer function */
tokenizer: TokenizerFunction;
/** Optional distance function */
distance?: DistanceFunction;
}
interface PassjoinIndex {
readonly size: number;
/** Add record to index */
add(record: any): this;
/** Find similar records above threshold */
search(query: any, threshold?: number): Array<{record: any, similarity: number}>;
/** Remove all records */
clear(): void;
inspect(): any;
}Hash array mapped trie for efficient dynamic array operations.
/**
* Hashed Array Tree for dynamic arrays
* @param options Optional configuration
*/
function HashedArrayTree<T>(options?: HATOptions): HashedArrayTree<T>;
interface HATOptions {
initialCapacity?: number;
factor?: number;
}
interface HashedArrayTree<T> {
readonly length: number;
readonly size: number;
/** Add item to end */
push(item: T): number;
/** Get item at index */
get(index: number): T | undefined;
/** Set item at index */
set(index: number, value: T): void;
/** Remove and return last item */
pop(): T | undefined;
/** Remove all items */
clear(): void;
values(): IterableIterator<T>;
[Symbol.iterator](): IterableIterator<T>;
inspect(): any;
}import {BloomFilter} from "mnemonist";
// URL deduplication with 1% false positive rate
const seenUrls = new BloomFilter(1000000, 0.01);
function crawlUrl(url: string) {
if (seenUrls.test(url)) {
console.log("Probably already crawled:", url);
return; // Might be false positive, but likely already seen
}
// Crawl the URL
console.log("Crawling new URL:", url);
seenUrls.add(url);
}
crawlUrl("https://example.com");
crawlUrl("https://example.org");
crawlUrl("https://example.com"); // "Probably already crawled"import {SuffixArray} from "mnemonist";
// Text search and analysis
const text = "banana";
const suffixArray = new SuffixArray(text);
// Find all occurrences of pattern
const matches = suffixArray.search("ana");
console.log(matches); // [1, 3] - positions where "ana" occurs
// Find longest repeated substring
const lcs = suffixArray.longestCommonSubstring();
console.log(lcs); // {length: 3, index: 1} - "ana" at position 1
// DNA sequence analysis
const dna = "ATCGATCGATCG";
const dnaArray = new SuffixArray(dna);
const repeats = dnaArray.search("ATCG");
console.log(repeats); // All positions of ATCG patternimport {InvertedIndex} from "mnemonist";
// Simple search engine
const searchIndex = new InvertedIndex();
// Custom tokenizer
function tokenize(text: string): Array<string> {
return text.toLowerCase()
.replace(/[^\w\s]/g, "")
.split(/\s+/)
.filter(token => token.length > 2);
}
const index = new InvertedIndex(tokenize);
// Add documents
index.add({id: 1, title: "Machine Learning Basics"},
tokenize("Introduction to machine learning algorithms"));
index.add({id: 2, title: "Deep Learning Guide"},
tokenize("Deep neural networks and machine learning"));
index.add({id: 3, title: "Data Science Tools"},
tokenize("Tools for data analysis and statistics"));
// Search for documents
const results = index.search(tokenize("machine learning"));
results.forEach(result => {
console.log(`Document ${result.document.id}: ${result.score}`);
});import {SymSpell} from "mnemonist";
// Spell checker
const spellChecker = new SymSpell({
maxEditDistance: 2,
maxSuggestions: 5
});
// Add dictionary words with frequencies
const dictionary = [
{word: "javascript", freq: 1000},
{word: "typescript", freq: 800},
{word: "python", freq: 1200},
{word: "programming", freq: 900}
];
dictionary.forEach(({word, freq}) => {
spellChecker.add(word, freq);
});
// Get spelling suggestions
const suggestions = spellChecker.search("javascrip");
suggestions.forEach(suggestion => {
console.log(`${suggestion.term} (distance: ${suggestion.distance}, freq: ${suggestion.frequency})`);
});
// Output: javascript (distance: 1, freq: 1000)import {PassjoinIndex} from "mnemonist";
// Duplicate detection in records
function jaccardSimilarity(tokensA: Set<string>, tokensB: Set<string>): number {
const intersection = new Set([...tokensA].filter(x => tokensB.has(x)));
const union = new Set([...tokensA, ...tokensB]);
return intersection.size / union.size;
}
const duplicateDetector = new PassjoinIndex({
threshold: 0.8,
tokenizer: (text: string) => text.toLowerCase().split(/\s+/),
distance: jaccardSimilarity
});
// Add product records
duplicateDetector.add({id: 1, name: "Apple iPhone 13 Pro Max"});
duplicateDetector.add({id: 2, name: "Samsung Galaxy S21 Ultra"});
duplicateDetector.add({id: 3, name: "iPhone 13 Pro Max Apple"});
// Find duplicates
const query = {name: "Apple iPhone 13 Pro Max 256GB"};
const similar = duplicateDetector.search(query, 0.7);
similar.forEach(match => {
console.log(`Similar: ${match.record.name} (${match.similarity})`);
});| Structure | Add/Insert | Search | Space | Notes |
|---|---|---|---|---|
| BloomFilter | O(k) | O(k) | O(m) | k=hash functions, m=bits |
| SuffixArray | O(n log n) | O(m log n) | O(n) | n=text length, m=pattern |
| InvertedIndex | O(d) | O(q + r) | O(V + D) | d=doc terms, q=query, r=results |
| SymSpell | O(1) | O(n) | O(n·k) | n=dictionary size, k=deletions |
| PassjoinIndex | O(s) | O(s·c) | O(n·s) | s=signature size, c=candidates |
| HashedArrayTree | O(1) amortized | O(1) | O(n) | Dynamic array operations |