Comprehensive natural language processing library with tokenization, stemming, classification, sentiment analysis, phonetics, distance algorithms, and WordNet integration.
—
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Pending
The risk profile of this skill
String distance algorithms for calculating similarity between strings. These are useful for fuzzy matching, spell checking, duplicate detection, and text comparison tasks.
Jaro-Winkler distance algorithm optimized for short strings and personal names.
/**
* Calculate Jaro-Winkler distance between two strings
* Returns a value between 0 (no similarity) and 1 (identical)
* @param s1 - First string
* @param s2 - Second string
* @returns Similarity score (0-1)
*/
function JaroWinklerDistance(s1: string, s2: string): number;Usage Examples:
const natural = require('natural');
// Calculate similarity
console.log(natural.JaroWinklerDistance('sitting', 'kitten')); // 0.746
console.log(natural.JaroWinklerDistance('hello', 'hello')); // 1.0
console.log(natural.JaroWinklerDistance('hello', 'world')); // 0.0
// Find closest match
const target = 'javascript';
const candidates = ['java', 'script', 'typescript', 'coffeescript'];
let bestMatch = '';
let bestScore = 0;
candidates.forEach(candidate => {
const score = natural.JaroWinklerDistance(target, candidate);
if (score > bestScore) {
bestScore = score;
bestMatch = candidate;
}
});
console.log(`Best match: ${bestMatch} (${bestScore})`);Classic edit distance measuring minimum edits (insertions, deletions, substitutions) needed to transform one string into another.
/**
* Calculate Levenshtein distance between two strings
* Returns the minimum number of edits required
* @param s1 - First string
* @param s2 - Second string
* @returns Number of edits required
*/
function LevenshteinDistance(s1: string, s2: string): number;
/**
* Search for strings within a certain Levenshtein distance
* @param source - Source string to search from
* @param targets - Array of target strings to search
* @param maxDistance - Maximum distance to consider
* @returns Array of matches with distances
*/
function LevenshteinDistanceSearch(source: string, targets: string[], maxDistance: number): SearchResult[];
interface SearchResult {
target: string;
distance: number;
}Usage Examples:
const natural = require('natural');
// Basic distance calculation
console.log(natural.LevenshteinDistance('kitten', 'sitting')); // 3
console.log(natural.LevenshteinDistance('hello', 'hallo')); // 1
// Search with distance threshold
const source = 'javascript';
const targets = ['java', 'script', 'typescript', 'python', 'rust'];
const results = natural.LevenshteinDistanceSearch(source, targets, 5);
console.log(results);
// Results with distance <= 5Extended Levenshtein distance that also allows transposition of adjacent characters.
/**
* Calculate Damerau-Levenshtein distance between two strings
* Includes transposition operations in addition to insertions, deletions, substitutions
* @param s1 - First string
* @param s2 - Second string
* @returns Number of edits required
*/
function DamerauLevenshteinDistance(s1: string, s2: string): number;
/**
* Search using Damerau-Levenshtein distance
* @param source - Source string
* @param targets - Target strings to search
* @param maxDistance - Maximum distance threshold
* @returns Array of matches with distances
*/
function DamerauLevenshteinDistanceSearch(source: string, targets: string[], maxDistance: number): SearchResult[];Usage Examples:
const natural = require('natural');
// Handles transpositions better than standard Levenshtein
console.log(natural.DamerauLevenshteinDistance('ca', 'ac')); // 1 (transposition)
console.log(natural.LevenshteinDistance('ca', 'ac')); // 2 (deletion + insertion)
// Useful for typos where characters are swapped
console.log(natural.DamerauLevenshteinDistance('javascript', 'javasrcpit')); // 1Dice coefficient for measuring similarity based on bigram overlap.
/**
* Calculate Dice coefficient between two strings
* Based on bigram similarity, returns value between 0 and 1
* @param s1 - First string
* @param s2 - Second string
* @returns Similarity coefficient (0-1)
*/
function DiceCoefficient(s1: string, s2: string): number;Usage Examples:
const natural = require('natural');
// Good for longer strings and fuzzy matching
console.log(natural.DiceCoefficient('night', 'nacht')); // ~0.25
console.log(natural.DiceCoefficient('hello world', 'hello word')); // High similarity
// Useful for document similarity
const doc1 = 'the quick brown fox jumps';
const doc2 = 'quick brown fox jumping';
console.log(natural.DiceCoefficient(doc1, doc2)); // Measures word overlapHamming distance for strings of equal length, counting differing positions.
/**
* Calculate Hamming distance between two equal-length strings
* Counts the number of positions where characters differ
* @param s1 - First string
* @param s2 - Second string (must be same length as s1)
* @returns Number of differing positions
* @throws Error if strings have different lengths
*/
function HammingDistance(s1: string, s2: string): number;Usage Examples:
const natural = require('natural');
// Strings must be same length
console.log(natural.HammingDistance('hello', 'hallo')); // 1
console.log(natural.HammingDistance('12345', '12395')); // 1
// Useful for binary strings, DNA sequences, etc.
console.log(natural.HammingDistance('1011101', '1001001')); // 2
// Error for different lengths
try {
natural.HammingDistance('hello', 'hi'); // Throws error
} catch (err) {
console.log('Strings must be same length');
}const natural = require('natural');
function findClosestSpelling(word, dictionary, threshold = 0.8) {
let bestMatch = null;
let bestScore = 0;
dictionary.forEach(dictWord => {
const score = natural.JaroWinklerDistance(word, dictWord);
if (score > bestScore && score >= threshold) {
bestScore = score;
bestMatch = dictWord;
}
});
return bestMatch;
}
const dictionary = ['javascript', 'python', 'java', 'typescript'];
const misspelled = 'javasript';
const suggestion = findClosestSpelling(misspelled, dictionary);
console.log(`Did you mean: ${suggestion}?`);const natural = require('natural');
function fuzzySearch(query, items, maxDistance = 2) {
const results = [];
items.forEach(item => {
const distance = natural.LevenshteinDistance(query.toLowerCase(), item.toLowerCase());
if (distance <= maxDistance) {
results.push({ item, distance });
}
});
return results.sort((a, b) => a.distance - b.distance);
}
const items = ['apple', 'application', 'apply', 'approach', 'appropriate'];
const query = 'aple';
const matches = fuzzySearch(query, items);
console.log(matches); // [{item: 'apple', distance: 1}, ...]const natural = require('natural');
function findDuplicates(strings, threshold = 0.9) {
const duplicates = [];
for (let i = 0; i < strings.length; i++) {
for (let j = i + 1; j < strings.length; j++) {
const similarity = natural.JaroWinklerDistance(strings[i], strings[j]);
if (similarity >= threshold) {
duplicates.push({
first: strings[i],
second: strings[j],
similarity
});
}
}
}
return duplicates;
}
const names = ['John Smith', 'Jon Smith', 'Jane Doe', 'Jane Do'];
const duplicates = findDuplicates(names);
console.log(duplicates); // Potential duplicates with high similarity