Comprehensive natural language processing library with tokenization, stemming, classification, sentiment analysis, phonetics, distance algorithms, and WordNet integration.
—
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Pending
The risk profile of this skill
Phonetic encoding algorithms for matching words by sound rather than spelling. These algorithms are useful for fuzzy matching, spell checking, and search applications where pronunciation similarity matters more than exact spelling.
Classic Soundex algorithm for phonetic encoding, particularly effective for English surnames.
/**
* SoundEx phonetic algorithm
*/
class SoundEx {
/**
* Generate SoundEx code for a word
* @param word - Word to encode
* @returns SoundEx code (4 characters: letter + 3 digits)
*/
static process(word: string): string;
}Usage Examples:
const natural = require('natural');
// Basic SoundEx encoding
console.log(natural.SoundEx.process('Smith')); // 'S530'
console.log(natural.SoundEx.process('Smyth')); // 'S530' (same code)
console.log(natural.SoundEx.process('Johnson')); // 'J525'
console.log(natural.SoundEx.process('Jonson')); // 'J525' (same code)
// Matching surnames with similar pronunciation
const surnames = ['Smith', 'Smyth', 'Schmidt', 'Johnson', 'Jonson', 'Johnsen'];
const groupedBySoundEx = {};
surnames.forEach(name => {
const code = natural.SoundEx.process(name);
if (!groupedBySoundEx[code]) {
groupedBySoundEx[code] = [];
}
groupedBySoundEx[code].push(name);
});
console.log('Names grouped by SoundEx:');
Object.entries(groupedBySoundEx).forEach(([code, names]) => {
console.log(`${code}: ${names.join(', ')}`);
});Metaphone algorithm providing more accurate phonetic encoding than SoundEx, especially for English words.
/**
* Metaphone phonetic algorithm
*/
class Metaphone {
/**
* Generate Metaphone code for a word
* @param word - Word to encode
* @returns Metaphone code (variable length)
*/
static process(word: string): string;
}Usage Examples:
const natural = require('natural');
// Basic Metaphone encoding
console.log(natural.Metaphone.process('Smith')); // 'SM0'
console.log(natural.Metaphone.process('Smyth')); // 'SM0' (same code)
console.log(natural.Metaphone.process('knight')); // 'NT'
console.log(natural.Metaphone.process('night')); // 'NT' (same code)
// More complex examples
const words = ['phone', 'fone', 'through', 'threw', 'cat', 'caught'];
console.log('Metaphone encodings:');
words.forEach(word => {
console.log(`${word} -> ${natural.Metaphone.process(word)}`);
});
// Finding phonetically similar words
function findPhoneticMatches(target, wordList) {
const targetCode = natural.Metaphone.process(target);
return wordList.filter(word =>
natural.Metaphone.process(word) === targetCode
);
}
const dictionary = ['night', 'knight', 'write', 'right', 'rite', 'sight', 'site'];
const matches = findPhoneticMatches('night', dictionary);
console.log('Words that sound like "night":', matches);Advanced Double Metaphone algorithm providing primary and alternate encodings for better phonetic matching.
/**
* Double Metaphone phonetic algorithm
*/
class DoubleMetaphone {
/**
* Generate Double Metaphone codes for a word
* @param word - Word to encode
* @returns Array with primary and alternate codes [primary, alternate]
*/
static process(word: string): string[];
}Usage Examples:
const natural = require('natural');
// Basic Double Metaphone encoding
console.log(natural.DoubleMetaphone.process('Smith')); // ['SM0', 'XMT']
console.log(natural.DoubleMetaphone.process('Schmidt')); // ['XMT', 'SMT']
// Handle foreign names and complex pronunciation
const names = ['Mueller', 'Miller', 'José', 'Jose', 'Catherine', 'Katherine'];
console.log('Double Metaphone encodings:');
names.forEach(name => {
const [primary, alternate] = natural.DoubleMetaphone.process(name);
console.log(`${name} -> Primary: ${primary}, Alternate: ${alternate || 'none'}`);
});
// Advanced phonetic matching using both codes
function findDoubleMetaphoneMatches(target, wordList) {
const [targetPrimary, targetAlternate] = natural.DoubleMetaphone.process(target);
return wordList.filter(word => {
const [primary, alternate] = natural.DoubleMetaphone.process(word);
return primary === targetPrimary ||
primary === targetAlternate ||
(alternate && alternate === targetPrimary) ||
(alternate && targetAlternate && alternate === targetAlternate);
});
}
const nameList = ['Catherine', 'Katherine', 'Kathryn', 'Maria', 'Marie', 'Mary'];
const phoneticMatches = findDoubleMetaphoneMatches('Catherine', nameList);
console.log('Names similar to "Catherine":', phoneticMatches);Specialized SoundEx variant optimized for Eastern European names.
/**
* Daitch-Mokotoff SoundEx algorithm
*/
class SoundExDM {
/**
* Generate Daitch-Mokotoff SoundEx code
* @param word - Word to encode
* @returns DM SoundEx code
*/
static process(word: string): string;
}Usage Examples:
const natural = require('natural');
// Daitch-Mokotoff SoundEx for Eastern European names
const easternEuropeanNames = [
'Kowalski', 'Kowalsky', 'Novak', 'Nowak', 'Dvorak', 'Dvorák'
];
console.log('Daitch-Mokotoff SoundEx encodings:');
easternEuropeanNames.forEach(name => {
console.log(`${name} -> ${natural.SoundExDM.process(name)}`);
});const natural = require('natural');
/**
* Phonetic search system using multiple algorithms
*/
class PhoneticSearcher {
constructor(algorithm = 'metaphone') {
this.algorithm = algorithm;
this.index = new Map();
}
/**
* Add words to the phonetic index
* @param words - Array of words to index
*/
indexWords(words) {
words.forEach(word => this.addWord(word));
}
/**
* Add single word to index
* @param word - Word to add
*/
addWord(word) {
const codes = this.encode(word);
codes.forEach(code => {
if (!this.index.has(code)) {
this.index.set(code, new Set());
}
this.index.get(code).add(word.toLowerCase());
});
}
/**
* Search for phonetically similar words
* @param query - Query word
* @returns Array of matching words
*/
search(query) {
const codes = this.encode(query);
const matches = new Set();
codes.forEach(code => {
if (this.index.has(code)) {
this.index.get(code).forEach(word => matches.add(word));
}
});
return [...matches];
}
/**
* Encode word using selected algorithm
* @param word - Word to encode
* @returns Array of phonetic codes
*/
encode(word) {
switch (this.algorithm) {
case 'soundex':
return [natural.SoundEx.process(word)];
case 'metaphone':
return [natural.Metaphone.process(word)];
case 'doublemetaphone':
return natural.DoubleMetaphone.process(word).filter(code => code);
case 'dmSoundex':
return [natural.SoundExDM.process(word)];
default:
return [natural.Metaphone.process(word)];
}
}
}
// Usage example
const searcher = new PhoneticSearcher('doublemetaphone');
// Index a dictionary of names
const names = [
'Smith', 'Smyth', 'Schmidt', 'Johnson', 'Jonson', 'Johnsen',
'Catherine', 'Katherine', 'Kathryn', 'Maria', 'Marie', 'Mary',
'Mueller', 'Miller', 'Muller', 'Stephen', 'Steven', 'Stefan'
];
searcher.indexWords(names);
// Search for phonetically similar names
console.log('Names similar to "Catherine":', searcher.search('Catherine'));
console.log('Names similar to "Smith":', searcher.search('Smith'));
console.log('Names similar to "Steven":', searcher.search('Steven'));const natural = require('natural');
/**
* Spell checker that uses phonetic matching as fallback
*/
class PhoneticSpellChecker {
constructor(dictionary) {
this.dictionary = new Set(dictionary.map(word => word.toLowerCase()));
this.phoneticIndex = new Map();
// Build phonetic index
dictionary.forEach(word => {
const metaphone = natural.Metaphone.process(word);
if (!this.phoneticIndex.has(metaphone)) {
this.phoneticIndex.set(metaphone, []);
}
this.phoneticIndex.get(metaphone).push(word.toLowerCase());
});
}
/**
* Check spelling and suggest corrections
* @param word - Word to check
* @returns Object with spelling status and suggestions
*/
check(word) {
const lowerWord = word.toLowerCase();
// Check if word is spelled correctly
if (this.dictionary.has(lowerWord)) {
return { correct: true, suggestions: [] };
}
// Find phonetic matches
const metaphone = natural.Metaphone.process(word);
const phoneticMatches = this.phoneticIndex.get(metaphone) || [];
// Find edit distance matches
const editDistanceMatches = [...this.dictionary].filter(dictWord => {
const distance = natural.LevenshteinDistance(lowerWord, dictWord);
return distance <= 2; // Allow up to 2 edits
});
// Combine and rank suggestions
const allSuggestions = new Set([...phoneticMatches, ...editDistanceMatches]);
const rankedSuggestions = [...allSuggestions].map(suggestion => ({
word: suggestion,
editDistance: natural.LevenshteinDistance(lowerWord, suggestion),
phoneticMatch: phoneticMatches.includes(suggestion)
})).sort((a, b) => {
// Prefer phonetic matches, then by edit distance
if (a.phoneticMatch && !b.phoneticMatch) return -1;
if (!a.phoneticMatch && b.phoneticMatch) return 1;
return a.editDistance - b.editDistance;
});
return {
correct: false,
suggestions: rankedSuggestions.slice(0, 5).map(s => s.word)
};
}
}
// Usage
const dictionary = [
'apple', 'application', 'apply', 'approach', 'appropriate',
'cat', 'catch', 'caught', 'car', 'card', 'care',
'phone', 'photograph', 'phonetic', 'elephant'
];
const spellChecker = new PhoneticSpellChecker(dictionary);
// Test with various misspellings
const testWords = ['aple', 'fone', 'elefant', 'aproach', 'apropriate'];
testWords.forEach(word => {
const result = spellChecker.check(word);
console.log(`"${word}": ${result.correct ? 'CORRECT' : 'INCORRECT'}`);
if (!result.correct && result.suggestions.length > 0) {
console.log(` Suggestions: ${result.suggestions.join(', ')}`);
}
});const natural = require('natural');
/**
* Name matching system for finding similar names
*/
class NameMatcher {
constructor() {
this.algorithms = {
soundex: (name) => [natural.SoundEx.process(name)],
metaphone: (name) => [natural.Metaphone.process(name)],
doublemetaphone: (name) => natural.DoubleMetaphone.process(name)
};
}
/**
* Find similar names using multiple phonetic algorithms
* @param targetName - Name to find matches for
* @param nameList - List of names to search
* @param threshold - Minimum number of algorithms that must match
* @returns Array of matching names with scores
*/
findSimilarNames(targetName, nameList, threshold = 1) {
const targetCodes = {};
Object.entries(this.algorithms).forEach(([algorithm, encoder]) => {
targetCodes[algorithm] = encoder(targetName);
});
const matches = nameList.map(name => {
let matchScore = 0;
const nameCodes = {};
Object.entries(this.algorithms).forEach(([algorithm, encoder]) => {
nameCodes[algorithm] = encoder(name);
// Check if any codes match
const targetAlgoCodes = targetCodes[algorithm];
const nameAlgoCodes = nameCodes[algorithm];
const hasMatch = targetAlgoCodes.some(targetCode =>
nameAlgoCodes.includes(targetCode) && targetCode !== ''
);
if (hasMatch) matchScore++;
});
return {
name,
score: matchScore,
codes: nameCodes
};
}).filter(match => match.score >= threshold);
return matches.sort((a, b) => b.score - a.score);
}
/**
* Advanced name matching with fuzzy string matching
* @param targetName - Target name
* @param nameList - List of candidate names
* @returns Ranked list of matches
*/
advancedNameMatch(targetName, nameList) {
return nameList.map(name => {
// Phonetic similarity
const phoneticScore = this.calculatePhoneticSimilarity(targetName, name);
// String similarity
const stringScore = natural.JaroWinklerDistance(targetName.toLowerCase(), name.toLowerCase());
// Combined score (weighted)
const combinedScore = (phoneticScore * 0.6) + (stringScore * 0.4);
return {
name,
phoneticScore,
stringScore,
combinedScore
};
}).sort((a, b) => b.combinedScore - a.combinedScore);
}
/**
* Calculate phonetic similarity between two names
* @param name1 - First name
* @param name2 - Second name
* @returns Similarity score (0-1)
*/
calculatePhoneticSimilarity(name1, name2) {
let matches = 0;
let total = 0;
Object.values(this.algorithms).forEach(encoder => {
const codes1 = encoder(name1);
const codes2 = encoder(name2);
total++;
if (codes1.some(code1 => codes2.includes(code1) && code1 !== '')) {
matches++;
}
});
return matches / total;
}
}
// Usage
const matcher = new NameMatcher();
const customerNames = [
'John Smith', 'Jon Smyth', 'Jonathan Smith', 'Jane Smith',
'Catherine Johnson', 'Katherine Jonson', 'Maria Garcia',
'Jose Rodriguez', 'José Rodriguez', 'Michael Brown'
];
// Find names similar to a query
const query = 'Jon Smith';
const similarNames = matcher.findSimilarNames(query, customerNames, 2);
console.log(`Names similar to "${query}" (requiring 2+ algorithm matches):`);
similarNames.forEach(match => {
console.log(`${match.name} (score: ${match.score})`);
});
// Advanced matching
const advancedMatches = matcher.advancedNameMatch(query, customerNames);
console.log(`\nAdvanced matching results for "${query}":`);
advancedMatches.slice(0, 5).forEach(match => {
console.log(`${match.name} (combined: ${match.combinedScore.toFixed(3)})`);
});