0
# Phonetics
1
2
Phonetic encoding algorithms for matching words by sound rather than spelling. These algorithms are useful for fuzzy matching, spell checking, and search applications where pronunciation similarity matters more than exact spelling.
3
4
## Capabilities
5
6
### SoundEx
7
8
Classic Soundex algorithm for phonetic encoding, particularly effective for English surnames.
9
10
```javascript { .api }
11
/**
12
* SoundEx phonetic algorithm
13
*/
14
class SoundEx {
15
/**
16
* Generate SoundEx code for a word
17
* @param word - Word to encode
18
* @returns SoundEx code (4 characters: letter + 3 digits)
19
*/
20
static process(word: string): string;
21
}
22
```
23
24
**Usage Examples:**
25
26
```javascript
27
const natural = require('natural');
28
29
// Basic SoundEx encoding
30
console.log(natural.SoundEx.process('Smith')); // 'S530'
31
console.log(natural.SoundEx.process('Smyth')); // 'S530' (same code)
32
console.log(natural.SoundEx.process('Johnson')); // 'J525'
33
console.log(natural.SoundEx.process('Jonson')); // 'J525' (same code)
34
35
// Matching surnames with similar pronunciation
36
const surnames = ['Smith', 'Smyth', 'Schmidt', 'Johnson', 'Jonson', 'Johnsen'];
37
const groupedBySoundEx = {};
38
39
surnames.forEach(name => {
40
const code = natural.SoundEx.process(name);
41
if (!groupedBySoundEx[code]) {
42
groupedBySoundEx[code] = [];
43
}
44
groupedBySoundEx[code].push(name);
45
});
46
47
console.log('Names grouped by SoundEx:');
48
Object.entries(groupedBySoundEx).forEach(([code, names]) => {
49
console.log(`${code}: ${names.join(', ')}`);
50
});
51
```
52
53
### Metaphone
54
55
Metaphone algorithm providing more accurate phonetic encoding than SoundEx, especially for English words.
56
57
```javascript { .api }
58
/**
59
* Metaphone phonetic algorithm
60
*/
61
class Metaphone {
62
/**
63
* Generate Metaphone code for a word
64
* @param word - Word to encode
65
* @returns Metaphone code (variable length)
66
*/
67
static process(word: string): string;
68
}
69
```
70
71
**Usage Examples:**
72
73
```javascript
74
const natural = require('natural');
75
76
// Basic Metaphone encoding
77
console.log(natural.Metaphone.process('Smith')); // 'SM0'
78
console.log(natural.Metaphone.process('Smyth')); // 'SM0' (same code)
79
console.log(natural.Metaphone.process('knight')); // 'NT'
80
console.log(natural.Metaphone.process('night')); // 'NT' (same code)
81
82
// More complex examples
83
const words = ['phone', 'fone', 'through', 'threw', 'cat', 'caught'];
84
console.log('Metaphone encodings:');
85
words.forEach(word => {
86
console.log(`${word} -> ${natural.Metaphone.process(word)}`);
87
});
88
89
// Finding phonetically similar words
90
function findPhoneticMatches(target, wordList) {
91
const targetCode = natural.Metaphone.process(target);
92
return wordList.filter(word =>
93
natural.Metaphone.process(word) === targetCode
94
);
95
}
96
97
const dictionary = ['night', 'knight', 'write', 'right', 'rite', 'sight', 'site'];
98
const matches = findPhoneticMatches('night', dictionary);
99
console.log('Words that sound like "night":', matches);
100
```
101
102
### Double Metaphone
103
104
Advanced Double Metaphone algorithm providing primary and alternate encodings for better phonetic matching.
105
106
```javascript { .api }
107
/**
108
* Double Metaphone phonetic algorithm
109
*/
110
class DoubleMetaphone {
111
/**
112
* Generate Double Metaphone codes for a word
113
* @param word - Word to encode
114
* @returns Array with primary and alternate codes [primary, alternate]
115
*/
116
static process(word: string): string[];
117
}
118
```
119
120
**Usage Examples:**
121
122
```javascript
123
const natural = require('natural');
124
125
// Basic Double Metaphone encoding
126
console.log(natural.DoubleMetaphone.process('Smith')); // ['SM0', 'XMT']
127
console.log(natural.DoubleMetaphone.process('Schmidt')); // ['XMT', 'SMT']
128
129
// Handle foreign names and complex pronunciation
130
const names = ['Mueller', 'Miller', 'José', 'Jose', 'Catherine', 'Katherine'];
131
console.log('Double Metaphone encodings:');
132
names.forEach(name => {
133
const [primary, alternate] = natural.DoubleMetaphone.process(name);
134
console.log(`${name} -> Primary: ${primary}, Alternate: ${alternate || 'none'}`);
135
});
136
137
// Advanced phonetic matching using both codes
138
function findDoubleMetaphoneMatches(target, wordList) {
139
const [targetPrimary, targetAlternate] = natural.DoubleMetaphone.process(target);
140
141
return wordList.filter(word => {
142
const [primary, alternate] = natural.DoubleMetaphone.process(word);
143
return primary === targetPrimary ||
144
primary === targetAlternate ||
145
(alternate && alternate === targetPrimary) ||
146
(alternate && targetAlternate && alternate === targetAlternate);
147
});
148
}
149
150
const nameList = ['Catherine', 'Katherine', 'Kathryn', 'Maria', 'Marie', 'Mary'];
151
const phoneticMatches = findDoubleMetaphoneMatches('Catherine', nameList);
152
console.log('Names similar to "Catherine":', phoneticMatches);
153
```
154
155
### Daitch-Mokotoff SoundEx
156
157
Specialized SoundEx variant optimized for Eastern European names.
158
159
```javascript { .api }
160
/**
161
* Daitch-Mokotoff SoundEx algorithm
162
*/
163
class SoundExDM {
164
/**
165
* Generate Daitch-Mokotoff SoundEx code
166
* @param word - Word to encode
167
* @returns DM SoundEx code
168
*/
169
static process(word: string): string;
170
}
171
```
172
173
**Usage Examples:**
174
175
```javascript
176
const natural = require('natural');
177
178
// Daitch-Mokotoff SoundEx for Eastern European names
179
const easternEuropeanNames = [
180
'Kowalski', 'Kowalsky', 'Novak', 'Nowak', 'Dvorak', 'Dvorák'
181
];
182
183
console.log('Daitch-Mokotoff SoundEx encodings:');
184
easternEuropeanNames.forEach(name => {
185
console.log(`${name} -> ${natural.SoundExDM.process(name)}`);
186
});
187
```
188
189
## Practical Applications
190
191
### Phonetic Search System
192
193
```javascript
194
const natural = require('natural');
195
196
/**
197
* Phonetic search system using multiple algorithms
198
*/
199
class PhoneticSearcher {
200
constructor(algorithm = 'metaphone') {
201
this.algorithm = algorithm;
202
this.index = new Map();
203
}
204
205
/**
206
* Add words to the phonetic index
207
* @param words - Array of words to index
208
*/
209
indexWords(words) {
210
words.forEach(word => this.addWord(word));
211
}
212
213
/**
214
* Add single word to index
215
* @param word - Word to add
216
*/
217
addWord(word) {
218
const codes = this.encode(word);
219
codes.forEach(code => {
220
if (!this.index.has(code)) {
221
this.index.set(code, new Set());
222
}
223
this.index.get(code).add(word.toLowerCase());
224
});
225
}
226
227
/**
228
* Search for phonetically similar words
229
* @param query - Query word
230
* @returns Array of matching words
231
*/
232
search(query) {
233
const codes = this.encode(query);
234
const matches = new Set();
235
236
codes.forEach(code => {
237
if (this.index.has(code)) {
238
this.index.get(code).forEach(word => matches.add(word));
239
}
240
});
241
242
return [...matches];
243
}
244
245
/**
246
* Encode word using selected algorithm
247
* @param word - Word to encode
248
* @returns Array of phonetic codes
249
*/
250
encode(word) {
251
switch (this.algorithm) {
252
case 'soundex':
253
return [natural.SoundEx.process(word)];
254
case 'metaphone':
255
return [natural.Metaphone.process(word)];
256
case 'doublemetaphone':
257
return natural.DoubleMetaphone.process(word).filter(code => code);
258
case 'dmSoundex':
259
return [natural.SoundExDM.process(word)];
260
default:
261
return [natural.Metaphone.process(word)];
262
}
263
}
264
}
265
266
// Usage example
267
const searcher = new PhoneticSearcher('doublemetaphone');
268
269
// Index a dictionary of names
270
const names = [
271
'Smith', 'Smyth', 'Schmidt', 'Johnson', 'Jonson', 'Johnsen',
272
'Catherine', 'Katherine', 'Kathryn', 'Maria', 'Marie', 'Mary',
273
'Mueller', 'Miller', 'Muller', 'Stephen', 'Steven', 'Stefan'
274
];
275
276
searcher.indexWords(names);
277
278
// Search for phonetically similar names
279
console.log('Names similar to "Catherine":', searcher.search('Catherine'));
280
console.log('Names similar to "Smith":', searcher.search('Smith'));
281
console.log('Names similar to "Steven":', searcher.search('Steven'));
282
```
283
284
### Spell Checker with Phonetic Fallback
285
286
```javascript
287
const natural = require('natural');
288
289
/**
290
* Spell checker that uses phonetic matching as fallback
291
*/
292
class PhoneticSpellChecker {
293
constructor(dictionary) {
294
this.dictionary = new Set(dictionary.map(word => word.toLowerCase()));
295
this.phoneticIndex = new Map();
296
297
// Build phonetic index
298
dictionary.forEach(word => {
299
const metaphone = natural.Metaphone.process(word);
300
if (!this.phoneticIndex.has(metaphone)) {
301
this.phoneticIndex.set(metaphone, []);
302
}
303
this.phoneticIndex.get(metaphone).push(word.toLowerCase());
304
});
305
}
306
307
/**
308
* Check spelling and suggest corrections
309
* @param word - Word to check
310
* @returns Object with spelling status and suggestions
311
*/
312
check(word) {
313
const lowerWord = word.toLowerCase();
314
315
// Check if word is spelled correctly
316
if (this.dictionary.has(lowerWord)) {
317
return { correct: true, suggestions: [] };
318
}
319
320
// Find phonetic matches
321
const metaphone = natural.Metaphone.process(word);
322
const phoneticMatches = this.phoneticIndex.get(metaphone) || [];
323
324
// Find edit distance matches
325
const editDistanceMatches = [...this.dictionary].filter(dictWord => {
326
const distance = natural.LevenshteinDistance(lowerWord, dictWord);
327
return distance <= 2; // Allow up to 2 edits
328
});
329
330
// Combine and rank suggestions
331
const allSuggestions = new Set([...phoneticMatches, ...editDistanceMatches]);
332
const rankedSuggestions = [...allSuggestions].map(suggestion => ({
333
word: suggestion,
334
editDistance: natural.LevenshteinDistance(lowerWord, suggestion),
335
phoneticMatch: phoneticMatches.includes(suggestion)
336
})).sort((a, b) => {
337
// Prefer phonetic matches, then by edit distance
338
if (a.phoneticMatch && !b.phoneticMatch) return -1;
339
if (!a.phoneticMatch && b.phoneticMatch) return 1;
340
return a.editDistance - b.editDistance;
341
});
342
343
return {
344
correct: false,
345
suggestions: rankedSuggestions.slice(0, 5).map(s => s.word)
346
};
347
}
348
}
349
350
// Usage
351
const dictionary = [
352
'apple', 'application', 'apply', 'approach', 'appropriate',
353
'cat', 'catch', 'caught', 'car', 'card', 'care',
354
'phone', 'photograph', 'phonetic', 'elephant'
355
];
356
357
const spellChecker = new PhoneticSpellChecker(dictionary);
358
359
// Test with various misspellings
360
const testWords = ['aple', 'fone', 'elefant', 'aproach', 'apropriate'];
361
362
testWords.forEach(word => {
363
const result = spellChecker.check(word);
364
console.log(`"${word}": ${result.correct ? 'CORRECT' : 'INCORRECT'}`);
365
if (!result.correct && result.suggestions.length > 0) {
366
console.log(` Suggestions: ${result.suggestions.join(', ')}`);
367
}
368
});
369
```
370
371
### Name Matching System
372
373
```javascript
374
const natural = require('natural');
375
376
/**
377
* Name matching system for finding similar names
378
*/
379
class NameMatcher {
380
constructor() {
381
this.algorithms = {
382
soundex: (name) => [natural.SoundEx.process(name)],
383
metaphone: (name) => [natural.Metaphone.process(name)],
384
doublemetaphone: (name) => natural.DoubleMetaphone.process(name)
385
};
386
}
387
388
/**
389
* Find similar names using multiple phonetic algorithms
390
* @param targetName - Name to find matches for
391
* @param nameList - List of names to search
392
* @param threshold - Minimum number of algorithms that must match
393
* @returns Array of matching names with scores
394
*/
395
findSimilarNames(targetName, nameList, threshold = 1) {
396
const targetCodes = {};
397
Object.entries(this.algorithms).forEach(([algorithm, encoder]) => {
398
targetCodes[algorithm] = encoder(targetName);
399
});
400
401
const matches = nameList.map(name => {
402
let matchScore = 0;
403
const nameCodes = {};
404
405
Object.entries(this.algorithms).forEach(([algorithm, encoder]) => {
406
nameCodes[algorithm] = encoder(name);
407
408
// Check if any codes match
409
const targetAlgoCodes = targetCodes[algorithm];
410
const nameAlgoCodes = nameCodes[algorithm];
411
412
const hasMatch = targetAlgoCodes.some(targetCode =>
413
nameAlgoCodes.includes(targetCode) && targetCode !== ''
414
);
415
416
if (hasMatch) matchScore++;
417
});
418
419
return {
420
name,
421
score: matchScore,
422
codes: nameCodes
423
};
424
}).filter(match => match.score >= threshold);
425
426
return matches.sort((a, b) => b.score - a.score);
427
}
428
429
/**
430
* Advanced name matching with fuzzy string matching
431
* @param targetName - Target name
432
* @param nameList - List of candidate names
433
* @returns Ranked list of matches
434
*/
435
advancedNameMatch(targetName, nameList) {
436
return nameList.map(name => {
437
// Phonetic similarity
438
const phoneticScore = this.calculatePhoneticSimilarity(targetName, name);
439
440
// String similarity
441
const stringScore = natural.JaroWinklerDistance(targetName.toLowerCase(), name.toLowerCase());
442
443
// Combined score (weighted)
444
const combinedScore = (phoneticScore * 0.6) + (stringScore * 0.4);
445
446
return {
447
name,
448
phoneticScore,
449
stringScore,
450
combinedScore
451
};
452
}).sort((a, b) => b.combinedScore - a.combinedScore);
453
}
454
455
/**
456
* Calculate phonetic similarity between two names
457
* @param name1 - First name
458
* @param name2 - Second name
459
* @returns Similarity score (0-1)
460
*/
461
calculatePhoneticSimilarity(name1, name2) {
462
let matches = 0;
463
let total = 0;
464
465
Object.values(this.algorithms).forEach(encoder => {
466
const codes1 = encoder(name1);
467
const codes2 = encoder(name2);
468
469
total++;
470
if (codes1.some(code1 => codes2.includes(code1) && code1 !== '')) {
471
matches++;
472
}
473
});
474
475
return matches / total;
476
}
477
}
478
479
// Usage
480
const matcher = new NameMatcher();
481
482
const customerNames = [
483
'John Smith', 'Jon Smyth', 'Jonathan Smith', 'Jane Smith',
484
'Catherine Johnson', 'Katherine Jonson', 'Maria Garcia',
485
'Jose Rodriguez', 'José Rodriguez', 'Michael Brown'
486
];
487
488
// Find names similar to a query
489
const query = 'Jon Smith';
490
const similarNames = matcher.findSimilarNames(query, customerNames, 2);
491
492
console.log(`Names similar to "${query}" (requiring 2+ algorithm matches):`);
493
similarNames.forEach(match => {
494
console.log(`${match.name} (score: ${match.score})`);
495
});
496
497
// Advanced matching
498
const advancedMatches = matcher.advancedNameMatch(query, customerNames);
499
console.log(`\nAdvanced matching results for "${query}":`);
500
advancedMatches.slice(0, 5).forEach(match => {
501
console.log(`${match.name} (combined: ${match.combinedScore.toFixed(3)})`);
502
});
503
```