0
# Distance Algorithms
1
2
String distance algorithms for calculating similarity between strings. These are useful for fuzzy matching, spell checking, duplicate detection, and text comparison tasks.
3
4
## Capabilities
5
6
### Jaro-Winkler Distance
7
8
Jaro-Winkler distance algorithm optimized for short strings and personal names.
9
10
```javascript { .api }
11
/**
12
* Calculate Jaro-Winkler distance between two strings
13
* Returns a value between 0 (no similarity) and 1 (identical)
14
* @param s1 - First string
15
* @param s2 - Second string
16
* @returns Similarity score (0-1)
17
*/
18
function JaroWinklerDistance(s1: string, s2: string): number;
19
```
20
21
**Usage Examples:**
22
23
```javascript
24
const natural = require('natural');
25
26
// Calculate similarity
27
console.log(natural.JaroWinklerDistance('sitting', 'kitten')); // 0.746
28
console.log(natural.JaroWinklerDistance('hello', 'hello')); // 1.0
29
console.log(natural.JaroWinklerDistance('hello', 'world')); // 0.0
30
31
// Find closest match
32
const target = 'javascript';
33
const candidates = ['java', 'script', 'typescript', 'coffeescript'];
34
35
let bestMatch = '';
36
let bestScore = 0;
37
candidates.forEach(candidate => {
38
const score = natural.JaroWinklerDistance(target, candidate);
39
if (score > bestScore) {
40
bestScore = score;
41
bestMatch = candidate;
42
}
43
});
44
console.log(`Best match: ${bestMatch} (${bestScore})`);
45
```
46
47
### Levenshtein Distance
48
49
Classic edit distance measuring minimum edits (insertions, deletions, substitutions) needed to transform one string into another.
50
51
```javascript { .api }
52
/**
53
* Calculate Levenshtein distance between two strings
54
* Returns the minimum number of edits required
55
* @param s1 - First string
56
* @param s2 - Second string
57
* @returns Number of edits required
58
*/
59
function LevenshteinDistance(s1: string, s2: string): number;
60
61
/**
62
* Search for strings within a certain Levenshtein distance
63
* @param source - Source string to search from
64
* @param targets - Array of target strings to search
65
* @param maxDistance - Maximum distance to consider
66
* @returns Array of matches with distances
67
*/
68
function LevenshteinDistanceSearch(source: string, targets: string[], maxDistance: number): SearchResult[];
69
70
interface SearchResult {
71
target: string;
72
distance: number;
73
}
74
```
75
76
**Usage Examples:**
77
78
```javascript
79
const natural = require('natural');
80
81
// Basic distance calculation
82
console.log(natural.LevenshteinDistance('kitten', 'sitting')); // 3
83
console.log(natural.LevenshteinDistance('hello', 'hallo')); // 1
84
85
// Search with distance threshold
86
const source = 'javascript';
87
const targets = ['java', 'script', 'typescript', 'python', 'rust'];
88
const results = natural.LevenshteinDistanceSearch(source, targets, 5);
89
console.log(results);
90
// Results with distance <= 5
91
```
92
93
### Damerau-Levenshtein Distance
94
95
Extended Levenshtein distance that also allows transposition of adjacent characters.
96
97
```javascript { .api }
98
/**
99
* Calculate Damerau-Levenshtein distance between two strings
100
* Includes transposition operations in addition to insertions, deletions, substitutions
101
* @param s1 - First string
102
* @param s2 - Second string
103
* @returns Number of edits required
104
*/
105
function DamerauLevenshteinDistance(s1: string, s2: string): number;
106
107
/**
108
* Search using Damerau-Levenshtein distance
109
* @param source - Source string
110
* @param targets - Target strings to search
111
* @param maxDistance - Maximum distance threshold
112
* @returns Array of matches with distances
113
*/
114
function DamerauLevenshteinDistanceSearch(source: string, targets: string[], maxDistance: number): SearchResult[];
115
```
116
117
**Usage Examples:**
118
119
```javascript
120
const natural = require('natural');
121
122
// Handles transpositions better than standard Levenshtein
123
console.log(natural.DamerauLevenshteinDistance('ca', 'ac')); // 1 (transposition)
124
console.log(natural.LevenshteinDistance('ca', 'ac')); // 2 (deletion + insertion)
125
126
// Useful for typos where characters are swapped
127
console.log(natural.DamerauLevenshteinDistance('javascript', 'javasrcpit')); // 1
128
```
129
130
### Dice Coefficient
131
132
Dice coefficient for measuring similarity based on bigram overlap.
133
134
```javascript { .api }
135
/**
136
* Calculate Dice coefficient between two strings
137
* Based on bigram similarity, returns value between 0 and 1
138
* @param s1 - First string
139
* @param s2 - Second string
140
* @returns Similarity coefficient (0-1)
141
*/
142
function DiceCoefficient(s1: string, s2: string): number;
143
```
144
145
**Usage Examples:**
146
147
```javascript
148
const natural = require('natural');
149
150
// Good for longer strings and fuzzy matching
151
console.log(natural.DiceCoefficient('night', 'nacht')); // ~0.25
152
console.log(natural.DiceCoefficient('hello world', 'hello word')); // High similarity
153
154
// Useful for document similarity
155
const doc1 = 'the quick brown fox jumps';
156
const doc2 = 'quick brown fox jumping';
157
console.log(natural.DiceCoefficient(doc1, doc2)); // Measures word overlap
158
```
159
160
### Hamming Distance
161
162
Hamming distance for strings of equal length, counting differing positions.
163
164
```javascript { .api }
165
/**
166
* Calculate Hamming distance between two equal-length strings
167
* Counts the number of positions where characters differ
168
* @param s1 - First string
169
* @param s2 - Second string (must be same length as s1)
170
* @returns Number of differing positions
171
* @throws Error if strings have different lengths
172
*/
173
function HammingDistance(s1: string, s2: string): number;
174
```
175
176
**Usage Examples:**
177
178
```javascript
179
const natural = require('natural');
180
181
// Strings must be same length
182
console.log(natural.HammingDistance('hello', 'hallo')); // 1
183
console.log(natural.HammingDistance('12345', '12395')); // 1
184
185
// Useful for binary strings, DNA sequences, etc.
186
console.log(natural.HammingDistance('1011101', '1001001')); // 2
187
188
// Error for different lengths
189
try {
190
natural.HammingDistance('hello', 'hi'); // Throws error
191
} catch (err) {
192
console.log('Strings must be same length');
193
}
194
```
195
196
## Usage Patterns
197
198
### Spell Checking
199
200
```javascript
201
const natural = require('natural');
202
203
function findClosestSpelling(word, dictionary, threshold = 0.8) {
204
let bestMatch = null;
205
let bestScore = 0;
206
207
dictionary.forEach(dictWord => {
208
const score = natural.JaroWinklerDistance(word, dictWord);
209
if (score > bestScore && score >= threshold) {
210
bestScore = score;
211
bestMatch = dictWord;
212
}
213
});
214
215
return bestMatch;
216
}
217
218
const dictionary = ['javascript', 'python', 'java', 'typescript'];
219
const misspelled = 'javasript';
220
const suggestion = findClosestSpelling(misspelled, dictionary);
221
console.log(`Did you mean: ${suggestion}?`);
222
```
223
224
### Fuzzy Search
225
226
```javascript
227
const natural = require('natural');
228
229
function fuzzySearch(query, items, maxDistance = 2) {
230
const results = [];
231
232
items.forEach(item => {
233
const distance = natural.LevenshteinDistance(query.toLowerCase(), item.toLowerCase());
234
if (distance <= maxDistance) {
235
results.push({ item, distance });
236
}
237
});
238
239
return results.sort((a, b) => a.distance - b.distance);
240
}
241
242
const items = ['apple', 'application', 'apply', 'approach', 'appropriate'];
243
const query = 'aple';
244
const matches = fuzzySearch(query, items);
245
console.log(matches); // [{item: 'apple', distance: 1}, ...]
246
```
247
248
### Duplicate Detection
249
250
```javascript
251
const natural = require('natural');
252
253
function findDuplicates(strings, threshold = 0.9) {
254
const duplicates = [];
255
256
for (let i = 0; i < strings.length; i++) {
257
for (let j = i + 1; j < strings.length; j++) {
258
const similarity = natural.JaroWinklerDistance(strings[i], strings[j]);
259
if (similarity >= threshold) {
260
duplicates.push({
261
first: strings[i],
262
second: strings[j],
263
similarity
264
});
265
}
266
}
267
}
268
269
return duplicates;
270
}
271
272
const names = ['John Smith', 'Jon Smith', 'Jane Doe', 'Jane Do'];
273
const duplicates = findDuplicates(names);
274
console.log(duplicates); // Potential duplicates with high similarity
275
```