or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

classification.mddistance.mdindex.mdngrams-tfidf.mdphonetics.mdpos-tagging.mdsentiment.mdtext-processing.mdtransliterators.mdutilities.mdwordnet.md

distance.mddocs/

0

# Distance Algorithms

1

2

String distance algorithms for calculating similarity between strings. These are useful for fuzzy matching, spell checking, duplicate detection, and text comparison tasks.

3

4

## Capabilities

5

6

### Jaro-Winkler Distance

7

8

Jaro-Winkler distance algorithm optimized for short strings and personal names.

9

10

```javascript { .api }

11

/**

12

* Calculate Jaro-Winkler distance between two strings

13

* Returns a value between 0 (no similarity) and 1 (identical)

14

* @param s1 - First string

15

* @param s2 - Second string

16

* @returns Similarity score (0-1)

17

*/

18

function JaroWinklerDistance(s1: string, s2: string): number;

19

```

20

21

**Usage Examples:**

22

23

```javascript

24

const natural = require('natural');

25

26

// Calculate similarity

27

console.log(natural.JaroWinklerDistance('sitting', 'kitten')); // 0.746

28

console.log(natural.JaroWinklerDistance('hello', 'hello')); // 1.0

29

console.log(natural.JaroWinklerDistance('hello', 'world')); // 0.0

30

31

// Find closest match

32

const target = 'javascript';

33

const candidates = ['java', 'script', 'typescript', 'coffeescript'];

34

35

let bestMatch = '';

36

let bestScore = 0;

37

candidates.forEach(candidate => {

38

const score = natural.JaroWinklerDistance(target, candidate);

39

if (score > bestScore) {

40

bestScore = score;

41

bestMatch = candidate;

42

}

43

});

44

console.log(`Best match: ${bestMatch} (${bestScore})`);

45

```

46

47

### Levenshtein Distance

48

49

Classic edit distance measuring minimum edits (insertions, deletions, substitutions) needed to transform one string into another.

50

51

```javascript { .api }

52

/**

53

* Calculate Levenshtein distance between two strings

54

* Returns the minimum number of edits required

55

* @param s1 - First string

56

* @param s2 - Second string

57

* @returns Number of edits required

58

*/

59

function LevenshteinDistance(s1: string, s2: string): number;

60

61

/**

62

* Search for strings within a certain Levenshtein distance

63

* @param source - Source string to search from

64

* @param targets - Array of target strings to search

65

* @param maxDistance - Maximum distance to consider

66

* @returns Array of matches with distances

67

*/

68

function LevenshteinDistanceSearch(source: string, targets: string[], maxDistance: number): SearchResult[];

69

70

interface SearchResult {

71

target: string;

72

distance: number;

73

}

74

```

75

76

**Usage Examples:**

77

78

```javascript

79

const natural = require('natural');

80

81

// Basic distance calculation

82

console.log(natural.LevenshteinDistance('kitten', 'sitting')); // 3

83

console.log(natural.LevenshteinDistance('hello', 'hallo')); // 1

84

85

// Search with distance threshold

86

const source = 'javascript';

87

const targets = ['java', 'script', 'typescript', 'python', 'rust'];

88

const results = natural.LevenshteinDistanceSearch(source, targets, 5);

89

console.log(results);

90

// Results with distance <= 5

91

```

92

93

### Damerau-Levenshtein Distance

94

95

Extended Levenshtein distance that also allows transposition of adjacent characters.

96

97

```javascript { .api }

98

/**

99

* Calculate Damerau-Levenshtein distance between two strings

100

* Includes transposition operations in addition to insertions, deletions, substitutions

101

* @param s1 - First string

102

* @param s2 - Second string

103

* @returns Number of edits required

104

*/

105

function DamerauLevenshteinDistance(s1: string, s2: string): number;

106

107

/**

108

* Search using Damerau-Levenshtein distance

109

* @param source - Source string

110

* @param targets - Target strings to search

111

* @param maxDistance - Maximum distance threshold

112

* @returns Array of matches with distances

113

*/

114

function DamerauLevenshteinDistanceSearch(source: string, targets: string[], maxDistance: number): SearchResult[];

115

```

116

117

**Usage Examples:**

118

119

```javascript

120

const natural = require('natural');

121

122

// Handles transpositions better than standard Levenshtein

123

console.log(natural.DamerauLevenshteinDistance('ca', 'ac')); // 1 (transposition)

124

console.log(natural.LevenshteinDistance('ca', 'ac')); // 2 (deletion + insertion)

125

126

// Useful for typos where characters are swapped

127

console.log(natural.DamerauLevenshteinDistance('javascript', 'javasrcpit')); // 1

128

```

129

130

### Dice Coefficient

131

132

Dice coefficient for measuring similarity based on bigram overlap.

133

134

```javascript { .api }

135

/**

136

* Calculate Dice coefficient between two strings

137

* Based on bigram similarity, returns value between 0 and 1

138

* @param s1 - First string

139

* @param s2 - Second string

140

* @returns Similarity coefficient (0-1)

141

*/

142

function DiceCoefficient(s1: string, s2: string): number;

143

```

144

145

**Usage Examples:**

146

147

```javascript

148

const natural = require('natural');

149

150

// Good for longer strings and fuzzy matching

151

console.log(natural.DiceCoefficient('night', 'nacht')); // ~0.25

152

console.log(natural.DiceCoefficient('hello world', 'hello word')); // High similarity

153

154

// Useful for document similarity

155

const doc1 = 'the quick brown fox jumps';

156

const doc2 = 'quick brown fox jumping';

157

console.log(natural.DiceCoefficient(doc1, doc2)); // Measures word overlap

158

```

159

160

### Hamming Distance

161

162

Hamming distance for strings of equal length, counting differing positions.

163

164

```javascript { .api }

165

/**

166

* Calculate Hamming distance between two equal-length strings

167

* Counts the number of positions where characters differ

168

* @param s1 - First string

169

* @param s2 - Second string (must be same length as s1)

170

* @returns Number of differing positions

171

* @throws Error if strings have different lengths

172

*/

173

function HammingDistance(s1: string, s2: string): number;

174

```

175

176

**Usage Examples:**

177

178

```javascript

179

const natural = require('natural');

180

181

// Strings must be same length

182

console.log(natural.HammingDistance('hello', 'hallo')); // 1

183

console.log(natural.HammingDistance('12345', '12395')); // 1

184

185

// Useful for binary strings, DNA sequences, etc.

186

console.log(natural.HammingDistance('1011101', '1001001')); // 2

187

188

// Error for different lengths

189

try {

190

natural.HammingDistance('hello', 'hi'); // Throws error

191

} catch (err) {

192

console.log('Strings must be same length');

193

}

194

```

195

196

## Usage Patterns

197

198

### Spell Checking

199

200

```javascript

201

const natural = require('natural');

202

203

function findClosestSpelling(word, dictionary, threshold = 0.8) {

204

let bestMatch = null;

205

let bestScore = 0;

206

207

dictionary.forEach(dictWord => {

208

const score = natural.JaroWinklerDistance(word, dictWord);

209

if (score > bestScore && score >= threshold) {

210

bestScore = score;

211

bestMatch = dictWord;

212

}

213

});

214

215

return bestMatch;

216

}

217

218

const dictionary = ['javascript', 'python', 'java', 'typescript'];

219

const misspelled = 'javasript';

220

const suggestion = findClosestSpelling(misspelled, dictionary);

221

console.log(`Did you mean: ${suggestion}?`);

222

```

223

224

### Fuzzy Search

225

226

```javascript

227

const natural = require('natural');

228

229

function fuzzySearch(query, items, maxDistance = 2) {

230

const results = [];

231

232

items.forEach(item => {

233

const distance = natural.LevenshteinDistance(query.toLowerCase(), item.toLowerCase());

234

if (distance <= maxDistance) {

235

results.push({ item, distance });

236

}

237

});

238

239

return results.sort((a, b) => a.distance - b.distance);

240

}

241

242

const items = ['apple', 'application', 'apply', 'approach', 'appropriate'];

243

const query = 'aple';

244

const matches = fuzzySearch(query, items);

245

console.log(matches); // [{item: 'apple', distance: 1}, ...]

246

```

247

248

### Duplicate Detection

249

250

```javascript

251

const natural = require('natural');

252

253

function findDuplicates(strings, threshold = 0.9) {

254

const duplicates = [];

255

256

for (let i = 0; i < strings.length; i++) {

257

for (let j = i + 1; j < strings.length; j++) {

258

const similarity = natural.JaroWinklerDistance(strings[i], strings[j]);

259

if (similarity >= threshold) {

260

duplicates.push({

261

first: strings[i],

262

second: strings[j],

263

similarity

264

});

265

}

266

}

267

}

268

269

return duplicates;

270

}

271

272

const names = ['John Smith', 'Jon Smith', 'Jane Doe', 'Jane Do'];

273

const duplicates = findDuplicates(names);

274

console.log(duplicates); // Potential duplicates with high similarity

275

```