0
# Utilities
1
2
Utility functions and data structures used throughout the Lunr library. This includes helper functions for string manipulation, data structures for efficient search operations, field references, match data handling, and scoring functions.
3
4
## Capabilities
5
6
### Utility Functions
7
8
Core utility functions in the `lunr.utils` namespace.
9
10
```javascript { .api }
11
/**
12
* Utility namespace containing helper functions
13
* @namespace lunr.utils
14
*/
15
lunr.utils = {
16
/**
17
* Print warning message to console
18
* @param {string} message - Warning message to display
19
*/
20
warn(message),
21
22
/**
23
* Convert object to string, handling null and undefined
24
* @param {*} obj - Object to convert to string
25
* @returns {string} - String representation, empty string for null/undefined
26
*/
27
asString(obj),
28
29
/**
30
* Shallow clone objects and arrays
31
* @param {*} obj - Object or array to clone
32
* @returns {*} - Cloned object/array
33
*/
34
clone(obj)
35
};
36
```
37
38
**Usage Examples:**
39
40
```javascript
41
const lunr = require('lunr');
42
43
// Warning function
44
lunr.utils.warn('This is a warning message');
45
// Outputs to console if available
46
47
// String conversion with null safety
48
console.log(lunr.utils.asString(null)); // ""
49
console.log(lunr.utils.asString(undefined)); // ""
50
console.log(lunr.utils.asString("hello")); // "hello"
51
console.log(lunr.utils.asString(123)); // "123"
52
console.log(lunr.utils.asString({a: 1})); // "[object Object]"
53
54
// Object cloning
55
const original = { name: 'John', tags: ['dev', 'js'] };
56
const cloned = lunr.utils.clone(original);
57
cloned.name = 'Jane';
58
console.log(original.name); // "John" (unchanged)
59
console.log(cloned.name); // "Jane"
60
61
// Array cloning
62
const originalArray = [1, 2, 3];
63
const clonedArray = lunr.utils.clone(originalArray);
64
clonedArray.push(4);
65
console.log(originalArray.length); // 3 (unchanged)
66
console.log(clonedArray.length); // 4
67
```
68
69
### Scoring Functions
70
71
Functions for calculating document relevance scores.
72
73
```javascript { .api }
74
/**
75
* Calculate inverse document frequency for term scoring
76
* @param {Object} posting - Term posting information containing document frequency
77
* @param {number} documentCount - Total number of documents in the index
78
* @returns {number} - IDF score for the term
79
*/
80
lunr.idf(posting, documentCount);
81
```
82
83
**Usage Examples:**
84
85
```javascript
86
// Calculate IDF for a term that appears in 5 out of 100 documents
87
const posting = { df: 5 }; // Document frequency
88
const totalDocs = 100;
89
const idfScore = lunr.idf(posting, totalDocs);
90
console.log(idfScore); // ~2.996 (higher for rare terms)
91
92
// Common term (appears in 80 out of 100 documents)
93
const commonPosting = { df: 80 };
94
const commonIdf = lunr.idf(commonPosting, totalDocs);
95
console.log(commonIdf); // ~0.223 (lower for common terms)
96
```
97
98
### Vector Class
99
100
Vector space representation for documents and term weights.
101
102
```javascript { .api }
103
/**
104
* Vector class for document representation in vector space
105
*/
106
class Vector {
107
/**
108
* Create a vector from array of elements
109
* @param {Array<number>} elements - Array of [index, value] pairs or values
110
*/
111
constructor(elements);
112
113
/**
114
* Calculate insertion position for an index
115
* @param {number} index - Index to find position for
116
* @returns {number} - Position where index should be inserted
117
*/
118
positionForIndex(index);
119
120
/**
121
* Insert a value at the specified index
122
* @param {number} index - Index position
123
* @param {number} value - Value to insert
124
* @returns {lunr.Vector} - Updated vector
125
*/
126
insert(index, value);
127
128
/**
129
* Insert or update a value at the specified index
130
* @param {number} index - Index position
131
* @param {number} value - Value to insert/update
132
* @param {Function} fn - Optional function to combine with existing value
133
* @returns {lunr.Vector} - Updated vector
134
*/
135
upsert(index, value, fn);
136
137
/**
138
* Calculate the magnitude (length) of the vector
139
* @returns {number} - Vector magnitude
140
*/
141
magnitude();
142
143
/**
144
* Calculate dot product with another vector
145
* @param {lunr.Vector} otherVector - Vector to calculate dot product with
146
* @returns {number} - Dot product result
147
*/
148
dot(otherVector);
149
150
/**
151
* Calculate cosine similarity with another vector
152
* @param {lunr.Vector} otherVector - Vector to compare with
153
* @returns {number} - Cosine similarity (0-1, higher = more similar)
154
*/
155
similarity(otherVector);
156
157
/**
158
* Convert vector to regular array
159
* @returns {Array<number>} - Array representation
160
*/
161
toArray();
162
163
/**
164
* Serialize vector to JSON
165
* @returns {Array<number>} - Serialized vector data
166
*/
167
toJSON();
168
}
169
```
170
171
**Usage Examples:**
172
173
```javascript
174
// Create vectors
175
const vec1 = new lunr.Vector([1, 2, 3, 4, 5]);
176
const vec2 = new lunr.Vector([2, 1, 3, 2, 1]);
177
178
// Vector operations
179
console.log(vec1.magnitude()); // Calculate length
180
console.log(vec1.dot(vec2)); // Dot product
181
console.log(vec1.similarity(vec2)); // Cosine similarity
182
183
// Sparse vector operations (index-value pairs)
184
const sparseVec = new lunr.Vector();
185
sparseVec.insert(10, 0.5); // Insert value 0.5 at index 10
186
sparseVec.insert(25, 1.2); // Insert value 1.2 at index 25
187
188
// Upsert (insert or update)
189
sparseVec.upsert(10, 0.3, (existing, new_val) => existing + new_val);
190
// Index 10 now has value 0.8 (0.5 + 0.3)
191
```
192
193
### Set Class
194
195
Set data structure for document collections and filtering.
196
197
```javascript { .api }
198
/**
199
* Set class for working with collections of document references
200
*/
201
class Set {
202
/**
203
* Create a set from array of elements
204
* @param {Array} elements - Array of elements to include in set
205
*/
206
constructor(elements);
207
208
/**
209
* Check if the set contains an object
210
* @param {*} object - Object to check for membership
211
* @returns {boolean} - True if object is in the set
212
*/
213
contains(object);
214
215
/**
216
* Calculate intersection with another set
217
* @param {lunr.Set} other - Set to intersect with
218
* @returns {lunr.Set} - New set containing common elements
219
*/
220
intersect(other);
221
222
/**
223
* Calculate union with another set
224
* @param {lunr.Set} other - Set to union with
225
* @returns {lunr.Set} - New set containing all elements from both sets
226
*/
227
union(other);
228
229
/**
230
* Universal set containing all possible elements
231
* @type {lunr.Set}
232
*/
233
static complete;
234
235
/**
236
* Empty set containing no elements
237
* @type {lunr.Set}
238
*/
239
static empty;
240
}
241
```
242
243
**Usage Examples:**
244
245
```javascript
246
// Create sets
247
const set1 = new lunr.Set(['doc1', 'doc2', 'doc3']);
248
const set2 = new lunr.Set(['doc2', 'doc3', 'doc4']);
249
250
// Set operations
251
console.log(set1.contains('doc1')); // true
252
console.log(set1.contains('doc4')); // false
253
254
const intersection = set1.intersect(set2); // ['doc2', 'doc3']
255
const union = set1.union(set2); // ['doc1', 'doc2', 'doc3', 'doc4']
256
257
// Special sets
258
console.log(lunr.Set.empty.contains('anything')); // false
259
console.log(lunr.Set.complete.contains('anything')); // true
260
```
261
262
### TokenSet Class
263
264
Finite state automaton for efficient token matching with wildcards and fuzzy search.
265
266
```javascript { .api }
267
/**
268
* TokenSet class implementing finite state automaton for token matching
269
*/
270
class TokenSet {
271
/**
272
* Create a new TokenSet
273
*/
274
constructor();
275
276
/**
277
* Convert TokenSet to array of accepted strings
278
* @returns {Array<string>} - Array of strings accepted by this TokenSet
279
*/
280
toArray();
281
282
/**
283
* Convert TokenSet to string representation
284
* @returns {string} - String representation of the TokenSet
285
*/
286
toString();
287
288
/**
289
* Calculate intersection with another TokenSet
290
* @param {lunr.TokenSet} other - TokenSet to intersect with
291
* @returns {lunr.TokenSet} - New TokenSet representing the intersection
292
*/
293
intersect(other);
294
295
/**
296
* Create TokenSet from sorted array of strings
297
* @param {Array<string>} arr - Sorted array of strings
298
* @returns {lunr.TokenSet} - TokenSet accepting the given strings
299
*/
300
static fromArray(arr);
301
302
/**
303
* Create TokenSet from query clause
304
* @param {Object} clause - Query clause object
305
* @returns {lunr.TokenSet} - TokenSet for matching the clause
306
*/
307
static fromClause(clause);
308
309
/**
310
* Create TokenSet for fuzzy string matching
311
* @param {string} str - String to match fuzzily
312
* @param {number} editDistance - Maximum edit distance allowed
313
* @returns {lunr.TokenSet} - TokenSet for fuzzy matching
314
*/
315
static fromFuzzyString(str, editDistance);
316
317
/**
318
* Create TokenSet from exact string
319
* @param {string} str - String to match exactly
320
* @returns {lunr.TokenSet} - TokenSet for exact matching
321
*/
322
static fromString(str);
323
324
/**
325
* Internal ID counter for TokenSet nodes
326
* @type {number}
327
*/
328
static _nextId;
329
}
330
```
331
332
**Usage Examples:**
333
334
```javascript
335
// Create TokenSet for exact matching
336
const exactSet = lunr.TokenSet.fromString('javascript');
337
console.log(exactSet.toArray()); // ['javascript']
338
339
// Create TokenSet for fuzzy matching
340
const fuzzySet = lunr.TokenSet.fromFuzzyString('javascript', 1);
341
console.log(fuzzySet.toArray()); // ['javascript', 'javascrip', 'avascript', etc.]
342
343
// Create from array
344
const arraySet = lunr.TokenSet.fromArray(['java', 'javascript', 'java-script']);
345
346
// TokenSet operations
347
const intersection = exactSet.intersect(fuzzySet);
348
```
349
350
### TokenSet Builder
351
352
Builder for constructing optimized TokenSets from word lists.
353
354
```javascript { .api }
355
/**
356
* Builder for constructing TokenSets efficiently
357
*/
358
class TokenSetBuilder {
359
/**
360
* Create a new TokenSet builder
361
*/
362
constructor();
363
364
/**
365
* Insert a word into the builder
366
* @param {string} word - Word to insert
367
*/
368
insert(word);
369
370
/**
371
* Finalize the TokenSet construction
372
* @returns {lunr.TokenSet} - Built TokenSet
373
*/
374
finish();
375
376
/**
377
* Minimize the automaton to reduce states
378
* @param {number} downTo - Minimize down to this state level
379
*/
380
minimize(downTo);
381
}
382
```
383
384
**Usage Examples:**
385
386
```javascript
387
// Build TokenSet from word list
388
const builder = new lunr.TokenSet.Builder();
389
['java', 'javascript', 'typescript', 'coffeescript'].forEach(word => {
390
builder.insert(word);
391
});
392
393
const wordSet = builder.finish();
394
console.log(wordSet.toArray()); // All inserted words
395
```
396
397
### Field Reference
398
399
Reference system for identifying fields within documents.
400
401
```javascript { .api }
402
/**
403
* Reference to a specific field within a document
404
*/
405
class FieldRef {
406
/**
407
* Create a field reference
408
* @param {string} docRef - Document reference identifier
409
* @param {string} fieldName - Name of the field
410
* @param {string} stringValue - Optional string representation
411
*/
412
constructor(docRef, fieldName, stringValue);
413
414
/**
415
* Convert field reference to string representation
416
* @returns {string} - String representation (docRef/fieldName)
417
*/
418
toString();
419
420
/**
421
* Parse field reference from string representation
422
* @param {string} str - String to parse (docRef/fieldName format)
423
* @returns {lunr.FieldRef} - Parsed field reference
424
*/
425
static fromString(str);
426
427
/**
428
* Separator character used in string representation
429
* @type {string}
430
*/
431
static joiner; // "/"
432
}
433
```
434
435
**Usage Examples:**
436
437
```javascript
438
// Create field reference
439
const fieldRef = new lunr.FieldRef('doc123', 'title');
440
console.log(fieldRef.toString()); // "doc123/title"
441
442
// Parse from string
443
const parsed = lunr.FieldRef.fromString('doc456/content');
444
console.log(parsed.docRef); // "doc456"
445
console.log(parsed.fieldName); // "content"
446
447
// Custom joiner
448
console.log(lunr.FieldRef.joiner); // "/"
449
```
450
451
### Match Data
452
453
Container for search match metadata and term position information.
454
455
```javascript { .api }
456
/**
457
* Container for metadata about search matches
458
*/
459
class MatchData {
460
/**
461
* Create match data for a term and field
462
* @param {string} term - Matching term
463
* @param {string} field - Field where match occurred
464
* @param {Object} metadata - Match metadata (positions, etc.)
465
*/
466
constructor(term, field, metadata);
467
468
/**
469
* Combine this match data with another MatchData instance
470
* @param {lunr.MatchData} otherMatchData - Other match data to combine
471
* @returns {lunr.MatchData} - Combined match data
472
*/
473
combine(otherMatchData);
474
475
/**
476
* Add metadata for a term and field
477
* @param {string} term - Term to add metadata for
478
* @param {string} field - Field to add metadata for
479
* @param {Object} metadata - Metadata to add
480
*/
481
add(term, field, metadata);
482
}
483
```
484
485
**Usage Examples:**
486
487
```javascript
488
// Create match data
489
const matchData = new lunr.MatchData('javascript', 'title', {
490
positions: [[0, 10]],
491
frequency: 1
492
});
493
494
// Add more match information
495
matchData.add('tutorial', 'content', {
496
positions: [[15, 23], [45, 53]],
497
frequency: 2
498
});
499
500
// Combine match data from different sources
501
const otherMatches = new lunr.MatchData('node', 'tags', {
502
positions: [[0, 4]],
503
frequency: 1
504
});
505
506
const combined = matchData.combine(otherMatches);
507
```
508
509
## Advanced Utility Patterns
510
511
### Custom String Processing
512
513
```javascript
514
// Extend utils with custom functions
515
lunr.utils.customNormalize = function(str) {
516
return lunr.utils.asString(str)
517
.toLowerCase()
518
.replace(/[^\w\s]/g, '') // Remove punctuation
519
.replace(/\s+/g, ' ') // Normalize whitespace
520
.trim();
521
};
522
523
// Use in pipeline
524
function normalizeToken(token) {
525
return token.update(lunr.utils.customNormalize);
526
}
527
```
528
529
### Vector Space Operations
530
531
```javascript
532
// Document similarity calculation
533
function calculateSimilarity(doc1Vector, doc2Vector) {
534
const similarity = doc1Vector.similarity(doc2Vector);
535
return {
536
similarity: similarity,
537
category: similarity > 0.8 ? 'very similar' :
538
similarity > 0.5 ? 'similar' :
539
similarity > 0.2 ? 'somewhat similar' : 'different'
540
};
541
}
542
543
// Find similar documents
544
function findSimilarDocuments(queryVector, documentVectors, threshold = 0.3) {
545
return documentVectors
546
.map((docVec, index) => ({
547
index: index,
548
similarity: queryVector.similarity(docVec)
549
}))
550
.filter(result => result.similarity > threshold)
551
.sort((a, b) => b.similarity - a.similarity);
552
}
553
```
554
555
### Set-based Filtering
556
557
```javascript
558
// Document filtering with sets
559
class DocumentFilter {
560
constructor() {
561
this.allowedDocs = new lunr.Set();
562
this.blockedDocs = new lunr.Set();
563
}
564
565
allow(docRefs) {
566
this.allowedDocs = this.allowedDocs.union(new lunr.Set(docRefs));
567
}
568
569
block(docRefs) {
570
this.blockedDocs = this.blockedDocs.union(new lunr.Set(docRefs));
571
}
572
573
filter(results) {
574
return results.filter(result => {
575
if (this.blockedDocs.contains(result.ref)) return false;
576
if (this.allowedDocs === lunr.Set.complete) return true;
577
return this.allowedDocs.contains(result.ref);
578
});
579
}
580
}
581
582
// Usage
583
const filter = new DocumentFilter();
584
filter.allow(['doc1', 'doc2', 'doc3']);
585
filter.block(['doc2']); // Block doc2 even though it's allowed
586
587
const filteredResults = filter.filter(searchResults);
588
```
589
590
### Performance Monitoring
591
592
```javascript
593
// Performance monitoring utilities
594
lunr.utils.performance = {
595
time: function(label, fn) {
596
const start = Date.now();
597
const result = fn();
598
const duration = Date.now() - start;
599
lunr.utils.warn(`${label}: ${duration}ms`);
600
return result;
601
},
602
603
memory: function(label, fn) {
604
if (typeof process !== 'undefined' && process.memoryUsage) {
605
const startMem = process.memoryUsage();
606
const result = fn();
607
const endMem = process.memoryUsage();
608
const diff = endMem.heapUsed - startMem.heapUsed;
609
lunr.utils.warn(`${label}: ${Math.round(diff / 1024)}KB`);
610
return result;
611
}
612
return fn();
613
}
614
};
615
616
// Usage
617
const results = lunr.utils.performance.time('Search Query', () => {
618
return idx.search('javascript tutorial');
619
});
620
```