0
# Index Building
1
2
Core functionality for creating search indexes from documents. The index building process involves configuring fields, processing documents through text pipelines, and building optimized data structures for fast search operations.
3
4
## Capabilities
5
6
### Main Lunr Function
7
8
The primary entry point for creating search indexes using the builder pattern.
9
10
```javascript { .api }
11
/**
12
* Create a new search index using the builder pattern
13
* @param {Function} config - Configuration function that receives a Builder instance
14
* @returns {lunr.Index} - Built search index ready for querying
15
*/
16
function lunr(config);
17
18
/**
19
* Current version of the Lunr library
20
* @type {string}
21
*/
22
lunr.version; // "2.3.9"
23
```
24
25
**Usage Examples:**
26
27
```javascript
28
const lunr = require('lunr');
29
30
// Basic index creation
31
const idx = lunr(function () {
32
this.ref('id');
33
this.field('title');
34
this.field('content');
35
36
this.add({ id: '1', title: 'Hello', content: 'World' });
37
});
38
39
// Index with custom configuration
40
const customIdx = lunr(function () {
41
this.ref('docId');
42
this.field('title', { boost: 10 });
43
this.field('body');
44
this.field('tags', { boost: 5 });
45
46
// Custom pipeline (remove default stemmer)
47
this.pipeline.remove(lunr.stemmer);
48
this.pipeline.add(myCustomProcessor);
49
50
documents.forEach(function (doc) {
51
this.add(doc, { boost: doc.priority || 1 });
52
}, this);
53
});
54
```
55
56
### Builder Class
57
58
The Builder class provides the configuration interface for creating indexes.
59
60
```javascript { .api }
61
/**
62
* Builder class for configuring and constructing search indexes
63
*/
64
class Builder {
65
/**
66
* Create a new Builder instance
67
*/
68
constructor();
69
70
/**
71
* Set the document reference field
72
* @param {string} refField - Field name to use as document reference (default: 'id')
73
*/
74
ref(refField);
75
76
/**
77
* Add a field to be indexed
78
* @param {string} fieldName - Name of the field to index
79
* @param {Object} attributes - Optional field configuration
80
* @param {number} attributes.boost - Boost multiplier for field importance
81
* @param {Function} attributes.extractor - Function to extract field value from documents
82
*/
83
field(fieldName, attributes);
84
85
/**
86
* Set the field length normalization parameter
87
* @param {number} number - Normalization factor (0-1, default: 0.75)
88
*/
89
b(number);
90
91
/**
92
* Set the term frequency saturation parameter
93
* @param {number} number - Saturation factor (default: 1.2)
94
*/
95
k1(number);
96
97
/**
98
* Add a document to the index
99
* @param {Object} doc - Document object to index
100
* @param {Object} attributes - Optional document attributes
101
* @param {number} attributes.boost - Boost multiplier for document importance
102
*/
103
add(doc, attributes);
104
105
/**
106
* Build the final search index
107
* @returns {lunr.Index} - Constructed search index
108
*/
109
build();
110
111
/**
112
* Apply a plugin to the builder
113
* @param {Function} plugin - Plugin function that receives the builder
114
*/
115
use(plugin);
116
117
/**
118
* Processing pipeline for indexing documents
119
* @type {lunr.Pipeline}
120
*/
121
pipeline;
122
123
/**
124
* Processing pipeline for search queries
125
* @type {lunr.Pipeline}
126
*/
127
searchPipeline;
128
129
/**
130
* Tokenizer function for breaking text into tokens
131
* @type {Function}
132
*/
133
tokenizer;
134
135
/**
136
* Number of documents added to the index
137
* @type {number}
138
*/
139
documentCount;
140
141
/**
142
* Array of metadata keys to preserve during indexing
143
* @type {Array<string>}
144
*/
145
metadataWhitelist;
146
}
147
```
148
149
**Usage Examples:**
150
151
```javascript
152
// Manual builder usage (equivalent to lunr() function)
153
const builder = new lunr.Builder();
154
builder.ref('id');
155
builder.field('title', { boost: 10 });
156
builder.field('content');
157
158
// Configure scoring parameters
159
builder.b(0.8); // Higher field length normalization
160
builder.k1(1.5); // Higher term frequency saturation
161
162
// Add documents with custom boosts
163
builder.add({ id: '1', title: 'Important', content: 'Very important document' }, { boost: 2 });
164
builder.add({ id: '2', title: 'Normal', content: 'Regular document' });
165
166
const idx = builder.build();
167
```
168
169
### Field Configuration
170
171
Advanced field configuration options for customizing how fields are indexed and searched.
172
173
```javascript { .api }
174
/**
175
* Field configuration options
176
* @typedef {Object} FieldAttributes
177
* @property {number} boost - Multiplicative boost factor for field importance
178
* @property {Function} extractor - Function to extract field value from documents
179
*/
180
181
/**
182
* Document boost configuration
183
* @typedef {Object} DocumentAttributes
184
* @property {number} boost - Multiplicative boost factor for document importance
185
*/
186
```
187
188
**Usage Examples:**
189
190
```javascript
191
const idx = lunr(function () {
192
this.ref('id');
193
194
// Boosted title field
195
this.field('title', { boost: 10 });
196
197
// Field with custom extractor
198
this.field('author', {
199
extractor: function (doc) {
200
return doc.author ? doc.author.name : '';
201
}
202
});
203
204
// Multi-value field extraction
205
this.field('tags', {
206
extractor: function (doc) {
207
return doc.tags ? doc.tags.join(' ') : '';
208
}
209
});
210
211
this.add({
212
id: '1',
213
title: 'TypeScript Guide',
214
author: { name: 'John Doe', email: 'john@example.com' },
215
tags: ['typescript', 'javascript', 'programming']
216
});
217
});
218
```
219
220
### Pipeline Configuration
221
222
Customizing the text processing pipeline for indexing and searching.
223
224
```javascript { .api }
225
/**
226
* Text processing pipeline
227
* @type {lunr.Pipeline}
228
*/
229
Builder.prototype.pipeline;
230
231
/**
232
* Search-time processing pipeline
233
* @type {lunr.Pipeline}
234
*/
235
Builder.prototype.searchPipeline;
236
```
237
238
**Usage Examples:**
239
240
```javascript
241
const idx = lunr(function () {
242
this.ref('id');
243
this.field('content');
244
245
// Remove stop word filter
246
this.pipeline.remove(lunr.stopWordFilter);
247
248
// Add custom processor before stemmer
249
this.pipeline.before(lunr.stemmer, function (token) {
250
// Custom processing logic
251
return token.update(function (str) {
252
return str.replace(/[0-9]/g, '');
253
});
254
});
255
256
// Customize search pipeline differently
257
this.searchPipeline.remove(lunr.stemmer);
258
259
this.add({ id: '1', content: 'Process this text with custom pipeline' });
260
});
261
```
262
263
### Metadata Preservation
264
265
Controlling which metadata is preserved during indexing for later retrieval.
266
267
```javascript { .api }
268
/**
269
* Array of metadata keys to preserve during indexing
270
* @type {Array<string>}
271
*/
272
Builder.prototype.metadataWhitelist;
273
```
274
275
**Usage Examples:**
276
277
```javascript
278
const idx = lunr(function () {
279
this.ref('id');
280
this.field('content');
281
282
// Preserve custom metadata
283
this.metadataWhitelist = ['position', 'frequency'];
284
285
this.add({ id: '1', content: 'Document with preserved metadata' });
286
});
287
```
288
289
## Common Patterns
290
291
### Large Dataset Indexing
292
293
```javascript
294
// Efficient indexing of large datasets
295
const idx = lunr(function () {
296
this.ref('id');
297
this.field('title', { boost: 5 });
298
this.field('content');
299
300
// Process documents in batches to avoid memory issues
301
const batchSize = 1000;
302
for (let i = 0; i < documents.length; i += batchSize) {
303
const batch = documents.slice(i, i + batchSize);
304
batch.forEach(doc => this.add(doc));
305
}
306
});
307
```
308
309
### Multi-language Support
310
311
```javascript
312
// Using plugins for multi-language support (requires lunr language plugins)
313
const idx = lunr(function () {
314
// Apply language-specific plugins
315
this.use(lunr.fr); // French language support (example)
316
317
this.ref('id');
318
this.field('title');
319
this.field('content');
320
321
frenchDocuments.forEach(doc => this.add(doc));
322
});
323
```
324
325
### Custom Scoring Parameters
326
327
```javascript
328
// Fine-tuning search scoring
329
const idx = lunr(function () {
330
this.ref('id');
331
this.field('title', { boost: 15 });
332
this.field('content');
333
334
// BM25 parameters
335
this.b(0.9); // Higher field length normalization (0-1)
336
this.k1(2.0); // Lower term frequency saturation
337
338
documents.forEach(doc => this.add(doc));
339
});
340
```