Tessl Tile for npm/lunr@2.3.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

advanced-querying.md index-building.md index.md searching.md text-processing.md utilities.md

index-building.mddocs/

0
# Index Building
1

2
Core functionality for creating search indexes from documents. The index building process involves configuring fields, processing documents through text pipelines, and building optimized data structures for fast search operations.
3

4
## Capabilities
5

6
### Main Lunr Function
7

8
The primary entry point for creating search indexes using the builder pattern.
9

10
```javascript { .api }
11
/**
12
 * Create a new search index using the builder pattern
13
 * @param {Function} config - Configuration function that receives a Builder instance
14
 * @returns {lunr.Index} - Built search index ready for querying
15
 */
16
function lunr(config);
17

18
/**
19
 * Current version of the Lunr library
20
 * @type {string}
21
 */
22
lunr.version; // "2.3.9"
23
```
24

25
**Usage Examples:**
26

27
```javascript
28
const lunr = require('lunr');
29

30
// Basic index creation
31
const idx = lunr(function () {
32
  this.ref('id');
33
  this.field('title');
34
  this.field('content');
35

36
  this.add({ id: '1', title: 'Hello', content: 'World' });
37
});
38

39
// Index with custom configuration
40
const customIdx = lunr(function () {
41
  this.ref('docId');
42
  this.field('title', { boost: 10 });
43
  this.field('body');
44
  this.field('tags', { boost: 5 });
45
  
46
  // Custom pipeline (remove default stemmer)
47
  this.pipeline.remove(lunr.stemmer);
48
  this.pipeline.add(myCustomProcessor);
49
  
50
  documents.forEach(function (doc) {
51
    this.add(doc, { boost: doc.priority || 1 });
52
  }, this);
53
});
54
```
55

56
### Builder Class
57

58
The Builder class provides the configuration interface for creating indexes.
59

60
```javascript { .api }
61
/**
62
 * Builder class for configuring and constructing search indexes
63
 */
64
class Builder {
65
  /**
66
   * Create a new Builder instance
67
   */
68
  constructor();
69

70
  /**
71
   * Set the document reference field
72
   * @param {string} refField - Field name to use as document reference (default: 'id')
73
   */
74
  ref(refField);
75

76
  /**
77
   * Add a field to be indexed
78
   * @param {string} fieldName - Name of the field to index
79
   * @param {Object} attributes - Optional field configuration
80
   * @param {number} attributes.boost - Boost multiplier for field importance
81
   * @param {Function} attributes.extractor - Function to extract field value from documents
82
   */
83
  field(fieldName, attributes);
84

85
  /**
86
   * Set the field length normalization parameter
87
   * @param {number} number - Normalization factor (0-1, default: 0.75)
88
   */
89
  b(number);
90

91
  /**
92
   * Set the term frequency saturation parameter
93
   * @param {number} number - Saturation factor (default: 1.2)
94
   */
95
  k1(number);
96

97
  /**
98
   * Add a document to the index
99
   * @param {Object} doc - Document object to index
100
   * @param {Object} attributes - Optional document attributes  
101
   * @param {number} attributes.boost - Boost multiplier for document importance
102
   */
103
  add(doc, attributes);
104

105
  /**
106
   * Build the final search index
107
   * @returns {lunr.Index} - Constructed search index
108
   */
109
  build();
110

111
  /**
112
   * Apply a plugin to the builder
113
   * @param {Function} plugin - Plugin function that receives the builder
114
   */
115
  use(plugin);
116

117
  /**
118
   * Processing pipeline for indexing documents
119
   * @type {lunr.Pipeline}
120
   */
121
  pipeline;
122

123
  /**
124
   * Processing pipeline for search queries
125
   * @type {lunr.Pipeline}
126
   */
127
  searchPipeline;
128

129
  /**
130
   * Tokenizer function for breaking text into tokens
131
   * @type {Function}
132
   */
133
  tokenizer;
134

135
  /**
136
   * Number of documents added to the index
137
   * @type {number}
138
   */
139
  documentCount;
140

141
  /**
142
   * Array of metadata keys to preserve during indexing
143
   * @type {Array<string>}
144
   */
145
  metadataWhitelist;
146
}
147
```
148

149
**Usage Examples:**
150

151
```javascript
152
// Manual builder usage (equivalent to lunr() function)
153
const builder = new lunr.Builder();
154
builder.ref('id');
155
builder.field('title', { boost: 10 });
156
builder.field('content');
157

158
// Configure scoring parameters
159
builder.b(0.8);    // Higher field length normalization
160
builder.k1(1.5);   // Higher term frequency saturation
161

162
// Add documents with custom boosts
163
builder.add({ id: '1', title: 'Important', content: 'Very important document' }, { boost: 2 });
164
builder.add({ id: '2', title: 'Normal', content: 'Regular document' });
165

166
const idx = builder.build();
167
```
168

169
### Field Configuration
170

171
Advanced field configuration options for customizing how fields are indexed and searched.
172

173
```javascript { .api }
174
/**
175
 * Field configuration options
176
 * @typedef {Object} FieldAttributes
177
 * @property {number} boost - Multiplicative boost factor for field importance
178
 * @property {Function} extractor - Function to extract field value from documents
179
 */
180

181
/**
182
 * Document boost configuration
183
 * @typedef {Object} DocumentAttributes  
184
 * @property {number} boost - Multiplicative boost factor for document importance
185
 */
186
```
187

188
**Usage Examples:**
189

190
```javascript
191
const idx = lunr(function () {
192
  this.ref('id');
193
  
194
  // Boosted title field
195
  this.field('title', { boost: 10 });
196
  
197
  // Field with custom extractor
198
  this.field('author', {
199
    extractor: function (doc) {
200
      return doc.author ? doc.author.name : '';
201
    }
202
  });
203
  
204
  // Multi-value field extraction
205
  this.field('tags', {
206
    extractor: function (doc) {
207
      return doc.tags ? doc.tags.join(' ') : '';
208
    }
209
  });
210

211
  this.add({
212
    id: '1',
213
    title: 'TypeScript Guide',
214
    author: { name: 'John Doe', email: 'john@example.com' },
215
    tags: ['typescript', 'javascript', 'programming']
216
  });
217
});
218
```
219

220
### Pipeline Configuration
221

222
Customizing the text processing pipeline for indexing and searching.
223

224
```javascript { .api }
225
/**
226
 * Text processing pipeline
227
 * @type {lunr.Pipeline}
228
 */
229
Builder.prototype.pipeline;
230

231
/**
232
 * Search-time processing pipeline
233
 * @type {lunr.Pipeline}
234
 */
235
Builder.prototype.searchPipeline;
236
```
237

238
**Usage Examples:**
239

240
```javascript
241
const idx = lunr(function () {
242
  this.ref('id');
243
  this.field('content');
244
  
245
  // Remove stop word filter
246
  this.pipeline.remove(lunr.stopWordFilter);
247
  
248
  // Add custom processor before stemmer
249
  this.pipeline.before(lunr.stemmer, function (token) {
250
    // Custom processing logic
251
    return token.update(function (str) {
252
      return str.replace(/[0-9]/g, '');
253
    });
254
  });
255
  
256
  // Customize search pipeline differently
257
  this.searchPipeline.remove(lunr.stemmer);
258
  
259
  this.add({ id: '1', content: 'Process this text with custom pipeline' });
260
});
261
```
262

263
### Metadata Preservation
264

265
Controlling which metadata is preserved during indexing for later retrieval.
266

267
```javascript { .api }
268
/**
269
 * Array of metadata keys to preserve during indexing
270
 * @type {Array<string>}
271
 */
272
Builder.prototype.metadataWhitelist;
273
```
274

275
**Usage Examples:**
276

277
```javascript
278
const idx = lunr(function () {
279
  this.ref('id');
280
  this.field('content');
281
  
282
  // Preserve custom metadata
283
  this.metadataWhitelist = ['position', 'frequency'];
284
  
285
  this.add({ id: '1', content: 'Document with preserved metadata' });
286
});
287
```
288

289
## Common Patterns
290

291
### Large Dataset Indexing
292

293
```javascript
294
// Efficient indexing of large datasets
295
const idx = lunr(function () {
296
  this.ref('id');
297
  this.field('title', { boost: 5 });
298
  this.field('content');
299
  
300
  // Process documents in batches to avoid memory issues
301
  const batchSize = 1000;
302
  for (let i = 0; i < documents.length; i += batchSize) {
303
    const batch = documents.slice(i, i + batchSize);
304
    batch.forEach(doc => this.add(doc));
305
  }
306
});
307
```
308

309
### Multi-language Support
310

311
```javascript
312
// Using plugins for multi-language support (requires lunr language plugins)
313
const idx = lunr(function () {
314
  // Apply language-specific plugins
315
  this.use(lunr.fr); // French language support (example)
316
  
317
  this.ref('id');
318
  this.field('title');
319
  this.field('content');
320
  
321
  frenchDocuments.forEach(doc => this.add(doc));
322
});
323
```
324

325
### Custom Scoring Parameters
326

327
```javascript
328
// Fine-tuning search scoring
329
const idx = lunr(function () {
330
  this.ref('id');
331
  this.field('title', { boost: 15 });
332
  this.field('content');
333
  
334
  // BM25 parameters
335
  this.b(0.9);   // Higher field length normalization (0-1)
336
  this.k1(2.0);  // Lower term frequency saturation
337
  
338
  documents.forEach(doc => this.add(doc));
339
});
340
```

Version

Tile

Files

index-building.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

index-building.mddocs/