or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced-querying.mdindex-building.mdindex.mdsearching.mdtext-processing.mdutilities.md

index-building.mddocs/

0

# Index Building

1

2

Core functionality for creating search indexes from documents. The index building process involves configuring fields, processing documents through text pipelines, and building optimized data structures for fast search operations.

3

4

## Capabilities

5

6

### Main Lunr Function

7

8

The primary entry point for creating search indexes using the builder pattern.

9

10

```javascript { .api }

11

/**

12

* Create a new search index using the builder pattern

13

* @param {Function} config - Configuration function that receives a Builder instance

14

* @returns {lunr.Index} - Built search index ready for querying

15

*/

16

function lunr(config);

17

18

/**

19

* Current version of the Lunr library

20

* @type {string}

21

*/

22

lunr.version; // "2.3.9"

23

```

24

25

**Usage Examples:**

26

27

```javascript

28

const lunr = require('lunr');

29

30

// Basic index creation

31

const idx = lunr(function () {

32

this.ref('id');

33

this.field('title');

34

this.field('content');

35

36

this.add({ id: '1', title: 'Hello', content: 'World' });

37

});

38

39

// Index with custom configuration

40

const customIdx = lunr(function () {

41

this.ref('docId');

42

this.field('title', { boost: 10 });

43

this.field('body');

44

this.field('tags', { boost: 5 });

45

46

// Custom pipeline (remove default stemmer)

47

this.pipeline.remove(lunr.stemmer);

48

this.pipeline.add(myCustomProcessor);

49

50

documents.forEach(function (doc) {

51

this.add(doc, { boost: doc.priority || 1 });

52

}, this);

53

});

54

```

55

56

### Builder Class

57

58

The Builder class provides the configuration interface for creating indexes.

59

60

```javascript { .api }

61

/**

62

* Builder class for configuring and constructing search indexes

63

*/

64

class Builder {

65

/**

66

* Create a new Builder instance

67

*/

68

constructor();

69

70

/**

71

* Set the document reference field

72

* @param {string} refField - Field name to use as document reference (default: 'id')

73

*/

74

ref(refField);

75

76

/**

77

* Add a field to be indexed

78

* @param {string} fieldName - Name of the field to index

79

* @param {Object} attributes - Optional field configuration

80

* @param {number} attributes.boost - Boost multiplier for field importance

81

* @param {Function} attributes.extractor - Function to extract field value from documents

82

*/

83

field(fieldName, attributes);

84

85

/**

86

* Set the field length normalization parameter

87

* @param {number} number - Normalization factor (0-1, default: 0.75)

88

*/

89

b(number);

90

91

/**

92

* Set the term frequency saturation parameter

93

* @param {number} number - Saturation factor (default: 1.2)

94

*/

95

k1(number);

96

97

/**

98

* Add a document to the index

99

* @param {Object} doc - Document object to index

100

* @param {Object} attributes - Optional document attributes

101

* @param {number} attributes.boost - Boost multiplier for document importance

102

*/

103

add(doc, attributes);

104

105

/**

106

* Build the final search index

107

* @returns {lunr.Index} - Constructed search index

108

*/

109

build();

110

111

/**

112

* Apply a plugin to the builder

113

* @param {Function} plugin - Plugin function that receives the builder

114

*/

115

use(plugin);

116

117

/**

118

* Processing pipeline for indexing documents

119

* @type {lunr.Pipeline}

120

*/

121

pipeline;

122

123

/**

124

* Processing pipeline for search queries

125

* @type {lunr.Pipeline}

126

*/

127

searchPipeline;

128

129

/**

130

* Tokenizer function for breaking text into tokens

131

* @type {Function}

132

*/

133

tokenizer;

134

135

/**

136

* Number of documents added to the index

137

* @type {number}

138

*/

139

documentCount;

140

141

/**

142

* Array of metadata keys to preserve during indexing

143

* @type {Array<string>}

144

*/

145

metadataWhitelist;

146

}

147

```

148

149

**Usage Examples:**

150

151

```javascript

152

// Manual builder usage (equivalent to lunr() function)

153

const builder = new lunr.Builder();

154

builder.ref('id');

155

builder.field('title', { boost: 10 });

156

builder.field('content');

157

158

// Configure scoring parameters

159

builder.b(0.8); // Higher field length normalization

160

builder.k1(1.5); // Higher term frequency saturation

161

162

// Add documents with custom boosts

163

builder.add({ id: '1', title: 'Important', content: 'Very important document' }, { boost: 2 });

164

builder.add({ id: '2', title: 'Normal', content: 'Regular document' });

165

166

const idx = builder.build();

167

```

168

169

### Field Configuration

170

171

Advanced field configuration options for customizing how fields are indexed and searched.

172

173

```javascript { .api }

174

/**

175

* Field configuration options

176

* @typedef {Object} FieldAttributes

177

* @property {number} boost - Multiplicative boost factor for field importance

178

* @property {Function} extractor - Function to extract field value from documents

179

*/

180

181

/**

182

* Document boost configuration

183

* @typedef {Object} DocumentAttributes

184

* @property {number} boost - Multiplicative boost factor for document importance

185

*/

186

```

187

188

**Usage Examples:**

189

190

```javascript

191

const idx = lunr(function () {

192

this.ref('id');

193

194

// Boosted title field

195

this.field('title', { boost: 10 });

196

197

// Field with custom extractor

198

this.field('author', {

199

extractor: function (doc) {

200

return doc.author ? doc.author.name : '';

201

}

202

});

203

204

// Multi-value field extraction

205

this.field('tags', {

206

extractor: function (doc) {

207

return doc.tags ? doc.tags.join(' ') : '';

208

}

209

});

210

211

this.add({

212

id: '1',

213

title: 'TypeScript Guide',

214

author: { name: 'John Doe', email: 'john@example.com' },

215

tags: ['typescript', 'javascript', 'programming']

216

});

217

});

218

```

219

220

### Pipeline Configuration

221

222

Customizing the text processing pipeline for indexing and searching.

223

224

```javascript { .api }

225

/**

226

* Text processing pipeline

227

* @type {lunr.Pipeline}

228

*/

229

Builder.prototype.pipeline;

230

231

/**

232

* Search-time processing pipeline

233

* @type {lunr.Pipeline}

234

*/

235

Builder.prototype.searchPipeline;

236

```

237

238

**Usage Examples:**

239

240

```javascript

241

const idx = lunr(function () {

242

this.ref('id');

243

this.field('content');

244

245

// Remove stop word filter

246

this.pipeline.remove(lunr.stopWordFilter);

247

248

// Add custom processor before stemmer

249

this.pipeline.before(lunr.stemmer, function (token) {

250

// Custom processing logic

251

return token.update(function (str) {

252

return str.replace(/[0-9]/g, '');

253

});

254

});

255

256

// Customize search pipeline differently

257

this.searchPipeline.remove(lunr.stemmer);

258

259

this.add({ id: '1', content: 'Process this text with custom pipeline' });

260

});

261

```

262

263

### Metadata Preservation

264

265

Controlling which metadata is preserved during indexing for later retrieval.

266

267

```javascript { .api }

268

/**

269

* Array of metadata keys to preserve during indexing

270

* @type {Array<string>}

271

*/

272

Builder.prototype.metadataWhitelist;

273

```

274

275

**Usage Examples:**

276

277

```javascript

278

const idx = lunr(function () {

279

this.ref('id');

280

this.field('content');

281

282

// Preserve custom metadata

283

this.metadataWhitelist = ['position', 'frequency'];

284

285

this.add({ id: '1', content: 'Document with preserved metadata' });

286

});

287

```

288

289

## Common Patterns

290

291

### Large Dataset Indexing

292

293

```javascript

294

// Efficient indexing of large datasets

295

const idx = lunr(function () {

296

this.ref('id');

297

this.field('title', { boost: 5 });

298

this.field('content');

299

300

// Process documents in batches to avoid memory issues

301

const batchSize = 1000;

302

for (let i = 0; i < documents.length; i += batchSize) {

303

const batch = documents.slice(i, i + batchSize);

304

batch.forEach(doc => this.add(doc));

305

}

306

});

307

```

308

309

### Multi-language Support

310

311

```javascript

312

// Using plugins for multi-language support (requires lunr language plugins)

313

const idx = lunr(function () {

314

// Apply language-specific plugins

315

this.use(lunr.fr); // French language support (example)

316

317

this.ref('id');

318

this.field('title');

319

this.field('content');

320

321

frenchDocuments.forEach(doc => this.add(doc));

322

});

323

```

324

325

### Custom Scoring Parameters

326

327

```javascript

328

// Fine-tuning search scoring

329

const idx = lunr(function () {

330

this.ref('id');

331

this.field('title', { boost: 15 });

332

this.field('content');

333

334

// BM25 parameters

335

this.b(0.9); // Higher field length normalization (0-1)

336

this.k1(2.0); // Lower term frequency saturation

337

338

documents.forEach(doc => this.add(doc));

339

});

340

```