or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced-querying.mdindex-building.mdindex.mdsearching.mdtext-processing.mdutilities.md

utilities.mddocs/

0

# Utilities

1

2

Utility functions and data structures used throughout the Lunr library. This includes helper functions for string manipulation, data structures for efficient search operations, field references, match data handling, and scoring functions.

3

4

## Capabilities

5

6

### Utility Functions

7

8

Core utility functions in the `lunr.utils` namespace.

9

10

```javascript { .api }

11

/**

12

* Utility namespace containing helper functions

13

* @namespace lunr.utils

14

*/

15

lunr.utils = {

16

/**

17

* Print warning message to console

18

* @param {string} message - Warning message to display

19

*/

20

warn(message),

21

22

/**

23

* Convert object to string, handling null and undefined

24

* @param {*} obj - Object to convert to string

25

* @returns {string} - String representation, empty string for null/undefined

26

*/

27

asString(obj),

28

29

/**

30

* Shallow clone objects and arrays

31

* @param {*} obj - Object or array to clone

32

* @returns {*} - Cloned object/array

33

*/

34

clone(obj)

35

};

36

```

37

38

**Usage Examples:**

39

40

```javascript

41

const lunr = require('lunr');

42

43

// Warning function

44

lunr.utils.warn('This is a warning message');

45

// Outputs to console if available

46

47

// String conversion with null safety

48

console.log(lunr.utils.asString(null)); // ""

49

console.log(lunr.utils.asString(undefined)); // ""

50

console.log(lunr.utils.asString("hello")); // "hello"

51

console.log(lunr.utils.asString(123)); // "123"

52

console.log(lunr.utils.asString({a: 1})); // "[object Object]"

53

54

// Object cloning

55

const original = { name: 'John', tags: ['dev', 'js'] };

56

const cloned = lunr.utils.clone(original);

57

cloned.name = 'Jane';

58

console.log(original.name); // "John" (unchanged)

59

console.log(cloned.name); // "Jane"

60

61

// Array cloning

62

const originalArray = [1, 2, 3];

63

const clonedArray = lunr.utils.clone(originalArray);

64

clonedArray.push(4);

65

console.log(originalArray.length); // 3 (unchanged)

66

console.log(clonedArray.length); // 4

67

```

68

69

### Scoring Functions

70

71

Functions for calculating document relevance scores.

72

73

```javascript { .api }

74

/**

75

* Calculate inverse document frequency for term scoring

76

* @param {Object} posting - Term posting information containing document frequency

77

* @param {number} documentCount - Total number of documents in the index

78

* @returns {number} - IDF score for the term

79

*/

80

lunr.idf(posting, documentCount);

81

```

82

83

**Usage Examples:**

84

85

```javascript

86

// Calculate IDF for a term that appears in 5 out of 100 documents

87

const posting = { df: 5 }; // Document frequency

88

const totalDocs = 100;

89

const idfScore = lunr.idf(posting, totalDocs);

90

console.log(idfScore); // ~2.996 (higher for rare terms)

91

92

// Common term (appears in 80 out of 100 documents)

93

const commonPosting = { df: 80 };

94

const commonIdf = lunr.idf(commonPosting, totalDocs);

95

console.log(commonIdf); // ~0.223 (lower for common terms)

96

```

97

98

### Vector Class

99

100

Vector space representation for documents and term weights.

101

102

```javascript { .api }

103

/**

104

* Vector class for document representation in vector space

105

*/

106

class Vector {

107

/**

108

* Create a vector from array of elements

109

* @param {Array<number>} elements - Array of [index, value] pairs or values

110

*/

111

constructor(elements);

112

113

/**

114

* Calculate insertion position for an index

115

* @param {number} index - Index to find position for

116

* @returns {number} - Position where index should be inserted

117

*/

118

positionForIndex(index);

119

120

/**

121

* Insert a value at the specified index

122

* @param {number} index - Index position

123

* @param {number} value - Value to insert

124

* @returns {lunr.Vector} - Updated vector

125

*/

126

insert(index, value);

127

128

/**

129

* Insert or update a value at the specified index

130

* @param {number} index - Index position

131

* @param {number} value - Value to insert/update

132

* @param {Function} fn - Optional function to combine with existing value

133

* @returns {lunr.Vector} - Updated vector

134

*/

135

upsert(index, value, fn);

136

137

/**

138

* Calculate the magnitude (length) of the vector

139

* @returns {number} - Vector magnitude

140

*/

141

magnitude();

142

143

/**

144

* Calculate dot product with another vector

145

* @param {lunr.Vector} otherVector - Vector to calculate dot product with

146

* @returns {number} - Dot product result

147

*/

148

dot(otherVector);

149

150

/**

151

* Calculate cosine similarity with another vector

152

* @param {lunr.Vector} otherVector - Vector to compare with

153

* @returns {number} - Cosine similarity (0-1, higher = more similar)

154

*/

155

similarity(otherVector);

156

157

/**

158

* Convert vector to regular array

159

* @returns {Array<number>} - Array representation

160

*/

161

toArray();

162

163

/**

164

* Serialize vector to JSON

165

* @returns {Array<number>} - Serialized vector data

166

*/

167

toJSON();

168

}

169

```

170

171

**Usage Examples:**

172

173

```javascript

174

// Create vectors

175

const vec1 = new lunr.Vector([1, 2, 3, 4, 5]);

176

const vec2 = new lunr.Vector([2, 1, 3, 2, 1]);

177

178

// Vector operations

179

console.log(vec1.magnitude()); // Calculate length

180

console.log(vec1.dot(vec2)); // Dot product

181

console.log(vec1.similarity(vec2)); // Cosine similarity

182

183

// Sparse vector operations (index-value pairs)

184

const sparseVec = new lunr.Vector();

185

sparseVec.insert(10, 0.5); // Insert value 0.5 at index 10

186

sparseVec.insert(25, 1.2); // Insert value 1.2 at index 25

187

188

// Upsert (insert or update)

189

sparseVec.upsert(10, 0.3, (existing, new_val) => existing + new_val);

190

// Index 10 now has value 0.8 (0.5 + 0.3)

191

```

192

193

### Set Class

194

195

Set data structure for document collections and filtering.

196

197

```javascript { .api }

198

/**

199

* Set class for working with collections of document references

200

*/

201

class Set {

202

/**

203

* Create a set from array of elements

204

* @param {Array} elements - Array of elements to include in set

205

*/

206

constructor(elements);

207

208

/**

209

* Check if the set contains an object

210

* @param {*} object - Object to check for membership

211

* @returns {boolean} - True if object is in the set

212

*/

213

contains(object);

214

215

/**

216

* Calculate intersection with another set

217

* @param {lunr.Set} other - Set to intersect with

218

* @returns {lunr.Set} - New set containing common elements

219

*/

220

intersect(other);

221

222

/**

223

* Calculate union with another set

224

* @param {lunr.Set} other - Set to union with

225

* @returns {lunr.Set} - New set containing all elements from both sets

226

*/

227

union(other);

228

229

/**

230

* Universal set containing all possible elements

231

* @type {lunr.Set}

232

*/

233

static complete;

234

235

/**

236

* Empty set containing no elements

237

* @type {lunr.Set}

238

*/

239

static empty;

240

}

241

```

242

243

**Usage Examples:**

244

245

```javascript

246

// Create sets

247

const set1 = new lunr.Set(['doc1', 'doc2', 'doc3']);

248

const set2 = new lunr.Set(['doc2', 'doc3', 'doc4']);

249

250

// Set operations

251

console.log(set1.contains('doc1')); // true

252

console.log(set1.contains('doc4')); // false

253

254

const intersection = set1.intersect(set2); // ['doc2', 'doc3']

255

const union = set1.union(set2); // ['doc1', 'doc2', 'doc3', 'doc4']

256

257

// Special sets

258

console.log(lunr.Set.empty.contains('anything')); // false

259

console.log(lunr.Set.complete.contains('anything')); // true

260

```

261

262

### TokenSet Class

263

264

Finite state automaton for efficient token matching with wildcards and fuzzy search.

265

266

```javascript { .api }

267

/**

268

* TokenSet class implementing finite state automaton for token matching

269

*/

270

class TokenSet {

271

/**

272

* Create a new TokenSet

273

*/

274

constructor();

275

276

/**

277

* Convert TokenSet to array of accepted strings

278

* @returns {Array<string>} - Array of strings accepted by this TokenSet

279

*/

280

toArray();

281

282

/**

283

* Convert TokenSet to string representation

284

* @returns {string} - String representation of the TokenSet

285

*/

286

toString();

287

288

/**

289

* Calculate intersection with another TokenSet

290

* @param {lunr.TokenSet} other - TokenSet to intersect with

291

* @returns {lunr.TokenSet} - New TokenSet representing the intersection

292

*/

293

intersect(other);

294

295

/**

296

* Create TokenSet from sorted array of strings

297

* @param {Array<string>} arr - Sorted array of strings

298

* @returns {lunr.TokenSet} - TokenSet accepting the given strings

299

*/

300

static fromArray(arr);

301

302

/**

303

* Create TokenSet from query clause

304

* @param {Object} clause - Query clause object

305

* @returns {lunr.TokenSet} - TokenSet for matching the clause

306

*/

307

static fromClause(clause);

308

309

/**

310

* Create TokenSet for fuzzy string matching

311

* @param {string} str - String to match fuzzily

312

* @param {number} editDistance - Maximum edit distance allowed

313

* @returns {lunr.TokenSet} - TokenSet for fuzzy matching

314

*/

315

static fromFuzzyString(str, editDistance);

316

317

/**

318

* Create TokenSet from exact string

319

* @param {string} str - String to match exactly

320

* @returns {lunr.TokenSet} - TokenSet for exact matching

321

*/

322

static fromString(str);

323

324

/**

325

* Internal ID counter for TokenSet nodes

326

* @type {number}

327

*/

328

static _nextId;

329

}

330

```

331

332

**Usage Examples:**

333

334

```javascript

335

// Create TokenSet for exact matching

336

const exactSet = lunr.TokenSet.fromString('javascript');

337

console.log(exactSet.toArray()); // ['javascript']

338

339

// Create TokenSet for fuzzy matching

340

const fuzzySet = lunr.TokenSet.fromFuzzyString('javascript', 1);

341

console.log(fuzzySet.toArray()); // ['javascript', 'javascrip', 'avascript', etc.]

342

343

// Create from array

344

const arraySet = lunr.TokenSet.fromArray(['java', 'javascript', 'java-script']);

345

346

// TokenSet operations

347

const intersection = exactSet.intersect(fuzzySet);

348

```

349

350

### TokenSet Builder

351

352

Builder for constructing optimized TokenSets from word lists.

353

354

```javascript { .api }

355

/**

356

* Builder for constructing TokenSets efficiently

357

*/

358

class TokenSetBuilder {

359

/**

360

* Create a new TokenSet builder

361

*/

362

constructor();

363

364

/**

365

* Insert a word into the builder

366

* @param {string} word - Word to insert

367

*/

368

insert(word);

369

370

/**

371

* Finalize the TokenSet construction

372

* @returns {lunr.TokenSet} - Built TokenSet

373

*/

374

finish();

375

376

/**

377

* Minimize the automaton to reduce states

378

* @param {number} downTo - Minimize down to this state level

379

*/

380

minimize(downTo);

381

}

382

```

383

384

**Usage Examples:**

385

386

```javascript

387

// Build TokenSet from word list

388

const builder = new lunr.TokenSet.Builder();

389

['java', 'javascript', 'typescript', 'coffeescript'].forEach(word => {

390

builder.insert(word);

391

});

392

393

const wordSet = builder.finish();

394

console.log(wordSet.toArray()); // All inserted words

395

```

396

397

### Field Reference

398

399

Reference system for identifying fields within documents.

400

401

```javascript { .api }

402

/**

403

* Reference to a specific field within a document

404

*/

405

class FieldRef {

406

/**

407

* Create a field reference

408

* @param {string} docRef - Document reference identifier

409

* @param {string} fieldName - Name of the field

410

* @param {string} stringValue - Optional string representation

411

*/

412

constructor(docRef, fieldName, stringValue);

413

414

/**

415

* Convert field reference to string representation

416

* @returns {string} - String representation (docRef/fieldName)

417

*/

418

toString();

419

420

/**

421

* Parse field reference from string representation

422

* @param {string} str - String to parse (docRef/fieldName format)

423

* @returns {lunr.FieldRef} - Parsed field reference

424

*/

425

static fromString(str);

426

427

/**

428

* Separator character used in string representation

429

* @type {string}

430

*/

431

static joiner; // "/"

432

}

433

```

434

435

**Usage Examples:**

436

437

```javascript

438

// Create field reference

439

const fieldRef = new lunr.FieldRef('doc123', 'title');

440

console.log(fieldRef.toString()); // "doc123/title"

441

442

// Parse from string

443

const parsed = lunr.FieldRef.fromString('doc456/content');

444

console.log(parsed.docRef); // "doc456"

445

console.log(parsed.fieldName); // "content"

446

447

// Custom joiner

448

console.log(lunr.FieldRef.joiner); // "/"

449

```

450

451

### Match Data

452

453

Container for search match metadata and term position information.

454

455

```javascript { .api }

456

/**

457

* Container for metadata about search matches

458

*/

459

class MatchData {

460

/**

461

* Create match data for a term and field

462

* @param {string} term - Matching term

463

* @param {string} field - Field where match occurred

464

* @param {Object} metadata - Match metadata (positions, etc.)

465

*/

466

constructor(term, field, metadata);

467

468

/**

469

* Combine this match data with another MatchData instance

470

* @param {lunr.MatchData} otherMatchData - Other match data to combine

471

* @returns {lunr.MatchData} - Combined match data

472

*/

473

combine(otherMatchData);

474

475

/**

476

* Add metadata for a term and field

477

* @param {string} term - Term to add metadata for

478

* @param {string} field - Field to add metadata for

479

* @param {Object} metadata - Metadata to add

480

*/

481

add(term, field, metadata);

482

}

483

```

484

485

**Usage Examples:**

486

487

```javascript

488

// Create match data

489

const matchData = new lunr.MatchData('javascript', 'title', {

490

positions: [[0, 10]],

491

frequency: 1

492

});

493

494

// Add more match information

495

matchData.add('tutorial', 'content', {

496

positions: [[15, 23], [45, 53]],

497

frequency: 2

498

});

499

500

// Combine match data from different sources

501

const otherMatches = new lunr.MatchData('node', 'tags', {

502

positions: [[0, 4]],

503

frequency: 1

504

});

505

506

const combined = matchData.combine(otherMatches);

507

```

508

509

## Advanced Utility Patterns

510

511

### Custom String Processing

512

513

```javascript

514

// Extend utils with custom functions

515

lunr.utils.customNormalize = function(str) {

516

return lunr.utils.asString(str)

517

.toLowerCase()

518

.replace(/[^\w\s]/g, '') // Remove punctuation

519

.replace(/\s+/g, ' ') // Normalize whitespace

520

.trim();

521

};

522

523

// Use in pipeline

524

function normalizeToken(token) {

525

return token.update(lunr.utils.customNormalize);

526

}

527

```

528

529

### Vector Space Operations

530

531

```javascript

532

// Document similarity calculation

533

function calculateSimilarity(doc1Vector, doc2Vector) {

534

const similarity = doc1Vector.similarity(doc2Vector);

535

return {

536

similarity: similarity,

537

category: similarity > 0.8 ? 'very similar' :

538

similarity > 0.5 ? 'similar' :

539

similarity > 0.2 ? 'somewhat similar' : 'different'

540

};

541

}

542

543

// Find similar documents

544

function findSimilarDocuments(queryVector, documentVectors, threshold = 0.3) {

545

return documentVectors

546

.map((docVec, index) => ({

547

index: index,

548

similarity: queryVector.similarity(docVec)

549

}))

550

.filter(result => result.similarity > threshold)

551

.sort((a, b) => b.similarity - a.similarity);

552

}

553

```

554

555

### Set-based Filtering

556

557

```javascript

558

// Document filtering with sets

559

class DocumentFilter {

560

constructor() {

561

this.allowedDocs = new lunr.Set();

562

this.blockedDocs = new lunr.Set();

563

}

564

565

allow(docRefs) {

566

this.allowedDocs = this.allowedDocs.union(new lunr.Set(docRefs));

567

}

568

569

block(docRefs) {

570

this.blockedDocs = this.blockedDocs.union(new lunr.Set(docRefs));

571

}

572

573

filter(results) {

574

return results.filter(result => {

575

if (this.blockedDocs.contains(result.ref)) return false;

576

if (this.allowedDocs === lunr.Set.complete) return true;

577

return this.allowedDocs.contains(result.ref);

578

});

579

}

580

}

581

582

// Usage

583

const filter = new DocumentFilter();

584

filter.allow(['doc1', 'doc2', 'doc3']);

585

filter.block(['doc2']); // Block doc2 even though it's allowed

586

587

const filteredResults = filter.filter(searchResults);

588

```

589

590

### Performance Monitoring

591

592

```javascript

593

// Performance monitoring utilities

594

lunr.utils.performance = {

595

time: function(label, fn) {

596

const start = Date.now();

597

const result = fn();

598

const duration = Date.now() - start;

599

lunr.utils.warn(`${label}: ${duration}ms`);

600

return result;

601

},

602

603

memory: function(label, fn) {

604

if (typeof process !== 'undefined' && process.memoryUsage) {

605

const startMem = process.memoryUsage();

606

const result = fn();

607

const endMem = process.memoryUsage();

608

const diff = endMem.heapUsed - startMem.heapUsed;

609

lunr.utils.warn(`${label}: ${Math.round(diff / 1024)}KB`);

610

return result;

611

}

612

return fn();

613

}

614

};

615

616

// Usage

617

const results = lunr.utils.performance.time('Search Query', () => {

618

return idx.search('javascript tutorial');

619

});

620

```