tessl/npm-lunr

Simple full-text search in your browser.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Securityby

Pending

The risk profile of this skill

Overview

Eval results

Files

Index Building

Name: tessl/npm-lunr
Author: tessl

Core functionality for creating search indexes from documents. The index building process involves configuring fields, processing documents through text pipelines, and building optimized data structures for fast search operations.

Capabilities

Main Lunr Function

The primary entry point for creating search indexes using the builder pattern.

/**
 * Create a new search index using the builder pattern
 * @param {Function} config - Configuration function that receives a Builder instance
 * @returns {lunr.Index} - Built search index ready for querying
 */
function lunr(config);

/**
 * Current version of the Lunr library
 * @type {string}
 */
lunr.version; // "2.3.9"

Usage Examples:

const lunr = require('lunr');

// Basic index creation
const idx = lunr(function () {
  this.ref('id');
  this.field('title');
  this.field('content');

  this.add({ id: '1', title: 'Hello', content: 'World' });
});

// Index with custom configuration
const customIdx = lunr(function () {
  this.ref('docId');
  this.field('title', { boost: 10 });
  this.field('body');
  this.field('tags', { boost: 5 });
  
  // Custom pipeline (remove default stemmer)
  this.pipeline.remove(lunr.stemmer);
  this.pipeline.add(myCustomProcessor);
  
  documents.forEach(function (doc) {
    this.add(doc, { boost: doc.priority || 1 });
  }, this);
});

Builder Class

The Builder class provides the configuration interface for creating indexes.

/**
 * Builder class for configuring and constructing search indexes
 */
class Builder {
  /**
   * Create a new Builder instance
   */
  constructor();

  /**
   * Set the document reference field
   * @param {string} refField - Field name to use as document reference (default: 'id')
   */
  ref(refField);

  /**
   * Add a field to be indexed
   * @param {string} fieldName - Name of the field to index
   * @param {Object} attributes - Optional field configuration
   * @param {number} attributes.boost - Boost multiplier for field importance
   * @param {Function} attributes.extractor - Function to extract field value from documents
   */
  field(fieldName, attributes);

  /**
   * Set the field length normalization parameter
   * @param {number} number - Normalization factor (0-1, default: 0.75)
   */
  b(number);

  /**
   * Set the term frequency saturation parameter
   * @param {number} number - Saturation factor (default: 1.2)
   */
  k1(number);

  /**
   * Add a document to the index
   * @param {Object} doc - Document object to index
   * @param {Object} attributes - Optional document attributes  
   * @param {number} attributes.boost - Boost multiplier for document importance
   */
  add(doc, attributes);

  /**
   * Build the final search index
   * @returns {lunr.Index} - Constructed search index
   */
  build();

  /**
   * Apply a plugin to the builder
   * @param {Function} plugin - Plugin function that receives the builder
   */
  use(plugin);

  /**
   * Processing pipeline for indexing documents
   * @type {lunr.Pipeline}
   */
  pipeline;

  /**
   * Processing pipeline for search queries
   * @type {lunr.Pipeline}
   */
  searchPipeline;

  /**
   * Tokenizer function for breaking text into tokens
   * @type {Function}
   */
  tokenizer;

  /**
   * Number of documents added to the index
   * @type {number}
   */
  documentCount;

  /**
   * Array of metadata keys to preserve during indexing
   * @type {Array<string>}
   */
  metadataWhitelist;
}

Usage Examples:

// Manual builder usage (equivalent to lunr() function)
const builder = new lunr.Builder();
builder.ref('id');
builder.field('title', { boost: 10 });
builder.field('content');

// Configure scoring parameters
builder.b(0.8);    // Higher field length normalization
builder.k1(1.5);   // Higher term frequency saturation

// Add documents with custom boosts
builder.add({ id: '1', title: 'Important', content: 'Very important document' }, { boost: 2 });
builder.add({ id: '2', title: 'Normal', content: 'Regular document' });

const idx = builder.build();

Field Configuration

Advanced field configuration options for customizing how fields are indexed and searched.

/**
 * Field configuration options
 * @typedef {Object} FieldAttributes
 * @property {number} boost - Multiplicative boost factor for field importance
 * @property {Function} extractor - Function to extract field value from documents
 */

/**
 * Document boost configuration
 * @typedef {Object} DocumentAttributes  
 * @property {number} boost - Multiplicative boost factor for document importance
 */

Usage Examples:

const idx = lunr(function () {
  this.ref('id');
  
  // Boosted title field
  this.field('title', { boost: 10 });
  
  // Field with custom extractor
  this.field('author', {
    extractor: function (doc) {
      return doc.author ? doc.author.name : '';
    }
  });
  
  // Multi-value field extraction
  this.field('tags', {
    extractor: function (doc) {
      return doc.tags ? doc.tags.join(' ') : '';
    }
  });

  this.add({
    id: '1',
    title: 'TypeScript Guide',
    author: { name: 'John Doe', email: 'john@example.com' },
    tags: ['typescript', 'javascript', 'programming']
  });
});

Pipeline Configuration

Customizing the text processing pipeline for indexing and searching.

/**
 * Text processing pipeline
 * @type {lunr.Pipeline}
 */
Builder.prototype.pipeline;

/**
 * Search-time processing pipeline
 * @type {lunr.Pipeline}
 */
Builder.prototype.searchPipeline;

Usage Examples:

const idx = lunr(function () {
  this.ref('id');
  this.field('content');
  
  // Remove stop word filter
  this.pipeline.remove(lunr.stopWordFilter);
  
  // Add custom processor before stemmer
  this.pipeline.before(lunr.stemmer, function (token) {
    // Custom processing logic
    return token.update(function (str) {
      return str.replace(/[0-9]/g, '');
    });
  });
  
  // Customize search pipeline differently
  this.searchPipeline.remove(lunr.stemmer);
  
  this.add({ id: '1', content: 'Process this text with custom pipeline' });
});

Metadata Preservation

Controlling which metadata is preserved during indexing for later retrieval.

/**
 * Array of metadata keys to preserve during indexing
 * @type {Array<string>}
 */
Builder.prototype.metadataWhitelist;

Usage Examples:

const idx = lunr(function () {
  this.ref('id');
  this.field('content');
  
  // Preserve custom metadata
  this.metadataWhitelist = ['position', 'frequency'];
  
  this.add({ id: '1', content: 'Document with preserved metadata' });
});

Common Patterns

Large Dataset Indexing

// Efficient indexing of large datasets
const idx = lunr(function () {
  this.ref('id');
  this.field('title', { boost: 5 });
  this.field('content');
  
  // Process documents in batches to avoid memory issues
  const batchSize = 1000;
  for (let i = 0; i < documents.length; i += batchSize) {
    const batch = documents.slice(i, i + batchSize);
    batch.forEach(doc => this.add(doc));
  }
});

Multi-language Support

// Using plugins for multi-language support (requires lunr language plugins)
const idx = lunr(function () {
  // Apply language-specific plugins
  this.use(lunr.fr); // French language support (example)
  
  this.ref('id');
  this.field('title');
  this.field('content');
  
  frenchDocuments.forEach(doc => this.add(doc));
});

Custom Scoring Parameters

// Fine-tuning search scoring
const idx = lunr(function () {
  this.ref('id');
  this.field('title', { boost: 15 });
  this.field('content');
  
  // BM25 parameters
  this.b(0.9);   // Higher field length normalization (0-1)
  this.k1(2.0);  // Lower term frequency saturation
  
  documents.forEach(doc => this.add(doc));
});