tessl/npm-lunr

Simple full-text search in your browser.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Securityby

Pending

The risk profile of this skill

Overview

Eval results

Files

Utilities

Name: tessl/npm-lunr
Author: tessl

Utility functions and data structures used throughout the Lunr library. This includes helper functions for string manipulation, data structures for efficient search operations, field references, match data handling, and scoring functions.

Capabilities

Utility Functions

Core utility functions in the lunr.utils namespace.

/**
 * Utility namespace containing helper functions
 * @namespace lunr.utils
 */
lunr.utils = {
  /**
   * Print warning message to console
   * @param {string} message - Warning message to display
   */
  warn(message),

  /**
   * Convert object to string, handling null and undefined
   * @param {*} obj - Object to convert to string
   * @returns {string} - String representation, empty string for null/undefined
   */
  asString(obj),

  /**
   * Shallow clone objects and arrays
   * @param {*} obj - Object or array to clone
   * @returns {*} - Cloned object/array
   */
  clone(obj)
};

Usage Examples:

const lunr = require('lunr');

// Warning function
lunr.utils.warn('This is a warning message');
// Outputs to console if available

// String conversion with null safety
console.log(lunr.utils.asString(null));      // ""
console.log(lunr.utils.asString(undefined)); // ""
console.log(lunr.utils.asString("hello"));   // "hello"
console.log(lunr.utils.asString(123));       // "123"
console.log(lunr.utils.asString({a: 1}));    // "[object Object]"

// Object cloning
const original = { name: 'John', tags: ['dev', 'js'] };
const cloned = lunr.utils.clone(original);
cloned.name = 'Jane';
console.log(original.name); // "John" (unchanged)
console.log(cloned.name);   // "Jane"

// Array cloning
const originalArray = [1, 2, 3];
const clonedArray = lunr.utils.clone(originalArray);
clonedArray.push(4);
console.log(originalArray.length); // 3 (unchanged)
console.log(clonedArray.length);   // 4

Scoring Functions

Functions for calculating document relevance scores.

/**
 * Calculate inverse document frequency for term scoring
 * @param {Object} posting - Term posting information containing document frequency
 * @param {number} documentCount - Total number of documents in the index
 * @returns {number} - IDF score for the term
 */
lunr.idf(posting, documentCount);

Usage Examples:

// Calculate IDF for a term that appears in 5 out of 100 documents
const posting = { df: 5 }; // Document frequency
const totalDocs = 100;
const idfScore = lunr.idf(posting, totalDocs);
console.log(idfScore); // ~2.996 (higher for rare terms)

// Common term (appears in 80 out of 100 documents)
const commonPosting = { df: 80 };
const commonIdf = lunr.idf(commonPosting, totalDocs);
console.log(commonIdf); // ~0.223 (lower for common terms)

Vector Class

Vector space representation for documents and term weights.

/**
 * Vector class for document representation in vector space
 */
class Vector {
  /**
   * Create a vector from array of elements
   * @param {Array<number>} elements - Array of [index, value] pairs or values
   */
  constructor(elements);

  /**
   * Calculate insertion position for an index
   * @param {number} index - Index to find position for
   * @returns {number} - Position where index should be inserted
   */
  positionForIndex(index);

  /**
   * Insert a value at the specified index
   * @param {number} index - Index position
   * @param {number} value - Value to insert
   * @returns {lunr.Vector} - Updated vector
   */
  insert(index, value);

  /**
   * Insert or update a value at the specified index
   * @param {number} index - Index position
   * @param {number} value - Value to insert/update
   * @param {Function} fn - Optional function to combine with existing value
   * @returns {lunr.Vector} - Updated vector
   */
  upsert(index, value, fn);

  /**
   * Calculate the magnitude (length) of the vector
   * @returns {number} - Vector magnitude
   */
  magnitude();

  /**
   * Calculate dot product with another vector
   * @param {lunr.Vector} otherVector - Vector to calculate dot product with
   * @returns {number} - Dot product result
   */
  dot(otherVector);

  /**
   * Calculate cosine similarity with another vector
   * @param {lunr.Vector} otherVector - Vector to compare with
   * @returns {number} - Cosine similarity (0-1, higher = more similar)
   */
  similarity(otherVector);

  /**
   * Convert vector to regular array
   * @returns {Array<number>} - Array representation
   */
  toArray();

  /**
   * Serialize vector to JSON
   * @returns {Array<number>} - Serialized vector data
   */
  toJSON();
}

Usage Examples:

// Create vectors
const vec1 = new lunr.Vector([1, 2, 3, 4, 5]);
const vec2 = new lunr.Vector([2, 1, 3, 2, 1]);

// Vector operations
console.log(vec1.magnitude());        // Calculate length
console.log(vec1.dot(vec2));         // Dot product
console.log(vec1.similarity(vec2));  // Cosine similarity

// Sparse vector operations (index-value pairs)
const sparseVec = new lunr.Vector();
sparseVec.insert(10, 0.5);  // Insert value 0.5 at index 10
sparseVec.insert(25, 1.2);  // Insert value 1.2 at index 25

// Upsert (insert or update)
sparseVec.upsert(10, 0.3, (existing, new_val) => existing + new_val);
// Index 10 now has value 0.8 (0.5 + 0.3)

Set Class

Set data structure for document collections and filtering.

/**
 * Set class for working with collections of document references
 */
class Set {
  /**
   * Create a set from array of elements
   * @param {Array} elements - Array of elements to include in set
   */
  constructor(elements);

  /**
   * Check if the set contains an object
   * @param {*} object - Object to check for membership
   * @returns {boolean} - True if object is in the set
   */
  contains(object);

  /**
   * Calculate intersection with another set
   * @param {lunr.Set} other - Set to intersect with
   * @returns {lunr.Set} - New set containing common elements
   */
  intersect(other);

  /**
   * Calculate union with another set
   * @param {lunr.Set} other - Set to union with
   * @returns {lunr.Set} - New set containing all elements from both sets
   */
  union(other);

  /**
   * Universal set containing all possible elements
   * @type {lunr.Set}
   */
  static complete;

  /**
   * Empty set containing no elements
   * @type {lunr.Set}
   */
  static empty;
}

Usage Examples:

// Create sets
const set1 = new lunr.Set(['doc1', 'doc2', 'doc3']);
const set2 = new lunr.Set(['doc2', 'doc3', 'doc4']);

// Set operations
console.log(set1.contains('doc1'));    // true
console.log(set1.contains('doc4'));    // false

const intersection = set1.intersect(set2);  // ['doc2', 'doc3']
const union = set1.union(set2);            // ['doc1', 'doc2', 'doc3', 'doc4']

// Special sets
console.log(lunr.Set.empty.contains('anything'));     // false
console.log(lunr.Set.complete.contains('anything'));  // true

TokenSet Class

Finite state automaton for efficient token matching with wildcards and fuzzy search.

/**
 * TokenSet class implementing finite state automaton for token matching
 */
class TokenSet {
  /**
   * Create a new TokenSet
   */
  constructor();

  /**
   * Convert TokenSet to array of accepted strings
   * @returns {Array<string>} - Array of strings accepted by this TokenSet
   */
  toArray();

  /**
   * Convert TokenSet to string representation
   * @returns {string} - String representation of the TokenSet
   */
  toString();

  /**
   * Calculate intersection with another TokenSet
   * @param {lunr.TokenSet} other - TokenSet to intersect with
   * @returns {lunr.TokenSet} - New TokenSet representing the intersection
   */
  intersect(other);

  /**
   * Create TokenSet from sorted array of strings
   * @param {Array<string>} arr - Sorted array of strings
   * @returns {lunr.TokenSet} - TokenSet accepting the given strings
   */
  static fromArray(arr);

  /**
   * Create TokenSet from query clause
   * @param {Object} clause - Query clause object
   * @returns {lunr.TokenSet} - TokenSet for matching the clause
   */
  static fromClause(clause);

  /**
   * Create TokenSet for fuzzy string matching
   * @param {string} str - String to match fuzzily
   * @param {number} editDistance - Maximum edit distance allowed
   * @returns {lunr.TokenSet} - TokenSet for fuzzy matching
   */
  static fromFuzzyString(str, editDistance);

  /**
   * Create TokenSet from exact string
   * @param {string} str - String to match exactly
   * @returns {lunr.TokenSet} - TokenSet for exact matching
   */
  static fromString(str);

  /**
   * Internal ID counter for TokenSet nodes
   * @type {number}
   */
  static _nextId;
}

Usage Examples:

// Create TokenSet for exact matching
const exactSet = lunr.TokenSet.fromString('javascript');
console.log(exactSet.toArray()); // ['javascript']

// Create TokenSet for fuzzy matching
const fuzzySet = lunr.TokenSet.fromFuzzyString('javascript', 1);
console.log(fuzzySet.toArray()); // ['javascript', 'javascrip', 'avascript', etc.]

// Create from array
const arraySet = lunr.TokenSet.fromArray(['java', 'javascript', 'java-script']);

// TokenSet operations
const intersection = exactSet.intersect(fuzzySet);

TokenSet Builder

Builder for constructing optimized TokenSets from word lists.

/**
 * Builder for constructing TokenSets efficiently
 */
class TokenSetBuilder {
  /**
   * Create a new TokenSet builder
   */
  constructor();

  /**
   * Insert a word into the builder
   * @param {string} word - Word to insert
   */
  insert(word);

  /**
   * Finalize the TokenSet construction
   * @returns {lunr.TokenSet} - Built TokenSet
   */
  finish();

  /**
   * Minimize the automaton to reduce states
   * @param {number} downTo - Minimize down to this state level
   */
  minimize(downTo);
}

Usage Examples:

// Build TokenSet from word list
const builder = new lunr.TokenSet.Builder();
['java', 'javascript', 'typescript', 'coffeescript'].forEach(word => {
  builder.insert(word);
});

const wordSet = builder.finish();
console.log(wordSet.toArray()); // All inserted words

Field Reference

Reference system for identifying fields within documents.

/**
 * Reference to a specific field within a document
 */
class FieldRef {
  /**
   * Create a field reference
   * @param {string} docRef - Document reference identifier
   * @param {string} fieldName - Name of the field
   * @param {string} stringValue - Optional string representation
   */
  constructor(docRef, fieldName, stringValue);

  /**
   * Convert field reference to string representation
   * @returns {string} - String representation (docRef/fieldName)
   */
  toString();

  /**
   * Parse field reference from string representation
   * @param {string} str - String to parse (docRef/fieldName format)
   * @returns {lunr.FieldRef} - Parsed field reference
   */
  static fromString(str);

  /**
   * Separator character used in string representation
   * @type {string}
   */
  static joiner; // "/"
}

Usage Examples:

// Create field reference
const fieldRef = new lunr.FieldRef('doc123', 'title');
console.log(fieldRef.toString()); // "doc123/title"

// Parse from string
const parsed = lunr.FieldRef.fromString('doc456/content');
console.log(parsed.docRef);    // "doc456"
console.log(parsed.fieldName); // "content"

// Custom joiner
console.log(lunr.FieldRef.joiner); // "/"

Match Data

Container for search match metadata and term position information.

/**
 * Container for metadata about search matches
 */
class MatchData {
  /**
   * Create match data for a term and field
   * @param {string} term - Matching term
   * @param {string} field - Field where match occurred
   * @param {Object} metadata - Match metadata (positions, etc.)
   */
  constructor(term, field, metadata);

  /**
   * Combine this match data with another MatchData instance
   * @param {lunr.MatchData} otherMatchData - Other match data to combine
   * @returns {lunr.MatchData} - Combined match data
   */
  combine(otherMatchData);

  /**
   * Add metadata for a term and field
   * @param {string} term - Term to add metadata for
   * @param {string} field - Field to add metadata for
   * @param {Object} metadata - Metadata to add
   */
  add(term, field, metadata);
}

Usage Examples:

// Create match data
const matchData = new lunr.MatchData('javascript', 'title', {
  positions: [[0, 10]],
  frequency: 1
});

// Add more match information
matchData.add('tutorial', 'content', {
  positions: [[15, 23], [45, 53]],
  frequency: 2
});

// Combine match data from different sources
const otherMatches = new lunr.MatchData('node', 'tags', {
  positions: [[0, 4]],
  frequency: 1
});

const combined = matchData.combine(otherMatches);

Advanced Utility Patterns

Custom String Processing

// Extend utils with custom functions
lunr.utils.customNormalize = function(str) {
  return lunr.utils.asString(str)
    .toLowerCase()
    .replace(/[^\w\s]/g, '') // Remove punctuation
    .replace(/\s+/g, ' ')    // Normalize whitespace
    .trim();
};

// Use in pipeline
function normalizeToken(token) {
  return token.update(lunr.utils.customNormalize);
}

Vector Space Operations

// Document similarity calculation
function calculateSimilarity(doc1Vector, doc2Vector) {
  const similarity = doc1Vector.similarity(doc2Vector);
  return {
    similarity: similarity,
    category: similarity > 0.8 ? 'very similar' :
              similarity > 0.5 ? 'similar' :
              similarity > 0.2 ? 'somewhat similar' : 'different'
  };
}

// Find similar documents
function findSimilarDocuments(queryVector, documentVectors, threshold = 0.3) {
  return documentVectors
    .map((docVec, index) => ({
      index: index,
      similarity: queryVector.similarity(docVec)
    }))
    .filter(result => result.similarity > threshold)
    .sort((a, b) => b.similarity - a.similarity);
}

Set-based Filtering

// Document filtering with sets
class DocumentFilter {
  constructor() {
    this.allowedDocs = new lunr.Set();
    this.blockedDocs = new lunr.Set();
  }
  
  allow(docRefs) {
    this.allowedDocs = this.allowedDocs.union(new lunr.Set(docRefs));
  }
  
  block(docRefs) {
    this.blockedDocs = this.blockedDocs.union(new lunr.Set(docRefs));
  }
  
  filter(results) {
    return results.filter(result => {
      if (this.blockedDocs.contains(result.ref)) return false;
      if (this.allowedDocs === lunr.Set.complete) return true;
      return this.allowedDocs.contains(result.ref);
    });
  }
}

// Usage
const filter = new DocumentFilter();
filter.allow(['doc1', 'doc2', 'doc3']);
filter.block(['doc2']); // Block doc2 even though it's allowed

const filteredResults = filter.filter(searchResults);

Performance Monitoring

// Performance monitoring utilities
lunr.utils.performance = {
  time: function(label, fn) {
    const start = Date.now();
    const result = fn();
    const duration = Date.now() - start;
    lunr.utils.warn(`${label}: ${duration}ms`);
    return result;
  },
  
  memory: function(label, fn) {
    if (typeof process !== 'undefined' && process.memoryUsage) {
      const startMem = process.memoryUsage();
      const result = fn();
      const endMem = process.memoryUsage();
      const diff = endMem.heapUsed - startMem.heapUsed;
      lunr.utils.warn(`${label}: ${Math.round(diff / 1024)}KB`);
      return result;
    }
    return fn();
  }
};

// Usage
const results = lunr.utils.performance.time('Search Query', () => {
  return idx.search('javascript tutorial');
});