CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-lunr

Simple full-text search in your browser.

Pending
Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

SecuritybySnyk

Pending

The risk profile of this skill

Overview
Eval results
Files

advanced-querying.mddocs/

Advanced Querying

Advanced query construction and parsing capabilities for sophisticated search operations. The query system supports field restrictions, wildcards, fuzzy matching, edit distance, boolean operators, term boosting, and programmatic query building for complex search logic.

Capabilities

Query Class

Programmatic query construction for building complex search queries.

/**
 * Query builder for constructing search queries programmatically
 */
class Query {
  /**
   * Create a new query for the specified fields
   * @param {Array<string>} allFields - All available fields for searching
   */
  constructor(allFields);

  /**
   * Add a clause to the query
   * @param {Object} clause - Query clause configuration
   * @returns {lunr.Query} - Query instance for chaining
   */
  clause(clause);

  /**
   * Add a term to the query with options
   * @param {string} term - Search term
   * @param {Object} options - Term configuration options
   * @returns {lunr.Query} - Query instance for chaining
   */
  term(term, options);

  /**
   * Check if the query is negated (all terms are prohibited)
   * @returns {boolean} - True if query is negated
   */
  isNegated();

  /**
   * Wildcard insertion constants
   * @type {Object}
   */
  static wildcard = {
    NONE: 0,      // No wildcards
    LEADING: 1,   // Leading wildcard (*term)
    TRAILING: 2   // Trailing wildcard (term*)
  };

  /**
   * Term presence constants
   * @type {Object}
   */
  static presence = {
    OPTIONAL: 1,   // Term is optional (default)
    REQUIRED: 2,   // Term must be present (+term)
    PROHIBITED: 3  // Term must not be present (-term)
  };
}

/**
 * Term options for query construction
 * @typedef {Object} TermOptions
 * @property {Array<string>} fields - Fields to search (default: all fields)
 * @property {number} boost - Boost factor for term importance (default: 1)
 * @property {boolean} usePipeline - Whether to process term through search pipeline (default: true)
 * @property {number} wildcard - Wildcard insertion mode (Query.wildcard constants)
 * @property {number} presence - Term presence requirement (Query.presence constants)
 * @property {number} editDistance - Edit distance for fuzzy matching (0-2)
 */

/**
 * Query clause structure
 * @typedef {Object} QueryClause
 * @property {Array<string>} fields - Fields to search
 * @property {number} boost - Boost factor
 * @property {number} editDistance - Edit distance for fuzzy matching
 * @property {boolean} usePipeline - Whether to use search pipeline
 * @property {number} wildcard - Wildcard configuration
 * @property {number} presence - Presence requirement
 * @property {string} term - Search term
 */

Usage Examples:

const lunr = require('lunr');

// Create index for examples
const idx = lunr(function () {
  this.ref('id');
  this.field('title');
  this.field('content');
  this.field('tags');
  
  this.add({ id: '1', title: 'JavaScript Tutorial', content: 'Learn JS basics', tags: 'programming' });
  this.add({ id: '2', title: 'Advanced Node.js', content: 'Server-side JavaScript', tags: 'backend' });
});

// Basic programmatic query
const results = idx.query(function (q) {
  q.term('javascript', {
    boost: 10,
    presence: lunr.Query.presence.REQUIRED
  });
});

// Multi-field query with different boosts
const multiFieldResults = idx.query(function (q) {
  q.term('javascript', {
    fields: ['title'],
    boost: 15
  });
  
  q.term('javascript', {
    fields: ['content'],
    boost: 5
  });
  
  q.term('tutorial', {
    presence: lunr.Query.presence.OPTIONAL,
    boost: 3
  });
});

Query Parser

Parses query strings into structured Query objects.

/**
 * Parser for converting query strings into Query objects
 */
class QueryParser {
  /**
   * Create a query parser
   * @param {string} str - Query string to parse
   * @param {lunr.Query} query - Query object to populate
   */
  constructor(str, query);

  /**
   * Parse the query string
   * @returns {lunr.Query} - Populated query object
   */
  parse();

  /**
   * Look at the next lexeme without consuming it
   * @returns {Object} - Next lexeme
   */
  peekLexeme();

  /**
   * Consume and return the next lexeme
   * @returns {Object} - Consumed lexeme
   */
  consumeLexeme();

  /**
   * Parse the next query clause
   * @returns {Object} - Parsed clause
   */
  nextClause();

  /**
   * Static parsing methods for different query components
   */
  static parseClause(parser);
  static parsePresence(parser);
  static parseField(parser);
  static parseTerm(parser);
  static parseEditDistance(parser);
  static parseBoost(parser);
}

Query Lexer

Tokenizes query strings for parsing by the QueryParser.

/**
 * Lexical analyzer for query strings
 */
class QueryLexer {
  /**
   * Create a query lexer
   * @param {string} str - Query string to tokenize
   */
  constructor(str);

  /**
   * Run the lexer and generate tokens
   * @returns {Array<Object>} - Array of lexical tokens
   */
  run();

  /**
   * Get current string slice
   * @returns {string} - Current slice of input
   */
  sliceString();

  /**
   * Emit a token of the specified type
   * @param {string} type - Token type
   */
  emit(type);

  /**
   * Handle escape sequences
   */
  escapeCharacter();

  /**
   * Move to next character
   * @returns {string} - Next character
   */
  next();

  /**
   * Get width of current token
   * @returns {number} - Token width
   */
  width();

  /**
   * Ignore current input (move start position)
   */
  ignore();

  /**
   * Move back one character
   */
  backup();

  /**
   * Accept a run of digits
   * @returns {boolean} - True if digits were accepted
   */
  acceptDigitRun();

  /**
   * Check if more input is available
   * @returns {boolean} - True if more input available
   */
  more();

  /**
   * Token type constants
   */
  static EOS = 'EOS';           // End of string
  static FIELD = 'FIELD';       // Field restriction (field:)
  static TERM = 'TERM';         // Search term
  static EDIT_DISTANCE = 'EDIT_DISTANCE'; // Edit distance (~N)
  static BOOST = 'BOOST';       // Term boost (^N)
  static PRESENCE = 'PRESENCE'; // Presence modifier (+, -)

  /**
   * Term separator pattern
   * @type {RegExp}
   */
  static termSeparator;

  /**
   * Static lexing functions for different token types
   */
  static lexField(lexer);
  static lexTerm(lexer);
  static lexEditDistance(lexer);
  static lexBoost(lexer);
  static lexEOS(lexer);
  static lexText(lexer);
}

Query Parse Error

Error class for query parsing failures.

/**
 * Error thrown when query parsing fails
 */
class QueryParseError extends Error {
  /**
   * Create a query parse error
   * @param {string} message - Error message
   * @param {number} start - Start position of error
   * @param {number} end - End position of error
   */
  constructor(message, start, end);
}

Query Syntax Patterns

Boolean Operations

/**
 * Boolean query syntax:
 * - Default: OR behavior (any term matches)
 * - Required: +term (must be present)
 * - Prohibited: -term (must not be present)
 * - Grouping: +(term1 term2) (group operations)
 */

Usage Examples:

// Required terms (AND behavior)
idx.search('+javascript +tutorial');

// Prohibited terms
idx.search('javascript -deprecated');

// Mixed boolean
idx.search('+javascript tutorial -outdated');

// Grouping with parentheses
idx.search('+(javascript node.js) +tutorial');

// Programmatic equivalent
const booleanResults = idx.query(function (q) {
  q.term('javascript', { presence: lunr.Query.presence.REQUIRED });
  q.term('tutorial', { presence: lunr.Query.presence.OPTIONAL });
  q.term('deprecated', { presence: lunr.Query.presence.PROHIBITED });
});

Field Restrictions

/**
 * Field restriction syntax:
 * - Single field: field:term
 * - Multiple terms: field:term1 field:term2
 * - Mixed fields: title:javascript content:tutorial
 */

Usage Examples:

// Single field search
idx.search('title:tutorial');

// Multiple field restrictions
idx.search('title:javascript content:advanced');

// Field with boolean operators
idx.search('+title:javascript -content:deprecated');

// Programmatic field restrictions
const fieldResults = idx.query(function (q) {
  q.term('javascript', {
    fields: ['title'],
    boost: 10
  });
  
  q.term('advanced', {
    fields: ['content', 'tags'],
    boost: 5
  });
});

Wildcard Queries

/**
 * Wildcard syntax:
 * - Prefix: term* (matches terms starting with "term")
 * - Suffix: *term (matches terms ending with "term")
 * - Contains: *term* (matches terms containing "term")
 */

Usage Examples:

// Prefix matching
idx.search('java*');        // Matches "java", "javascript", "javadoc"

// Suffix matching  
idx.search('*script');      // Matches "javascript", "typescript"

// Contains matching
idx.search('*node*');       // Matches "node", "nodejs", "node_modules"

// Field-specific wildcards
idx.search('title:java* content:*script');

// Programmatic wildcards
const wildcardResults = idx.query(function (q) {
  q.term('java', {
    wildcard: lunr.Query.wildcard.TRAILING,  // "java*"
    boost: 5
  });
  
  q.term('script', {
    wildcard: lunr.Query.wildcard.LEADING,   // "*script"
    fields: ['content']
  });
});

Fuzzy Matching

/**
 * Fuzzy matching syntax:
 * - Edit distance 1: term~1 (allow 1 character difference)
 * - Edit distance 2: term~2 (allow 2 character differences)
 * - Default distance: term~ (defaults to 1)
 */

Usage Examples:

// Basic fuzzy matching
idx.search('javascript~1');     // Matches "javascript", "javascrpt", etc.

// Higher edit distance
idx.search('tutorial~2');       // More tolerant of typos

// Fuzzy with field restrictions
idx.search('title:javascrpt~1');

// Fuzzy with other operators
idx.search('+javascrpt~1 +tutorial~1');

// Programmatic fuzzy matching
const fuzzyResults = idx.query(function (q) {
  q.term('javascrpt', {
    editDistance: 1,
    boost: 8  // Lower boost for fuzzy matches
  });
  
  q.term('tutorial', {
    editDistance: 2,
    fields: ['title', 'content']
  });
});

Term Boosting

/**
 * Term boosting syntax:
 * - Basic boost: term^5 (boost by factor of 5)
 * - Decimal boost: term^1.5 (boost by 1.5x)
 * - Field boost: field:term^10 (boost field-specific term)
 */

Usage Examples:

// Basic term boosting
idx.search('javascript^10 tutorial');

// Multiple boosting
idx.search('important^15 secondary^2 normal');

// Field-specific boosting
idx.search('title:javascript^20 content:tutorial^5');

// Boost with other modifiers
idx.search('title:java*^10 +tutorial~1^5');

// Programmatic boosting
const boostedResults = idx.query(function (q) {
  q.term('javascript', {
    boost: 20,
    fields: ['title']
  });
  
  q.term('tutorial', {
    boost: 5,
    presence: lunr.Query.presence.OPTIONAL
  });
});

Advanced Query Patterns

Multi-Stage Queries

// Implement query expansion and refinement
function expandedSearch(originalQuery, expansions = {}) {
  return idx.query(function (q) {
    // Original query with high boost
    q.term(originalQuery, { boost: 20 });
    
    // Add expansions with lower boosts
    Object.entries(expansions).forEach(([term, boost]) => {
      q.term(term, { boost: boost || 5 });
    });
  });
}

// Search for "js" with expansions
const expandedResults = expandedSearch('js', {
  'javascript': 15,
  'node.js': 10,
  'react': 8
});

Contextual Queries

// Build context-aware queries
function contextualSearch(mainTerm, context = {}) {
  return idx.query(function (q) {
    // Main search term
    q.term(mainTerm, { 
      boost: 15,
      presence: lunr.Query.presence.REQUIRED
    });
    
    // Context terms boost relevance
    if (context.category) {
      q.term(context.category, {
        fields: ['tags', 'category'],
        boost: 10
      });
    }
    
    if (context.level) {
      q.term(context.level, {
        fields: ['level', 'difficulty'],
        boost: 8
      });
    }
    
    // Exclude unwanted content
    if (context.exclude) {
      context.exclude.forEach(term => {
        q.term(term, { presence: lunr.Query.presence.PROHIBITED });
      });
    }
  });
}

// Search for tutorials with context
const contextualResults = contextualSearch('tutorial', {
  category: 'javascript',
  level: 'beginner',
  exclude: ['deprecated', 'outdated']
});

Query Templates

// Reusable query templates
const queryTemplates = {
  exact: (term, fields = []) => (q) => {
    q.term(term, {
      fields: fields.length ? fields : undefined,
      boost: 20,
      presence: lunr.Query.presence.REQUIRED,
      usePipeline: false  // Exact matching without stemming
    });
  },
  
  fuzzy: (term, distance = 1, boost = 10) => (q) => {
    q.term(term, {
      editDistance: distance,
      boost: boost
    });
  },
  
  fieldSearch: (field, term, boost = 15) => (q) => {
    q.term(term, {
      fields: [field],
      boost: boost
    });
  }
};

// Use templates
const exactResults = idx.query(queryTemplates.exact('javascript', ['title']));
const fuzzyResults = idx.query(queryTemplates.fuzzy('javascrpt', 1, 8));

Performance-Optimized Queries

// Optimize queries for large indexes
function optimizedQuery(terms, options = {}) {
  return idx.query(function (q) {
    terms.forEach((term, index) => {
      const isRequired = options.requireAll && index === 0;
      const boost = options.boosts?.[index] || (5 - index); // Decreasing boost
      
      q.term(term, {
        boost: boost,
        presence: isRequired ? lunr.Query.presence.REQUIRED : lunr.Query.presence.OPTIONAL,
        // Disable pipeline for exact matches on first term
        usePipeline: index > 0
      });
    });
    
    // Add prohibited terms
    if (options.exclude) {
      options.exclude.forEach(term => {
        q.term(term, { presence: lunr.Query.presence.PROHIBITED });
      });
    }
  });
}

// Optimized search
const optimizedResults = optimizedQuery(
  ['javascript', 'tutorial', 'beginner'],
  {
    requireAll: false,
    boosts: [20, 10, 5],
    exclude: ['advanced', 'deprecated']
  }
);

Query Debugging and Analysis

Query Introspection

// Analyze query structure (for debugging)
function analyzeQuery(queryString) {
  const query = new lunr.Query(['title', 'content', 'tags']);
  const parser = new lunr.QueryParser(queryString, query);
  
  try {
    const parsedQuery = parser.parse();
    
    console.log('Query analysis:');
    console.log('- Clauses:', parsedQuery.clauses.length);
    
    parsedQuery.clauses.forEach((clause, index) => {
      console.log(`  Clause ${index + 1}:`);
      console.log(`    Term: "${clause.term}"`);
      console.log(`    Fields: ${clause.fields.join(', ')}`);
      console.log(`    Boost: ${clause.boost}`);
      console.log(`    Presence: ${clause.presence}`);
      console.log(`    Wildcard: ${clause.wildcard}`);
      console.log(`    Edit Distance: ${clause.editDistance}`);
    });
    
    return parsedQuery;
  } catch (error) {
    if (error instanceof lunr.QueryParseError) {
      console.error('Query parsing failed:', error.message);
      console.error('At position:', error.start, '-', error.end);
    }
    throw error;
  }
}

// Debug query
analyzeQuery('title:javascript^10 +tutorial~1 -deprecated');

docs

advanced-querying.md

index-building.md

index.md

searching.md

text-processing.md

utilities.md

tile.json