or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced-querying.mdindex-building.mdindex.mdsearching.mdtext-processing.mdutilities.md

advanced-querying.mddocs/

0

# Advanced Querying

1

2

Advanced query construction and parsing capabilities for sophisticated search operations. The query system supports field restrictions, wildcards, fuzzy matching, edit distance, boolean operators, term boosting, and programmatic query building for complex search logic.

3

4

## Capabilities

5

6

### Query Class

7

8

Programmatic query construction for building complex search queries.

9

10

```javascript { .api }

11

/**

12

* Query builder for constructing search queries programmatically

13

*/

14

class Query {

15

/**

16

* Create a new query for the specified fields

17

* @param {Array<string>} allFields - All available fields for searching

18

*/

19

constructor(allFields);

20

21

/**

22

* Add a clause to the query

23

* @param {Object} clause - Query clause configuration

24

* @returns {lunr.Query} - Query instance for chaining

25

*/

26

clause(clause);

27

28

/**

29

* Add a term to the query with options

30

* @param {string} term - Search term

31

* @param {Object} options - Term configuration options

32

* @returns {lunr.Query} - Query instance for chaining

33

*/

34

term(term, options);

35

36

/**

37

* Check if the query is negated (all terms are prohibited)

38

* @returns {boolean} - True if query is negated

39

*/

40

isNegated();

41

42

/**

43

* Wildcard insertion constants

44

* @type {Object}

45

*/

46

static wildcard = {

47

NONE: 0, // No wildcards

48

LEADING: 1, // Leading wildcard (*term)

49

TRAILING: 2 // Trailing wildcard (term*)

50

};

51

52

/**

53

* Term presence constants

54

* @type {Object}

55

*/

56

static presence = {

57

OPTIONAL: 1, // Term is optional (default)

58

REQUIRED: 2, // Term must be present (+term)

59

PROHIBITED: 3 // Term must not be present (-term)

60

};

61

}

62

63

/**

64

* Term options for query construction

65

* @typedef {Object} TermOptions

66

* @property {Array<string>} fields - Fields to search (default: all fields)

67

* @property {number} boost - Boost factor for term importance (default: 1)

68

* @property {boolean} usePipeline - Whether to process term through search pipeline (default: true)

69

* @property {number} wildcard - Wildcard insertion mode (Query.wildcard constants)

70

* @property {number} presence - Term presence requirement (Query.presence constants)

71

* @property {number} editDistance - Edit distance for fuzzy matching (0-2)

72

*/

73

74

/**

75

* Query clause structure

76

* @typedef {Object} QueryClause

77

* @property {Array<string>} fields - Fields to search

78

* @property {number} boost - Boost factor

79

* @property {number} editDistance - Edit distance for fuzzy matching

80

* @property {boolean} usePipeline - Whether to use search pipeline

81

* @property {number} wildcard - Wildcard configuration

82

* @property {number} presence - Presence requirement

83

* @property {string} term - Search term

84

*/

85

```

86

87

**Usage Examples:**

88

89

```javascript

90

const lunr = require('lunr');

91

92

// Create index for examples

93

const idx = lunr(function () {

94

this.ref('id');

95

this.field('title');

96

this.field('content');

97

this.field('tags');

98

99

this.add({ id: '1', title: 'JavaScript Tutorial', content: 'Learn JS basics', tags: 'programming' });

100

this.add({ id: '2', title: 'Advanced Node.js', content: 'Server-side JavaScript', tags: 'backend' });

101

});

102

103

// Basic programmatic query

104

const results = idx.query(function (q) {

105

q.term('javascript', {

106

boost: 10,

107

presence: lunr.Query.presence.REQUIRED

108

});

109

});

110

111

// Multi-field query with different boosts

112

const multiFieldResults = idx.query(function (q) {

113

q.term('javascript', {

114

fields: ['title'],

115

boost: 15

116

});

117

118

q.term('javascript', {

119

fields: ['content'],

120

boost: 5

121

});

122

123

q.term('tutorial', {

124

presence: lunr.Query.presence.OPTIONAL,

125

boost: 3

126

});

127

});

128

```

129

130

### Query Parser

131

132

Parses query strings into structured Query objects.

133

134

```javascript { .api }

135

/**

136

* Parser for converting query strings into Query objects

137

*/

138

class QueryParser {

139

/**

140

* Create a query parser

141

* @param {string} str - Query string to parse

142

* @param {lunr.Query} query - Query object to populate

143

*/

144

constructor(str, query);

145

146

/**

147

* Parse the query string

148

* @returns {lunr.Query} - Populated query object

149

*/

150

parse();

151

152

/**

153

* Look at the next lexeme without consuming it

154

* @returns {Object} - Next lexeme

155

*/

156

peekLexeme();

157

158

/**

159

* Consume and return the next lexeme

160

* @returns {Object} - Consumed lexeme

161

*/

162

consumeLexeme();

163

164

/**

165

* Parse the next query clause

166

* @returns {Object} - Parsed clause

167

*/

168

nextClause();

169

170

/**

171

* Static parsing methods for different query components

172

*/

173

static parseClause(parser);

174

static parsePresence(parser);

175

static parseField(parser);

176

static parseTerm(parser);

177

static parseEditDistance(parser);

178

static parseBoost(parser);

179

}

180

```

181

182

### Query Lexer

183

184

Tokenizes query strings for parsing by the QueryParser.

185

186

```javascript { .api }

187

/**

188

* Lexical analyzer for query strings

189

*/

190

class QueryLexer {

191

/**

192

* Create a query lexer

193

* @param {string} str - Query string to tokenize

194

*/

195

constructor(str);

196

197

/**

198

* Run the lexer and generate tokens

199

* @returns {Array<Object>} - Array of lexical tokens

200

*/

201

run();

202

203

/**

204

* Get current string slice

205

* @returns {string} - Current slice of input

206

*/

207

sliceString();

208

209

/**

210

* Emit a token of the specified type

211

* @param {string} type - Token type

212

*/

213

emit(type);

214

215

/**

216

* Handle escape sequences

217

*/

218

escapeCharacter();

219

220

/**

221

* Move to next character

222

* @returns {string} - Next character

223

*/

224

next();

225

226

/**

227

* Get width of current token

228

* @returns {number} - Token width

229

*/

230

width();

231

232

/**

233

* Ignore current input (move start position)

234

*/

235

ignore();

236

237

/**

238

* Move back one character

239

*/

240

backup();

241

242

/**

243

* Accept a run of digits

244

* @returns {boolean} - True if digits were accepted

245

*/

246

acceptDigitRun();

247

248

/**

249

* Check if more input is available

250

* @returns {boolean} - True if more input available

251

*/

252

more();

253

254

/**

255

* Token type constants

256

*/

257

static EOS = 'EOS'; // End of string

258

static FIELD = 'FIELD'; // Field restriction (field:)

259

static TERM = 'TERM'; // Search term

260

static EDIT_DISTANCE = 'EDIT_DISTANCE'; // Edit distance (~N)

261

static BOOST = 'BOOST'; // Term boost (^N)

262

static PRESENCE = 'PRESENCE'; // Presence modifier (+, -)

263

264

/**

265

* Term separator pattern

266

* @type {RegExp}

267

*/

268

static termSeparator;

269

270

/**

271

* Static lexing functions for different token types

272

*/

273

static lexField(lexer);

274

static lexTerm(lexer);

275

static lexEditDistance(lexer);

276

static lexBoost(lexer);

277

static lexEOS(lexer);

278

static lexText(lexer);

279

}

280

```

281

282

### Query Parse Error

283

284

Error class for query parsing failures.

285

286

```javascript { .api }

287

/**

288

* Error thrown when query parsing fails

289

*/

290

class QueryParseError extends Error {

291

/**

292

* Create a query parse error

293

* @param {string} message - Error message

294

* @param {number} start - Start position of error

295

* @param {number} end - End position of error

296

*/

297

constructor(message, start, end);

298

}

299

```

300

301

## Query Syntax Patterns

302

303

### Boolean Operations

304

305

```javascript { .api }

306

/**

307

* Boolean query syntax:

308

* - Default: OR behavior (any term matches)

309

* - Required: +term (must be present)

310

* - Prohibited: -term (must not be present)

311

* - Grouping: +(term1 term2) (group operations)

312

*/

313

```

314

315

**Usage Examples:**

316

317

```javascript

318

// Required terms (AND behavior)

319

idx.search('+javascript +tutorial');

320

321

// Prohibited terms

322

idx.search('javascript -deprecated');

323

324

// Mixed boolean

325

idx.search('+javascript tutorial -outdated');

326

327

// Grouping with parentheses

328

idx.search('+(javascript node.js) +tutorial');

329

330

// Programmatic equivalent

331

const booleanResults = idx.query(function (q) {

332

q.term('javascript', { presence: lunr.Query.presence.REQUIRED });

333

q.term('tutorial', { presence: lunr.Query.presence.OPTIONAL });

334

q.term('deprecated', { presence: lunr.Query.presence.PROHIBITED });

335

});

336

```

337

338

### Field Restrictions

339

340

```javascript { .api }

341

/**

342

* Field restriction syntax:

343

* - Single field: field:term

344

* - Multiple terms: field:term1 field:term2

345

* - Mixed fields: title:javascript content:tutorial

346

*/

347

```

348

349

**Usage Examples:**

350

351

```javascript

352

// Single field search

353

idx.search('title:tutorial');

354

355

// Multiple field restrictions

356

idx.search('title:javascript content:advanced');

357

358

// Field with boolean operators

359

idx.search('+title:javascript -content:deprecated');

360

361

// Programmatic field restrictions

362

const fieldResults = idx.query(function (q) {

363

q.term('javascript', {

364

fields: ['title'],

365

boost: 10

366

});

367

368

q.term('advanced', {

369

fields: ['content', 'tags'],

370

boost: 5

371

});

372

});

373

```

374

375

### Wildcard Queries

376

377

```javascript { .api }

378

/**

379

* Wildcard syntax:

380

* - Prefix: term* (matches terms starting with "term")

381

* - Suffix: *term (matches terms ending with "term")

382

* - Contains: *term* (matches terms containing "term")

383

*/

384

```

385

386

**Usage Examples:**

387

388

```javascript

389

// Prefix matching

390

idx.search('java*'); // Matches "java", "javascript", "javadoc"

391

392

// Suffix matching

393

idx.search('*script'); // Matches "javascript", "typescript"

394

395

// Contains matching

396

idx.search('*node*'); // Matches "node", "nodejs", "node_modules"

397

398

// Field-specific wildcards

399

idx.search('title:java* content:*script');

400

401

// Programmatic wildcards

402

const wildcardResults = idx.query(function (q) {

403

q.term('java', {

404

wildcard: lunr.Query.wildcard.TRAILING, // "java*"

405

boost: 5

406

});

407

408

q.term('script', {

409

wildcard: lunr.Query.wildcard.LEADING, // "*script"

410

fields: ['content']

411

});

412

});

413

```

414

415

### Fuzzy Matching

416

417

```javascript { .api }

418

/**

419

* Fuzzy matching syntax:

420

* - Edit distance 1: term~1 (allow 1 character difference)

421

* - Edit distance 2: term~2 (allow 2 character differences)

422

* - Default distance: term~ (defaults to 1)

423

*/

424

```

425

426

**Usage Examples:**

427

428

```javascript

429

// Basic fuzzy matching

430

idx.search('javascript~1'); // Matches "javascript", "javascrpt", etc.

431

432

// Higher edit distance

433

idx.search('tutorial~2'); // More tolerant of typos

434

435

// Fuzzy with field restrictions

436

idx.search('title:javascrpt~1');

437

438

// Fuzzy with other operators

439

idx.search('+javascrpt~1 +tutorial~1');

440

441

// Programmatic fuzzy matching

442

const fuzzyResults = idx.query(function (q) {

443

q.term('javascrpt', {

444

editDistance: 1,

445

boost: 8 // Lower boost for fuzzy matches

446

});

447

448

q.term('tutorial', {

449

editDistance: 2,

450

fields: ['title', 'content']

451

});

452

});

453

```

454

455

### Term Boosting

456

457

```javascript { .api }

458

/**

459

* Term boosting syntax:

460

* - Basic boost: term^5 (boost by factor of 5)

461

* - Decimal boost: term^1.5 (boost by 1.5x)

462

* - Field boost: field:term^10 (boost field-specific term)

463

*/

464

```

465

466

**Usage Examples:**

467

468

```javascript

469

// Basic term boosting

470

idx.search('javascript^10 tutorial');

471

472

// Multiple boosting

473

idx.search('important^15 secondary^2 normal');

474

475

// Field-specific boosting

476

idx.search('title:javascript^20 content:tutorial^5');

477

478

// Boost with other modifiers

479

idx.search('title:java*^10 +tutorial~1^5');

480

481

// Programmatic boosting

482

const boostedResults = idx.query(function (q) {

483

q.term('javascript', {

484

boost: 20,

485

fields: ['title']

486

});

487

488

q.term('tutorial', {

489

boost: 5,

490

presence: lunr.Query.presence.OPTIONAL

491

});

492

});

493

```

494

495

## Advanced Query Patterns

496

497

### Multi-Stage Queries

498

499

```javascript

500

// Implement query expansion and refinement

501

function expandedSearch(originalQuery, expansions = {}) {

502

return idx.query(function (q) {

503

// Original query with high boost

504

q.term(originalQuery, { boost: 20 });

505

506

// Add expansions with lower boosts

507

Object.entries(expansions).forEach(([term, boost]) => {

508

q.term(term, { boost: boost || 5 });

509

});

510

});

511

}

512

513

// Search for "js" with expansions

514

const expandedResults = expandedSearch('js', {

515

'javascript': 15,

516

'node.js': 10,

517

'react': 8

518

});

519

```

520

521

### Contextual Queries

522

523

```javascript

524

// Build context-aware queries

525

function contextualSearch(mainTerm, context = {}) {

526

return idx.query(function (q) {

527

// Main search term

528

q.term(mainTerm, {

529

boost: 15,

530

presence: lunr.Query.presence.REQUIRED

531

});

532

533

// Context terms boost relevance

534

if (context.category) {

535

q.term(context.category, {

536

fields: ['tags', 'category'],

537

boost: 10

538

});

539

}

540

541

if (context.level) {

542

q.term(context.level, {

543

fields: ['level', 'difficulty'],

544

boost: 8

545

});

546

}

547

548

// Exclude unwanted content

549

if (context.exclude) {

550

context.exclude.forEach(term => {

551

q.term(term, { presence: lunr.Query.presence.PROHIBITED });

552

});

553

}

554

});

555

}

556

557

// Search for tutorials with context

558

const contextualResults = contextualSearch('tutorial', {

559

category: 'javascript',

560

level: 'beginner',

561

exclude: ['deprecated', 'outdated']

562

});

563

```

564

565

### Query Templates

566

567

```javascript

568

// Reusable query templates

569

const queryTemplates = {

570

exact: (term, fields = []) => (q) => {

571

q.term(term, {

572

fields: fields.length ? fields : undefined,

573

boost: 20,

574

presence: lunr.Query.presence.REQUIRED,

575

usePipeline: false // Exact matching without stemming

576

});

577

},

578

579

fuzzy: (term, distance = 1, boost = 10) => (q) => {

580

q.term(term, {

581

editDistance: distance,

582

boost: boost

583

});

584

},

585

586

fieldSearch: (field, term, boost = 15) => (q) => {

587

q.term(term, {

588

fields: [field],

589

boost: boost

590

});

591

}

592

};

593

594

// Use templates

595

const exactResults = idx.query(queryTemplates.exact('javascript', ['title']));

596

const fuzzyResults = idx.query(queryTemplates.fuzzy('javascrpt', 1, 8));

597

```

598

599

### Performance-Optimized Queries

600

601

```javascript

602

// Optimize queries for large indexes

603

function optimizedQuery(terms, options = {}) {

604

return idx.query(function (q) {

605

terms.forEach((term, index) => {

606

const isRequired = options.requireAll && index === 0;

607

const boost = options.boosts?.[index] || (5 - index); // Decreasing boost

608

609

q.term(term, {

610

boost: boost,

611

presence: isRequired ? lunr.Query.presence.REQUIRED : lunr.Query.presence.OPTIONAL,

612

// Disable pipeline for exact matches on first term

613

usePipeline: index > 0

614

});

615

});

616

617

// Add prohibited terms

618

if (options.exclude) {

619

options.exclude.forEach(term => {

620

q.term(term, { presence: lunr.Query.presence.PROHIBITED });

621

});

622

}

623

});

624

}

625

626

// Optimized search

627

const optimizedResults = optimizedQuery(

628

['javascript', 'tutorial', 'beginner'],

629

{

630

requireAll: false,

631

boosts: [20, 10, 5],

632

exclude: ['advanced', 'deprecated']

633

}

634

);

635

```

636

637

## Query Debugging and Analysis

638

639

### Query Introspection

640

641

```javascript

642

// Analyze query structure (for debugging)

643

function analyzeQuery(queryString) {

644

const query = new lunr.Query(['title', 'content', 'tags']);

645

const parser = new lunr.QueryParser(queryString, query);

646

647

try {

648

const parsedQuery = parser.parse();

649

650

console.log('Query analysis:');

651

console.log('- Clauses:', parsedQuery.clauses.length);

652

653

parsedQuery.clauses.forEach((clause, index) => {

654

console.log(` Clause ${index + 1}:`);

655

console.log(` Term: "${clause.term}"`);

656

console.log(` Fields: ${clause.fields.join(', ')}`);

657

console.log(` Boost: ${clause.boost}`);

658

console.log(` Presence: ${clause.presence}`);

659

console.log(` Wildcard: ${clause.wildcard}`);

660

console.log(` Edit Distance: ${clause.editDistance}`);

661

});

662

663

return parsedQuery;

664

} catch (error) {

665

if (error instanceof lunr.QueryParseError) {

666

console.error('Query parsing failed:', error.message);

667

console.error('At position:', error.start, '-', error.end);

668

}

669

throw error;

670

}

671

}

672

673

// Debug query

674

analyzeQuery('title:javascript^10 +tutorial~1 -deprecated');

675

```