Tessl Tile for npm/llamaindex@0.11.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

agents.md chat-engines.md document-processing.md embeddings.md index.md llm-integration.md query-engines.md response-synthesis.md settings.md storage.md tools.md vector-indexing.md

tile.json

query-engines.mddocs/

0
# Query Engines
1

2
Query processing and response synthesis for retrieving and generating answers from indexed data in LlamaIndex.TS.
3

4
## Import
5

6
```typescript
7
import { VectorStoreIndex } from "llamaindex";
8
// Or from specific submodules
9
import { RetrieverQueryEngine, SubQuestionQueryEngine } from "llamaindex/engines";
10
```
11

12
## Overview
13

14
Query engines in LlamaIndex.TS handle the process of retrieving relevant information from indices and synthesizing coherent responses. They combine retrieval mechanisms with response generation to provide comprehensive answers to user queries.
15

16
## Base Query Engine Interface
17

18
All query engines implement the base interface.
19

20
```typescript { .api }
21
interface BaseQueryEngine {
22
  query(query: string, options?: QueryOptions): Promise<EngineResponse>;
23
  aquery(query: string, options?: QueryOptions): AsyncIterable<EngineResponse>;
24
}
25

26
interface QueryOptions {
27
  stream?: boolean;
28
  preFilters?: MetadataFilters;
29
  similarity_top_k?: number;
30
}
31
```
32

33
## RetrieverQueryEngine
34

35
The most commonly used query engine that combines a retriever with response synthesis.
36

37
```typescript { .api }
38
class RetrieverQueryEngine implements BaseQueryEngine {
39
  constructor(args: {
40
    retriever: BaseRetriever;
41
    responseSynthesizer?: ResponseSynthesizer;
42
    nodePostprocessors?: BasePostprocessor[];
43
  });
44
  
45
  query(query: string, options?: QueryOptions): Promise<EngineResponse>;
46
  aquery(query: string, options?: QueryOptions): AsyncIterable<EngineResponse>;
47
  
48
  retriever: BaseRetriever;
49
  responseSynthesizer: ResponseSynthesizer;
50
  nodePostprocessors: BasePostprocessor[];
51
}
52
```
53

54
## SubQuestionQueryEngine
55

56
Breaks down complex queries into sub-questions for better handling of multi-part queries.
57

58
```typescript { .api }
59
class SubQuestionQueryEngine implements BaseQueryEngine {
60
  constructor(args: {
61
    queryEngineTools: QueryEngineTool[];
62
    questionGen?: LLMQuestionGenerator;
63
    responseSynthesizer?: ResponseSynthesizer;
64
  });
65
  
66
  query(query: string, options?: QueryOptions): Promise<EngineResponse>;
67
  aquery(query: string, options?: QueryOptions): AsyncIterable<EngineResponse>;
68
  
69
  queryEngineTools: QueryEngineTool[];
70
  questionGen: LLMQuestionGenerator;
71
  responseSynthesizer: ResponseSynthesizer;
72
}
73
```
74

75
## RouterQueryEngine
76

77
Routes queries to different query engines based on query characteristics.
78

79
```typescript { .api }
80
class RouterQueryEngine implements BaseQueryEngine {
81
  constructor(args: {
82
    selector: BaseSelector;
83
    queryEngineTools: QueryEngineTool[];
84
    defaultTool?: QueryEngineTool;
85
  });
86
  
87
  query(query: string, options?: QueryOptions): Promise<EngineResponse>;
88
  aquery(query: string, options?: QueryOptions): AsyncIterable<EngineResponse>;
89
  
90
  selector: BaseSelector;
91
  queryEngineTools: QueryEngineTool[];
92
  defaultTool?: QueryEngineTool;
93
}
94
```
95

96
## Response Types
97

98
### EngineResponse
99

100
Standard response object returned by query engines.
101

102
```typescript { .api }
103
class EngineResponse {
104
  response: string;
105
  sourceNodes?: NodeWithScore[];
106
  metadata?: Record<string, any>;
107
  
108
  toString(): string;
109
  print(): void;
110
}
111

112
interface NodeWithScore {
113
  node: BaseNode;
114
  score?: number;
115
}
116
```
117

118
## Basic Usage
119

120
### Simple Query Engine
121

122
```typescript
123
import { VectorStoreIndex, Document } from "llamaindex";
124

125
// Create index
126
const documents = [
127
  new Document({ text: "LlamaIndex is a data framework for LLM applications." }),
128
  new Document({ text: "It provides tools for document processing and retrieval." }),
129
];
130

131
const index = await VectorStoreIndex.fromDocuments(documents);
132

133
// Create query engine (uses RetrieverQueryEngine internally)
134
const queryEngine = index.asQueryEngine();
135

136
// Query the engine
137
const response = await queryEngine.query("What is LlamaIndex?");
138
console.log("Answer:", response.toString());
139

140
// Access source information
141
if (response.sourceNodes) {
142
  console.log("Sources:");
143
  response.sourceNodes.forEach((nodeWithScore, i) => {
144
    console.log(`  ${i + 1}. ${nodeWithScore.node.text} (score: ${nodeWithScore.score})`);
145
  });
146
}
147
```
148

149
### Streaming Responses
150

151
```typescript
152
// Stream responses for real-time output
153
const response = await queryEngine.query("Explain LlamaIndex in detail", { 
154
  stream: true 
155
});
156

157
// For streaming, iterate over the async generator
158
for await (const chunk of queryEngine.aquery("What is LlamaIndex?")) {
159
  console.log("Chunk:", chunk.response);
160
}
161
```
162

163
### Custom Retriever Query Engine
164

165
```typescript
166
import { RetrieverQueryEngine, ResponseSynthesizer } from "llamaindex/engines";
167

168
// Create custom retriever
169
const retriever = index.asRetriever({
170
  similarityTopK: 5,
171
});
172

173
// Create custom response synthesizer
174
const responseSynthesizer = new ResponseSynthesizer({
175
  responseMode: "tree_summarize",
176
});
177

178
// Create query engine with custom components
179
const customQueryEngine = new RetrieverQueryEngine({
180
  retriever,
181
  responseSynthesizer,
182
});
183

184
const response = await customQueryEngine.query("How does semantic search work?");
185
```
186

187
## Advanced Usage
188

189
### Query Engine with Filters
190

191
```typescript
192
// Query with metadata filters
193
const response = await queryEngine.query("Find financial information", {
194
  preFilters: {
195
    filters: [
196
      { key: "category", value: "finance", operator: "==" },
197
      { key: "year", value: 2024, operator: ">=" }
198
    ]
199
  }
200
});
201
```
202

203
### Sub-Question Query Engine
204

205
```typescript
206
import { SubQuestionQueryEngine, QueryEngineTool } from "llamaindex/engines";
207

208
// Create multiple specialized indices
209
const techIndex = await VectorStoreIndex.fromDocuments(techDocs);
210
const financeIndex = await VectorStoreIndex.fromDocuments(financeDocs);
211

212
// Create query engine tools
213
const queryEngineTools = [
214
  new QueryEngineTool({
215
    queryEngine: techIndex.asQueryEngine(),
216
    metadata: {
217
      name: "tech_search",
218
      description: "Useful for answering questions about technical topics"
219
    }
220
  }),
221
  new QueryEngineTool({
222
    queryEngine: financeIndex.asQueryEngine(),
223
    metadata: {
224
      name: "finance_search", 
225
      description: "Useful for answering questions about financial topics"
226
    }
227
  })
228
];
229

230
// Create sub-question query engine
231
const subQuestionQE = new SubQuestionQueryEngine({
232
  queryEngineTools,
233
});
234

235
// Ask complex multi-part question
236
const response = await subQuestionQE.query(
237
  "What are the technical challenges and financial implications of implementing AI in healthcare?"
238
);
239

240
console.log("Complex answer:", response.toString());
241
```
242

243
### Router Query Engine
244

245
```typescript
246
import { RouterQueryEngine, LLMSingleSelector } from "llamaindex/engines";
247

248
// Create selector to route queries
249
const selector = new LLMSingleSelector();
250

251
// Create router query engine
252
const routerQE = new RouterQueryEngine({
253
  selector,
254
  queryEngineTools,
255
  defaultTool: queryEngineTools[0], // Fallback tool
256
});
257

258
// Route queries automatically
259
const response1 = await routerQE.query("How do neural networks work?"); // → tech_search
260
const response2 = await routerQE.query("What was the quarterly revenue?"); // → finance_search
261
```
262

263
## Response Synthesis Modes
264

265
### Tree Summarize
266

267
Best for comprehensive answers from multiple sources.
268

269
```typescript
270
import { ResponseSynthesizer } from "llamaindex";
271

272
const queryEngine = new RetrieverQueryEngine({
273
  retriever: index.asRetriever(),
274
  responseSynthesizer: new ResponseSynthesizer({
275
    responseMode: "tree_summarize",
276
  }),
277
});
278
```
279

280
### Refine Mode
281

282
Iteratively refines the answer with each retrieved chunk.
283

284
```typescript
285
const queryEngine = new RetrieverQueryEngine({
286
  retriever: index.asRetriever(),
287
  responseSynthesizer: new ResponseSynthesizer({
288
    responseMode: "refine",
289
  }),
290
});
291
```
292

293
### Compact Mode
294

295
Combines chunks to maximize context window usage.
296

297
```typescript
298
const queryEngine = new RetrieverQueryEngine({
299
  retriever: index.asRetriever(), 
300
  responseSynthesizer: new ResponseSynthesizer({
301
    responseMode: "compact",
302
  }),
303
});
304
```
305

306
## Post-Processing Pipeline
307

308
The query engine supports a comprehensive post-processing pipeline to refine and filter retrieved nodes before response synthesis.
309

310
### BasePostprocessor Interface
311

312
All post-processors implement the base interface.
313

314
```typescript { .api }
315
interface BasePostProcessor {
316
  postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
317
}
318
```
319

320
### Core Post-Processors
321

322
```typescript { .api }
323
class SimilarityPostprocessor implements BasePostProcessor {
324
  constructor(options: {
325
    similarityCutoff?: number;
326
    similarityTop?: number;
327
  });
328
  
329
  postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
330
}
331

332
class KeywordNodePostprocessor implements BasePostProcessor {
333
  constructor(options: {
334
    requiredKeywords?: string[];
335
    excludeKeywords?: string[];
336
    lang?: string;
337
  });
338
  
339
  postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
340
}
341

342
class MetadataReplacementPostProcessor implements BasePostProcessor {
343
  constructor(targetMetadataKey: string);
344
  
345
  postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
346
}
347

348
class SentenceEmbeddingOptimizer implements BasePostProcessor {
349
  constructor(options: {
350
    embedModel?: BaseEmbedding;
351
    percentilesCutoff?: number;
352
    thresholdCutoff?: number;
353
  });
354
  
355
  postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
356
}
357

358
class CohereRerank implements BasePostProcessor {
359
  constructor(options: {
360
    apiKey: string;
361
    topN?: number;
362
    model?: string;
363
  });
364
  
365
  postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
366
}
367

368
class LLMRerank implements BasePostProcessor {
369
  constructor(options: {
370
    llm?: LLM;
371
    topN?: number;
372
    choice_batch_size?: number;
373
  });
374
  
375
  postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
376
}
377

378
class FixedRecencyPostprocessor implements BasePostProcessor {
379
  constructor(options: {
380
    topK?: number;
381
    dateKey?: string;
382
    inPlace?: boolean;
383
  });
384
  
385
  postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
386
}
387

388
class EmbeddingRecencyPostprocessor implements BasePostProcessor {
389
  constructor(options: {
390
    embedModel?: BaseEmbedding;
391
    similarityTopK?: number;
392
    dateKey?: string;
393
    recencyWeights?: number[];
394
  });
395
  
396
  postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
397
}
398

399
class TimeWeightedPostprocessor implements BasePostProcessor {
400
  constructor(options: {
401
    timeDecayFunction?: (timeDiff: number) => number;
402
    timeKey?: string;
403
    nowTimestamp?: Date;
404
  });
405
  
406
  postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
407
}
408
```
409

410
### Advanced Post-Processing Examples
411

412
```typescript
413
import { 
414
  SimilarityPostprocessor, 
415
  KeywordNodePostprocessor,
416
  CohereRerank,
417
  LLMRerank,
418
  SentenceEmbeddingOptimizer 
419
} from "llamaindex/postprocessors";
420

421
// Multi-stage post-processing pipeline
422
const queryEngine = new RetrieverQueryEngine({
423
  retriever: index.asRetriever({ similarityTopK: 20 }), // Get more candidates
424
  nodePostprocessors: [
425
    // Stage 1: Filter by similarity threshold
426
    new SimilarityPostprocessor({
427
      similarityCutoff: 0.6, // Remove low-relevance nodes
428
    }),
429
    
430
    // Stage 2: Filter by keywords
431
    new KeywordNodePostprocessor({
432
      requiredKeywords: ["important", "relevant"],
433
      excludeKeywords: ["outdated", "deprecated"],
434
    }),
435
    
436
    // Stage 3: Sentence-level embedding optimization
437
    new SentenceEmbeddingOptimizer({
438
      percentilesCutoff: 0.8, // Keep top 80% of sentence embeddings
439
      thresholdCutoff: 0.7,   // Minimum threshold
440
    }),
441
    
442
    // Stage 4: Re-rank using external service
443
    new CohereRerank({
444
      apiKey: process.env.COHERE_API_KEY,
445
      topN: 5, // Final top 5 results
446
      model: "rerank-english-v2.0",
447
    }),
448
  ],
449
});
450

451
// Time-weighted post-processing for temporal data
452
const temporalQueryEngine = new RetrieverQueryEngine({
453
  retriever: index.asRetriever(),
454
  nodePostprocessors: [
455
    new TimeWeightedPostprocessor({
456
      timeKey: "created_date",
457
      timeDecayFunction: (timeDiffDays) => Math.exp(-timeDiffDays / 30), // Exponential decay
458
    }),
459
    new FixedRecencyPostprocessor({
460
      topK: 10,
461
      dateKey: "created_date",
462
    }),
463
  ],
464
});
465

466
// LLM-based re-ranking
467
const llmRerankEngine = new RetrieverQueryEngine({
468
  retriever: index.asRetriever({ similarityTopK: 10 }),
469
  nodePostprocessors: [
470
    new LLMRerank({
471
      llm: /* your LLM instance */,
472
      topN: 3,
473
      choice_batch_size: 5, // Process in batches for efficiency
474
    }),
475
  ],
476
});
477
```
478

479
## Query Engine Tools
480

481
### Creating Tools for Agents
482

483
```typescript
484
import { QueryEngineTool } from "llamaindex/tools";
485

486
// Create query engine tool for use with agents
487
const queryTool = new QueryEngineTool({
488
  queryEngine: index.asQueryEngine(),
489
  metadata: {
490
    name: "knowledge_search",
491
    description: "Search the knowledge base for information about the company",
492
  },
493
});
494

495
// Use with ReAct agent
496
import { ReActAgent } from "llamaindex/agent";
497

498
const agent = new ReActAgent({
499
  tools: [queryTool],
500
  llm: /* your LLM */,
501
});
502

503
const response = await agent.chat("Find information about our product roadmap");
504
```
505

506
## Performance Optimization
507

508
### Async Query Processing
509

510
```typescript
511
// Process multiple queries concurrently
512
const queries = [
513
  "What is machine learning?",
514
  "How does deep learning work?", 
515
  "What are neural networks?",
516
];
517

518
const responses = await Promise.all(
519
  queries.map(query => queryEngine.query(query))
520
);
521

522
responses.forEach((response, i) => {
523
  console.log(`Query ${i + 1}:`, response.toString());
524
});
525
```
526

527
### Query Caching
528

529
```typescript
530
// Simple query cache implementation
531
class CachedQueryEngine {
532
  private cache = new Map<string, EngineResponse>();
533
  
534
  constructor(private queryEngine: BaseQueryEngine) {}
535
  
536
  async query(query: string): Promise<EngineResponse> {
537
    if (this.cache.has(query)) {
538
      return this.cache.get(query)!;
539
    }
540
    
541
    const response = await this.queryEngine.query(query);
542
    this.cache.set(query, response);
543
    return response;
544
  }
545
}
546

547
const cachedQE = new CachedQueryEngine(queryEngine);
548
```
549

550
### Batch Query Processing
551

552
```typescript
553
// Process queries in batches to manage memory
554
const batchQueries = async (queries: string[], batchSize: number = 5) => {
555
  const results: EngineResponse[] = [];
556
  
557
  for (let i = 0; i < queries.length; i += batchSize) {
558
    const batch = queries.slice(i, i + batchSize);
559
    const batchResults = await Promise.all(
560
      batch.map(query => queryEngine.query(query))
561
    );
562
    results.push(...batchResults);
563
    
564
    // Optional: Add delay between batches
565
    await new Promise(resolve => setTimeout(resolve, 100));
566
  }
567
  
568
  return results;
569
};
570
```
571

572
## Error Handling
573

574
### Robust Query Processing
575

576
```typescript
577
const safeQuery = async (query: string): Promise<EngineResponse | null> => {
578
  try {
579
    const response = await queryEngine.query(query);
580
    
581
    // Validate response
582
    if (!response.response || response.response.trim().length === 0) {
583
      console.warn("Empty response received");
584
      return null;
585
    }
586
    
587
    return response;
588
  } catch (error) {
589
    console.error("Query failed:", error);
590
    
591
    // Handle specific error types
592
    if (error.message.includes("embedding")) {
593
      console.error("Embedding service issue");
594
    } else if (error.message.includes("LLM")) {
595
      console.error("Language model issue");
596
    }
597
    
598
    return null;
599
  }
600
};
601

602
const response = await safeQuery("What is the meaning of life?");
603
if (response) {
604
  console.log("Answer:", response.toString());
605
} else {
606
  console.log("Could not generate response");
607
}
608
```
609

610
## Best Practices
611

612
### Query Engine Selection
613

614
```typescript
615
// Choose the right query engine for your use case
616
const createQueryEngine = (useCase: string) => {
617
  switch (useCase) {
618
    case "simple":
619
      // Basic retrieval and synthesis
620
      return index.asQueryEngine();
621
      
622
    case "complex":
623
      // Multi-step reasoning
624
      return new SubQuestionQueryEngine({ queryEngineTools });
625
      
626
    case "specialized":
627
      // Route to different knowledge bases
628
      return new RouterQueryEngine({ selector, queryEngineTools });
629
      
630
    default:
631
      return index.asQueryEngine();
632
  }
633
};
634
```
635

636
### Response Quality
637

638
```typescript
639
// Configure for high-quality responses
640
const highQualityQE = new RetrieverQueryEngine({
641
  retriever: index.asRetriever({
642
    similarityTopK: 5, // Get more context
643
  }),
644
  responseSynthesizer: new ResponseSynthesizer({
645
    responseMode: "tree_summarize", // Best synthesis method
646
  }),
647
  nodePostprocessors: [
648
    new SimilarityPostprocessor({
649
      similarityCutoff: 0.7, // Only high-quality sources
650
    }),
651
  ],
652
});
653
```
654

655
### Monitoring and Debugging
656

657
```typescript
658
// Add response metadata logging
659
const logQueryResponse = async (query: string) => {
660
  const response = await queryEngine.query(query);
661
  
662
  console.log("Query:", query);
663
  console.log("Response:", response.toString());
664
  console.log("Source count:", response.sourceNodes?.length || 0);
665
  console.log("Metadata:", response.metadata);
666
  
667
  return response;
668
};
669
```

Version

Tile

Files

query-engines.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

query-engines.mddocs/