0
# Query Engines
1
2
Query processing and response synthesis for retrieving and generating answers from indexed data in LlamaIndex.TS.
3
4
## Import
5
6
```typescript
7
import { VectorStoreIndex } from "llamaindex";
8
// Or from specific submodules
9
import { RetrieverQueryEngine, SubQuestionQueryEngine } from "llamaindex/engines";
10
```
11
12
## Overview
13
14
Query engines in LlamaIndex.TS handle the process of retrieving relevant information from indices and synthesizing coherent responses. They combine retrieval mechanisms with response generation to provide comprehensive answers to user queries.
15
16
## Base Query Engine Interface
17
18
All query engines implement the base interface.
19
20
```typescript { .api }
21
interface BaseQueryEngine {
22
query(query: string, options?: QueryOptions): Promise<EngineResponse>;
23
aquery(query: string, options?: QueryOptions): AsyncIterable<EngineResponse>;
24
}
25
26
interface QueryOptions {
27
stream?: boolean;
28
preFilters?: MetadataFilters;
29
similarity_top_k?: number;
30
}
31
```
32
33
## RetrieverQueryEngine
34
35
The most commonly used query engine that combines a retriever with response synthesis.
36
37
```typescript { .api }
38
class RetrieverQueryEngine implements BaseQueryEngine {
39
constructor(args: {
40
retriever: BaseRetriever;
41
responseSynthesizer?: ResponseSynthesizer;
42
nodePostprocessors?: BasePostprocessor[];
43
});
44
45
query(query: string, options?: QueryOptions): Promise<EngineResponse>;
46
aquery(query: string, options?: QueryOptions): AsyncIterable<EngineResponse>;
47
48
retriever: BaseRetriever;
49
responseSynthesizer: ResponseSynthesizer;
50
nodePostprocessors: BasePostprocessor[];
51
}
52
```
53
54
## SubQuestionQueryEngine
55
56
Breaks down complex queries into sub-questions for better handling of multi-part queries.
57
58
```typescript { .api }
59
class SubQuestionQueryEngine implements BaseQueryEngine {
60
constructor(args: {
61
queryEngineTools: QueryEngineTool[];
62
questionGen?: LLMQuestionGenerator;
63
responseSynthesizer?: ResponseSynthesizer;
64
});
65
66
query(query: string, options?: QueryOptions): Promise<EngineResponse>;
67
aquery(query: string, options?: QueryOptions): AsyncIterable<EngineResponse>;
68
69
queryEngineTools: QueryEngineTool[];
70
questionGen: LLMQuestionGenerator;
71
responseSynthesizer: ResponseSynthesizer;
72
}
73
```
74
75
## RouterQueryEngine
76
77
Routes queries to different query engines based on query characteristics.
78
79
```typescript { .api }
80
class RouterQueryEngine implements BaseQueryEngine {
81
constructor(args: {
82
selector: BaseSelector;
83
queryEngineTools: QueryEngineTool[];
84
defaultTool?: QueryEngineTool;
85
});
86
87
query(query: string, options?: QueryOptions): Promise<EngineResponse>;
88
aquery(query: string, options?: QueryOptions): AsyncIterable<EngineResponse>;
89
90
selector: BaseSelector;
91
queryEngineTools: QueryEngineTool[];
92
defaultTool?: QueryEngineTool;
93
}
94
```
95
96
## Response Types
97
98
### EngineResponse
99
100
Standard response object returned by query engines.
101
102
```typescript { .api }
103
class EngineResponse {
104
response: string;
105
sourceNodes?: NodeWithScore[];
106
metadata?: Record<string, any>;
107
108
toString(): string;
109
print(): void;
110
}
111
112
interface NodeWithScore {
113
node: BaseNode;
114
score?: number;
115
}
116
```
117
118
## Basic Usage
119
120
### Simple Query Engine
121
122
```typescript
123
import { VectorStoreIndex, Document } from "llamaindex";
124
125
// Create index
126
const documents = [
127
new Document({ text: "LlamaIndex is a data framework for LLM applications." }),
128
new Document({ text: "It provides tools for document processing and retrieval." }),
129
];
130
131
const index = await VectorStoreIndex.fromDocuments(documents);
132
133
// Create query engine (uses RetrieverQueryEngine internally)
134
const queryEngine = index.asQueryEngine();
135
136
// Query the engine
137
const response = await queryEngine.query("What is LlamaIndex?");
138
console.log("Answer:", response.toString());
139
140
// Access source information
141
if (response.sourceNodes) {
142
console.log("Sources:");
143
response.sourceNodes.forEach((nodeWithScore, i) => {
144
console.log(` ${i + 1}. ${nodeWithScore.node.text} (score: ${nodeWithScore.score})`);
145
});
146
}
147
```
148
149
### Streaming Responses
150
151
```typescript
152
// Stream responses for real-time output
153
const response = await queryEngine.query("Explain LlamaIndex in detail", {
154
stream: true
155
});
156
157
// For streaming, iterate over the async generator
158
for await (const chunk of queryEngine.aquery("What is LlamaIndex?")) {
159
console.log("Chunk:", chunk.response);
160
}
161
```
162
163
### Custom Retriever Query Engine
164
165
```typescript
166
import { RetrieverQueryEngine, ResponseSynthesizer } from "llamaindex/engines";
167
168
// Create custom retriever
169
const retriever = index.asRetriever({
170
similarityTopK: 5,
171
});
172
173
// Create custom response synthesizer
174
const responseSynthesizer = new ResponseSynthesizer({
175
responseMode: "tree_summarize",
176
});
177
178
// Create query engine with custom components
179
const customQueryEngine = new RetrieverQueryEngine({
180
retriever,
181
responseSynthesizer,
182
});
183
184
const response = await customQueryEngine.query("How does semantic search work?");
185
```
186
187
## Advanced Usage
188
189
### Query Engine with Filters
190
191
```typescript
192
// Query with metadata filters
193
const response = await queryEngine.query("Find financial information", {
194
preFilters: {
195
filters: [
196
{ key: "category", value: "finance", operator: "==" },
197
{ key: "year", value: 2024, operator: ">=" }
198
]
199
}
200
});
201
```
202
203
### Sub-Question Query Engine
204
205
```typescript
206
import { SubQuestionQueryEngine, QueryEngineTool } from "llamaindex/engines";
207
208
// Create multiple specialized indices
209
const techIndex = await VectorStoreIndex.fromDocuments(techDocs);
210
const financeIndex = await VectorStoreIndex.fromDocuments(financeDocs);
211
212
// Create query engine tools
213
const queryEngineTools = [
214
new QueryEngineTool({
215
queryEngine: techIndex.asQueryEngine(),
216
metadata: {
217
name: "tech_search",
218
description: "Useful for answering questions about technical topics"
219
}
220
}),
221
new QueryEngineTool({
222
queryEngine: financeIndex.asQueryEngine(),
223
metadata: {
224
name: "finance_search",
225
description: "Useful for answering questions about financial topics"
226
}
227
})
228
];
229
230
// Create sub-question query engine
231
const subQuestionQE = new SubQuestionQueryEngine({
232
queryEngineTools,
233
});
234
235
// Ask complex multi-part question
236
const response = await subQuestionQE.query(
237
"What are the technical challenges and financial implications of implementing AI in healthcare?"
238
);
239
240
console.log("Complex answer:", response.toString());
241
```
242
243
### Router Query Engine
244
245
```typescript
246
import { RouterQueryEngine, LLMSingleSelector } from "llamaindex/engines";
247
248
// Create selector to route queries
249
const selector = new LLMSingleSelector();
250
251
// Create router query engine
252
const routerQE = new RouterQueryEngine({
253
selector,
254
queryEngineTools,
255
defaultTool: queryEngineTools[0], // Fallback tool
256
});
257
258
// Route queries automatically
259
const response1 = await routerQE.query("How do neural networks work?"); // → tech_search
260
const response2 = await routerQE.query("What was the quarterly revenue?"); // → finance_search
261
```
262
263
## Response Synthesis Modes
264
265
### Tree Summarize
266
267
Best for comprehensive answers from multiple sources.
268
269
```typescript
270
import { ResponseSynthesizer } from "llamaindex";
271
272
const queryEngine = new RetrieverQueryEngine({
273
retriever: index.asRetriever(),
274
responseSynthesizer: new ResponseSynthesizer({
275
responseMode: "tree_summarize",
276
}),
277
});
278
```
279
280
### Refine Mode
281
282
Iteratively refines the answer with each retrieved chunk.
283
284
```typescript
285
const queryEngine = new RetrieverQueryEngine({
286
retriever: index.asRetriever(),
287
responseSynthesizer: new ResponseSynthesizer({
288
responseMode: "refine",
289
}),
290
});
291
```
292
293
### Compact Mode
294
295
Combines chunks to maximize context window usage.
296
297
```typescript
298
const queryEngine = new RetrieverQueryEngine({
299
retriever: index.asRetriever(),
300
responseSynthesizer: new ResponseSynthesizer({
301
responseMode: "compact",
302
}),
303
});
304
```
305
306
## Post-Processing Pipeline
307
308
The query engine supports a comprehensive post-processing pipeline to refine and filter retrieved nodes before response synthesis.
309
310
### BasePostprocessor Interface
311
312
All post-processors implement the base interface.
313
314
```typescript { .api }
315
interface BasePostProcessor {
316
postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
317
}
318
```
319
320
### Core Post-Processors
321
322
```typescript { .api }
323
class SimilarityPostprocessor implements BasePostProcessor {
324
constructor(options: {
325
similarityCutoff?: number;
326
similarityTop?: number;
327
});
328
329
postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
330
}
331
332
class KeywordNodePostprocessor implements BasePostProcessor {
333
constructor(options: {
334
requiredKeywords?: string[];
335
excludeKeywords?: string[];
336
lang?: string;
337
});
338
339
postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
340
}
341
342
class MetadataReplacementPostProcessor implements BasePostProcessor {
343
constructor(targetMetadataKey: string);
344
345
postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
346
}
347
348
class SentenceEmbeddingOptimizer implements BasePostProcessor {
349
constructor(options: {
350
embedModel?: BaseEmbedding;
351
percentilesCutoff?: number;
352
thresholdCutoff?: number;
353
});
354
355
postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
356
}
357
358
class CohereRerank implements BasePostProcessor {
359
constructor(options: {
360
apiKey: string;
361
topN?: number;
362
model?: string;
363
});
364
365
postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
366
}
367
368
class LLMRerank implements BasePostProcessor {
369
constructor(options: {
370
llm?: LLM;
371
topN?: number;
372
choice_batch_size?: number;
373
});
374
375
postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
376
}
377
378
class FixedRecencyPostprocessor implements BasePostProcessor {
379
constructor(options: {
380
topK?: number;
381
dateKey?: string;
382
inPlace?: boolean;
383
});
384
385
postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
386
}
387
388
class EmbeddingRecencyPostprocessor implements BasePostProcessor {
389
constructor(options: {
390
embedModel?: BaseEmbedding;
391
similarityTopK?: number;
392
dateKey?: string;
393
recencyWeights?: number[];
394
});
395
396
postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
397
}
398
399
class TimeWeightedPostprocessor implements BasePostProcessor {
400
constructor(options: {
401
timeDecayFunction?: (timeDiff: number) => number;
402
timeKey?: string;
403
nowTimestamp?: Date;
404
});
405
406
postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];
407
}
408
```
409
410
### Advanced Post-Processing Examples
411
412
```typescript
413
import {
414
SimilarityPostprocessor,
415
KeywordNodePostprocessor,
416
CohereRerank,
417
LLMRerank,
418
SentenceEmbeddingOptimizer
419
} from "llamaindex/postprocessors";
420
421
// Multi-stage post-processing pipeline
422
const queryEngine = new RetrieverQueryEngine({
423
retriever: index.asRetriever({ similarityTopK: 20 }), // Get more candidates
424
nodePostprocessors: [
425
// Stage 1: Filter by similarity threshold
426
new SimilarityPostprocessor({
427
similarityCutoff: 0.6, // Remove low-relevance nodes
428
}),
429
430
// Stage 2: Filter by keywords
431
new KeywordNodePostprocessor({
432
requiredKeywords: ["important", "relevant"],
433
excludeKeywords: ["outdated", "deprecated"],
434
}),
435
436
// Stage 3: Sentence-level embedding optimization
437
new SentenceEmbeddingOptimizer({
438
percentilesCutoff: 0.8, // Keep top 80% of sentence embeddings
439
thresholdCutoff: 0.7, // Minimum threshold
440
}),
441
442
// Stage 4: Re-rank using external service
443
new CohereRerank({
444
apiKey: process.env.COHERE_API_KEY,
445
topN: 5, // Final top 5 results
446
model: "rerank-english-v2.0",
447
}),
448
],
449
});
450
451
// Time-weighted post-processing for temporal data
452
const temporalQueryEngine = new RetrieverQueryEngine({
453
retriever: index.asRetriever(),
454
nodePostprocessors: [
455
new TimeWeightedPostprocessor({
456
timeKey: "created_date",
457
timeDecayFunction: (timeDiffDays) => Math.exp(-timeDiffDays / 30), // Exponential decay
458
}),
459
new FixedRecencyPostprocessor({
460
topK: 10,
461
dateKey: "created_date",
462
}),
463
],
464
});
465
466
// LLM-based re-ranking
467
const llmRerankEngine = new RetrieverQueryEngine({
468
retriever: index.asRetriever({ similarityTopK: 10 }),
469
nodePostprocessors: [
470
new LLMRerank({
471
llm: /* your LLM instance */,
472
topN: 3,
473
choice_batch_size: 5, // Process in batches for efficiency
474
}),
475
],
476
});
477
```
478
479
## Query Engine Tools
480
481
### Creating Tools for Agents
482
483
```typescript
484
import { QueryEngineTool } from "llamaindex/tools";
485
486
// Create query engine tool for use with agents
487
const queryTool = new QueryEngineTool({
488
queryEngine: index.asQueryEngine(),
489
metadata: {
490
name: "knowledge_search",
491
description: "Search the knowledge base for information about the company",
492
},
493
});
494
495
// Use with ReAct agent
496
import { ReActAgent } from "llamaindex/agent";
497
498
const agent = new ReActAgent({
499
tools: [queryTool],
500
llm: /* your LLM */,
501
});
502
503
const response = await agent.chat("Find information about our product roadmap");
504
```
505
506
## Performance Optimization
507
508
### Async Query Processing
509
510
```typescript
511
// Process multiple queries concurrently
512
const queries = [
513
"What is machine learning?",
514
"How does deep learning work?",
515
"What are neural networks?",
516
];
517
518
const responses = await Promise.all(
519
queries.map(query => queryEngine.query(query))
520
);
521
522
responses.forEach((response, i) => {
523
console.log(`Query ${i + 1}:`, response.toString());
524
});
525
```
526
527
### Query Caching
528
529
```typescript
530
// Simple query cache implementation
531
class CachedQueryEngine {
532
private cache = new Map<string, EngineResponse>();
533
534
constructor(private queryEngine: BaseQueryEngine) {}
535
536
async query(query: string): Promise<EngineResponse> {
537
if (this.cache.has(query)) {
538
return this.cache.get(query)!;
539
}
540
541
const response = await this.queryEngine.query(query);
542
this.cache.set(query, response);
543
return response;
544
}
545
}
546
547
const cachedQE = new CachedQueryEngine(queryEngine);
548
```
549
550
### Batch Query Processing
551
552
```typescript
553
// Process queries in batches to manage memory
554
const batchQueries = async (queries: string[], batchSize: number = 5) => {
555
const results: EngineResponse[] = [];
556
557
for (let i = 0; i < queries.length; i += batchSize) {
558
const batch = queries.slice(i, i + batchSize);
559
const batchResults = await Promise.all(
560
batch.map(query => queryEngine.query(query))
561
);
562
results.push(...batchResults);
563
564
// Optional: Add delay between batches
565
await new Promise(resolve => setTimeout(resolve, 100));
566
}
567
568
return results;
569
};
570
```
571
572
## Error Handling
573
574
### Robust Query Processing
575
576
```typescript
577
const safeQuery = async (query: string): Promise<EngineResponse | null> => {
578
try {
579
const response = await queryEngine.query(query);
580
581
// Validate response
582
if (!response.response || response.response.trim().length === 0) {
583
console.warn("Empty response received");
584
return null;
585
}
586
587
return response;
588
} catch (error) {
589
console.error("Query failed:", error);
590
591
// Handle specific error types
592
if (error.message.includes("embedding")) {
593
console.error("Embedding service issue");
594
} else if (error.message.includes("LLM")) {
595
console.error("Language model issue");
596
}
597
598
return null;
599
}
600
};
601
602
const response = await safeQuery("What is the meaning of life?");
603
if (response) {
604
console.log("Answer:", response.toString());
605
} else {
606
console.log("Could not generate response");
607
}
608
```
609
610
## Best Practices
611
612
### Query Engine Selection
613
614
```typescript
615
// Choose the right query engine for your use case
616
const createQueryEngine = (useCase: string) => {
617
switch (useCase) {
618
case "simple":
619
// Basic retrieval and synthesis
620
return index.asQueryEngine();
621
622
case "complex":
623
// Multi-step reasoning
624
return new SubQuestionQueryEngine({ queryEngineTools });
625
626
case "specialized":
627
// Route to different knowledge bases
628
return new RouterQueryEngine({ selector, queryEngineTools });
629
630
default:
631
return index.asQueryEngine();
632
}
633
};
634
```
635
636
### Response Quality
637
638
```typescript
639
// Configure for high-quality responses
640
const highQualityQE = new RetrieverQueryEngine({
641
retriever: index.asRetriever({
642
similarityTopK: 5, // Get more context
643
}),
644
responseSynthesizer: new ResponseSynthesizer({
645
responseMode: "tree_summarize", // Best synthesis method
646
}),
647
nodePostprocessors: [
648
new SimilarityPostprocessor({
649
similarityCutoff: 0.7, // Only high-quality sources
650
}),
651
],
652
});
653
```
654
655
### Monitoring and Debugging
656
657
```typescript
658
// Add response metadata logging
659
const logQueryResponse = async (query: string) => {
660
const response = await queryEngine.query(query);
661
662
console.log("Query:", query);
663
console.log("Response:", response.toString());
664
console.log("Source count:", response.sourceNodes?.length || 0);
665
console.log("Metadata:", response.metadata);
666
667
return response;
668
};
669
```