or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

agents.mdchat-engines.mddocument-processing.mdembeddings.mdindex.mdllm-integration.mdquery-engines.mdresponse-synthesis.mdsettings.mdstorage.mdtools.mdvector-indexing.md
tile.json

query-engines.mddocs/

0

# Query Engines

1

2

Query processing and response synthesis for retrieving and generating answers from indexed data in LlamaIndex.TS.

3

4

## Import

5

6

```typescript

7

import { VectorStoreIndex } from "llamaindex";

8

// Or from specific submodules

9

import { RetrieverQueryEngine, SubQuestionQueryEngine } from "llamaindex/engines";

10

```

11

12

## Overview

13

14

Query engines in LlamaIndex.TS handle the process of retrieving relevant information from indices and synthesizing coherent responses. They combine retrieval mechanisms with response generation to provide comprehensive answers to user queries.

15

16

## Base Query Engine Interface

17

18

All query engines implement the base interface.

19

20

```typescript { .api }

21

interface BaseQueryEngine {

22

query(query: string, options?: QueryOptions): Promise<EngineResponse>;

23

aquery(query: string, options?: QueryOptions): AsyncIterable<EngineResponse>;

24

}

25

26

interface QueryOptions {

27

stream?: boolean;

28

preFilters?: MetadataFilters;

29

similarity_top_k?: number;

30

}

31

```

32

33

## RetrieverQueryEngine

34

35

The most commonly used query engine that combines a retriever with response synthesis.

36

37

```typescript { .api }

38

class RetrieverQueryEngine implements BaseQueryEngine {

39

constructor(args: {

40

retriever: BaseRetriever;

41

responseSynthesizer?: ResponseSynthesizer;

42

nodePostprocessors?: BasePostprocessor[];

43

});

44

45

query(query: string, options?: QueryOptions): Promise<EngineResponse>;

46

aquery(query: string, options?: QueryOptions): AsyncIterable<EngineResponse>;

47

48

retriever: BaseRetriever;

49

responseSynthesizer: ResponseSynthesizer;

50

nodePostprocessors: BasePostprocessor[];

51

}

52

```

53

54

## SubQuestionQueryEngine

55

56

Breaks down complex queries into sub-questions for better handling of multi-part queries.

57

58

```typescript { .api }

59

class SubQuestionQueryEngine implements BaseQueryEngine {

60

constructor(args: {

61

queryEngineTools: QueryEngineTool[];

62

questionGen?: LLMQuestionGenerator;

63

responseSynthesizer?: ResponseSynthesizer;

64

});

65

66

query(query: string, options?: QueryOptions): Promise<EngineResponse>;

67

aquery(query: string, options?: QueryOptions): AsyncIterable<EngineResponse>;

68

69

queryEngineTools: QueryEngineTool[];

70

questionGen: LLMQuestionGenerator;

71

responseSynthesizer: ResponseSynthesizer;

72

}

73

```

74

75

## RouterQueryEngine

76

77

Routes queries to different query engines based on query characteristics.

78

79

```typescript { .api }

80

class RouterQueryEngine implements BaseQueryEngine {

81

constructor(args: {

82

selector: BaseSelector;

83

queryEngineTools: QueryEngineTool[];

84

defaultTool?: QueryEngineTool;

85

});

86

87

query(query: string, options?: QueryOptions): Promise<EngineResponse>;

88

aquery(query: string, options?: QueryOptions): AsyncIterable<EngineResponse>;

89

90

selector: BaseSelector;

91

queryEngineTools: QueryEngineTool[];

92

defaultTool?: QueryEngineTool;

93

}

94

```

95

96

## Response Types

97

98

### EngineResponse

99

100

Standard response object returned by query engines.

101

102

```typescript { .api }

103

class EngineResponse {

104

response: string;

105

sourceNodes?: NodeWithScore[];

106

metadata?: Record<string, any>;

107

108

toString(): string;

109

print(): void;

110

}

111

112

interface NodeWithScore {

113

node: BaseNode;

114

score?: number;

115

}

116

```

117

118

## Basic Usage

119

120

### Simple Query Engine

121

122

```typescript

123

import { VectorStoreIndex, Document } from "llamaindex";

124

125

// Create index

126

const documents = [

127

new Document({ text: "LlamaIndex is a data framework for LLM applications." }),

128

new Document({ text: "It provides tools for document processing and retrieval." }),

129

];

130

131

const index = await VectorStoreIndex.fromDocuments(documents);

132

133

// Create query engine (uses RetrieverQueryEngine internally)

134

const queryEngine = index.asQueryEngine();

135

136

// Query the engine

137

const response = await queryEngine.query("What is LlamaIndex?");

138

console.log("Answer:", response.toString());

139

140

// Access source information

141

if (response.sourceNodes) {

142

console.log("Sources:");

143

response.sourceNodes.forEach((nodeWithScore, i) => {

144

console.log(` ${i + 1}. ${nodeWithScore.node.text} (score: ${nodeWithScore.score})`);

145

});

146

}

147

```

148

149

### Streaming Responses

150

151

```typescript

152

// Stream responses for real-time output

153

const response = await queryEngine.query("Explain LlamaIndex in detail", {

154

stream: true

155

});

156

157

// For streaming, iterate over the async generator

158

for await (const chunk of queryEngine.aquery("What is LlamaIndex?")) {

159

console.log("Chunk:", chunk.response);

160

}

161

```

162

163

### Custom Retriever Query Engine

164

165

```typescript

166

import { RetrieverQueryEngine, ResponseSynthesizer } from "llamaindex/engines";

167

168

// Create custom retriever

169

const retriever = index.asRetriever({

170

similarityTopK: 5,

171

});

172

173

// Create custom response synthesizer

174

const responseSynthesizer = new ResponseSynthesizer({

175

responseMode: "tree_summarize",

176

});

177

178

// Create query engine with custom components

179

const customQueryEngine = new RetrieverQueryEngine({

180

retriever,

181

responseSynthesizer,

182

});

183

184

const response = await customQueryEngine.query("How does semantic search work?");

185

```

186

187

## Advanced Usage

188

189

### Query Engine with Filters

190

191

```typescript

192

// Query with metadata filters

193

const response = await queryEngine.query("Find financial information", {

194

preFilters: {

195

filters: [

196

{ key: "category", value: "finance", operator: "==" },

197

{ key: "year", value: 2024, operator: ">=" }

198

]

199

}

200

});

201

```

202

203

### Sub-Question Query Engine

204

205

```typescript

206

import { SubQuestionQueryEngine, QueryEngineTool } from "llamaindex/engines";

207

208

// Create multiple specialized indices

209

const techIndex = await VectorStoreIndex.fromDocuments(techDocs);

210

const financeIndex = await VectorStoreIndex.fromDocuments(financeDocs);

211

212

// Create query engine tools

213

const queryEngineTools = [

214

new QueryEngineTool({

215

queryEngine: techIndex.asQueryEngine(),

216

metadata: {

217

name: "tech_search",

218

description: "Useful for answering questions about technical topics"

219

}

220

}),

221

new QueryEngineTool({

222

queryEngine: financeIndex.asQueryEngine(),

223

metadata: {

224

name: "finance_search",

225

description: "Useful for answering questions about financial topics"

226

}

227

})

228

];

229

230

// Create sub-question query engine

231

const subQuestionQE = new SubQuestionQueryEngine({

232

queryEngineTools,

233

});

234

235

// Ask complex multi-part question

236

const response = await subQuestionQE.query(

237

"What are the technical challenges and financial implications of implementing AI in healthcare?"

238

);

239

240

console.log("Complex answer:", response.toString());

241

```

242

243

### Router Query Engine

244

245

```typescript

246

import { RouterQueryEngine, LLMSingleSelector } from "llamaindex/engines";

247

248

// Create selector to route queries

249

const selector = new LLMSingleSelector();

250

251

// Create router query engine

252

const routerQE = new RouterQueryEngine({

253

selector,

254

queryEngineTools,

255

defaultTool: queryEngineTools[0], // Fallback tool

256

});

257

258

// Route queries automatically

259

const response1 = await routerQE.query("How do neural networks work?"); // → tech_search

260

const response2 = await routerQE.query("What was the quarterly revenue?"); // → finance_search

261

```

262

263

## Response Synthesis Modes

264

265

### Tree Summarize

266

267

Best for comprehensive answers from multiple sources.

268

269

```typescript

270

import { ResponseSynthesizer } from "llamaindex";

271

272

const queryEngine = new RetrieverQueryEngine({

273

retriever: index.asRetriever(),

274

responseSynthesizer: new ResponseSynthesizer({

275

responseMode: "tree_summarize",

276

}),

277

});

278

```

279

280

### Refine Mode

281

282

Iteratively refines the answer with each retrieved chunk.

283

284

```typescript

285

const queryEngine = new RetrieverQueryEngine({

286

retriever: index.asRetriever(),

287

responseSynthesizer: new ResponseSynthesizer({

288

responseMode: "refine",

289

}),

290

});

291

```

292

293

### Compact Mode

294

295

Combines chunks to maximize context window usage.

296

297

```typescript

298

const queryEngine = new RetrieverQueryEngine({

299

retriever: index.asRetriever(),

300

responseSynthesizer: new ResponseSynthesizer({

301

responseMode: "compact",

302

}),

303

});

304

```

305

306

## Post-Processing Pipeline

307

308

The query engine supports a comprehensive post-processing pipeline to refine and filter retrieved nodes before response synthesis.

309

310

### BasePostprocessor Interface

311

312

All post-processors implement the base interface.

313

314

```typescript { .api }

315

interface BasePostProcessor {

316

postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];

317

}

318

```

319

320

### Core Post-Processors

321

322

```typescript { .api }

323

class SimilarityPostprocessor implements BasePostProcessor {

324

constructor(options: {

325

similarityCutoff?: number;

326

similarityTop?: number;

327

});

328

329

postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];

330

}

331

332

class KeywordNodePostprocessor implements BasePostProcessor {

333

constructor(options: {

334

requiredKeywords?: string[];

335

excludeKeywords?: string[];

336

lang?: string;

337

});

338

339

postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];

340

}

341

342

class MetadataReplacementPostProcessor implements BasePostProcessor {

343

constructor(targetMetadataKey: string);

344

345

postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];

346

}

347

348

class SentenceEmbeddingOptimizer implements BasePostProcessor {

349

constructor(options: {

350

embedModel?: BaseEmbedding;

351

percentilesCutoff?: number;

352

thresholdCutoff?: number;

353

});

354

355

postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];

356

}

357

358

class CohereRerank implements BasePostProcessor {

359

constructor(options: {

360

apiKey: string;

361

topN?: number;

362

model?: string;

363

});

364

365

postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];

366

}

367

368

class LLMRerank implements BasePostProcessor {

369

constructor(options: {

370

llm?: LLM;

371

topN?: number;

372

choice_batch_size?: number;

373

});

374

375

postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];

376

}

377

378

class FixedRecencyPostprocessor implements BasePostProcessor {

379

constructor(options: {

380

topK?: number;

381

dateKey?: string;

382

inPlace?: boolean;

383

});

384

385

postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];

386

}

387

388

class EmbeddingRecencyPostprocessor implements BasePostProcessor {

389

constructor(options: {

390

embedModel?: BaseEmbedding;

391

similarityTopK?: number;

392

dateKey?: string;

393

recencyWeights?: number[];

394

});

395

396

postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];

397

}

398

399

class TimeWeightedPostprocessor implements BasePostProcessor {

400

constructor(options: {

401

timeDecayFunction?: (timeDiff: number) => number;

402

timeKey?: string;

403

nowTimestamp?: Date;

404

});

405

406

postprocessNodes(nodes: NodeWithScore[], query?: string): NodeWithScore[];

407

}

408

```

409

410

### Advanced Post-Processing Examples

411

412

```typescript

413

import {

414

SimilarityPostprocessor,

415

KeywordNodePostprocessor,

416

CohereRerank,

417

LLMRerank,

418

SentenceEmbeddingOptimizer

419

} from "llamaindex/postprocessors";

420

421

// Multi-stage post-processing pipeline

422

const queryEngine = new RetrieverQueryEngine({

423

retriever: index.asRetriever({ similarityTopK: 20 }), // Get more candidates

424

nodePostprocessors: [

425

// Stage 1: Filter by similarity threshold

426

new SimilarityPostprocessor({

427

similarityCutoff: 0.6, // Remove low-relevance nodes

428

}),

429

430

// Stage 2: Filter by keywords

431

new KeywordNodePostprocessor({

432

requiredKeywords: ["important", "relevant"],

433

excludeKeywords: ["outdated", "deprecated"],

434

}),

435

436

// Stage 3: Sentence-level embedding optimization

437

new SentenceEmbeddingOptimizer({

438

percentilesCutoff: 0.8, // Keep top 80% of sentence embeddings

439

thresholdCutoff: 0.7, // Minimum threshold

440

}),

441

442

// Stage 4: Re-rank using external service

443

new CohereRerank({

444

apiKey: process.env.COHERE_API_KEY,

445

topN: 5, // Final top 5 results

446

model: "rerank-english-v2.0",

447

}),

448

],

449

});

450

451

// Time-weighted post-processing for temporal data

452

const temporalQueryEngine = new RetrieverQueryEngine({

453

retriever: index.asRetriever(),

454

nodePostprocessors: [

455

new TimeWeightedPostprocessor({

456

timeKey: "created_date",

457

timeDecayFunction: (timeDiffDays) => Math.exp(-timeDiffDays / 30), // Exponential decay

458

}),

459

new FixedRecencyPostprocessor({

460

topK: 10,

461

dateKey: "created_date",

462

}),

463

],

464

});

465

466

// LLM-based re-ranking

467

const llmRerankEngine = new RetrieverQueryEngine({

468

retriever: index.asRetriever({ similarityTopK: 10 }),

469

nodePostprocessors: [

470

new LLMRerank({

471

llm: /* your LLM instance */,

472

topN: 3,

473

choice_batch_size: 5, // Process in batches for efficiency

474

}),

475

],

476

});

477

```

478

479

## Query Engine Tools

480

481

### Creating Tools for Agents

482

483

```typescript

484

import { QueryEngineTool } from "llamaindex/tools";

485

486

// Create query engine tool for use with agents

487

const queryTool = new QueryEngineTool({

488

queryEngine: index.asQueryEngine(),

489

metadata: {

490

name: "knowledge_search",

491

description: "Search the knowledge base for information about the company",

492

},

493

});

494

495

// Use with ReAct agent

496

import { ReActAgent } from "llamaindex/agent";

497

498

const agent = new ReActAgent({

499

tools: [queryTool],

500

llm: /* your LLM */,

501

});

502

503

const response = await agent.chat("Find information about our product roadmap");

504

```

505

506

## Performance Optimization

507

508

### Async Query Processing

509

510

```typescript

511

// Process multiple queries concurrently

512

const queries = [

513

"What is machine learning?",

514

"How does deep learning work?",

515

"What are neural networks?",

516

];

517

518

const responses = await Promise.all(

519

queries.map(query => queryEngine.query(query))

520

);

521

522

responses.forEach((response, i) => {

523

console.log(`Query ${i + 1}:`, response.toString());

524

});

525

```

526

527

### Query Caching

528

529

```typescript

530

// Simple query cache implementation

531

class CachedQueryEngine {

532

private cache = new Map<string, EngineResponse>();

533

534

constructor(private queryEngine: BaseQueryEngine) {}

535

536

async query(query: string): Promise<EngineResponse> {

537

if (this.cache.has(query)) {

538

return this.cache.get(query)!;

539

}

540

541

const response = await this.queryEngine.query(query);

542

this.cache.set(query, response);

543

return response;

544

}

545

}

546

547

const cachedQE = new CachedQueryEngine(queryEngine);

548

```

549

550

### Batch Query Processing

551

552

```typescript

553

// Process queries in batches to manage memory

554

const batchQueries = async (queries: string[], batchSize: number = 5) => {

555

const results: EngineResponse[] = [];

556

557

for (let i = 0; i < queries.length; i += batchSize) {

558

const batch = queries.slice(i, i + batchSize);

559

const batchResults = await Promise.all(

560

batch.map(query => queryEngine.query(query))

561

);

562

results.push(...batchResults);

563

564

// Optional: Add delay between batches

565

await new Promise(resolve => setTimeout(resolve, 100));

566

}

567

568

return results;

569

};

570

```

571

572

## Error Handling

573

574

### Robust Query Processing

575

576

```typescript

577

const safeQuery = async (query: string): Promise<EngineResponse | null> => {

578

try {

579

const response = await queryEngine.query(query);

580

581

// Validate response

582

if (!response.response || response.response.trim().length === 0) {

583

console.warn("Empty response received");

584

return null;

585

}

586

587

return response;

588

} catch (error) {

589

console.error("Query failed:", error);

590

591

// Handle specific error types

592

if (error.message.includes("embedding")) {

593

console.error("Embedding service issue");

594

} else if (error.message.includes("LLM")) {

595

console.error("Language model issue");

596

}

597

598

return null;

599

}

600

};

601

602

const response = await safeQuery("What is the meaning of life?");

603

if (response) {

604

console.log("Answer:", response.toString());

605

} else {

606

console.log("Could not generate response");

607

}

608

```

609

610

## Best Practices

611

612

### Query Engine Selection

613

614

```typescript

615

// Choose the right query engine for your use case

616

const createQueryEngine = (useCase: string) => {

617

switch (useCase) {

618

case "simple":

619

// Basic retrieval and synthesis

620

return index.asQueryEngine();

621

622

case "complex":

623

// Multi-step reasoning

624

return new SubQuestionQueryEngine({ queryEngineTools });

625

626

case "specialized":

627

// Route to different knowledge bases

628

return new RouterQueryEngine({ selector, queryEngineTools });

629

630

default:

631

return index.asQueryEngine();

632

}

633

};

634

```

635

636

### Response Quality

637

638

```typescript

639

// Configure for high-quality responses

640

const highQualityQE = new RetrieverQueryEngine({

641

retriever: index.asRetriever({

642

similarityTopK: 5, // Get more context

643

}),

644

responseSynthesizer: new ResponseSynthesizer({

645

responseMode: "tree_summarize", // Best synthesis method

646

}),

647

nodePostprocessors: [

648

new SimilarityPostprocessor({

649

similarityCutoff: 0.7, // Only high-quality sources

650

}),

651

],

652

});

653

```

654

655

### Monitoring and Debugging

656

657

```typescript

658

// Add response metadata logging

659

const logQueryResponse = async (query: string) => {

660

const response = await queryEngine.query(query);

661

662

console.log("Query:", query);

663

console.log("Response:", response.toString());

664

console.log("Source count:", response.sourceNodes?.length || 0);

665

console.log("Metadata:", response.metadata);

666

667

return response;

668

};

669

```