0
# Chat Engines
1
2
Conversational interfaces that maintain context and enable back-and-forth interactions with your data in LlamaIndex.TS.
3
4
## Import
5
6
```typescript
7
import { VectorStoreIndex } from "llamaindex";
8
// Or from specific submodules
9
import { ContextChatEngine, SimpleChatEngine } from "llamaindex/engines";
10
```
11
12
## Overview
13
14
Chat engines in LlamaIndex.TS provide conversational interfaces that can maintain context across multiple turns of conversation. Unlike query engines that handle single queries, chat engines are designed for interactive, multi-turn conversations while leveraging your indexed data.
15
16
## Base Chat Engine Interface
17
18
All chat engines implement the base interface.
19
20
```typescript { .api }
21
interface BaseChatEngine {
22
chat(message: string, options?: ChatOptions): Promise<EngineResponse>;
23
achat(message: string, options?: ChatOptions): AsyncIterable<EngineResponse>;
24
reset(): void;
25
chatHistory: ChatMessage[];
26
}
27
28
interface ChatOptions {
29
stream?: boolean;
30
chatHistory?: ChatMessage[];
31
}
32
33
interface ChatMessage {
34
role: MessageType;
35
content: string;
36
}
37
38
type MessageType = "system" | "user" | "assistant";
39
```
40
41
## ContextChatEngine
42
43
A chat engine that uses retrieval to provide context-aware responses while maintaining conversation history.
44
45
```typescript { .api }
46
class ContextChatEngine implements BaseChatEngine {
47
constructor(args: {
48
retriever: BaseRetriever;
49
memory?: BaseMemory;
50
systemPrompt?: string;
51
nodePostprocessors?: BasePostprocessor[];
52
contextRole?: string;
53
});
54
55
chat(message: string, options?: ChatOptions): Promise<EngineResponse>;
56
achat(message: string, options?: ChatOptions): AsyncIterable<EngineResponse>;
57
reset(): void;
58
59
chatHistory: ChatMessage[];
60
retriever: BaseRetriever;
61
memory: BaseMemory;
62
systemPrompt?: string;
63
}
64
```
65
66
## SimpleChatEngine
67
68
A basic chat engine that maintains conversation history without retrieval.
69
70
```typescript { .api }
71
class SimpleChatEngine implements BaseChatEngine {
72
constructor(args: {
73
llm: LLM;
74
memory?: BaseMemory;
75
systemPrompt?: string;
76
});
77
78
chat(message: string, options?: ChatOptions): Promise<EngineResponse>;
79
achat(message: string, options?: ChatOptions): AsyncIterable<EngineResponse>;
80
reset(): void;
81
82
chatHistory: ChatMessage[];
83
llm: LLM;
84
memory: BaseMemory;
85
}
86
```
87
88
## CondenseQuestionChatEngine
89
90
A chat engine that condenses the conversation history and current question into a standalone question for better retrieval.
91
92
```typescript { .api }
93
class CondenseQuestionChatEngine implements BaseChatEngine {
94
constructor(args: {
95
queryEngine: BaseQueryEngine;
96
memory?: BaseMemory;
97
systemPrompt?: string;
98
condenseQuestionPrompt?: string;
99
});
100
101
chat(message: string, options?: ChatOptions): Promise<EngineResponse>;
102
achat(message: string, options?: ChatOptions): AsyncIterable<EngineResponse>;
103
reset(): void;
104
105
chatHistory: ChatMessage[];
106
queryEngine: BaseQueryEngine;
107
memory: BaseMemory;
108
}
109
```
110
111
## Memory System
112
113
### BaseMemory Interface
114
115
Interface for chat memory implementations.
116
117
```typescript { .api }
118
interface BaseMemory {
119
get(initialTokenCount?: number): ChatMessage[];
120
getAll(): ChatMessage[];
121
put(message: ChatMessage): void;
122
set(messages: ChatMessage[]): void;
123
reset(): void;
124
}
125
```
126
127
### ChatMemoryBuffer
128
129
Simple in-memory buffer for storing chat history.
130
131
```typescript { .api }
132
class ChatMemoryBuffer implements BaseMemory {
133
constructor(args?: {
134
tokenLimit?: number;
135
chatHistory?: ChatMessage[];
136
});
137
138
get(initialTokenCount?: number): ChatMessage[];
139
getAll(): ChatMessage[];
140
put(message: ChatMessage): void;
141
set(messages: ChatMessage[]): void;
142
reset(): void;
143
144
tokenLimit?: number;
145
chatHistory: ChatMessage[];
146
}
147
```
148
149
## Basic Usage
150
151
### Context-Aware Chat
152
153
```typescript
154
import { VectorStoreIndex, Document } from "llamaindex";
155
156
// Create knowledge base
157
const documents = [
158
new Document({ text: "LlamaIndex is a data framework for LLM applications." }),
159
new Document({ text: "It supports various document types and vector stores." }),
160
new Document({ text: "You can build chatbots and Q&A systems with it." }),
161
];
162
163
const index = await VectorStoreIndex.fromDocuments(documents);
164
165
// Create context chat engine
166
const chatEngine = index.asChatEngine({
167
chatMode: "context", // Use context-aware chat
168
systemPrompt: "You are a helpful assistant that answers questions about LlamaIndex.",
169
});
170
171
// Start conversation
172
const response1 = await chatEngine.chat("What is LlamaIndex?");
173
console.log("Assistant:", response1.toString());
174
175
// Continue conversation with context
176
const response2 = await chatEngine.chat("What can I build with it?");
177
console.log("Assistant:", response2.toString());
178
179
// Check conversation history
180
console.log("Chat history:", chatEngine.chatHistory);
181
```
182
183
### Simple Chat Without Retrieval
184
185
```typescript
186
import { SimpleChatEngine, OpenAI } from "llamaindex";
187
188
// Create simple chat engine
189
const simpleChatEngine = new SimpleChatEngine({
190
llm: new OpenAI({ model: "gpt-3.5-turbo" }),
191
systemPrompt: "You are a helpful assistant.",
192
});
193
194
// Have a conversation
195
const response = await simpleChatEngine.chat("Hello! How are you?");
196
console.log("Response:", response.toString());
197
```
198
199
### Streaming Chat
200
201
```typescript
202
// Enable streaming for real-time responses
203
const response = await chatEngine.chat("Explain vector databases", {
204
stream: true
205
});
206
207
// For streaming, use achat
208
for await (const chunk of chatEngine.achat("Tell me about embeddings")) {
209
process.stdout.write(chunk.response);
210
}
211
```
212
213
## Advanced Usage
214
215
### Custom Memory Configuration
216
217
```typescript
218
import { ContextChatEngine, ChatMemoryBuffer } from "llamaindex";
219
220
// Create chat engine with custom memory
221
const customMemory = new ChatMemoryBuffer({
222
tokenLimit: 4000, // Limit context window
223
chatHistory: [
224
{ role: "system", content: "You are an expert on AI and machine learning." }
225
],
226
});
227
228
const chatEngine = new ContextChatEngine({
229
retriever: index.asRetriever(),
230
memory: customMemory,
231
systemPrompt: "Answer questions about AI using the provided context.",
232
});
233
```
234
235
### Condense Question Chat Engine
236
237
```typescript
238
import { CondenseQuestionChatEngine } from "llamaindex/engines";
239
240
// Create condense question chat engine for better multi-turn conversations
241
const condenseEngine = new CondenseQuestionChatEngine({
242
queryEngine: index.asQueryEngine(),
243
condenseQuestionPrompt: `
244
Given the conversation history and a follow-up question,
245
rephrase the follow-up question to be a standalone question.
246
247
Chat History: {chat_history}
248
Follow-up Input: {question}
249
Standalone Question:
250
`,
251
});
252
253
// Multi-turn conversation
254
await condenseEngine.chat("What is machine learning?");
255
await condenseEngine.chat("How does it differ from deep learning?"); // Will be condensed to standalone question
256
```
257
258
### Custom System Prompts
259
260
```typescript
261
const chatEngine = index.asChatEngine({
262
chatMode: "context",
263
systemPrompt: `
264
You are an expert technical documentation assistant.
265
266
Guidelines:
267
- Always provide accurate, technical information
268
- Include code examples when relevant
269
- Cite your sources when using retrieved context
270
- If you don't know something, say so clearly
271
- Keep responses concise but comprehensive
272
`,
273
});
274
```
275
276
## Conversation Management
277
278
### Managing Chat History
279
280
```typescript
281
// Access full conversation history
282
const history = chatEngine.chatHistory;
283
console.log("Conversation turns:", history.length);
284
285
// Filter by role
286
const userMessages = history.filter(msg => msg.role === "user");
287
const assistantMessages = history.filter(msg => msg.role === "assistant");
288
289
// Reset conversation
290
chatEngine.reset();
291
console.log("History after reset:", chatEngine.chatHistory.length); // 0
292
```
293
294
### Conversation Persistence
295
296
```typescript
297
// Save conversation to storage
298
const saveConversation = (chatEngine: BaseChatEngine, filename: string) => {
299
const conversation = {
300
history: chatEngine.chatHistory,
301
timestamp: new Date().toISOString(),
302
};
303
304
// Save to file or database
305
// fs.writeFileSync(filename, JSON.stringify(conversation, null, 2));
306
};
307
308
// Load conversation from storage
309
const loadConversation = (chatEngine: BaseChatEngine, conversationData: any) => {
310
chatEngine.chatHistory = conversationData.history;
311
};
312
```
313
314
### Context Window Management
315
316
```typescript
317
import { ChatMemoryBuffer } from "llamaindex";
318
319
// Create memory with token limit to manage context window
320
const limitedMemory = new ChatMemoryBuffer({
321
tokenLimit: 3000, // Adjust based on your model's context window
322
});
323
324
const chatEngine = new ContextChatEngine({
325
retriever: index.asRetriever(),
326
memory: limitedMemory,
327
});
328
329
// The memory will automatically truncate old messages when limit is reached
330
```
331
332
## Integration with Agents
333
334
### Chat Engine as Agent Tool
335
336
```typescript
337
import { QueryEngineTool, ReActAgent } from "llamaindex";
338
339
// Convert chat engine to tool for use with agents
340
const chatTool = new QueryEngineTool({
341
queryEngine: chatEngine, // Chat engines implement BaseQueryEngine
342
metadata: {
343
name: "knowledge_chat",
344
description: "Have a conversation about the knowledge base",
345
},
346
});
347
348
// Use with agent
349
const agent = new ReActAgent({
350
tools: [chatTool],
351
llm: /* your LLM */,
352
});
353
```
354
355
## Multi-Modal Chat
356
357
### Image and Text Chat
358
359
```typescript
360
// For multi-modal conversations (requires compatible LLM)
361
const multiModalResponse = await chatEngine.chat("What's in this image?", {
362
chatHistory: [
363
{
364
role: "user",
365
content: [
366
{ type: "text", text: "Analyze this image:" },
367
{ type: "image_url", image_url: { url: "data:image/jpeg;base64,..." } }
368
]
369
}
370
]
371
});
372
```
373
374
## Performance Optimization
375
376
### Async Chat Processing
377
378
```typescript
379
// Handle multiple chat sessions concurrently
380
const handleMultipleChats = async (sessions: Array<{chatEngine: BaseChatEngine, message: string}>) => {
381
const responses = await Promise.all(
382
sessions.map(session => session.chatEngine.chat(session.message))
383
);
384
385
return responses;
386
};
387
```
388
389
### Chat Response Caching
390
391
```typescript
392
// Simple response caching for common questions
393
class CachedChatEngine {
394
private cache = new Map<string, EngineResponse>();
395
396
constructor(private chatEngine: BaseChatEngine) {}
397
398
async chat(message: string): Promise<EngineResponse> {
399
const cacheKey = message.toLowerCase().trim();
400
401
if (this.cache.has(cacheKey)) {
402
return this.cache.get(cacheKey)!;
403
}
404
405
const response = await this.chatEngine.chat(message);
406
this.cache.set(cacheKey, response);
407
return response;
408
}
409
}
410
```
411
412
## Error Handling
413
414
### Robust Chat Implementation
415
416
```typescript
417
const safeChat = async (chatEngine: BaseChatEngine, message: string): Promise<EngineResponse | null> => {
418
try {
419
// Validate input
420
if (!message || message.trim().length === 0) {
421
console.warn("Empty message provided");
422
return null;
423
}
424
425
const response = await chatEngine.chat(message);
426
427
// Validate response
428
if (!response.response || response.response.trim().length === 0) {
429
console.warn("Empty response received");
430
return null;
431
}
432
433
return response;
434
} catch (error) {
435
console.error("Chat error:", error);
436
437
// Handle specific errors
438
if (error.message.includes("context window")) {
439
console.error("Context window exceeded - consider resetting conversation");
440
chatEngine.reset();
441
}
442
443
return null;
444
}
445
};
446
```
447
448
## Best Practices
449
450
### Chat Engine Selection
451
452
```typescript
453
// Choose the right chat engine for your use case
454
const createChatEngine = (useCase: string, index: VectorStoreIndex) => {
455
switch (useCase) {
456
case "simple":
457
// Basic conversation without knowledge base
458
return new SimpleChatEngine({
459
llm: /* your LLM */,
460
});
461
462
case "knowledge":
463
// Conversations with knowledge base access
464
return index.asChatEngine({ chatMode: "context" });
465
466
case "complex":
467
// Multi-turn conversations with better context handling
468
return new CondenseQuestionChatEngine({
469
queryEngine: index.asQueryEngine(),
470
});
471
472
default:
473
return index.asChatEngine();
474
}
475
};
476
```
477
478
### Conversation Quality
479
480
```typescript
481
// Configure for high-quality conversations
482
const highQualityChatEngine = new ContextChatEngine({
483
retriever: index.asRetriever({
484
similarityTopK: 3, // Focused context
485
}),
486
memory: new ChatMemoryBuffer({
487
tokenLimit: 4000, // Manage context window
488
}),
489
systemPrompt: `
490
You are a knowledgeable assistant. Use the provided context to give accurate answers.
491
If the context doesn't contain relevant information, say so clearly.
492
Always be helpful and conversational while staying factual.
493
`,
494
});
495
```
496
497
### Monitoring Chat Sessions
498
499
```typescript
500
// Add logging and monitoring
501
const monitoredChat = async (chatEngine: BaseChatEngine, message: string) => {
502
const startTime = Date.now();
503
504
try {
505
const response = await chatEngine.chat(message);
506
const duration = Date.now() - startTime;
507
508
console.log({
509
timestamp: new Date().toISOString(),
510
message: message.substring(0, 100),
511
responseLength: response.response.length,
512
sourceCount: response.sourceNodes?.length || 0,
513
duration: `${duration}ms`,
514
historyLength: chatEngine.chatHistory.length,
515
});
516
517
return response;
518
} catch (error) {
519
console.error({
520
timestamp: new Date().toISOString(),
521
message: message.substring(0, 100),
522
error: error.message,
523
duration: `${Date.now() - startTime}ms`,
524
});
525
throw error;
526
}
527
};
528
```