or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

agents.mdchat-engines.mddocument-processing.mdembeddings.mdindex.mdllm-integration.mdquery-engines.mdresponse-synthesis.mdsettings.mdstorage.mdtools.mdvector-indexing.md
tile.json

chat-engines.mddocs/

0

# Chat Engines

1

2

Conversational interfaces that maintain context and enable back-and-forth interactions with your data in LlamaIndex.TS.

3

4

## Import

5

6

```typescript

7

import { VectorStoreIndex } from "llamaindex";

8

// Or from specific submodules

9

import { ContextChatEngine, SimpleChatEngine } from "llamaindex/engines";

10

```

11

12

## Overview

13

14

Chat engines in LlamaIndex.TS provide conversational interfaces that can maintain context across multiple turns of conversation. Unlike query engines that handle single queries, chat engines are designed for interactive, multi-turn conversations while leveraging your indexed data.

15

16

## Base Chat Engine Interface

17

18

All chat engines implement the base interface.

19

20

```typescript { .api }

21

interface BaseChatEngine {

22

chat(message: string, options?: ChatOptions): Promise<EngineResponse>;

23

achat(message: string, options?: ChatOptions): AsyncIterable<EngineResponse>;

24

reset(): void;

25

chatHistory: ChatMessage[];

26

}

27

28

interface ChatOptions {

29

stream?: boolean;

30

chatHistory?: ChatMessage[];

31

}

32

33

interface ChatMessage {

34

role: MessageType;

35

content: string;

36

}

37

38

type MessageType = "system" | "user" | "assistant";

39

```

40

41

## ContextChatEngine

42

43

A chat engine that uses retrieval to provide context-aware responses while maintaining conversation history.

44

45

```typescript { .api }

46

class ContextChatEngine implements BaseChatEngine {

47

constructor(args: {

48

retriever: BaseRetriever;

49

memory?: BaseMemory;

50

systemPrompt?: string;

51

nodePostprocessors?: BasePostprocessor[];

52

contextRole?: string;

53

});

54

55

chat(message: string, options?: ChatOptions): Promise<EngineResponse>;

56

achat(message: string, options?: ChatOptions): AsyncIterable<EngineResponse>;

57

reset(): void;

58

59

chatHistory: ChatMessage[];

60

retriever: BaseRetriever;

61

memory: BaseMemory;

62

systemPrompt?: string;

63

}

64

```

65

66

## SimpleChatEngine

67

68

A basic chat engine that maintains conversation history without retrieval.

69

70

```typescript { .api }

71

class SimpleChatEngine implements BaseChatEngine {

72

constructor(args: {

73

llm: LLM;

74

memory?: BaseMemory;

75

systemPrompt?: string;

76

});

77

78

chat(message: string, options?: ChatOptions): Promise<EngineResponse>;

79

achat(message: string, options?: ChatOptions): AsyncIterable<EngineResponse>;

80

reset(): void;

81

82

chatHistory: ChatMessage[];

83

llm: LLM;

84

memory: BaseMemory;

85

}

86

```

87

88

## CondenseQuestionChatEngine

89

90

A chat engine that condenses the conversation history and current question into a standalone question for better retrieval.

91

92

```typescript { .api }

93

class CondenseQuestionChatEngine implements BaseChatEngine {

94

constructor(args: {

95

queryEngine: BaseQueryEngine;

96

memory?: BaseMemory;

97

systemPrompt?: string;

98

condenseQuestionPrompt?: string;

99

});

100

101

chat(message: string, options?: ChatOptions): Promise<EngineResponse>;

102

achat(message: string, options?: ChatOptions): AsyncIterable<EngineResponse>;

103

reset(): void;

104

105

chatHistory: ChatMessage[];

106

queryEngine: BaseQueryEngine;

107

memory: BaseMemory;

108

}

109

```

110

111

## Memory System

112

113

### BaseMemory Interface

114

115

Interface for chat memory implementations.

116

117

```typescript { .api }

118

interface BaseMemory {

119

get(initialTokenCount?: number): ChatMessage[];

120

getAll(): ChatMessage[];

121

put(message: ChatMessage): void;

122

set(messages: ChatMessage[]): void;

123

reset(): void;

124

}

125

```

126

127

### ChatMemoryBuffer

128

129

Simple in-memory buffer for storing chat history.

130

131

```typescript { .api }

132

class ChatMemoryBuffer implements BaseMemory {

133

constructor(args?: {

134

tokenLimit?: number;

135

chatHistory?: ChatMessage[];

136

});

137

138

get(initialTokenCount?: number): ChatMessage[];

139

getAll(): ChatMessage[];

140

put(message: ChatMessage): void;

141

set(messages: ChatMessage[]): void;

142

reset(): void;

143

144

tokenLimit?: number;

145

chatHistory: ChatMessage[];

146

}

147

```

148

149

## Basic Usage

150

151

### Context-Aware Chat

152

153

```typescript

154

import { VectorStoreIndex, Document } from "llamaindex";

155

156

// Create knowledge base

157

const documents = [

158

new Document({ text: "LlamaIndex is a data framework for LLM applications." }),

159

new Document({ text: "It supports various document types and vector stores." }),

160

new Document({ text: "You can build chatbots and Q&A systems with it." }),

161

];

162

163

const index = await VectorStoreIndex.fromDocuments(documents);

164

165

// Create context chat engine

166

const chatEngine = index.asChatEngine({

167

chatMode: "context", // Use context-aware chat

168

systemPrompt: "You are a helpful assistant that answers questions about LlamaIndex.",

169

});

170

171

// Start conversation

172

const response1 = await chatEngine.chat("What is LlamaIndex?");

173

console.log("Assistant:", response1.toString());

174

175

// Continue conversation with context

176

const response2 = await chatEngine.chat("What can I build with it?");

177

console.log("Assistant:", response2.toString());

178

179

// Check conversation history

180

console.log("Chat history:", chatEngine.chatHistory);

181

```

182

183

### Simple Chat Without Retrieval

184

185

```typescript

186

import { SimpleChatEngine, OpenAI } from "llamaindex";

187

188

// Create simple chat engine

189

const simpleChatEngine = new SimpleChatEngine({

190

llm: new OpenAI({ model: "gpt-3.5-turbo" }),

191

systemPrompt: "You are a helpful assistant.",

192

});

193

194

// Have a conversation

195

const response = await simpleChatEngine.chat("Hello! How are you?");

196

console.log("Response:", response.toString());

197

```

198

199

### Streaming Chat

200

201

```typescript

202

// Enable streaming for real-time responses

203

const response = await chatEngine.chat("Explain vector databases", {

204

stream: true

205

});

206

207

// For streaming, use achat

208

for await (const chunk of chatEngine.achat("Tell me about embeddings")) {

209

process.stdout.write(chunk.response);

210

}

211

```

212

213

## Advanced Usage

214

215

### Custom Memory Configuration

216

217

```typescript

218

import { ContextChatEngine, ChatMemoryBuffer } from "llamaindex";

219

220

// Create chat engine with custom memory

221

const customMemory = new ChatMemoryBuffer({

222

tokenLimit: 4000, // Limit context window

223

chatHistory: [

224

{ role: "system", content: "You are an expert on AI and machine learning." }

225

],

226

});

227

228

const chatEngine = new ContextChatEngine({

229

retriever: index.asRetriever(),

230

memory: customMemory,

231

systemPrompt: "Answer questions about AI using the provided context.",

232

});

233

```

234

235

### Condense Question Chat Engine

236

237

```typescript

238

import { CondenseQuestionChatEngine } from "llamaindex/engines";

239

240

// Create condense question chat engine for better multi-turn conversations

241

const condenseEngine = new CondenseQuestionChatEngine({

242

queryEngine: index.asQueryEngine(),

243

condenseQuestionPrompt: `

244

Given the conversation history and a follow-up question,

245

rephrase the follow-up question to be a standalone question.

246

247

Chat History: {chat_history}

248

Follow-up Input: {question}

249

Standalone Question:

250

`,

251

});

252

253

// Multi-turn conversation

254

await condenseEngine.chat("What is machine learning?");

255

await condenseEngine.chat("How does it differ from deep learning?"); // Will be condensed to standalone question

256

```

257

258

### Custom System Prompts

259

260

```typescript

261

const chatEngine = index.asChatEngine({

262

chatMode: "context",

263

systemPrompt: `

264

You are an expert technical documentation assistant.

265

266

Guidelines:

267

- Always provide accurate, technical information

268

- Include code examples when relevant

269

- Cite your sources when using retrieved context

270

- If you don't know something, say so clearly

271

- Keep responses concise but comprehensive

272

`,

273

});

274

```

275

276

## Conversation Management

277

278

### Managing Chat History

279

280

```typescript

281

// Access full conversation history

282

const history = chatEngine.chatHistory;

283

console.log("Conversation turns:", history.length);

284

285

// Filter by role

286

const userMessages = history.filter(msg => msg.role === "user");

287

const assistantMessages = history.filter(msg => msg.role === "assistant");

288

289

// Reset conversation

290

chatEngine.reset();

291

console.log("History after reset:", chatEngine.chatHistory.length); // 0

292

```

293

294

### Conversation Persistence

295

296

```typescript

297

// Save conversation to storage

298

const saveConversation = (chatEngine: BaseChatEngine, filename: string) => {

299

const conversation = {

300

history: chatEngine.chatHistory,

301

timestamp: new Date().toISOString(),

302

};

303

304

// Save to file or database

305

// fs.writeFileSync(filename, JSON.stringify(conversation, null, 2));

306

};

307

308

// Load conversation from storage

309

const loadConversation = (chatEngine: BaseChatEngine, conversationData: any) => {

310

chatEngine.chatHistory = conversationData.history;

311

};

312

```

313

314

### Context Window Management

315

316

```typescript

317

import { ChatMemoryBuffer } from "llamaindex";

318

319

// Create memory with token limit to manage context window

320

const limitedMemory = new ChatMemoryBuffer({

321

tokenLimit: 3000, // Adjust based on your model's context window

322

});

323

324

const chatEngine = new ContextChatEngine({

325

retriever: index.asRetriever(),

326

memory: limitedMemory,

327

});

328

329

// The memory will automatically truncate old messages when limit is reached

330

```

331

332

## Integration with Agents

333

334

### Chat Engine as Agent Tool

335

336

```typescript

337

import { QueryEngineTool, ReActAgent } from "llamaindex";

338

339

// Convert chat engine to tool for use with agents

340

const chatTool = new QueryEngineTool({

341

queryEngine: chatEngine, // Chat engines implement BaseQueryEngine

342

metadata: {

343

name: "knowledge_chat",

344

description: "Have a conversation about the knowledge base",

345

},

346

});

347

348

// Use with agent

349

const agent = new ReActAgent({

350

tools: [chatTool],

351

llm: /* your LLM */,

352

});

353

```

354

355

## Multi-Modal Chat

356

357

### Image and Text Chat

358

359

```typescript

360

// For multi-modal conversations (requires compatible LLM)

361

const multiModalResponse = await chatEngine.chat("What's in this image?", {

362

chatHistory: [

363

{

364

role: "user",

365

content: [

366

{ type: "text", text: "Analyze this image:" },

367

{ type: "image_url", image_url: { url: "data:image/jpeg;base64,..." } }

368

]

369

}

370

]

371

});

372

```

373

374

## Performance Optimization

375

376

### Async Chat Processing

377

378

```typescript

379

// Handle multiple chat sessions concurrently

380

const handleMultipleChats = async (sessions: Array<{chatEngine: BaseChatEngine, message: string}>) => {

381

const responses = await Promise.all(

382

sessions.map(session => session.chatEngine.chat(session.message))

383

);

384

385

return responses;

386

};

387

```

388

389

### Chat Response Caching

390

391

```typescript

392

// Simple response caching for common questions

393

class CachedChatEngine {

394

private cache = new Map<string, EngineResponse>();

395

396

constructor(private chatEngine: BaseChatEngine) {}

397

398

async chat(message: string): Promise<EngineResponse> {

399

const cacheKey = message.toLowerCase().trim();

400

401

if (this.cache.has(cacheKey)) {

402

return this.cache.get(cacheKey)!;

403

}

404

405

const response = await this.chatEngine.chat(message);

406

this.cache.set(cacheKey, response);

407

return response;

408

}

409

}

410

```

411

412

## Error Handling

413

414

### Robust Chat Implementation

415

416

```typescript

417

const safeChat = async (chatEngine: BaseChatEngine, message: string): Promise<EngineResponse | null> => {

418

try {

419

// Validate input

420

if (!message || message.trim().length === 0) {

421

console.warn("Empty message provided");

422

return null;

423

}

424

425

const response = await chatEngine.chat(message);

426

427

// Validate response

428

if (!response.response || response.response.trim().length === 0) {

429

console.warn("Empty response received");

430

return null;

431

}

432

433

return response;

434

} catch (error) {

435

console.error("Chat error:", error);

436

437

// Handle specific errors

438

if (error.message.includes("context window")) {

439

console.error("Context window exceeded - consider resetting conversation");

440

chatEngine.reset();

441

}

442

443

return null;

444

}

445

};

446

```

447

448

## Best Practices

449

450

### Chat Engine Selection

451

452

```typescript

453

// Choose the right chat engine for your use case

454

const createChatEngine = (useCase: string, index: VectorStoreIndex) => {

455

switch (useCase) {

456

case "simple":

457

// Basic conversation without knowledge base

458

return new SimpleChatEngine({

459

llm: /* your LLM */,

460

});

461

462

case "knowledge":

463

// Conversations with knowledge base access

464

return index.asChatEngine({ chatMode: "context" });

465

466

case "complex":

467

// Multi-turn conversations with better context handling

468

return new CondenseQuestionChatEngine({

469

queryEngine: index.asQueryEngine(),

470

});

471

472

default:

473

return index.asChatEngine();

474

}

475

};

476

```

477

478

### Conversation Quality

479

480

```typescript

481

// Configure for high-quality conversations

482

const highQualityChatEngine = new ContextChatEngine({

483

retriever: index.asRetriever({

484

similarityTopK: 3, // Focused context

485

}),

486

memory: new ChatMemoryBuffer({

487

tokenLimit: 4000, // Manage context window

488

}),

489

systemPrompt: `

490

You are a knowledgeable assistant. Use the provided context to give accurate answers.

491

If the context doesn't contain relevant information, say so clearly.

492

Always be helpful and conversational while staying factual.

493

`,

494

});

495

```

496

497

### Monitoring Chat Sessions

498

499

```typescript

500

// Add logging and monitoring

501

const monitoredChat = async (chatEngine: BaseChatEngine, message: string) => {

502

const startTime = Date.now();

503

504

try {

505

const response = await chatEngine.chat(message);

506

const duration = Date.now() - startTime;

507

508

console.log({

509

timestamp: new Date().toISOString(),

510

message: message.substring(0, 100),

511

responseLength: response.response.length,

512

sourceCount: response.sourceNodes?.length || 0,

513

duration: `${duration}ms`,

514

historyLength: chatEngine.chatHistory.length,

515

});

516

517

return response;

518

} catch (error) {

519

console.error({

520

timestamp: new Date().toISOString(),

521

message: message.substring(0, 100),

522

error: error.message,

523

duration: `${Date.now() - startTime}ms`,

524

});

525

throw error;

526

}

527

};

528

```