or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

agents.mdchains.mddocument-loaders.mdembeddings.mdexperimental.mdindex.mdmemory.mdoutput-parsers.mdretrievers.mdtools.mdutilities.md
tile.json

retrievers.mddocs/

0

# Retrievers and Vector Operations

1

2

Systems for finding and retrieving relevant documents from vector stores, databases, and other sources. Retrievers provide the bridge between questions and relevant information with support for similarity search, filtering, and ranking.

3

4

## Capabilities

5

6

### Base Retriever Interface

7

8

Foundation interface that all retrievers implement for consistent document retrieval.

9

10

```typescript { .api }

11

/**

12

* Base interface for all retrievers

13

*/

14

interface BaseRetrieverInterface extends RunnableInterface<string, DocumentInterface[]> {

15

/** Get relevant documents for a query string */

16

getRelevantDocuments(

17

query: string,

18

config?: Partial<CallbackManagerForRetrieverRun>

19

): Promise<DocumentInterface[]>;

20

21

/** Invoke the retriever (Runnable interface) */

22

invoke(

23

input: string,

24

options?: RunnableConfig

25

): Promise<DocumentInterface[]>;

26

27

/** Stream retrieval results */

28

stream(

29

input: string,

30

options?: RunnableConfig

31

): AsyncGenerator<DocumentInterface[]>;

32

33

/** Batch retrieve for multiple queries */

34

batch(

35

inputs: string[],

36

options?: RunnableConfig[]

37

): Promise<DocumentInterface[][]>;

38

}

39

40

/**

41

* Base retriever class

42

*/

43

abstract class BaseRetriever implements BaseRetrieverInterface {

44

constructor(fields?: BaseRetrieverInput);

45

46

/** Verbose logging */

47

verbose?: boolean;

48

49

/** Callback handlers */

50

callbacks?: BaseCallbackHandler[];

51

52

/** Tags for tracing */

53

tags?: string[];

54

55

/** Metadata for tracing */

56

metadata?: Record<string, unknown>;

57

58

/** Internal implementation of document retrieval */

59

abstract _getRelevantDocuments(

60

query: string,

61

runManager?: CallbackManagerForRetrieverRun

62

): Promise<DocumentInterface[]>;

63

64

getRelevantDocuments(query: string): Promise<DocumentInterface[]>;

65

invoke(input: string, options?: RunnableConfig): Promise<DocumentInterface[]>;

66

}

67

```

68

69

### Contextual Compression

70

71

Retriever that compresses retrieved documents to focus on relevant content.

72

73

```typescript { .api }

74

/**

75

* Retriever that compresses documents using a base compressor

76

*/

77

class ContextualCompressionRetriever extends BaseRetriever {

78

constructor(args: ContextualCompressionRetrieverArgs);

79

80

/** Base compressor for document compression */

81

baseCompressor: BaseDocumentCompressor;

82

83

/** Base retriever for initial document retrieval */

84

baseRetriever: BaseRetrieverInterface;

85

86

_getRelevantDocuments(

87

query: string,

88

runManager?: CallbackManagerForRetrieverRun

89

): Promise<DocumentInterface[]>;

90

}

91

92

/**

93

* Base document compressor interface

94

*/

95

abstract class BaseDocumentCompressor {

96

/** Compress documents based on query */

97

abstract compressDocuments(

98

documents: DocumentInterface[],

99

query: string,

100

runManager?: CallbackManagerForChainRun

101

): Promise<DocumentInterface[]>;

102

}

103

```

104

105

**Usage Example:**

106

107

```typescript

108

import { ContextualCompressionRetriever } from "langchain/retrievers/contextual_compression";

109

import { LLMChainExtractor } from "langchain/retrievers/document_compressors";

110

import { OpenAI } from "@langchain/openai";

111

112

const llm = new OpenAI({ temperature: 0 });

113

const baseRetriever = vectorStore.asRetriever({ k: 10 });

114

115

// Create a compressor that extracts relevant parts

116

const compressor = LLMChainExtractor.fromLLM(llm);

117

118

// Create contextual compression retriever

119

const compressionRetriever = new ContextualCompressionRetriever({

120

baseCompressor: compressor,

121

baseRetriever: baseRetriever,

122

});

123

124

const compressedDocs = await compressionRetriever.getRelevantDocuments(

125

"What are the main benefits of renewable energy?"

126

);

127

```

128

129

### Multi-Query Retriever

130

131

Generates multiple query variations to improve retrieval coverage.

132

133

```typescript { .api }

134

/**

135

* Retriever that generates multiple queries for broader document coverage

136

*/

137

class MultiQueryRetriever extends BaseRetriever {

138

constructor(args: MultiQueryRetrieverInput);

139

140

/** Base retriever to query with generated queries */

141

retriever: BaseRetrieverInterface;

142

143

/** LLM for generating query variations */

144

llmChain: LLMChain;

145

146

/** Whether to include original query */

147

includeOriginal: boolean;

148

149

/** Parser for extracting queries from LLM output */

150

queryParser: BaseOutputParser<string[]>;

151

152

static fromLLM(args: {

153

retriever: BaseRetrieverInterface;

154

llm: BaseLanguageModelInterface;

155

prompt?: BasePromptTemplate;

156

queryParser?: BaseOutputParser<string[]>;

157

includeOriginal?: boolean;

158

}): MultiQueryRetriever;

159

160

_getRelevantDocuments(

161

query: string,

162

runManager?: CallbackManagerForRetrieverRun

163

): Promise<DocumentInterface[]>;

164

165

/** Generate multiple query variations */

166

generateQueries(

167

question: string,

168

runManager?: CallbackManagerForChainRun

169

): Promise<string[]>;

170

}

171

```

172

173

**Usage Example:**

174

175

```typescript

176

import { MultiQueryRetriever } from "langchain/retrievers/multi_query";

177

import { OpenAI } from "@langchain/openai";

178

179

const llm = new OpenAI({ temperature: 0.1 });

180

const baseRetriever = vectorStore.asRetriever();

181

182

const multiQueryRetriever = MultiQueryRetriever.fromLLM({

183

retriever: baseRetriever,

184

llm: llm,

185

includeOriginal: true,

186

});

187

188

// This will generate multiple query variations and retrieve documents for each

189

const docs = await multiQueryRetriever.getRelevantDocuments(

190

"How does machine learning work?"

191

);

192

```

193

194

### Ensemble Retriever

195

196

Combines results from multiple retrievers with weighted ranking.

197

198

```typescript { .api }

199

/**

200

* Retriever that combines results from multiple retrievers

201

*/

202

class EnsembleRetriever extends BaseRetriever {

203

constructor(args: EnsembleRetrieverArgs);

204

205

/** Array of retrievers to combine */

206

retrievers: BaseRetrieverInterface[];

207

208

/** Weights for each retriever (optional) */

209

weights?: number[];

210

211

/** Constant for rank fusion algorithm */

212

c?: number;

213

214

_getRelevantDocuments(

215

query: string,

216

runManager?: CallbackManagerForRetrieverRun

217

): Promise<DocumentInterface[]>;

218

219

/** Rank fusion algorithm for combining results */

220

rankFusion(

221

results: DocumentInterface[][],

222

weights?: number[]

223

): DocumentInterface[];

224

}

225

```

226

227

**Usage Example:**

228

229

```typescript

230

import { EnsembleRetriever } from "langchain/retrievers/ensemble";

231

232

const vectorRetriever = vectorStore.asRetriever({ k: 5 });

233

const bm25Retriever = new BM25Retriever({ documents, k: 5 });

234

235

const ensembleRetriever = new EnsembleRetriever({

236

retrievers: [vectorRetriever, bm25Retriever],

237

weights: [0.7, 0.3], // Favor vector search slightly

238

});

239

240

const docs = await ensembleRetriever.getRelevantDocuments(

241

"What is the capital of France?"

242

);

243

```

244

245

### Multi-Vector Retriever

246

247

Supports multiple vectors per document for enhanced retrieval strategies.

248

249

```typescript { .api }

250

/**

251

* Retriever that supports multiple vectors per document

252

*/

253

class MultiVectorRetriever extends BaseRetriever {

254

constructor(args: MultiVectorRetrieverArgs);

255

256

/** Vector store for embeddings */

257

vectorstore: VectorStoreInterface;

258

259

/** Storage for full documents */

260

docstore: BaseStore<string, DocumentInterface>;

261

262

/** ID key in vector store metadata */

263

idKey: string;

264

265

/** Child document ID key */

266

childK?: number;

267

268

/** Parent document ID key */

269

parentK?: number;

270

271

_getRelevantDocuments(

272

query: string,

273

runManager?: CallbackManagerForRetrieverRun

274

): Promise<DocumentInterface[]>;

275

276

/** Add documents with multiple vectors */

277

addDocuments(

278

docs: DocumentInterface[],

279

options?: {

280

ids?: string[];

281

addToDocstore?: boolean;

282

}

283

): Promise<string[] | void>;

284

}

285

```

286

287

### Parent Document Retriever

288

289

Retrieves parent documents based on child document matches.

290

291

```typescript { .api }

292

/**

293

* Retriever that returns parent documents when child documents match

294

*/

295

class ParentDocumentRetriever extends BaseRetriever {

296

constructor(args: ParentDocumentRetrieverArgs);

297

298

/** Vector store containing child documents */

299

vectorstore: VectorStoreInterface;

300

301

/** Storage for parent documents */

302

docstore: BaseStore<string, DocumentInterface>;

303

304

/** Text splitter for creating child documents */

305

childSplitter: TextSplitter;

306

307

/** Optional parent text splitter */

308

parentSplitter?: TextSplitter;

309

310

/** ID key for parent documents */

311

parentIdKey: string;

312

313

/** ID key for child documents */

314

childK?: number;

315

316

_getRelevantDocuments(

317

query: string,

318

runManager?: CallbackManagerForRetrieverRun

319

): Promise<DocumentInterface[]>;

320

321

/** Add documents with parent-child relationships */

322

addDocuments(

323

docs: DocumentInterface[],

324

options?: { ids?: string[] }

325

): Promise<void>;

326

}

327

```

328

329

### Time-Weighted Retriever

330

331

Retriever that considers document recency in addition to similarity.

332

333

```typescript { .api }

334

/**

335

* Retriever that weights documents by recency and relevance

336

*/

337

class TimeWeightedVectorStoreRetriever extends BaseRetriever {

338

constructor(args: TimeWeightedVectorStoreRetrieverArgs);

339

340

/** Underlying vector store */

341

vectorStore: VectorStoreInterface;

342

343

/** Number of documents to return */

344

k: number;

345

346

/** Decay rate for time weighting */

347

decayRate: number;

348

349

/** Memory stream for tracking access times */

350

memoryStream: MemoryStream;

351

352

/** Other score threshold */

353

otherScoreKeys: string[];

354

355

_getRelevantDocuments(

356

query: string,

357

runManager?: CallbackManagerForRetrieverRun

358

): Promise<DocumentInterface[]>;

359

360

/** Add documents with timestamps */

361

addDocuments(docs: DocumentInterface[]): Promise<void>;

362

363

/** Get salience scores based on time and access patterns */

364

getSalienceScores(docs: DocumentInterface[]): number[];

365

}

366

```

367

368

### Self-Query Retriever

369

370

Retriever that can construct queries from natural language including filters.

371

372

```typescript { .api }

373

/**

374

* Retriever that constructs structured queries from natural language

375

*/

376

class SelfQueryRetriever extends BaseRetriever {

377

constructor(args: SelfQueryRetrieverArgs);

378

379

/** Vector store to query */

380

vectorStore: VectorStoreInterface;

381

382

/** LLM chain for query construction */

383

llmChain: LLMChain;

384

385

/** Structured query translator */

386

structuredQueryTranslator: BaseTranslator;

387

388

/** Verbose logging */

389

verbose?: boolean;

390

391

/** Search type */

392

searchType?: "similarity" | "mmr";

393

394

/** Search parameters */

395

searchKwargs?: Record<string, any>;

396

397

static fromLLM(args: {

398

llm: BaseLanguageModelInterface;

399

vectorStore: VectorStoreInterface;

400

documentContents: string;

401

attributeInfo: AttributeInfo[];

402

examples?: SelfQueryRetrieverExample[];

403

structuredQueryTranslator?: BaseTranslator;

404

allowedComparators?: Comparator[];

405

allowedOperators?: Operator[];

406

enable_limit?: boolean;

407

}): SelfQueryRetriever;

408

409

_getRelevantDocuments(

410

query: string,

411

runManager?: CallbackManagerForRetrieverRun

412

): Promise<DocumentInterface[]>;

413

}

414

```

415

416

### HyDE Retriever

417

418

Hypothetical Document Embeddings retriever that generates hypothetical answers.

419

420

```typescript { .api }

421

/**

422

* Retriever using Hypothetical Document Embeddings (HyDE)

423

*/

424

class HydeRetriever extends BaseRetriever {

425

constructor(args: HydeRetrieverArgs);

426

427

/** Base retriever */

428

baseRetriever: BaseRetrieverInterface;

429

430

/** LLM for generating hypothetical documents */

431

llm: BaseLanguageModelInterface;

432

433

/** Prompt for generating hypothetical documents */

434

promptTemplate: BasePromptTemplate;

435

436

_getRelevantDocuments(

437

query: string,

438

runManager?: CallbackManagerForRetrieverRun

439

): Promise<DocumentInterface[]>;

440

441

/** Generate hypothetical document */

442

generateHypotheticalDocument(query: string): Promise<string>;

443

}

444

```

445

446

### Document Compressors

447

448

Components for compressing and filtering retrieved documents.

449

450

```typescript { .api }

451

/**

452

* LLM-based document compressor/extractor

453

*/

454

class LLMChainExtractor extends BaseDocumentCompressor {

455

constructor(args: LLMChainExtractorArgs);

456

457

/** LLM chain for extraction */

458

llmChain: LLMChain;

459

460

/** Whether to get only relevant documents */

461

getOnlyRelevant: boolean;

462

463

static fromLLM(

464

llm: BaseLanguageModelInterface,

465

prompt?: BasePromptTemplate,

466

getOnlyRelevant?: boolean

467

): LLMChainExtractor;

468

469

compressDocuments(

470

documents: DocumentInterface[],

471

query: string,

472

runManager?: CallbackManagerForChainRun

473

): Promise<DocumentInterface[]>;

474

}

475

476

/**

477

* Embeddings-based document filter

478

*/

479

class EmbeddingsFilter extends BaseDocumentCompressor {

480

constructor(args: EmbeddingsFilterArgs);

481

482

/** Embeddings model */

483

embeddings: EmbeddingsInterface;

484

485

/** Similarity threshold */

486

similarityThreshold?: number;

487

488

/** Number of documents to return */

489

k?: number;

490

491

/** Similarity function */

492

similarityFn?: (a: number[], b: number[]) => number;

493

494

compressDocuments(

495

documents: DocumentInterface[],

496

query: string,

497

runManager?: CallbackManagerForChainRun

498

): Promise<DocumentInterface[]>;

499

}

500

```

501

502

### Matryoshka Retriever

503

504

Retriever optimized for Matryoshka embedding models with adaptive dimensions.

505

506

```typescript { .api }

507

/**

508

* Retriever optimized for Matryoshka embeddings

509

*/

510

class MatryoshkaRetriever extends BaseRetriever {

511

constructor(args: MatryoshkaRetrieverArgs);

512

513

/** Base retriever with full-dimensional embeddings */

514

baseRetriever: BaseRetrieverInterface;

515

516

/** Small dimension retriever for initial filtering */

517

smallDimRetriever: BaseRetrieverInterface;

518

519

/** Large dimension for final ranking */

520

largeDimension: number;

521

522

/** Small dimension for initial filtering */

523

smallDimension: number;

524

525

/** Number of candidates from small dim retrieval */

526

numCandidates: number;

527

528

_getRelevantDocuments(

529

query: string,

530

runManager?: CallbackManagerForRetrieverRun

531

): Promise<DocumentInterface[]>;

532

}

533

```

534

535

### Score Threshold Retriever

536

537

Retriever that filters results based on similarity score thresholds.

538

539

```typescript { .api }

540

/**

541

* Retriever that filters by similarity score threshold

542

*/

543

class ScoreThresholdRetriever extends BaseRetriever {

544

constructor(args: ScoreThresholdRetrieverArgs);

545

546

/** Base vector store */

547

vectorStore: VectorStoreInterface;

548

549

/** Minimum similarity score threshold */

550

minSimilarityScore: number;

551

552

/** Maximum number of documents to return */

553

maxK?: number;

554

555

/** Minimum number of documents to return */

556

minK?: number;

557

558

_getRelevantDocuments(

559

query: string,

560

runManager?: CallbackManagerForRetrieverRun

561

): Promise<DocumentInterface[]>;

562

563

static fromVectorStore(

564

vectorStore: VectorStoreInterface,

565

options: {

566

minSimilarityScore: number;

567

maxK?: number;

568

minK?: number;

569

}

570

): ScoreThresholdRetriever;

571

}

572

```

573

574

## Types

575

576

### Base Retriever Types

577

578

```typescript { .api }

579

interface BaseRetrieverInput {

580

verbose?: boolean;

581

callbacks?: BaseCallbackHandler[];

582

tags?: string[];

583

metadata?: Record<string, unknown>;

584

}

585

586

interface CallbackManagerForRetrieverRun extends BaseCallbackManager {

587

handleRetrieverStart?(

588

retriever: { name: string },

589

query: string,

590

runId?: string,

591

parentRunId?: string,

592

tags?: string[],

593

metadata?: Record<string, unknown>

594

): Promise<void>;

595

596

handleRetrieverEnd?(

597

documents: DocumentInterface[],

598

runId?: string

599

): Promise<void>;

600

601

handleRetrieverError?(error: Error, runId?: string): Promise<void>;

602

}

603

```

604

605

### Contextual Compression Types

606

607

```typescript { .api }

608

interface ContextualCompressionRetrieverArgs {

609

baseCompressor: BaseDocumentCompressor;

610

baseRetriever: BaseRetrieverInterface;

611

}

612

613

interface LLMChainExtractorArgs {

614

llmChain: LLMChain;

615

getOnlyRelevant?: boolean;

616

}

617

618

interface EmbeddingsFilterArgs {

619

embeddings: EmbeddingsInterface;

620

similarityThreshold?: number;

621

k?: number;

622

similarityFn?: (a: number[], b: number[]) => number;

623

}

624

```

625

626

### Multi-Query Retriever Types

627

628

```typescript { .api }

629

interface MultiQueryRetrieverInput {

630

retriever: BaseRetrieverInterface;

631

llmChain: LLMChain;

632

queryParser: BaseOutputParser<string[]>;

633

includeOriginal?: boolean;

634

}

635

```

636

637

### Ensemble Retriever Types

638

639

```typescript { .api }

640

interface EnsembleRetrieverArgs {

641

retrievers: BaseRetrieverInterface[];

642

weights?: number[];

643

c?: number;

644

}

645

```

646

647

### Multi-Vector Retriever Types

648

649

```typescript { .api }

650

interface MultiVectorRetrieverArgs {

651

vectorstore: VectorStoreInterface;

652

docstore: BaseStore<string, DocumentInterface>;

653

idKey?: string;

654

childK?: number;

655

parentK?: number;

656

}

657

```

658

659

### Parent Document Retriever Types

660

661

```typescript { .api }

662

interface ParentDocumentRetrieverArgs {

663

vectorstore: VectorStoreInterface;

664

docstore: BaseStore<string, DocumentInterface>;

665

childSplitter: TextSplitter;

666

parentSplitter?: TextSplitter;

667

parentIdKey?: string;

668

childK?: number;

669

}

670

```

671

672

### Time-Weighted Retriever Types

673

674

```typescript { .api }

675

interface TimeWeightedVectorStoreRetrieverArgs {

676

vectorStore: VectorStoreInterface;

677

k?: number;

678

decayRate?: number;

679

memoryStream?: MemoryStream;

680

otherScoreKeys?: string[];

681

}

682

683

interface MemoryStream {

684

addDocuments(docs: DocumentInterface[]): void;

685

get(key: string): any;

686

set(key: string, value: any): void;

687

}

688

```

689

690

### Self-Query Retriever Types

691

692

```typescript { .api }

693

interface SelfQueryRetrieverArgs {

694

vectorStore: VectorStoreInterface;

695

llmChain: LLMChain;

696

structuredQueryTranslator: BaseTranslator;

697

verbose?: boolean;

698

searchType?: "similarity" | "mmr";

699

searchKwargs?: Record<string, any>;

700

}

701

702

interface AttributeInfo {

703

name: string;

704

description: string;

705

type: string;

706

}

707

708

interface SelfQueryRetrieverExample {

709

query: string;

710

filter: Record<string, any>;

711

}

712

713

type Comparator = "eq" | "ne" | "gt" | "gte" | "lt" | "lte" | "contain" | "like" | "in" | "nin";

714

type Operator = "and" | "or" | "not";

715

716

abstract class BaseTranslator {

717

abstract visitOperation(operation: Operation): any;

718

abstract visitComparison(comparison: Comparison): any;

719

abstract visitStructuredQuery(query: StructuredQuery): any;

720

}

721

```

722

723

### HyDE Retriever Types

724

725

```typescript { .api }

726

interface HydeRetrieverArgs {

727

baseRetriever: BaseRetrieverInterface;

728

llm: BaseLanguageModelInterface;

729

promptTemplate?: BasePromptTemplate;

730

}

731

```

732

733

### Matryoshka Retriever Types

734

735

```typescript { .api }

736

interface MatryoshkaRetrieverArgs {

737

baseRetriever: BaseRetrieverInterface;

738

smallDimRetriever: BaseRetrieverInterface;

739

largeDimension: number;

740

smallDimension: number;

741

numCandidates?: number;

742

}

743

```

744

745

### Score Threshold Types

746

747

```typescript { .api }

748

interface ScoreThresholdRetrieverArgs {

749

vectorStore: VectorStoreInterface;

750

minSimilarityScore: number;

751

maxK?: number;

752

minK?: number;

753

}

754

```