0
# Retrievers and Vector Operations
1
2
Systems for finding and retrieving relevant documents from vector stores, databases, and other sources. Retrievers provide the bridge between questions and relevant information with support for similarity search, filtering, and ranking.
3
4
## Capabilities
5
6
### Base Retriever Interface
7
8
Foundation interface that all retrievers implement for consistent document retrieval.
9
10
```typescript { .api }
11
/**
12
* Base interface for all retrievers
13
*/
14
interface BaseRetrieverInterface extends RunnableInterface<string, DocumentInterface[]> {
15
/** Get relevant documents for a query string */
16
getRelevantDocuments(
17
query: string,
18
config?: Partial<CallbackManagerForRetrieverRun>
19
): Promise<DocumentInterface[]>;
20
21
/** Invoke the retriever (Runnable interface) */
22
invoke(
23
input: string,
24
options?: RunnableConfig
25
): Promise<DocumentInterface[]>;
26
27
/** Stream retrieval results */
28
stream(
29
input: string,
30
options?: RunnableConfig
31
): AsyncGenerator<DocumentInterface[]>;
32
33
/** Batch retrieve for multiple queries */
34
batch(
35
inputs: string[],
36
options?: RunnableConfig[]
37
): Promise<DocumentInterface[][]>;
38
}
39
40
/**
41
* Base retriever class
42
*/
43
abstract class BaseRetriever implements BaseRetrieverInterface {
44
constructor(fields?: BaseRetrieverInput);
45
46
/** Verbose logging */
47
verbose?: boolean;
48
49
/** Callback handlers */
50
callbacks?: BaseCallbackHandler[];
51
52
/** Tags for tracing */
53
tags?: string[];
54
55
/** Metadata for tracing */
56
metadata?: Record<string, unknown>;
57
58
/** Internal implementation of document retrieval */
59
abstract _getRelevantDocuments(
60
query: string,
61
runManager?: CallbackManagerForRetrieverRun
62
): Promise<DocumentInterface[]>;
63
64
getRelevantDocuments(query: string): Promise<DocumentInterface[]>;
65
invoke(input: string, options?: RunnableConfig): Promise<DocumentInterface[]>;
66
}
67
```
68
69
### Contextual Compression
70
71
Retriever that compresses retrieved documents to focus on relevant content.
72
73
```typescript { .api }
74
/**
75
* Retriever that compresses documents using a base compressor
76
*/
77
class ContextualCompressionRetriever extends BaseRetriever {
78
constructor(args: ContextualCompressionRetrieverArgs);
79
80
/** Base compressor for document compression */
81
baseCompressor: BaseDocumentCompressor;
82
83
/** Base retriever for initial document retrieval */
84
baseRetriever: BaseRetrieverInterface;
85
86
_getRelevantDocuments(
87
query: string,
88
runManager?: CallbackManagerForRetrieverRun
89
): Promise<DocumentInterface[]>;
90
}
91
92
/**
93
* Base document compressor interface
94
*/
95
abstract class BaseDocumentCompressor {
96
/** Compress documents based on query */
97
abstract compressDocuments(
98
documents: DocumentInterface[],
99
query: string,
100
runManager?: CallbackManagerForChainRun
101
): Promise<DocumentInterface[]>;
102
}
103
```
104
105
**Usage Example:**
106
107
```typescript
108
import { ContextualCompressionRetriever } from "langchain/retrievers/contextual_compression";
109
import { LLMChainExtractor } from "langchain/retrievers/document_compressors";
110
import { OpenAI } from "@langchain/openai";
111
112
const llm = new OpenAI({ temperature: 0 });
113
const baseRetriever = vectorStore.asRetriever({ k: 10 });
114
115
// Create a compressor that extracts relevant parts
116
const compressor = LLMChainExtractor.fromLLM(llm);
117
118
// Create contextual compression retriever
119
const compressionRetriever = new ContextualCompressionRetriever({
120
baseCompressor: compressor,
121
baseRetriever: baseRetriever,
122
});
123
124
const compressedDocs = await compressionRetriever.getRelevantDocuments(
125
"What are the main benefits of renewable energy?"
126
);
127
```
128
129
### Multi-Query Retriever
130
131
Generates multiple query variations to improve retrieval coverage.
132
133
```typescript { .api }
134
/**
135
* Retriever that generates multiple queries for broader document coverage
136
*/
137
class MultiQueryRetriever extends BaseRetriever {
138
constructor(args: MultiQueryRetrieverInput);
139
140
/** Base retriever to query with generated queries */
141
retriever: BaseRetrieverInterface;
142
143
/** LLM for generating query variations */
144
llmChain: LLMChain;
145
146
/** Whether to include original query */
147
includeOriginal: boolean;
148
149
/** Parser for extracting queries from LLM output */
150
queryParser: BaseOutputParser<string[]>;
151
152
static fromLLM(args: {
153
retriever: BaseRetrieverInterface;
154
llm: BaseLanguageModelInterface;
155
prompt?: BasePromptTemplate;
156
queryParser?: BaseOutputParser<string[]>;
157
includeOriginal?: boolean;
158
}): MultiQueryRetriever;
159
160
_getRelevantDocuments(
161
query: string,
162
runManager?: CallbackManagerForRetrieverRun
163
): Promise<DocumentInterface[]>;
164
165
/** Generate multiple query variations */
166
generateQueries(
167
question: string,
168
runManager?: CallbackManagerForChainRun
169
): Promise<string[]>;
170
}
171
```
172
173
**Usage Example:**
174
175
```typescript
176
import { MultiQueryRetriever } from "langchain/retrievers/multi_query";
177
import { OpenAI } from "@langchain/openai";
178
179
const llm = new OpenAI({ temperature: 0.1 });
180
const baseRetriever = vectorStore.asRetriever();
181
182
const multiQueryRetriever = MultiQueryRetriever.fromLLM({
183
retriever: baseRetriever,
184
llm: llm,
185
includeOriginal: true,
186
});
187
188
// This will generate multiple query variations and retrieve documents for each
189
const docs = await multiQueryRetriever.getRelevantDocuments(
190
"How does machine learning work?"
191
);
192
```
193
194
### Ensemble Retriever
195
196
Combines results from multiple retrievers with weighted ranking.
197
198
```typescript { .api }
199
/**
200
* Retriever that combines results from multiple retrievers
201
*/
202
class EnsembleRetriever extends BaseRetriever {
203
constructor(args: EnsembleRetrieverArgs);
204
205
/** Array of retrievers to combine */
206
retrievers: BaseRetrieverInterface[];
207
208
/** Weights for each retriever (optional) */
209
weights?: number[];
210
211
/** Constant for rank fusion algorithm */
212
c?: number;
213
214
_getRelevantDocuments(
215
query: string,
216
runManager?: CallbackManagerForRetrieverRun
217
): Promise<DocumentInterface[]>;
218
219
/** Rank fusion algorithm for combining results */
220
rankFusion(
221
results: DocumentInterface[][],
222
weights?: number[]
223
): DocumentInterface[];
224
}
225
```
226
227
**Usage Example:**
228
229
```typescript
230
import { EnsembleRetriever } from "langchain/retrievers/ensemble";
231
232
const vectorRetriever = vectorStore.asRetriever({ k: 5 });
233
const bm25Retriever = new BM25Retriever({ documents, k: 5 });
234
235
const ensembleRetriever = new EnsembleRetriever({
236
retrievers: [vectorRetriever, bm25Retriever],
237
weights: [0.7, 0.3], // Favor vector search slightly
238
});
239
240
const docs = await ensembleRetriever.getRelevantDocuments(
241
"What is the capital of France?"
242
);
243
```
244
245
### Multi-Vector Retriever
246
247
Supports multiple vectors per document for enhanced retrieval strategies.
248
249
```typescript { .api }
250
/**
251
* Retriever that supports multiple vectors per document
252
*/
253
class MultiVectorRetriever extends BaseRetriever {
254
constructor(args: MultiVectorRetrieverArgs);
255
256
/** Vector store for embeddings */
257
vectorstore: VectorStoreInterface;
258
259
/** Storage for full documents */
260
docstore: BaseStore<string, DocumentInterface>;
261
262
/** ID key in vector store metadata */
263
idKey: string;
264
265
/** Child document ID key */
266
childK?: number;
267
268
/** Parent document ID key */
269
parentK?: number;
270
271
_getRelevantDocuments(
272
query: string,
273
runManager?: CallbackManagerForRetrieverRun
274
): Promise<DocumentInterface[]>;
275
276
/** Add documents with multiple vectors */
277
addDocuments(
278
docs: DocumentInterface[],
279
options?: {
280
ids?: string[];
281
addToDocstore?: boolean;
282
}
283
): Promise<string[] | void>;
284
}
285
```
286
287
### Parent Document Retriever
288
289
Retrieves parent documents based on child document matches.
290
291
```typescript { .api }
292
/**
293
* Retriever that returns parent documents when child documents match
294
*/
295
class ParentDocumentRetriever extends BaseRetriever {
296
constructor(args: ParentDocumentRetrieverArgs);
297
298
/** Vector store containing child documents */
299
vectorstore: VectorStoreInterface;
300
301
/** Storage for parent documents */
302
docstore: BaseStore<string, DocumentInterface>;
303
304
/** Text splitter for creating child documents */
305
childSplitter: TextSplitter;
306
307
/** Optional parent text splitter */
308
parentSplitter?: TextSplitter;
309
310
/** ID key for parent documents */
311
parentIdKey: string;
312
313
/** ID key for child documents */
314
childK?: number;
315
316
_getRelevantDocuments(
317
query: string,
318
runManager?: CallbackManagerForRetrieverRun
319
): Promise<DocumentInterface[]>;
320
321
/** Add documents with parent-child relationships */
322
addDocuments(
323
docs: DocumentInterface[],
324
options?: { ids?: string[] }
325
): Promise<void>;
326
}
327
```
328
329
### Time-Weighted Retriever
330
331
Retriever that considers document recency in addition to similarity.
332
333
```typescript { .api }
334
/**
335
* Retriever that weights documents by recency and relevance
336
*/
337
class TimeWeightedVectorStoreRetriever extends BaseRetriever {
338
constructor(args: TimeWeightedVectorStoreRetrieverArgs);
339
340
/** Underlying vector store */
341
vectorStore: VectorStoreInterface;
342
343
/** Number of documents to return */
344
k: number;
345
346
/** Decay rate for time weighting */
347
decayRate: number;
348
349
/** Memory stream for tracking access times */
350
memoryStream: MemoryStream;
351
352
/** Other score threshold */
353
otherScoreKeys: string[];
354
355
_getRelevantDocuments(
356
query: string,
357
runManager?: CallbackManagerForRetrieverRun
358
): Promise<DocumentInterface[]>;
359
360
/** Add documents with timestamps */
361
addDocuments(docs: DocumentInterface[]): Promise<void>;
362
363
/** Get salience scores based on time and access patterns */
364
getSalienceScores(docs: DocumentInterface[]): number[];
365
}
366
```
367
368
### Self-Query Retriever
369
370
Retriever that can construct queries from natural language including filters.
371
372
```typescript { .api }
373
/**
374
* Retriever that constructs structured queries from natural language
375
*/
376
class SelfQueryRetriever extends BaseRetriever {
377
constructor(args: SelfQueryRetrieverArgs);
378
379
/** Vector store to query */
380
vectorStore: VectorStoreInterface;
381
382
/** LLM chain for query construction */
383
llmChain: LLMChain;
384
385
/** Structured query translator */
386
structuredQueryTranslator: BaseTranslator;
387
388
/** Verbose logging */
389
verbose?: boolean;
390
391
/** Search type */
392
searchType?: "similarity" | "mmr";
393
394
/** Search parameters */
395
searchKwargs?: Record<string, any>;
396
397
static fromLLM(args: {
398
llm: BaseLanguageModelInterface;
399
vectorStore: VectorStoreInterface;
400
documentContents: string;
401
attributeInfo: AttributeInfo[];
402
examples?: SelfQueryRetrieverExample[];
403
structuredQueryTranslator?: BaseTranslator;
404
allowedComparators?: Comparator[];
405
allowedOperators?: Operator[];
406
enable_limit?: boolean;
407
}): SelfQueryRetriever;
408
409
_getRelevantDocuments(
410
query: string,
411
runManager?: CallbackManagerForRetrieverRun
412
): Promise<DocumentInterface[]>;
413
}
414
```
415
416
### HyDE Retriever
417
418
Hypothetical Document Embeddings retriever that generates hypothetical answers.
419
420
```typescript { .api }
421
/**
422
* Retriever using Hypothetical Document Embeddings (HyDE)
423
*/
424
class HydeRetriever extends BaseRetriever {
425
constructor(args: HydeRetrieverArgs);
426
427
/** Base retriever */
428
baseRetriever: BaseRetrieverInterface;
429
430
/** LLM for generating hypothetical documents */
431
llm: BaseLanguageModelInterface;
432
433
/** Prompt for generating hypothetical documents */
434
promptTemplate: BasePromptTemplate;
435
436
_getRelevantDocuments(
437
query: string,
438
runManager?: CallbackManagerForRetrieverRun
439
): Promise<DocumentInterface[]>;
440
441
/** Generate hypothetical document */
442
generateHypotheticalDocument(query: string): Promise<string>;
443
}
444
```
445
446
### Document Compressors
447
448
Components for compressing and filtering retrieved documents.
449
450
```typescript { .api }
451
/**
452
* LLM-based document compressor/extractor
453
*/
454
class LLMChainExtractor extends BaseDocumentCompressor {
455
constructor(args: LLMChainExtractorArgs);
456
457
/** LLM chain for extraction */
458
llmChain: LLMChain;
459
460
/** Whether to get only relevant documents */
461
getOnlyRelevant: boolean;
462
463
static fromLLM(
464
llm: BaseLanguageModelInterface,
465
prompt?: BasePromptTemplate,
466
getOnlyRelevant?: boolean
467
): LLMChainExtractor;
468
469
compressDocuments(
470
documents: DocumentInterface[],
471
query: string,
472
runManager?: CallbackManagerForChainRun
473
): Promise<DocumentInterface[]>;
474
}
475
476
/**
477
* Embeddings-based document filter
478
*/
479
class EmbeddingsFilter extends BaseDocumentCompressor {
480
constructor(args: EmbeddingsFilterArgs);
481
482
/** Embeddings model */
483
embeddings: EmbeddingsInterface;
484
485
/** Similarity threshold */
486
similarityThreshold?: number;
487
488
/** Number of documents to return */
489
k?: number;
490
491
/** Similarity function */
492
similarityFn?: (a: number[], b: number[]) => number;
493
494
compressDocuments(
495
documents: DocumentInterface[],
496
query: string,
497
runManager?: CallbackManagerForChainRun
498
): Promise<DocumentInterface[]>;
499
}
500
```
501
502
### Matryoshka Retriever
503
504
Retriever optimized for Matryoshka embedding models with adaptive dimensions.
505
506
```typescript { .api }
507
/**
508
* Retriever optimized for Matryoshka embeddings
509
*/
510
class MatryoshkaRetriever extends BaseRetriever {
511
constructor(args: MatryoshkaRetrieverArgs);
512
513
/** Base retriever with full-dimensional embeddings */
514
baseRetriever: BaseRetrieverInterface;
515
516
/** Small dimension retriever for initial filtering */
517
smallDimRetriever: BaseRetrieverInterface;
518
519
/** Large dimension for final ranking */
520
largeDimension: number;
521
522
/** Small dimension for initial filtering */
523
smallDimension: number;
524
525
/** Number of candidates from small dim retrieval */
526
numCandidates: number;
527
528
_getRelevantDocuments(
529
query: string,
530
runManager?: CallbackManagerForRetrieverRun
531
): Promise<DocumentInterface[]>;
532
}
533
```
534
535
### Score Threshold Retriever
536
537
Retriever that filters results based on similarity score thresholds.
538
539
```typescript { .api }
540
/**
541
* Retriever that filters by similarity score threshold
542
*/
543
class ScoreThresholdRetriever extends BaseRetriever {
544
constructor(args: ScoreThresholdRetrieverArgs);
545
546
/** Base vector store */
547
vectorStore: VectorStoreInterface;
548
549
/** Minimum similarity score threshold */
550
minSimilarityScore: number;
551
552
/** Maximum number of documents to return */
553
maxK?: number;
554
555
/** Minimum number of documents to return */
556
minK?: number;
557
558
_getRelevantDocuments(
559
query: string,
560
runManager?: CallbackManagerForRetrieverRun
561
): Promise<DocumentInterface[]>;
562
563
static fromVectorStore(
564
vectorStore: VectorStoreInterface,
565
options: {
566
minSimilarityScore: number;
567
maxK?: number;
568
minK?: number;
569
}
570
): ScoreThresholdRetriever;
571
}
572
```
573
574
## Types
575
576
### Base Retriever Types
577
578
```typescript { .api }
579
interface BaseRetrieverInput {
580
verbose?: boolean;
581
callbacks?: BaseCallbackHandler[];
582
tags?: string[];
583
metadata?: Record<string, unknown>;
584
}
585
586
interface CallbackManagerForRetrieverRun extends BaseCallbackManager {
587
handleRetrieverStart?(
588
retriever: { name: string },
589
query: string,
590
runId?: string,
591
parentRunId?: string,
592
tags?: string[],
593
metadata?: Record<string, unknown>
594
): Promise<void>;
595
596
handleRetrieverEnd?(
597
documents: DocumentInterface[],
598
runId?: string
599
): Promise<void>;
600
601
handleRetrieverError?(error: Error, runId?: string): Promise<void>;
602
}
603
```
604
605
### Contextual Compression Types
606
607
```typescript { .api }
608
interface ContextualCompressionRetrieverArgs {
609
baseCompressor: BaseDocumentCompressor;
610
baseRetriever: BaseRetrieverInterface;
611
}
612
613
interface LLMChainExtractorArgs {
614
llmChain: LLMChain;
615
getOnlyRelevant?: boolean;
616
}
617
618
interface EmbeddingsFilterArgs {
619
embeddings: EmbeddingsInterface;
620
similarityThreshold?: number;
621
k?: number;
622
similarityFn?: (a: number[], b: number[]) => number;
623
}
624
```
625
626
### Multi-Query Retriever Types
627
628
```typescript { .api }
629
interface MultiQueryRetrieverInput {
630
retriever: BaseRetrieverInterface;
631
llmChain: LLMChain;
632
queryParser: BaseOutputParser<string[]>;
633
includeOriginal?: boolean;
634
}
635
```
636
637
### Ensemble Retriever Types
638
639
```typescript { .api }
640
interface EnsembleRetrieverArgs {
641
retrievers: BaseRetrieverInterface[];
642
weights?: number[];
643
c?: number;
644
}
645
```
646
647
### Multi-Vector Retriever Types
648
649
```typescript { .api }
650
interface MultiVectorRetrieverArgs {
651
vectorstore: VectorStoreInterface;
652
docstore: BaseStore<string, DocumentInterface>;
653
idKey?: string;
654
childK?: number;
655
parentK?: number;
656
}
657
```
658
659
### Parent Document Retriever Types
660
661
```typescript { .api }
662
interface ParentDocumentRetrieverArgs {
663
vectorstore: VectorStoreInterface;
664
docstore: BaseStore<string, DocumentInterface>;
665
childSplitter: TextSplitter;
666
parentSplitter?: TextSplitter;
667
parentIdKey?: string;
668
childK?: number;
669
}
670
```
671
672
### Time-Weighted Retriever Types
673
674
```typescript { .api }
675
interface TimeWeightedVectorStoreRetrieverArgs {
676
vectorStore: VectorStoreInterface;
677
k?: number;
678
decayRate?: number;
679
memoryStream?: MemoryStream;
680
otherScoreKeys?: string[];
681
}
682
683
interface MemoryStream {
684
addDocuments(docs: DocumentInterface[]): void;
685
get(key: string): any;
686
set(key: string, value: any): void;
687
}
688
```
689
690
### Self-Query Retriever Types
691
692
```typescript { .api }
693
interface SelfQueryRetrieverArgs {
694
vectorStore: VectorStoreInterface;
695
llmChain: LLMChain;
696
structuredQueryTranslator: BaseTranslator;
697
verbose?: boolean;
698
searchType?: "similarity" | "mmr";
699
searchKwargs?: Record<string, any>;
700
}
701
702
interface AttributeInfo {
703
name: string;
704
description: string;
705
type: string;
706
}
707
708
interface SelfQueryRetrieverExample {
709
query: string;
710
filter: Record<string, any>;
711
}
712
713
type Comparator = "eq" | "ne" | "gt" | "gte" | "lt" | "lte" | "contain" | "like" | "in" | "nin";
714
type Operator = "and" | "or" | "not";
715
716
abstract class BaseTranslator {
717
abstract visitOperation(operation: Operation): any;
718
abstract visitComparison(comparison: Comparison): any;
719
abstract visitStructuredQuery(query: StructuredQuery): any;
720
}
721
```
722
723
### HyDE Retriever Types
724
725
```typescript { .api }
726
interface HydeRetrieverArgs {
727
baseRetriever: BaseRetrieverInterface;
728
llm: BaseLanguageModelInterface;
729
promptTemplate?: BasePromptTemplate;
730
}
731
```
732
733
### Matryoshka Retriever Types
734
735
```typescript { .api }
736
interface MatryoshkaRetrieverArgs {
737
baseRetriever: BaseRetrieverInterface;
738
smallDimRetriever: BaseRetrieverInterface;
739
largeDimension: number;
740
smallDimension: number;
741
numCandidates?: number;
742
}
743
```
744
745
### Score Threshold Types
746
747
```typescript { .api }
748
interface ScoreThresholdRetrieverArgs {
749
vectorStore: VectorStoreInterface;
750
minSimilarityScore: number;
751
maxK?: number;
752
minK?: number;
753
}
754
```