0
# Embeddings and Vector Stores
1
2
Embedding models and vector storage solutions with caching, similarity search, and filtering capabilities. Embeddings provide the foundation for semantic search and retrieval operations.
3
4
## Capabilities
5
6
### Cache-Backed Embeddings
7
8
Embeddings with intelligent caching to avoid recomputing embeddings for the same content.
9
10
```typescript { .api }
11
/**
12
* Embeddings with caching support for improved performance
13
*/
14
class CacheBackedEmbeddings extends Embeddings {
15
constructor(fields: CacheBackedEmbeddingsFields);
16
17
/** Underlying embeddings implementation */
18
underlyingEmbeddings: EmbeddingsInterface;
19
20
/** Document embedding cache store */
21
documentEmbeddingStore: BaseStore<string, Uint8Array>;
22
23
/** Optional namespace for cache keys */
24
namespace?: string;
25
26
/** Embed query text (no caching for queries) */
27
embedQuery(document: string): Promise<number[]>;
28
29
/** Embed documents with caching */
30
embedDocuments(documents: string[]): Promise<number[][]>;
31
32
/** Create from bytes store */
33
static fromBytesStore(
34
underlyingEmbeddings: EmbeddingsInterface,
35
documentEmbeddingStore: BaseStore<string, Uint8Array>,
36
options?: CacheBackedEmbeddingsFields
37
): CacheBackedEmbeddings;
38
}
39
```
40
41
**Usage Example:**
42
43
```typescript
44
import { CacheBackedEmbeddings } from "langchain/embeddings/cache_backed";
45
import { OpenAIEmbeddings } from "@langchain/openai";
46
import { LocalFileStore } from "langchain/storage/file_system";
47
48
// Create underlying embeddings
49
const underlyingEmbeddings = new OpenAIEmbeddings();
50
51
// Create file-based cache store
52
const cacheStore = await LocalFileStore.fromPath("./embeddings_cache");
53
54
// Create cache-backed embeddings
55
const cachedEmbeddings = CacheBackedEmbeddings.fromBytesStore(
56
underlyingEmbeddings,
57
cacheStore,
58
{ namespace: "openai_embeddings" }
59
);
60
61
// First call will compute and cache embeddings
62
const docs = ["Hello world", "LangChain is great", "Vector databases are useful"];
63
const embeddings1 = await cachedEmbeddings.embedDocuments(docs);
64
65
// Second call will use cached embeddings (much faster)
66
const embeddings2 = await cachedEmbeddings.embedDocuments(docs);
67
68
// Query embeddings are not cached (queries change frequently)
69
const queryEmbedding = await cachedEmbeddings.embedQuery("What is LangChain?");
70
```
71
72
### Fake Embeddings for Testing
73
74
Testing utilities for development and testing scenarios.
75
76
```typescript { .api }
77
/**
78
* Fake embeddings for testing purposes
79
*/
80
class FakeEmbeddings extends Embeddings {
81
constructor(params?: { size?: number; seed?: number });
82
83
/** Embedding dimension size */
84
size: number;
85
86
/** Random seed for reproducible embeddings */
87
seed?: number;
88
89
embedQuery(document: string): Promise<number[]>;
90
embedDocuments(documents: string[]): Promise<number[][]>;
91
}
92
93
/**
94
* Deterministic fake embeddings
95
*/
96
class DeterministicFakeEmbeddings extends Embeddings {
97
constructor(params?: { size?: number });
98
99
/** Embedding dimension size */
100
size: number;
101
102
embedQuery(document: string): Promise<number[]>;
103
embedDocuments(documents: string[]): Promise<number[][]>;
104
}
105
```
106
107
**Usage Example:**
108
109
```typescript
110
import { FakeEmbeddings } from "langchain/embeddings/fake";
111
112
// Create fake embeddings for testing
113
const fakeEmbeddings = new FakeEmbeddings({
114
size: 1536, // OpenAI embedding size
115
seed: 42 // For reproducible results
116
});
117
118
const testDocs = ["test doc 1", "test doc 2"];
119
const testEmbeddings = await fakeEmbeddings.embedDocuments(testDocs);
120
121
console.log(testEmbeddings[0].length); // 1536
122
console.log(testEmbeddings.length); // 2
123
```
124
125
### Memory Vector Store
126
127
In-memory vector store implementation for development and small datasets.
128
129
```typescript { .api }
130
/**
131
* In-memory vector store for small datasets and testing
132
*/
133
class MemoryVectorStore extends VectorStore {
134
constructor(embeddings: EmbeddingsInterface, fields?: MemoryVectorStoreArgs);
135
136
/** Array of stored vectors */
137
memoryVectors: MemoryVector[];
138
139
/** Similarity function for search */
140
similarity: SimilarityFunction;
141
142
/** Add documents to the vector store */
143
addDocuments(
144
documents: DocumentInterface[],
145
options?: { ids?: string[] }
146
): Promise<string[]>;
147
148
/** Add vectors directly */
149
addVectors(
150
vectors: number[][],
151
documents: DocumentInterface[],
152
options?: { ids?: string[] }
153
): Promise<string[]>;
154
155
/** Similarity search */
156
similaritySearch(
157
query: string,
158
k?: number,
159
filter?: MemoryVectorStoreFilter
160
): Promise<DocumentInterface[]>;
161
162
/** Similarity search with scores */
163
similaritySearchWithScore(
164
query: string,
165
k?: number,
166
filter?: MemoryVectorStoreFilter
167
): Promise<[DocumentInterface, number][]>;
168
169
/** Similarity search by vector */
170
similaritySearchVectorWithScore(
171
query: number[],
172
k: number,
173
filter?: MemoryVectorStoreFilter
174
): Promise<[DocumentInterface, number][]>;
175
176
/** Delete documents by ID */
177
delete(params: { ids: string[] }): Promise<void>;
178
179
/** Create from texts */
180
static fromTexts(
181
texts: string[],
182
metadatas: Record<string, any>[] | Record<string, any>,
183
embeddings: EmbeddingsInterface,
184
dbConfig?: MemoryVectorStoreArgs
185
): Promise<MemoryVectorStore>;
186
187
/** Create from documents */
188
static fromDocuments(
189
docs: DocumentInterface[],
190
embeddings: EmbeddingsInterface,
191
dbConfig?: MemoryVectorStoreArgs
192
): Promise<MemoryVectorStore>;
193
194
/** Create from existing vectors */
195
static fromExistingVectors(
196
vectors: number[][],
197
documents: DocumentInterface[],
198
embeddings: EmbeddingsInterface,
199
dbConfig?: MemoryVectorStoreArgs
200
): Promise<MemoryVectorStore>;
201
}
202
```
203
204
**Usage Example:**
205
206
```typescript
207
import { MemoryVectorStore } from "langchain/vectorstores/memory";
208
import { OpenAIEmbeddings } from "@langchain/openai";
209
210
const embeddings = new OpenAIEmbeddings();
211
212
// Create from texts
213
const vectorStore = await MemoryVectorStore.fromTexts(
214
[
215
"LangChain is a framework for building AI applications",
216
"Vector stores enable semantic search capabilities",
217
"Embeddings convert text into numerical representations"
218
],
219
[
220
{ source: "doc1", topic: "frameworks" },
221
{ source: "doc2", topic: "search" },
222
{ source: "doc3", topic: "embeddings" }
223
],
224
embeddings
225
);
226
227
// Search for similar documents
228
const results = await vectorStore.similaritySearch(
229
"How to build AI apps?",
230
2 // Return top 2 matches
231
);
232
233
console.log(results[0].pageContent);
234
// "LangChain is a framework for building AI applications"
235
236
// Search with scores
237
const resultsWithScores = await vectorStore.similaritySearchWithScore(
238
"semantic search",
239
3
240
);
241
242
resultsWithScores.forEach(([doc, score]) => {
243
console.log(`Score: ${score}, Content: ${doc.pageContent}`);
244
});
245
246
// Add more documents
247
await vectorStore.addDocuments([
248
{
249
pageContent: "RAG combines retrieval with generation",
250
metadata: { source: "doc4", topic: "rag" }
251
}
252
]);
253
254
// Delete specific documents
255
await vectorStore.delete({ ids: ["doc-id-1", "doc-id-2"] });
256
```
257
258
### Vector Store Base Classes
259
260
Foundation classes for creating custom vector store implementations.
261
262
```typescript { .api }
263
/**
264
* Base vector store class
265
*/
266
abstract class VectorStore {
267
constructor(embeddings: EmbeddingsInterface, dbConfig: Record<string, any>);
268
269
/** Embeddings instance */
270
embeddings: EmbeddingsInterface;
271
272
/** Add documents to vector store */
273
abstract addDocuments(
274
documents: DocumentInterface[],
275
options?: AddDocumentOptions
276
): Promise<string[] | void>;
277
278
/** Add vectors directly */
279
abstract addVectors(
280
vectors: number[][],
281
documents: DocumentInterface[],
282
options?: AddDocumentOptions
283
): Promise<string[] | void>;
284
285
/** Similarity search */
286
abstract similaritySearch(
287
query: string,
288
k?: number,
289
filter?: VectorStoreFilter
290
): Promise<DocumentInterface[]>;
291
292
/** Similarity search with scores */
293
abstract similaritySearchWithScore(
294
query: string,
295
k?: number,
296
filter?: VectorStoreFilter
297
): Promise<[DocumentInterface, number][]>;
298
299
/** Similarity search by vector */
300
abstract similaritySearchVectorWithScore(
301
query: number[],
302
k: number,
303
filter?: VectorStoreFilter
304
): Promise<[DocumentInterface, number][]>;
305
306
/** Delete documents */
307
delete?(params: { ids: string[] }): Promise<void>;
308
309
/** Convert to retriever */
310
asRetriever(options?: VectorStoreRetrieverInput): VectorStoreRetriever;
311
312
/** Maximum marginal relevance search */
313
maxMarginalRelevanceSearch?(
314
query: string,
315
options: MaxMarginalRelevanceSearchOptions
316
): Promise<DocumentInterface[]>;
317
}
318
319
/**
320
* Base vector store interface
321
*/
322
interface VectorStoreInterface {
323
addDocuments(documents: DocumentInterface[]): Promise<string[] | void>;
324
similaritySearch(query: string, k?: number): Promise<DocumentInterface[]>;
325
similaritySearchWithScore(query: string, k?: number): Promise<[DocumentInterface, number][]>;
326
asRetriever(options?: VectorStoreRetrieverInput): BaseRetrieverInterface;
327
}
328
```
329
330
### Vector Store Retriever
331
332
Adapter that converts vector stores into retrievers for use in chains and agents.
333
334
```typescript { .api }
335
/**
336
* Retriever wrapper for vector stores
337
*/
338
class VectorStoreRetriever extends BaseRetriever {
339
constructor(fields: VectorStoreRetrieverInput);
340
341
/** Vector store instance */
342
vectorStore: VectorStoreInterface;
343
344
/** Number of documents to retrieve */
345
k: number;
346
347
/** Search type */
348
searchType: VectorStoreRetrieverSearchType;
349
350
/** Search parameters */
351
searchKwargs?: VectorStoreRetrieverSearchKwargs;
352
353
/** Filter function */
354
filter?: VectorStoreFilter;
355
356
_getRelevantDocuments(query: string): Promise<DocumentInterface[]>;
357
358
/** Add documents to underlying vector store */
359
addDocuments(documents: DocumentInterface[]): Promise<string[] | void>;
360
}
361
```
362
363
**Usage Example:**
364
365
```typescript
366
import { MemoryVectorStore } from "langchain/vectorstores/memory";
367
368
const vectorStore = await MemoryVectorStore.fromTexts(
369
["Text 1", "Text 2", "Text 3"],
370
[{ id: 1 }, { id: 2 }, { id: 3 }],
371
embeddings
372
);
373
374
// Convert to retriever
375
const retriever = vectorStore.asRetriever({
376
k: 2,
377
searchType: "similarity",
378
searchKwargs: {
379
scoreThreshold: 0.8
380
}
381
});
382
383
// Use in retrieval chain
384
const docs = await retriever.getRelevantDocuments("query text");
385
386
// Use with filter
387
const filteredRetriever = vectorStore.asRetriever({
388
k: 3,
389
filter: (doc) => doc.metadata.id > 1
390
});
391
```
392
393
### Similarity Functions
394
395
Built-in similarity functions for vector comparison.
396
397
```typescript { .api }
398
/**
399
* Cosine similarity function
400
*/
401
function cosineSimilarity(a: number[], b: number[]): number;
402
403
/**
404
* Euclidean distance function
405
*/
406
function euclideanDistance(a: number[], b: number[]): number;
407
408
/**
409
* Dot product similarity
410
*/
411
function dotProduct(a: number[], b: number[]): number;
412
413
/**
414
* Manhattan distance function
415
*/
416
function manhattanDistance(a: number[], b: number[]): number;
417
418
type SimilarityFunction = (a: number[], b: number[]) => number;
419
```
420
421
**Usage Example:**
422
423
```typescript
424
import {
425
cosineSimilarity,
426
euclideanDistance
427
} from "langchain/vectorstores/utils";
428
429
const vector1 = [1, 2, 3];
430
const vector2 = [4, 5, 6];
431
432
const cosine = cosineSimilarity(vector1, vector2);
433
const euclidean = euclideanDistance(vector1, vector2);
434
435
console.log(`Cosine similarity: ${cosine}`);
436
console.log(`Euclidean distance: ${euclidean}`);
437
```
438
439
## Types
440
441
### Cache-Backed Embeddings Types
442
443
```typescript { .api }
444
interface CacheBackedEmbeddingsFields {
445
/** Underlying embeddings implementation */
446
underlyingEmbeddings: EmbeddingsInterface;
447
448
/** Document embedding cache store */
449
documentEmbeddingStore: BaseStore<string, Uint8Array>;
450
451
/** Optional namespace for cache keys */
452
namespace?: string;
453
}
454
455
interface EmbeddingsInterface {
456
/** Embed a single query */
457
embedQuery(text: string): Promise<number[]>;
458
459
/** Embed multiple documents */
460
embedDocuments(documents: string[]): Promise<number[][]>;
461
}
462
```
463
464
### Memory Vector Store Types
465
466
```typescript { .api }
467
interface MemoryVectorStoreArgs {
468
/** Similarity function to use */
469
similarity?: SimilarityFunction;
470
}
471
472
interface MemoryVector {
473
/** Document content */
474
content: string;
475
476
/** Vector embedding */
477
embedding: number[];
478
479
/** Document metadata */
480
metadata: Record<string, any>;
481
482
/** Document ID */
483
id: string;
484
}
485
486
type MemoryVectorStoreFilter = (doc: DocumentInterface) => boolean;
487
```
488
489
### Vector Store Base Types
490
491
```typescript { .api }
492
interface AddDocumentOptions {
493
/** Document IDs */
494
ids?: string[];
495
496
/** Additional options */
497
[key: string]: any;
498
}
499
500
type VectorStoreFilter =
501
| Record<string, any>
502
| ((doc: DocumentInterface) => boolean);
503
504
interface MaxMarginalRelevanceSearchOptions {
505
/** Number of documents to return */
506
k?: number;
507
508
/** Number of documents to fetch for reranking */
509
fetchK?: number;
510
511
/** Lambda parameter for MMR */
512
lambda?: number;
513
514
/** Filter function */
515
filter?: VectorStoreFilter;
516
}
517
```
518
519
### Vector Store Retriever Types
520
521
```typescript { .api }
522
interface VectorStoreRetrieverInput {
523
/** Vector store instance */
524
vectorStore: VectorStoreInterface;
525
526
/** Number of documents to retrieve */
527
k?: number;
528
529
/** Search type */
530
searchType?: VectorStoreRetrieverSearchType;
531
532
/** Search parameters */
533
searchKwargs?: VectorStoreRetrieverSearchKwargs;
534
535
/** Filter function */
536
filter?: VectorStoreFilter;
537
538
/** Verbose logging */
539
verbose?: boolean;
540
}
541
542
type VectorStoreRetrieverSearchType =
543
| "similarity"
544
| "mmr"
545
| "similarity_score_threshold";
546
547
interface VectorStoreRetrieverSearchKwargs {
548
/** Score threshold for filtering */
549
scoreThreshold?: number;
550
551
/** Fetch K parameter for MMR */
552
fetchK?: number;
553
554
/** Lambda parameter for MMR */
555
lambda?: number;
556
557
/** Additional search parameters */
558
[key: string]: any;
559
}
560
```
561
562
### Base Store Interface
563
564
```typescript { .api }
565
/**
566
* Base key-value store interface
567
*/
568
abstract class BaseStore<K, V> {
569
/** Get multiple values by keys */
570
abstract mget(keys: K[]): Promise<(V | undefined)[]>;
571
572
/** Set multiple key-value pairs */
573
abstract mset(keyValuePairs: [K, V][]): Promise<void>;
574
575
/** Delete multiple keys */
576
abstract mdelete(keys: K[]): Promise<void>;
577
578
/** Yield all keys with optional prefix */
579
abstract yieldKeys(prefix?: string): AsyncGenerator<K>;
580
}
581
```
582
583
### Fake Embeddings Types
584
585
```typescript { .api }
586
interface FakeEmbeddingsParams {
587
/** Embedding dimension size */
588
size?: number;
589
590
/** Random seed for reproducible results */
591
seed?: number;
592
}
593
594
interface DeterministicFakeEmbeddingsParams {
595
/** Embedding dimension size */
596
size?: number;
597
}
598
```
599
600
## Vector Store Usage Patterns
601
602
### RAG Implementation
603
604
```typescript
605
import { MemoryVectorStore } from "langchain/vectorstores/memory";
606
import { RetrievalQAChain } from "langchain/chains";
607
import { OpenAI } from "@langchain/openai";
608
import { OpenAIEmbeddings } from "@langchain/openai";
609
610
// Create embeddings and vector store
611
const embeddings = new OpenAIEmbeddings();
612
const vectorStore = await MemoryVectorStore.fromTexts(
613
[
614
"The sky is blue during clear weather.",
615
"Roses are red and violets are blue.",
616
"The ocean appears blue due to light scattering."
617
],
618
[{}, {}, {}],
619
embeddings
620
);
621
622
// Create RAG chain
623
const llm = new OpenAI({ temperature: 0 });
624
const retriever = vectorStore.asRetriever({ k: 2 });
625
626
const qa = RetrievalQAChain.fromLLM(llm, retriever);
627
628
const response = await qa.call({
629
query: "What color is the sky?"
630
});
631
```
632
633
### Custom Vector Store
634
635
```typescript
636
class CustomVectorStore extends VectorStore {
637
private documents: DocumentInterface[] = [];
638
private vectors: number[][] = [];
639
640
async addDocuments(documents: DocumentInterface[]): Promise<string[]> {
641
const ids: string[] = [];
642
643
for (const doc of documents) {
644
const vector = await this.embeddings.embedQuery(doc.pageContent);
645
const id = `doc_${Date.now()}_${Math.random()}`;
646
647
this.documents.push({ ...doc, metadata: { ...doc.metadata, id } });
648
this.vectors.push(vector);
649
ids.push(id);
650
}
651
652
return ids;
653
}
654
655
async similaritySearchWithScore(
656
query: string,
657
k: number = 4
658
): Promise<[DocumentInterface, number][]> {
659
const queryVector = await this.embeddings.embedQuery(query);
660
661
const results = this.documents.map((doc, index) => {
662
const score = cosineSimilarity(queryVector, this.vectors[index]);
663
return [doc, score] as [DocumentInterface, number];
664
});
665
666
return results
667
.sort((a, b) => b[1] - a[1])
668
.slice(0, k);
669
}
670
671
async similaritySearch(query: string, k?: number): Promise<DocumentInterface[]> {
672
const results = await this.similaritySearchWithScore(query, k);
673
return results.map(([doc]) => doc);
674
}
675
676
// Implement other required methods...
677
}
678
```