0
# Embeddings
1
2
The Models module provides text embedding generation capabilities for semantic search, similarity comparison, and clustering tasks.
3
4
## Capabilities
5
6
### embedContent
7
8
Generate embeddings for text content.
9
10
```typescript { .api }
11
/**
12
* Generate embeddings for text
13
* @param params - Embedding parameters
14
* @returns Promise resolving to embeddings response
15
*/
16
function embedContent(
17
params: EmbedContentParameters
18
): Promise<EmbedContentResponse>;
19
20
interface EmbedContentParameters {
21
/** Model name (e.g., 'text-embedding-004') */
22
model: string;
23
/** Content to embed */
24
contents: ContentListUnion;
25
/** Embedding configuration */
26
config?: EmbedContentConfig;
27
}
28
29
interface EmbedContentResponse {
30
/** Generated embeddings */
31
embeddings?: ContentEmbedding[];
32
}
33
34
interface ContentEmbedding {
35
/** Embedding values (vector) */
36
values?: number[];
37
}
38
```
39
40
**Usage Examples:**
41
42
```typescript
43
import { GoogleGenAI } from '@google/genai';
44
45
const client = new GoogleGenAI({ apiKey: 'YOUR_API_KEY' });
46
47
// Generate embedding for single text
48
const response = await client.models.embedContent({
49
model: 'text-embedding-004',
50
contents: 'What is machine learning?'
51
});
52
53
const embedding = response.embeddings?.[0]?.values;
54
console.log('Embedding dimensions:', embedding?.length);
55
console.log('Embedding vector:', embedding);
56
57
// Generate embeddings for multiple texts
58
const multiResponse = await client.models.embedContent({
59
model: 'text-embedding-004',
60
contents: [
61
{ role: 'user', parts: [{ text: 'Document 1 content' }] },
62
{ role: 'user', parts: [{ text: 'Document 2 content' }] },
63
{ role: 'user', parts: [{ text: 'Document 3 content' }] }
64
]
65
});
66
67
multiResponse.embeddings?.forEach((emb, i) => {
68
console.log(`Embedding ${i}:`, emb.values?.slice(0, 5));
69
});
70
```
71
72
## Types
73
74
### EmbedContentConfig
75
76
Configuration for embedding generation.
77
78
```typescript { .api }
79
interface EmbedContentConfig {
80
/** Task type for embedding */
81
taskType?: TaskType;
82
/** Title for retrieval document */
83
title?: string;
84
/** Output dimensionality */
85
outputDimensionality?: number;
86
}
87
88
enum TaskType {
89
TASK_TYPE_UNSPECIFIED = 'TASK_TYPE_UNSPECIFIED',
90
/** Retrieval query embedding */
91
RETRIEVAL_QUERY = 'RETRIEVAL_QUERY',
92
/** Retrieval document embedding */
93
RETRIEVAL_DOCUMENT = 'RETRIEVAL_DOCUMENT',
94
/** Semantic similarity */
95
SEMANTIC_SIMILARITY = 'SEMANTIC_SIMILARITY',
96
/** Classification */
97
CLASSIFICATION = 'CLASSIFICATION',
98
/** Clustering */
99
CLUSTERING = 'CLUSTERING',
100
/** Question answering */
101
QUESTION_ANSWERING = 'QUESTION_ANSWERING',
102
/** Fact verification */
103
FACT_VERIFICATION = 'FACT_VERIFICATION'
104
}
105
```
106
107
### ContentEmbedding
108
109
Embedding result for a single piece of content.
110
111
```typescript { .api }
112
interface ContentEmbedding {
113
/** Embedding values (vector of floats) */
114
values?: number[];
115
}
116
```
117
118
## Complete Examples
119
120
### Semantic Search
121
122
```typescript
123
import { GoogleGenAI, TaskType } from '@google/genai';
124
125
const client = new GoogleGenAI({ apiKey: 'YOUR_API_KEY' });
126
127
// Document corpus
128
const documents = [
129
'The quick brown fox jumps over the lazy dog',
130
'Machine learning is a subset of artificial intelligence',
131
'Python is a popular programming language',
132
'Climate change affects global weather patterns',
133
'Neural networks are inspired by biological neurons'
134
];
135
136
// Generate document embeddings
137
const docEmbeddings = await Promise.all(
138
documents.map(doc =>
139
client.models.embedContent({
140
model: 'text-embedding-004',
141
contents: doc,
142
config: {
143
taskType: TaskType.RETRIEVAL_DOCUMENT
144
}
145
})
146
)
147
);
148
149
const docVectors = docEmbeddings.map(r => r.embeddings![0].values!);
150
151
// Query
152
const query = 'Tell me about AI and machine learning';
153
154
const queryResponse = await client.models.embedContent({
155
model: 'text-embedding-004',
156
contents: query,
157
config: {
158
taskType: TaskType.RETRIEVAL_QUERY
159
}
160
});
161
162
const queryVector = queryResponse.embeddings![0].values!;
163
164
// Calculate cosine similarity
165
function cosineSimilarity(a: number[], b: number[]): number {
166
const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0);
167
const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
168
const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
169
return dotProduct / (magnitudeA * magnitudeB);
170
}
171
172
// Find most similar documents
173
const similarities = docVectors.map((docVec, i) => ({
174
document: documents[i],
175
similarity: cosineSimilarity(queryVector, docVec)
176
}));
177
178
similarities.sort((a, b) => b.similarity - a.similarity);
179
180
console.log('Query:', query);
181
console.log('\nMost similar documents:');
182
similarities.forEach((result, i) => {
183
console.log(`${i + 1}. ${result.document}`);
184
console.log(` Similarity: ${result.similarity.toFixed(4)}\n`);
185
});
186
```
187
188
### Clustering Documents
189
190
```typescript
191
import { TaskType } from '@google/genai';
192
193
const documents = [
194
'Dogs are loyal pets',
195
'Cats are independent animals',
196
'Python is used for data science',
197
'JavaScript runs in browsers',
198
'Birds can fly in the sky',
199
'Fish live in water',
200
'Java is object-oriented',
201
'Ruby is great for web development'
202
];
203
204
// Generate embeddings for clustering
205
const embeddings = await Promise.all(
206
documents.map(doc =>
207
client.models.embedContent({
208
model: 'text-embedding-004',
209
contents: doc,
210
config: {
211
taskType: TaskType.CLUSTERING
212
}
213
})
214
)
215
);
216
217
const vectors = embeddings.map(r => r.embeddings![0].values!);
218
219
// Simple K-means clustering (k=3)
220
function kMeansClustering(vectors: number[][], k: number): number[] {
221
// Simplified k-means implementation
222
// In production, use a proper ML library
223
const assignments = new Array(vectors.length).fill(0);
224
225
// Initialize centroids randomly
226
const centroids = vectors.slice(0, k);
227
228
for (let iter = 0; iter < 10; iter++) {
229
// Assign to nearest centroid
230
vectors.forEach((vec, i) => {
231
let minDist = Infinity;
232
let assignment = 0;
233
234
centroids.forEach((centroid, j) => {
235
const dist = euclideanDistance(vec, centroid);
236
if (dist < minDist) {
237
minDist = dist;
238
assignment = j;
239
}
240
});
241
242
assignments[i] = assignment;
243
});
244
245
// Update centroids
246
for (let j = 0; j < k; j++) {
247
const clusterVectors = vectors.filter((_, i) => assignments[i] === j);
248
if (clusterVectors.length > 0) {
249
centroids[j] = clusterVectors[0].map((_, dim) =>
250
clusterVectors.reduce((sum, vec) => sum + vec[dim], 0) / clusterVectors.length
251
);
252
}
253
}
254
}
255
256
return assignments;
257
}
258
259
function euclideanDistance(a: number[], b: number[]): number {
260
return Math.sqrt(a.reduce((sum, val, i) => sum + Math.pow(val - b[i], 2), 0));
261
}
262
263
const clusters = kMeansClustering(vectors, 3);
264
265
// Group documents by cluster
266
const clusterGroups: { [key: number]: string[] } = {};
267
documents.forEach((doc, i) => {
268
const cluster = clusters[i];
269
if (!clusterGroups[cluster]) {
270
clusterGroups[cluster] = [];
271
}
272
clusterGroups[cluster].push(doc);
273
});
274
275
console.log('Document Clusters:');
276
Object.entries(clusterGroups).forEach(([cluster, docs]) => {
277
console.log(`\nCluster ${cluster}:`);
278
docs.forEach(doc => console.log(` - ${doc}`));
279
});
280
```
281
282
### Text Classification
283
284
```typescript
285
import { TaskType } from '@google/genai';
286
287
// Training data (categories and examples)
288
const trainingData = [
289
{ category: 'sports', text: 'The team won the championship' },
290
{ category: 'sports', text: 'The player scored a goal' },
291
{ category: 'technology', text: 'New AI model released' },
292
{ category: 'technology', text: 'Software update available' },
293
{ category: 'food', text: 'The recipe uses fresh ingredients' },
294
{ category: 'food', text: 'The restaurant serves Italian cuisine' }
295
];
296
297
// Generate embeddings for training data
298
const trainingEmbeddings = await Promise.all(
299
trainingData.map(item =>
300
client.models.embedContent({
301
model: 'text-embedding-004',
302
contents: item.text,
303
config: {
304
taskType: TaskType.CLASSIFICATION
305
}
306
})
307
)
308
);
309
310
// New texts to classify
311
const testTexts = [
312
'The basketball game was exciting',
313
'The new smartphone has great features',
314
'This pasta dish is delicious'
315
];
316
317
// Generate embeddings for test data
318
const testEmbeddings = await Promise.all(
319
testTexts.map(text =>
320
client.models.embedContent({
321
model: 'text-embedding-004',
322
contents: text,
323
config: {
324
taskType: TaskType.CLASSIFICATION
325
}
326
})
327
)
328
);
329
330
// Classify using k-NN (k=3)
331
testTexts.forEach((text, i) => {
332
const testVec = testEmbeddings[i].embeddings![0].values!;
333
334
// Calculate distances to all training examples
335
const distances = trainingEmbeddings.map((emb, j) => ({
336
category: trainingData[j].category,
337
distance: euclideanDistance(testVec, emb.embeddings![0].values!)
338
}));
339
340
// Sort by distance and get top k
341
distances.sort((a, b) => a.distance - b.distance);
342
const topK = distances.slice(0, 3);
343
344
// Vote for category
345
const votes: { [key: string]: number } = {};
346
topK.forEach(item => {
347
votes[item.category] = (votes[item.category] || 0) + 1;
348
});
349
350
const prediction = Object.entries(votes).sort((a, b) => b[1] - a[1])[0][0];
351
352
console.log(`Text: "${text}"`);
353
console.log(`Predicted category: ${prediction}\n`);
354
});
355
```
356
357
### Question Answering with Embeddings
358
359
```typescript
360
import { TaskType } from '@google/genai';
361
362
// FAQ database
363
const faqs = [
364
{
365
question: 'How do I reset my password?',
366
answer: 'Click on "Forgot Password" on the login page and follow the instructions.'
367
},
368
{
369
question: 'What are your business hours?',
370
answer: 'We are open Monday to Friday, 9 AM to 5 PM.'
371
},
372
{
373
question: 'How can I contact support?',
374
answer: 'You can reach our support team at support@example.com or call 1-800-123-4567.'
375
},
376
{
377
question: 'What is your return policy?',
378
answer: 'Items can be returned within 30 days of purchase with a receipt.'
379
}
380
];
381
382
// Generate embeddings for FAQ questions
383
const faqEmbeddings = await Promise.all(
384
faqs.map(faq =>
385
client.models.embedContent({
386
model: 'text-embedding-004',
387
contents: faq.question,
388
config: {
389
taskType: TaskType.QUESTION_ANSWERING
390
}
391
})
392
)
393
);
394
395
// User question
396
const userQuestion = 'I forgot my password, what should I do?';
397
398
const questionResponse = await client.models.embedContent({
399
model: 'text-embedding-004',
400
contents: userQuestion,
401
config: {
402
taskType: TaskType.QUESTION_ANSWERING
403
}
404
});
405
406
const questionVec = questionResponse.embeddings![0].values!;
407
408
// Find most similar FAQ
409
const similarities = faqEmbeddings.map((emb, i) => ({
410
faq: faqs[i],
411
similarity: cosineSimilarity(questionVec, emb.embeddings![0].values!)
412
}));
413
414
similarities.sort((a, b) => b.similarity - a.similarity);
415
416
const bestMatch = similarities[0];
417
418
console.log('User Question:', userQuestion);
419
console.log('\nMost Similar FAQ:');
420
console.log('Q:', bestMatch.faq.question);
421
console.log('A:', bestMatch.faq.answer);
422
console.log('Similarity:', bestMatch.similarity.toFixed(4));
423
```
424
425
### Semantic Deduplication
426
427
```typescript
428
import { TaskType } from '@google/genai';
429
430
// Documents with potential duplicates
431
const documents = [
432
'The cat sat on the mat',
433
'A feline was sitting on the rug',
434
'Machine learning is amazing',
435
'Dogs are great companions',
436
'ML is an incredible technology',
437
'Canines make wonderful friends'
438
];
439
440
// Generate embeddings
441
const embeddings = await Promise.all(
442
documents.map(doc =>
443
client.models.embedContent({
444
model: 'text-embedding-004',
445
contents: doc,
446
config: {
447
taskType: TaskType.SEMANTIC_SIMILARITY
448
}
449
})
450
)
451
);
452
453
const vectors = embeddings.map(r => r.embeddings![0].values!);
454
455
// Find duplicates (similarity > threshold)
456
const threshold = 0.85;
457
const duplicates: Array<[number, number, number]> = [];
458
459
for (let i = 0; i < vectors.length; i++) {
460
for (let j = i + 1; j < vectors.length; j++) {
461
const similarity = cosineSimilarity(vectors[i], vectors[j]);
462
if (similarity > threshold) {
463
duplicates.push([i, j, similarity]);
464
}
465
}
466
}
467
468
console.log('Potential Duplicates:');
469
duplicates.forEach(([i, j, sim]) => {
470
console.log(`\nSimilarity: ${sim.toFixed(4)}`);
471
console.log(` 1. "${documents[i]}"`);
472
console.log(` 2. "${documents[j]}"`);
473
});
474
475
// Remove duplicates
476
const toRemove = new Set<number>();
477
duplicates.forEach(([i, j]) => {
478
toRemove.add(j); // Keep first, remove second
479
});
480
481
const uniqueDocs = documents.filter((_, i) => !toRemove.has(i));
482
483
console.log('\nUnique Documents:');
484
uniqueDocs.forEach(doc => console.log(` - ${doc}`));
485
```
486
487
### Batch Embedding Generation
488
489
```typescript
490
// Generate embeddings for large dataset efficiently
491
const largeDataset = Array.from({ length: 1000 }, (_, i) =>
492
`Document ${i} with unique content`
493
);
494
495
// Process in batches
496
const batchSize = 50;
497
const allEmbeddings: number[][] = [];
498
499
for (let i = 0; i < largeDataset.length; i += batchSize) {
500
const batch = largeDataset.slice(i, i + batchSize);
501
502
const batchEmbeddings = await Promise.all(
503
batch.map(doc =>
504
client.models.embedContent({
505
model: 'text-embedding-004',
506
contents: doc
507
})
508
)
509
);
510
511
const vectors = batchEmbeddings.map(r => r.embeddings![0].values!);
512
allEmbeddings.push(...vectors);
513
514
console.log(`Processed ${Math.min(i + batchSize, largeDataset.length)}/${largeDataset.length}`);
515
}
516
517
console.log(`Generated ${allEmbeddings.length} embeddings`);
518
```
519
520
### Custom Dimensionality
521
522
```typescript
523
// Generate lower-dimensional embeddings for faster computation
524
const response = await client.models.embedContent({
525
model: 'text-embedding-004',
526
contents: 'Sample text for embedding',
527
config: {
528
outputDimensionality: 256 // Reduce from default (e.g., 768)
529
}
530
});
531
532
const embedding = response.embeddings![0].values!;
533
console.log('Embedding dimensions:', embedding.length); // 256
534
```
535
536
### Store Embeddings for Vector Database
537
538
```typescript
539
import { TaskType } from '@google/genai';
540
541
interface DocumentWithEmbedding {
542
id: string;
543
text: string;
544
embedding: number[];
545
metadata: Record<string, any>;
546
}
547
548
const documents = [
549
{ id: '1', text: 'Document 1', category: 'tech' },
550
{ id: '2', text: 'Document 2', category: 'sports' },
551
{ id: '3', text: 'Document 3', category: 'food' }
552
];
553
554
// Generate and structure for storage
555
const documentsWithEmbeddings: DocumentWithEmbedding[] = await Promise.all(
556
documents.map(async doc => {
557
const response = await client.models.embedContent({
558
model: 'text-embedding-004',
559
contents: doc.text,
560
config: {
561
taskType: TaskType.RETRIEVAL_DOCUMENT
562
}
563
});
564
565
return {
566
id: doc.id,
567
text: doc.text,
568
embedding: response.embeddings![0].values!,
569
metadata: { category: doc.category }
570
};
571
})
572
);
573
574
// Store in vector database (pseudo-code)
575
// await vectorDB.insert(documentsWithEmbeddings);
576
577
console.log('Documents indexed with embeddings');
578
```
579
580
### Multi-language Similarity
581
582
```typescript
583
// Compare texts in different languages
584
const texts = [
585
'Hello, how are you?',
586
'Bonjour, comment allez-vous?', // French
587
'Hola, como estas?', // Spanish
588
'The weather is nice today',
589
'Il fait beau aujourd\'hui' // French
590
];
591
592
const embeddings = await Promise.all(
593
texts.map(text =>
594
client.models.embedContent({
595
model: 'text-embedding-004',
596
contents: text,
597
config: {
598
taskType: TaskType.SEMANTIC_SIMILARITY
599
}
600
})
601
)
602
);
603
604
const vectors = embeddings.map(r => r.embeddings![0].values!);
605
606
// Calculate similarity matrix
607
console.log('Similarity Matrix:');
608
for (let i = 0; i < texts.length; i++) {
609
for (let j = 0; j < texts.length; j++) {
610
const similarity = cosineSimilarity(vectors[i], vectors[j]);
611
console.log(`"${texts[i]}" <-> "${texts[j]}": ${similarity.toFixed(4)}`);
612
}
613
console.log('');
614
}
615
```
616
617
### Helper Functions
618
619
```typescript
620
// Utility functions for working with embeddings
621
622
function cosineSimilarity(a: number[], b: number[]): number {
623
const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0);
624
const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
625
const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
626
return dotProduct / (magnitudeA * magnitudeB);
627
}
628
629
function euclideanDistance(a: number[], b: number[]): number {
630
return Math.sqrt(a.reduce((sum, val, i) => sum + Math.pow(val - b[i], 2), 0));
631
}
632
633
function normalizeVector(vec: number[]): number[] {
634
const magnitude = Math.sqrt(vec.reduce((sum, val) => sum + val * val, 0));
635
return vec.map(val => val / magnitude);
636
}
637
638
function dotProduct(a: number[], b: number[]): number {
639
return a.reduce((sum, val, i) => sum + val * b[i], 0);
640
}
641
```
642