Tessl Tile for npm/@google/genai@1.30.0

or run

npx @tessl/cli init

embeddings.mddocs/

0
# Embeddings
1

2
The Models module provides text embedding generation capabilities for semantic search, similarity comparison, and clustering tasks.
3

4
## Capabilities
5

6
### embedContent
7

8
Generate embeddings for text content.
9

10
```typescript { .api }
11
/**
12
 * Generate embeddings for text
13
 * @param params - Embedding parameters
14
 * @returns Promise resolving to embeddings response
15
 */
16
function embedContent(
17
  params: EmbedContentParameters
18
): Promise<EmbedContentResponse>;
19

20
interface EmbedContentParameters {
21
  /** Model name (e.g., 'text-embedding-004') */
22
  model: string;
23
  /** Content to embed */
24
  contents: ContentListUnion;
25
  /** Embedding configuration */
26
  config?: EmbedContentConfig;
27
}
28

29
interface EmbedContentResponse {
30
  /** Generated embeddings */
31
  embeddings?: ContentEmbedding[];
32
}
33

34
interface ContentEmbedding {
35
  /** Embedding values (vector) */
36
  values?: number[];
37
}
38
```
39

40
**Usage Examples:**
41

42
```typescript
43
import { GoogleGenAI } from '@google/genai';
44

45
const client = new GoogleGenAI({ apiKey: 'YOUR_API_KEY' });
46

47
// Generate embedding for single text
48
const response = await client.models.embedContent({
49
  model: 'text-embedding-004',
50
  contents: 'What is machine learning?'
51
});
52

53
const embedding = response.embeddings?.[0]?.values;
54
console.log('Embedding dimensions:', embedding?.length);
55
console.log('Embedding vector:', embedding);
56

57
// Generate embeddings for multiple texts
58
const multiResponse = await client.models.embedContent({
59
  model: 'text-embedding-004',
60
  contents: [
61
    { role: 'user', parts: [{ text: 'Document 1 content' }] },
62
    { role: 'user', parts: [{ text: 'Document 2 content' }] },
63
    { role: 'user', parts: [{ text: 'Document 3 content' }] }
64
  ]
65
});
66

67
multiResponse.embeddings?.forEach((emb, i) => {
68
  console.log(`Embedding ${i}:`, emb.values?.slice(0, 5));
69
});
70
```
71

72
## Types
73

74
### EmbedContentConfig
75

76
Configuration for embedding generation.
77

78
```typescript { .api }
79
interface EmbedContentConfig {
80
  /** Task type for embedding */
81
  taskType?: TaskType;
82
  /** Title for retrieval document */
83
  title?: string;
84
  /** Output dimensionality */
85
  outputDimensionality?: number;
86
}
87

88
enum TaskType {
89
  TASK_TYPE_UNSPECIFIED = 'TASK_TYPE_UNSPECIFIED',
90
  /** Retrieval query embedding */
91
  RETRIEVAL_QUERY = 'RETRIEVAL_QUERY',
92
  /** Retrieval document embedding */
93
  RETRIEVAL_DOCUMENT = 'RETRIEVAL_DOCUMENT',
94
  /** Semantic similarity */
95
  SEMANTIC_SIMILARITY = 'SEMANTIC_SIMILARITY',
96
  /** Classification */
97
  CLASSIFICATION = 'CLASSIFICATION',
98
  /** Clustering */
99
  CLUSTERING = 'CLUSTERING',
100
  /** Question answering */
101
  QUESTION_ANSWERING = 'QUESTION_ANSWERING',
102
  /** Fact verification */
103
  FACT_VERIFICATION = 'FACT_VERIFICATION'
104
}
105
```
106

107
### ContentEmbedding
108

109
Embedding result for a single piece of content.
110

111
```typescript { .api }
112
interface ContentEmbedding {
113
  /** Embedding values (vector of floats) */
114
  values?: number[];
115
}
116
```
117

118
## Complete Examples
119

120
### Semantic Search
121

122
```typescript
123
import { GoogleGenAI, TaskType } from '@google/genai';
124

125
const client = new GoogleGenAI({ apiKey: 'YOUR_API_KEY' });
126

127
// Document corpus
128
const documents = [
129
  'The quick brown fox jumps over the lazy dog',
130
  'Machine learning is a subset of artificial intelligence',
131
  'Python is a popular programming language',
132
  'Climate change affects global weather patterns',
133
  'Neural networks are inspired by biological neurons'
134
];
135

136
// Generate document embeddings
137
const docEmbeddings = await Promise.all(
138
  documents.map(doc =>
139
    client.models.embedContent({
140
      model: 'text-embedding-004',
141
      contents: doc,
142
      config: {
143
        taskType: TaskType.RETRIEVAL_DOCUMENT
144
      }
145
    })
146
  )
147
);
148

149
const docVectors = docEmbeddings.map(r => r.embeddings![0].values!);
150

151
// Query
152
const query = 'Tell me about AI and machine learning';
153

154
const queryResponse = await client.models.embedContent({
155
  model: 'text-embedding-004',
156
  contents: query,
157
  config: {
158
    taskType: TaskType.RETRIEVAL_QUERY
159
  }
160
});
161

162
const queryVector = queryResponse.embeddings![0].values!;
163

164
// Calculate cosine similarity
165
function cosineSimilarity(a: number[], b: number[]): number {
166
  const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0);
167
  const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
168
  const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
169
  return dotProduct / (magnitudeA * magnitudeB);
170
}
171

172
// Find most similar documents
173
const similarities = docVectors.map((docVec, i) => ({
174
  document: documents[i],
175
  similarity: cosineSimilarity(queryVector, docVec)
176
}));
177

178
similarities.sort((a, b) => b.similarity - a.similarity);
179

180
console.log('Query:', query);
181
console.log('\nMost similar documents:');
182
similarities.forEach((result, i) => {
183
  console.log(`${i + 1}. ${result.document}`);
184
  console.log(`   Similarity: ${result.similarity.toFixed(4)}\n`);
185
});
186
```
187

188
### Clustering Documents
189

190
```typescript
191
import { TaskType } from '@google/genai';
192

193
const documents = [
194
  'Dogs are loyal pets',
195
  'Cats are independent animals',
196
  'Python is used for data science',
197
  'JavaScript runs in browsers',
198
  'Birds can fly in the sky',
199
  'Fish live in water',
200
  'Java is object-oriented',
201
  'Ruby is great for web development'
202
];
203

204
// Generate embeddings for clustering
205
const embeddings = await Promise.all(
206
  documents.map(doc =>
207
    client.models.embedContent({
208
      model: 'text-embedding-004',
209
      contents: doc,
210
      config: {
211
        taskType: TaskType.CLUSTERING
212
      }
213
    })
214
  )
215
);
216

217
const vectors = embeddings.map(r => r.embeddings![0].values!);
218

219
// Simple K-means clustering (k=3)
220
function kMeansClustering(vectors: number[][], k: number): number[] {
221
  // Simplified k-means implementation
222
  // In production, use a proper ML library
223
  const assignments = new Array(vectors.length).fill(0);
224

225
  // Initialize centroids randomly
226
  const centroids = vectors.slice(0, k);
227

228
  for (let iter = 0; iter < 10; iter++) {
229
    // Assign to nearest centroid
230
    vectors.forEach((vec, i) => {
231
      let minDist = Infinity;
232
      let assignment = 0;
233

234
      centroids.forEach((centroid, j) => {
235
        const dist = euclideanDistance(vec, centroid);
236
        if (dist < minDist) {
237
          minDist = dist;
238
          assignment = j;
239
        }
240
      });
241

242
      assignments[i] = assignment;
243
    });
244

245
    // Update centroids
246
    for (let j = 0; j < k; j++) {
247
      const clusterVectors = vectors.filter((_, i) => assignments[i] === j);
248
      if (clusterVectors.length > 0) {
249
        centroids[j] = clusterVectors[0].map((_, dim) =>
250
          clusterVectors.reduce((sum, vec) => sum + vec[dim], 0) / clusterVectors.length
251
        );
252
      }
253
    }
254
  }
255

256
  return assignments;
257
}
258

259
function euclideanDistance(a: number[], b: number[]): number {
260
  return Math.sqrt(a.reduce((sum, val, i) => sum + Math.pow(val - b[i], 2), 0));
261
}
262

263
const clusters = kMeansClustering(vectors, 3);
264

265
// Group documents by cluster
266
const clusterGroups: { [key: number]: string[] } = {};
267
documents.forEach((doc, i) => {
268
  const cluster = clusters[i];
269
  if (!clusterGroups[cluster]) {
270
    clusterGroups[cluster] = [];
271
  }
272
  clusterGroups[cluster].push(doc);
273
});
274

275
console.log('Document Clusters:');
276
Object.entries(clusterGroups).forEach(([cluster, docs]) => {
277
  console.log(`\nCluster ${cluster}:`);
278
  docs.forEach(doc => console.log(`  - ${doc}`));
279
});
280
```
281

282
### Text Classification
283

284
```typescript
285
import { TaskType } from '@google/genai';
286

287
// Training data (categories and examples)
288
const trainingData = [
289
  { category: 'sports', text: 'The team won the championship' },
290
  { category: 'sports', text: 'The player scored a goal' },
291
  { category: 'technology', text: 'New AI model released' },
292
  { category: 'technology', text: 'Software update available' },
293
  { category: 'food', text: 'The recipe uses fresh ingredients' },
294
  { category: 'food', text: 'The restaurant serves Italian cuisine' }
295
];
296

297
// Generate embeddings for training data
298
const trainingEmbeddings = await Promise.all(
299
  trainingData.map(item =>
300
    client.models.embedContent({
301
      model: 'text-embedding-004',
302
      contents: item.text,
303
      config: {
304
        taskType: TaskType.CLASSIFICATION
305
      }
306
    })
307
  )
308
);
309

310
// New texts to classify
311
const testTexts = [
312
  'The basketball game was exciting',
313
  'The new smartphone has great features',
314
  'This pasta dish is delicious'
315
];
316

317
// Generate embeddings for test data
318
const testEmbeddings = await Promise.all(
319
  testTexts.map(text =>
320
    client.models.embedContent({
321
      model: 'text-embedding-004',
322
      contents: text,
323
      config: {
324
        taskType: TaskType.CLASSIFICATION
325
      }
326
    })
327
  )
328
);
329

330
// Classify using k-NN (k=3)
331
testTexts.forEach((text, i) => {
332
  const testVec = testEmbeddings[i].embeddings![0].values!;
333

334
  // Calculate distances to all training examples
335
  const distances = trainingEmbeddings.map((emb, j) => ({
336
    category: trainingData[j].category,
337
    distance: euclideanDistance(testVec, emb.embeddings![0].values!)
338
  }));
339

340
  // Sort by distance and get top k
341
  distances.sort((a, b) => a.distance - b.distance);
342
  const topK = distances.slice(0, 3);
343

344
  // Vote for category
345
  const votes: { [key: string]: number } = {};
346
  topK.forEach(item => {
347
    votes[item.category] = (votes[item.category] || 0) + 1;
348
  });
349

350
  const prediction = Object.entries(votes).sort((a, b) => b[1] - a[1])[0][0];
351

352
  console.log(`Text: "${text}"`);
353
  console.log(`Predicted category: ${prediction}\n`);
354
});
355
```
356

357
### Question Answering with Embeddings
358

359
```typescript
360
import { TaskType } from '@google/genai';
361

362
// FAQ database
363
const faqs = [
364
  {
365
    question: 'How do I reset my password?',
366
    answer: 'Click on "Forgot Password" on the login page and follow the instructions.'
367
  },
368
  {
369
    question: 'What are your business hours?',
370
    answer: 'We are open Monday to Friday, 9 AM to 5 PM.'
371
  },
372
  {
373
    question: 'How can I contact support?',
374
    answer: 'You can reach our support team at support@example.com or call 1-800-123-4567.'
375
  },
376
  {
377
    question: 'What is your return policy?',
378
    answer: 'Items can be returned within 30 days of purchase with a receipt.'
379
  }
380
];
381

382
// Generate embeddings for FAQ questions
383
const faqEmbeddings = await Promise.all(
384
  faqs.map(faq =>
385
    client.models.embedContent({
386
      model: 'text-embedding-004',
387
      contents: faq.question,
388
      config: {
389
        taskType: TaskType.QUESTION_ANSWERING
390
      }
391
    })
392
  )
393
);
394

395
// User question
396
const userQuestion = 'I forgot my password, what should I do?';
397

398
const questionResponse = await client.models.embedContent({
399
  model: 'text-embedding-004',
400
  contents: userQuestion,
401
  config: {
402
    taskType: TaskType.QUESTION_ANSWERING
403
  }
404
});
405

406
const questionVec = questionResponse.embeddings![0].values!;
407

408
// Find most similar FAQ
409
const similarities = faqEmbeddings.map((emb, i) => ({
410
  faq: faqs[i],
411
  similarity: cosineSimilarity(questionVec, emb.embeddings![0].values!)
412
}));
413

414
similarities.sort((a, b) => b.similarity - a.similarity);
415

416
const bestMatch = similarities[0];
417

418
console.log('User Question:', userQuestion);
419
console.log('\nMost Similar FAQ:');
420
console.log('Q:', bestMatch.faq.question);
421
console.log('A:', bestMatch.faq.answer);
422
console.log('Similarity:', bestMatch.similarity.toFixed(4));
423
```
424

425
### Semantic Deduplication
426

427
```typescript
428
import { TaskType } from '@google/genai';
429

430
// Documents with potential duplicates
431
const documents = [
432
  'The cat sat on the mat',
433
  'A feline was sitting on the rug',
434
  'Machine learning is amazing',
435
  'Dogs are great companions',
436
  'ML is an incredible technology',
437
  'Canines make wonderful friends'
438
];
439

440
// Generate embeddings
441
const embeddings = await Promise.all(
442
  documents.map(doc =>
443
    client.models.embedContent({
444
      model: 'text-embedding-004',
445
      contents: doc,
446
      config: {
447
        taskType: TaskType.SEMANTIC_SIMILARITY
448
      }
449
    })
450
  )
451
);
452

453
const vectors = embeddings.map(r => r.embeddings![0].values!);
454

455
// Find duplicates (similarity > threshold)
456
const threshold = 0.85;
457
const duplicates: Array<[number, number, number]> = [];
458

459
for (let i = 0; i < vectors.length; i++) {
460
  for (let j = i + 1; j < vectors.length; j++) {
461
    const similarity = cosineSimilarity(vectors[i], vectors[j]);
462
    if (similarity > threshold) {
463
      duplicates.push([i, j, similarity]);
464
    }
465
  }
466
}
467

468
console.log('Potential Duplicates:');
469
duplicates.forEach(([i, j, sim]) => {
470
  console.log(`\nSimilarity: ${sim.toFixed(4)}`);
471
  console.log(`  1. "${documents[i]}"`);
472
  console.log(`  2. "${documents[j]}"`);
473
});
474

475
// Remove duplicates
476
const toRemove = new Set<number>();
477
duplicates.forEach(([i, j]) => {
478
  toRemove.add(j); // Keep first, remove second
479
});
480

481
const uniqueDocs = documents.filter((_, i) => !toRemove.has(i));
482

483
console.log('\nUnique Documents:');
484
uniqueDocs.forEach(doc => console.log(`  - ${doc}`));
485
```
486

487
### Batch Embedding Generation
488

489
```typescript
490
// Generate embeddings for large dataset efficiently
491
const largeDataset = Array.from({ length: 1000 }, (_, i) =>
492
  `Document ${i} with unique content`
493
);
494

495
// Process in batches
496
const batchSize = 50;
497
const allEmbeddings: number[][] = [];
498

499
for (let i = 0; i < largeDataset.length; i += batchSize) {
500
  const batch = largeDataset.slice(i, i + batchSize);
501

502
  const batchEmbeddings = await Promise.all(
503
    batch.map(doc =>
504
      client.models.embedContent({
505
        model: 'text-embedding-004',
506
        contents: doc
507
      })
508
    )
509
  );
510

511
  const vectors = batchEmbeddings.map(r => r.embeddings![0].values!);
512
  allEmbeddings.push(...vectors);
513

514
  console.log(`Processed ${Math.min(i + batchSize, largeDataset.length)}/${largeDataset.length}`);
515
}
516

517
console.log(`Generated ${allEmbeddings.length} embeddings`);
518
```
519

520
### Custom Dimensionality
521

522
```typescript
523
// Generate lower-dimensional embeddings for faster computation
524
const response = await client.models.embedContent({
525
  model: 'text-embedding-004',
526
  contents: 'Sample text for embedding',
527
  config: {
528
    outputDimensionality: 256 // Reduce from default (e.g., 768)
529
  }
530
});
531

532
const embedding = response.embeddings![0].values!;
533
console.log('Embedding dimensions:', embedding.length); // 256
534
```
535

536
### Store Embeddings for Vector Database
537

538
```typescript
539
import { TaskType } from '@google/genai';
540

541
interface DocumentWithEmbedding {
542
  id: string;
543
  text: string;
544
  embedding: number[];
545
  metadata: Record<string, any>;
546
}
547

548
const documents = [
549
  { id: '1', text: 'Document 1', category: 'tech' },
550
  { id: '2', text: 'Document 2', category: 'sports' },
551
  { id: '3', text: 'Document 3', category: 'food' }
552
];
553

554
// Generate and structure for storage
555
const documentsWithEmbeddings: DocumentWithEmbedding[] = await Promise.all(
556
  documents.map(async doc => {
557
    const response = await client.models.embedContent({
558
      model: 'text-embedding-004',
559
      contents: doc.text,
560
      config: {
561
        taskType: TaskType.RETRIEVAL_DOCUMENT
562
      }
563
    });
564

565
    return {
566
      id: doc.id,
567
      text: doc.text,
568
      embedding: response.embeddings![0].values!,
569
      metadata: { category: doc.category }
570
    };
571
  })
572
);
573

574
// Store in vector database (pseudo-code)
575
// await vectorDB.insert(documentsWithEmbeddings);
576

577
console.log('Documents indexed with embeddings');
578
```
579

580
### Multi-language Similarity
581

582
```typescript
583
// Compare texts in different languages
584
const texts = [
585
  'Hello, how are you?',
586
  'Bonjour, comment allez-vous?',  // French
587
  'Hola, como estas?',              // Spanish
588
  'The weather is nice today',
589
  'Il fait beau aujourd\'hui'       // French
590
];
591

592
const embeddings = await Promise.all(
593
  texts.map(text =>
594
    client.models.embedContent({
595
      model: 'text-embedding-004',
596
      contents: text,
597
      config: {
598
        taskType: TaskType.SEMANTIC_SIMILARITY
599
      }
600
    })
601
  )
602
);
603

604
const vectors = embeddings.map(r => r.embeddings![0].values!);
605

606
// Calculate similarity matrix
607
console.log('Similarity Matrix:');
608
for (let i = 0; i < texts.length; i++) {
609
  for (let j = 0; j < texts.length; j++) {
610
    const similarity = cosineSimilarity(vectors[i], vectors[j]);
611
    console.log(`"${texts[i]}" <-> "${texts[j]}": ${similarity.toFixed(4)}`);
612
  }
613
  console.log('');
614
}
615
```
616

617
### Helper Functions
618

619
```typescript
620
// Utility functions for working with embeddings
621

622
function cosineSimilarity(a: number[], b: number[]): number {
623
  const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0);
624
  const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
625
  const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
626
  return dotProduct / (magnitudeA * magnitudeB);
627
}
628

629
function euclideanDistance(a: number[], b: number[]): number {
630
  return Math.sqrt(a.reduce((sum, val, i) => sum + Math.pow(val - b[i], 2), 0));
631
}
632

633
function normalizeVector(vec: number[]): number[] {
634
  const magnitude = Math.sqrt(vec.reduce((sum, val) => sum + val * val, 0));
635
  return vec.map(val => val / magnitude);
636
}
637

638
function dotProduct(a: number[], b: number[]): number {
639
  return a.reduce((sum, val, i) => sum + val * b[i], 0);
640
}
641
```
642

Version

Tile

Files

embeddings.mddocs/

Version

Tile

Files

embeddings.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

embeddings.mddocs/