or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

auth-tokens.mdbatch.mdcaching.mdchat.mdclient.mdcontent-generation.mdembeddings.mdfile-search-stores.mdfiles.mdfunction-calling.mdimage-generation.mdindex.mdlive.mdmcp.mdmodels.mdoperations.mdtuning.mdvideo-generation.md

embeddings.mddocs/

0

# Embeddings

1

2

The Models module provides text embedding generation capabilities for semantic search, similarity comparison, and clustering tasks.

3

4

## Capabilities

5

6

### embedContent

7

8

Generate embeddings for text content.

9

10

```typescript { .api }

11

/**

12

* Generate embeddings for text

13

* @param params - Embedding parameters

14

* @returns Promise resolving to embeddings response

15

*/

16

function embedContent(

17

params: EmbedContentParameters

18

): Promise<EmbedContentResponse>;

19

20

interface EmbedContentParameters {

21

/** Model name (e.g., 'text-embedding-004') */

22

model: string;

23

/** Content to embed */

24

contents: ContentListUnion;

25

/** Embedding configuration */

26

config?: EmbedContentConfig;

27

}

28

29

interface EmbedContentResponse {

30

/** Generated embeddings */

31

embeddings?: ContentEmbedding[];

32

}

33

34

interface ContentEmbedding {

35

/** Embedding values (vector) */

36

values?: number[];

37

}

38

```

39

40

**Usage Examples:**

41

42

```typescript

43

import { GoogleGenAI } from '@google/genai';

44

45

const client = new GoogleGenAI({ apiKey: 'YOUR_API_KEY' });

46

47

// Generate embedding for single text

48

const response = await client.models.embedContent({

49

model: 'text-embedding-004',

50

contents: 'What is machine learning?'

51

});

52

53

const embedding = response.embeddings?.[0]?.values;

54

console.log('Embedding dimensions:', embedding?.length);

55

console.log('Embedding vector:', embedding);

56

57

// Generate embeddings for multiple texts

58

const multiResponse = await client.models.embedContent({

59

model: 'text-embedding-004',

60

contents: [

61

{ role: 'user', parts: [{ text: 'Document 1 content' }] },

62

{ role: 'user', parts: [{ text: 'Document 2 content' }] },

63

{ role: 'user', parts: [{ text: 'Document 3 content' }] }

64

]

65

});

66

67

multiResponse.embeddings?.forEach((emb, i) => {

68

console.log(`Embedding ${i}:`, emb.values?.slice(0, 5));

69

});

70

```

71

72

## Types

73

74

### EmbedContentConfig

75

76

Configuration for embedding generation.

77

78

```typescript { .api }

79

interface EmbedContentConfig {

80

/** Task type for embedding */

81

taskType?: TaskType;

82

/** Title for retrieval document */

83

title?: string;

84

/** Output dimensionality */

85

outputDimensionality?: number;

86

}

87

88

enum TaskType {

89

TASK_TYPE_UNSPECIFIED = 'TASK_TYPE_UNSPECIFIED',

90

/** Retrieval query embedding */

91

RETRIEVAL_QUERY = 'RETRIEVAL_QUERY',

92

/** Retrieval document embedding */

93

RETRIEVAL_DOCUMENT = 'RETRIEVAL_DOCUMENT',

94

/** Semantic similarity */

95

SEMANTIC_SIMILARITY = 'SEMANTIC_SIMILARITY',

96

/** Classification */

97

CLASSIFICATION = 'CLASSIFICATION',

98

/** Clustering */

99

CLUSTERING = 'CLUSTERING',

100

/** Question answering */

101

QUESTION_ANSWERING = 'QUESTION_ANSWERING',

102

/** Fact verification */

103

FACT_VERIFICATION = 'FACT_VERIFICATION'

104

}

105

```

106

107

### ContentEmbedding

108

109

Embedding result for a single piece of content.

110

111

```typescript { .api }

112

interface ContentEmbedding {

113

/** Embedding values (vector of floats) */

114

values?: number[];

115

}

116

```

117

118

## Complete Examples

119

120

### Semantic Search

121

122

```typescript

123

import { GoogleGenAI, TaskType } from '@google/genai';

124

125

const client = new GoogleGenAI({ apiKey: 'YOUR_API_KEY' });

126

127

// Document corpus

128

const documents = [

129

'The quick brown fox jumps over the lazy dog',

130

'Machine learning is a subset of artificial intelligence',

131

'Python is a popular programming language',

132

'Climate change affects global weather patterns',

133

'Neural networks are inspired by biological neurons'

134

];

135

136

// Generate document embeddings

137

const docEmbeddings = await Promise.all(

138

documents.map(doc =>

139

client.models.embedContent({

140

model: 'text-embedding-004',

141

contents: doc,

142

config: {

143

taskType: TaskType.RETRIEVAL_DOCUMENT

144

}

145

})

146

)

147

);

148

149

const docVectors = docEmbeddings.map(r => r.embeddings![0].values!);

150

151

// Query

152

const query = 'Tell me about AI and machine learning';

153

154

const queryResponse = await client.models.embedContent({

155

model: 'text-embedding-004',

156

contents: query,

157

config: {

158

taskType: TaskType.RETRIEVAL_QUERY

159

}

160

});

161

162

const queryVector = queryResponse.embeddings![0].values!;

163

164

// Calculate cosine similarity

165

function cosineSimilarity(a: number[], b: number[]): number {

166

const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0);

167

const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));

168

const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));

169

return dotProduct / (magnitudeA * magnitudeB);

170

}

171

172

// Find most similar documents

173

const similarities = docVectors.map((docVec, i) => ({

174

document: documents[i],

175

similarity: cosineSimilarity(queryVector, docVec)

176

}));

177

178

similarities.sort((a, b) => b.similarity - a.similarity);

179

180

console.log('Query:', query);

181

console.log('\nMost similar documents:');

182

similarities.forEach((result, i) => {

183

console.log(`${i + 1}. ${result.document}`);

184

console.log(` Similarity: ${result.similarity.toFixed(4)}\n`);

185

});

186

```

187

188

### Clustering Documents

189

190

```typescript

191

import { TaskType } from '@google/genai';

192

193

const documents = [

194

'Dogs are loyal pets',

195

'Cats are independent animals',

196

'Python is used for data science',

197

'JavaScript runs in browsers',

198

'Birds can fly in the sky',

199

'Fish live in water',

200

'Java is object-oriented',

201

'Ruby is great for web development'

202

];

203

204

// Generate embeddings for clustering

205

const embeddings = await Promise.all(

206

documents.map(doc =>

207

client.models.embedContent({

208

model: 'text-embedding-004',

209

contents: doc,

210

config: {

211

taskType: TaskType.CLUSTERING

212

}

213

})

214

)

215

);

216

217

const vectors = embeddings.map(r => r.embeddings![0].values!);

218

219

// Simple K-means clustering (k=3)

220

function kMeansClustering(vectors: number[][], k: number): number[] {

221

// Simplified k-means implementation

222

// In production, use a proper ML library

223

const assignments = new Array(vectors.length).fill(0);

224

225

// Initialize centroids randomly

226

const centroids = vectors.slice(0, k);

227

228

for (let iter = 0; iter < 10; iter++) {

229

// Assign to nearest centroid

230

vectors.forEach((vec, i) => {

231

let minDist = Infinity;

232

let assignment = 0;

233

234

centroids.forEach((centroid, j) => {

235

const dist = euclideanDistance(vec, centroid);

236

if (dist < minDist) {

237

minDist = dist;

238

assignment = j;

239

}

240

});

241

242

assignments[i] = assignment;

243

});

244

245

// Update centroids

246

for (let j = 0; j < k; j++) {

247

const clusterVectors = vectors.filter((_, i) => assignments[i] === j);

248

if (clusterVectors.length > 0) {

249

centroids[j] = clusterVectors[0].map((_, dim) =>

250

clusterVectors.reduce((sum, vec) => sum + vec[dim], 0) / clusterVectors.length

251

);

252

}

253

}

254

}

255

256

return assignments;

257

}

258

259

function euclideanDistance(a: number[], b: number[]): number {

260

return Math.sqrt(a.reduce((sum, val, i) => sum + Math.pow(val - b[i], 2), 0));

261

}

262

263

const clusters = kMeansClustering(vectors, 3);

264

265

// Group documents by cluster

266

const clusterGroups: { [key: number]: string[] } = {};

267

documents.forEach((doc, i) => {

268

const cluster = clusters[i];

269

if (!clusterGroups[cluster]) {

270

clusterGroups[cluster] = [];

271

}

272

clusterGroups[cluster].push(doc);

273

});

274

275

console.log('Document Clusters:');

276

Object.entries(clusterGroups).forEach(([cluster, docs]) => {

277

console.log(`\nCluster ${cluster}:`);

278

docs.forEach(doc => console.log(` - ${doc}`));

279

});

280

```

281

282

### Text Classification

283

284

```typescript

285

import { TaskType } from '@google/genai';

286

287

// Training data (categories and examples)

288

const trainingData = [

289

{ category: 'sports', text: 'The team won the championship' },

290

{ category: 'sports', text: 'The player scored a goal' },

291

{ category: 'technology', text: 'New AI model released' },

292

{ category: 'technology', text: 'Software update available' },

293

{ category: 'food', text: 'The recipe uses fresh ingredients' },

294

{ category: 'food', text: 'The restaurant serves Italian cuisine' }

295

];

296

297

// Generate embeddings for training data

298

const trainingEmbeddings = await Promise.all(

299

trainingData.map(item =>

300

client.models.embedContent({

301

model: 'text-embedding-004',

302

contents: item.text,

303

config: {

304

taskType: TaskType.CLASSIFICATION

305

}

306

})

307

)

308

);

309

310

// New texts to classify

311

const testTexts = [

312

'The basketball game was exciting',

313

'The new smartphone has great features',

314

'This pasta dish is delicious'

315

];

316

317

// Generate embeddings for test data

318

const testEmbeddings = await Promise.all(

319

testTexts.map(text =>

320

client.models.embedContent({

321

model: 'text-embedding-004',

322

contents: text,

323

config: {

324

taskType: TaskType.CLASSIFICATION

325

}

326

})

327

)

328

);

329

330

// Classify using k-NN (k=3)

331

testTexts.forEach((text, i) => {

332

const testVec = testEmbeddings[i].embeddings![0].values!;

333

334

// Calculate distances to all training examples

335

const distances = trainingEmbeddings.map((emb, j) => ({

336

category: trainingData[j].category,

337

distance: euclideanDistance(testVec, emb.embeddings![0].values!)

338

}));

339

340

// Sort by distance and get top k

341

distances.sort((a, b) => a.distance - b.distance);

342

const topK = distances.slice(0, 3);

343

344

// Vote for category

345

const votes: { [key: string]: number } = {};

346

topK.forEach(item => {

347

votes[item.category] = (votes[item.category] || 0) + 1;

348

});

349

350

const prediction = Object.entries(votes).sort((a, b) => b[1] - a[1])[0][0];

351

352

console.log(`Text: "${text}"`);

353

console.log(`Predicted category: ${prediction}\n`);

354

});

355

```

356

357

### Question Answering with Embeddings

358

359

```typescript

360

import { TaskType } from '@google/genai';

361

362

// FAQ database

363

const faqs = [

364

{

365

question: 'How do I reset my password?',

366

answer: 'Click on "Forgot Password" on the login page and follow the instructions.'

367

},

368

{

369

question: 'What are your business hours?',

370

answer: 'We are open Monday to Friday, 9 AM to 5 PM.'

371

},

372

{

373

question: 'How can I contact support?',

374

answer: 'You can reach our support team at support@example.com or call 1-800-123-4567.'

375

},

376

{

377

question: 'What is your return policy?',

378

answer: 'Items can be returned within 30 days of purchase with a receipt.'

379

}

380

];

381

382

// Generate embeddings for FAQ questions

383

const faqEmbeddings = await Promise.all(

384

faqs.map(faq =>

385

client.models.embedContent({

386

model: 'text-embedding-004',

387

contents: faq.question,

388

config: {

389

taskType: TaskType.QUESTION_ANSWERING

390

}

391

})

392

)

393

);

394

395

// User question

396

const userQuestion = 'I forgot my password, what should I do?';

397

398

const questionResponse = await client.models.embedContent({

399

model: 'text-embedding-004',

400

contents: userQuestion,

401

config: {

402

taskType: TaskType.QUESTION_ANSWERING

403

}

404

});

405

406

const questionVec = questionResponse.embeddings![0].values!;

407

408

// Find most similar FAQ

409

const similarities = faqEmbeddings.map((emb, i) => ({

410

faq: faqs[i],

411

similarity: cosineSimilarity(questionVec, emb.embeddings![0].values!)

412

}));

413

414

similarities.sort((a, b) => b.similarity - a.similarity);

415

416

const bestMatch = similarities[0];

417

418

console.log('User Question:', userQuestion);

419

console.log('\nMost Similar FAQ:');

420

console.log('Q:', bestMatch.faq.question);

421

console.log('A:', bestMatch.faq.answer);

422

console.log('Similarity:', bestMatch.similarity.toFixed(4));

423

```

424

425

### Semantic Deduplication

426

427

```typescript

428

import { TaskType } from '@google/genai';

429

430

// Documents with potential duplicates

431

const documents = [

432

'The cat sat on the mat',

433

'A feline was sitting on the rug',

434

'Machine learning is amazing',

435

'Dogs are great companions',

436

'ML is an incredible technology',

437

'Canines make wonderful friends'

438

];

439

440

// Generate embeddings

441

const embeddings = await Promise.all(

442

documents.map(doc =>

443

client.models.embedContent({

444

model: 'text-embedding-004',

445

contents: doc,

446

config: {

447

taskType: TaskType.SEMANTIC_SIMILARITY

448

}

449

})

450

)

451

);

452

453

const vectors = embeddings.map(r => r.embeddings![0].values!);

454

455

// Find duplicates (similarity > threshold)

456

const threshold = 0.85;

457

const duplicates: Array<[number, number, number]> = [];

458

459

for (let i = 0; i < vectors.length; i++) {

460

for (let j = i + 1; j < vectors.length; j++) {

461

const similarity = cosineSimilarity(vectors[i], vectors[j]);

462

if (similarity > threshold) {

463

duplicates.push([i, j, similarity]);

464

}

465

}

466

}

467

468

console.log('Potential Duplicates:');

469

duplicates.forEach(([i, j, sim]) => {

470

console.log(`\nSimilarity: ${sim.toFixed(4)}`);

471

console.log(` 1. "${documents[i]}"`);

472

console.log(` 2. "${documents[j]}"`);

473

});

474

475

// Remove duplicates

476

const toRemove = new Set<number>();

477

duplicates.forEach(([i, j]) => {

478

toRemove.add(j); // Keep first, remove second

479

});

480

481

const uniqueDocs = documents.filter((_, i) => !toRemove.has(i));

482

483

console.log('\nUnique Documents:');

484

uniqueDocs.forEach(doc => console.log(` - ${doc}`));

485

```

486

487

### Batch Embedding Generation

488

489

```typescript

490

// Generate embeddings for large dataset efficiently

491

const largeDataset = Array.from({ length: 1000 }, (_, i) =>

492

`Document ${i} with unique content`

493

);

494

495

// Process in batches

496

const batchSize = 50;

497

const allEmbeddings: number[][] = [];

498

499

for (let i = 0; i < largeDataset.length; i += batchSize) {

500

const batch = largeDataset.slice(i, i + batchSize);

501

502

const batchEmbeddings = await Promise.all(

503

batch.map(doc =>

504

client.models.embedContent({

505

model: 'text-embedding-004',

506

contents: doc

507

})

508

)

509

);

510

511

const vectors = batchEmbeddings.map(r => r.embeddings![0].values!);

512

allEmbeddings.push(...vectors);

513

514

console.log(`Processed ${Math.min(i + batchSize, largeDataset.length)}/${largeDataset.length}`);

515

}

516

517

console.log(`Generated ${allEmbeddings.length} embeddings`);

518

```

519

520

### Custom Dimensionality

521

522

```typescript

523

// Generate lower-dimensional embeddings for faster computation

524

const response = await client.models.embedContent({

525

model: 'text-embedding-004',

526

contents: 'Sample text for embedding',

527

config: {

528

outputDimensionality: 256 // Reduce from default (e.g., 768)

529

}

530

});

531

532

const embedding = response.embeddings![0].values!;

533

console.log('Embedding dimensions:', embedding.length); // 256

534

```

535

536

### Store Embeddings for Vector Database

537

538

```typescript

539

import { TaskType } from '@google/genai';

540

541

interface DocumentWithEmbedding {

542

id: string;

543

text: string;

544

embedding: number[];

545

metadata: Record<string, any>;

546

}

547

548

const documents = [

549

{ id: '1', text: 'Document 1', category: 'tech' },

550

{ id: '2', text: 'Document 2', category: 'sports' },

551

{ id: '3', text: 'Document 3', category: 'food' }

552

];

553

554

// Generate and structure for storage

555

const documentsWithEmbeddings: DocumentWithEmbedding[] = await Promise.all(

556

documents.map(async doc => {

557

const response = await client.models.embedContent({

558

model: 'text-embedding-004',

559

contents: doc.text,

560

config: {

561

taskType: TaskType.RETRIEVAL_DOCUMENT

562

}

563

});

564

565

return {

566

id: doc.id,

567

text: doc.text,

568

embedding: response.embeddings![0].values!,

569

metadata: { category: doc.category }

570

};

571

})

572

);

573

574

// Store in vector database (pseudo-code)

575

// await vectorDB.insert(documentsWithEmbeddings);

576

577

console.log('Documents indexed with embeddings');

578

```

579

580

### Multi-language Similarity

581

582

```typescript

583

// Compare texts in different languages

584

const texts = [

585

'Hello, how are you?',

586

'Bonjour, comment allez-vous?', // French

587

'Hola, como estas?', // Spanish

588

'The weather is nice today',

589

'Il fait beau aujourd\'hui' // French

590

];

591

592

const embeddings = await Promise.all(

593

texts.map(text =>

594

client.models.embedContent({

595

model: 'text-embedding-004',

596

contents: text,

597

config: {

598

taskType: TaskType.SEMANTIC_SIMILARITY

599

}

600

})

601

)

602

);

603

604

const vectors = embeddings.map(r => r.embeddings![0].values!);

605

606

// Calculate similarity matrix

607

console.log('Similarity Matrix:');

608

for (let i = 0; i < texts.length; i++) {

609

for (let j = 0; j < texts.length; j++) {

610

const similarity = cosineSimilarity(vectors[i], vectors[j]);

611

console.log(`"${texts[i]}" <-> "${texts[j]}": ${similarity.toFixed(4)}`);

612

}

613

console.log('');

614

}

615

```

616

617

### Helper Functions

618

619

```typescript

620

// Utility functions for working with embeddings

621

622

function cosineSimilarity(a: number[], b: number[]): number {

623

const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0);

624

const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));

625

const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));

626

return dotProduct / (magnitudeA * magnitudeB);

627

}

628

629

function euclideanDistance(a: number[], b: number[]): number {

630

return Math.sqrt(a.reduce((sum, val, i) => sum + Math.pow(val - b[i], 2), 0));

631

}

632

633

function normalizeVector(vec: number[]): number[] {

634

const magnitude = Math.sqrt(vec.reduce((sum, val) => sum + val * val, 0));

635

return vec.map(val => val / magnitude);

636

}

637

638

function dotProduct(a: number[], b: number[]): number {

639

return a.reduce((sum, val, i) => sum + val * b[i], 0);

640

}

641

```

642