or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

built-in-tools.mdimage-generation.mdindex.mdlanguage-models.mdprovider-setup.mdtext-embeddings.md
tile.json

text-embeddings.mddocs/

Text Embeddings

Text embedding models for semantic search, similarity analysis, vector operations, and retrieval systems using Google's advanced embedding models.

Capabilities

Text Embedding Model Access

Get text embedding model instances for converting text into vector representations.

/**
 * Get a text embedding model instance
 * @param modelId - Google embedding model identifier
 * @returns EmbeddingModelV2 instance
 */
textEmbedding(modelId: GoogleGenerativeAIEmbeddingModelId): EmbeddingModelV2<
  GoogleGenerativeAIEmbeddingProviderOptions
>;

/**
 * Get a text embedding model instance (alias for textEmbedding)
 * @param modelId - Google embedding model identifier
 * @returns EmbeddingModelV2 instance
 */
textEmbeddingModel(modelId: GoogleGenerativeAIEmbeddingModelId): EmbeddingModelV2<
  GoogleGenerativeAIEmbeddingProviderOptions
>;

/**
 * @deprecated Use textEmbedding() instead
 * @param modelId - Google embedding model identifier
 * @returns EmbeddingModelV2 instance
 */
embedding(modelId: GoogleGenerativeAIEmbeddingModelId): EmbeddingModelV2<
  GoogleGenerativeAIEmbeddingProviderOptions
>;

Usage Examples:

import { google } from "@ai-sdk/google";
import { embed, embedMany } from "ai";

// Single text embedding
const result = await embed({
  model: google.textEmbedding("text-embedding-004"),
  value: "The cat sat on the mat",
});

console.log("Embedding vector:", result.embedding);
console.log("Vector dimensions:", result.embedding.length);

// Multiple text embeddings
const results = await embedMany({
  model: google.textEmbedding("text-embedding-004"),
  values: [
    "The cat sat on the mat",
    "A feline rested on the rug",
    "Dogs like to play fetch",
  ],
});

results.embeddings.forEach((embedding, index) => {
  console.log(`Text ${index + 1} embedding:`, embedding.slice(0, 5), "...");
});

// Using alternative method names
const embeddingModel1 = google.textEmbeddingModel("gemini-embedding-001");
const embeddingModel2 = google.embedding("gemini-embedding-001"); // deprecated

Embedding Model IDs

Supported Google text embedding model identifiers.

type GoogleGenerativeAIEmbeddingModelId =
  | "gemini-embedding-001"
  | "text-embedding-004"
  | (string & {});

Model Information:

  • text-embedding-004: Google's latest high-performance embedding model with excellent semantic understanding
  • gemini-embedding-001: Gemini-based embedding model optimized for various text understanding tasks

Embedding Provider Options

Configuration options for embedding generation including task type optimization and dimensionality control.

interface GoogleGenerativeAIEmbeddingProviderOptions {
  /** Optional reduced dimension for output embeddings (truncates from end) */
  outputDimensionality?: number;
  
  /** Task type optimization for embeddings */
  taskType?: EmbeddingTaskType;
}

type EmbeddingTaskType =
  | "SEMANTIC_SIMILARITY"      // Optimized for text similarity
  | "CLASSIFICATION"           // Optimized for text classification
  | "CLUSTERING"              // Optimized for clustering texts
  | "RETRIEVAL_DOCUMENT"      // Optimized for document retrieval
  | "RETRIEVAL_QUERY"         // Optimized for query-based retrieval
  | "QUESTION_ANSWERING"      // Optimized for answering questions
  | "FACT_VERIFICATION"       // Optimized for verifying facts
  | "CODE_RETRIEVAL_QUERY";   // Optimized for code retrieval

Usage Examples:

import { google } from "@ai-sdk/google";
import { embed, embedMany } from "ai";

// Semantic similarity embeddings
const similarityResult = await embed({
  model: google.textEmbedding("text-embedding-004"),
  value: "Machine learning is a subset of artificial intelligence",
  providerOptions: {
    taskType: "SEMANTIC_SIMILARITY",
  },
});

// Document retrieval embeddings with reduced dimensions
const documents = [
  "Introduction to Python programming",
  "Advanced JavaScript concepts",
  "Machine learning fundamentals",
  "Database design principles",
];

const documentEmbeddings = await embedMany({
  model: google.textEmbedding("text-embedding-004"),
  values: documents,
  providerOptions: {
    taskType: "RETRIEVAL_DOCUMENT",
    outputDimensionality: 512, // Reduce from default dimensions
  },
});

// Classification embeddings
const classificationResult = await embed({
  model: google.textEmbedding("text-embedding-004"),
  value: "This product is amazing and I highly recommend it!",
  providerOptions: {
    taskType: "CLASSIFICATION",
  },
});

// Code retrieval embeddings
const codeResult = await embed({
  model: google.textEmbedding("text-embedding-004"),
  value: "function to sort an array of numbers in ascending order",
  providerOptions: {
    taskType: "CODE_RETRIEVAL_QUERY",
  },
});

Use Cases and Examples

Semantic Search

Build semantic search systems that understand meaning rather than just keywords.

import { google } from "@ai-sdk/google";
import { embed, embedMany } from "ai";

class SemanticSearch {
  private documents: Array<{ text: string; embedding: number[] }> = [];

  async addDocuments(texts: string[]) {
    const result = await embedMany({
      model: google.textEmbedding("text-embedding-004"),
      values: texts,
      providerOptions: {
        taskType: "RETRIEVAL_DOCUMENT",
      },
    });

    for (let i = 0; i < texts.length; i++) {
      this.documents.push({
        text: texts[i],
        embedding: result.embeddings[i],
      });
    }
  }

  async search(query: string, topK: number = 5) {
    const queryResult = await embed({
      model: google.textEmbedding("text-embedding-004"),
      value: query,
      providerOptions: {
        taskType: "RETRIEVAL_QUERY",
      },
    });

    const queryEmbedding = queryResult.embedding;

    // Calculate cosine similarity
    const similarities = this.documents.map((doc, index) => ({
      index,
      text: doc.text,
      similarity: this.cosineSimilarity(queryEmbedding, doc.embedding),
    }));

    // Sort by similarity and return top results
    return similarities
      .sort((a, b) => b.similarity - a.similarity)
      .slice(0, topK);
  }

  private cosineSimilarity(a: number[], b: number[]): number {
    const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0);
    const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
    const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
    return dotProduct / (magnitudeA * magnitudeB);
  }
}

// Usage
const search = new SemanticSearch();
await search.addDocuments([
  "Python is a high-level programming language",
  "Machine learning algorithms can classify data",
  "Databases store and retrieve information efficiently",
  "Web development involves HTML, CSS, and JavaScript",
]);

const results = await search.search("programming languages", 3);
console.log("Search results:", results);

Text Classification

Use embeddings for text classification and sentiment analysis.

import { google } from "@ai-sdk/google";
import { embed } from "ai";

async function classifyText(text: string) {
  const result = await embed({
    model: google.textEmbedding("text-embedding-004"),
    value: text,
    providerOptions: {
      taskType: "CLASSIFICATION",
    },
  });

  // Use the embedding with your classification model
  // This is a simplified example - in practice, you'd use a trained classifier
  return {
    text,
    embedding: result.embedding,
    vector_length: result.embedding.length,
  };
}

// Example usage
const texts = [
  "I love this product! It's amazing!",
  "This is terrible quality, very disappointed.",
  "The weather is sunny today.",
  "The code runs efficiently and handles edge cases well.",
];

for (const text of texts) {
  const classification = await classifyText(text);
  console.log(`Text: "${text}"`);
  console.log(`Embedding dimensions: ${classification.vector_length}`);
  console.log("---");
}

Clustering and Similarity Analysis

Group similar texts together using embedding-based clustering.

import { google } from "@ai-sdk/google";
import { embedMany } from "ai";

async function clusterTexts(texts: string[]) {
  const result = await embedMany({
    model: google.textEmbedding("text-embedding-004"),
    values: texts,
    providerOptions: {
      taskType: "CLUSTERING",
    },
  });

  const similarities: Array<{
    text1: string;
    text2: string;
    similarity: number;
  }> = [];

  // Calculate pairwise similarities
  for (let i = 0; i < texts.length; i++) {
    for (let j = i + 1; j < texts.length; j++) {
      const similarity = cosineSimilarity(
        result.embeddings[i],
        result.embeddings[j]
      );
      
      similarities.push({
        text1: texts[i],
        text2: texts[j],
        similarity,
      });
    }
  }

  return similarities.sort((a, b) => b.similarity - a.similarity);
}

function cosineSimilarity(a: number[], b: number[]): number {
  const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0);
  const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
  const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
  return dotProduct / (magnitudeA * magnitudeB);
}

// Example usage
const texts = [
  "Cats are adorable pets",
  "Dogs make great companions",
  "Programming requires logical thinking",
  "Coding involves problem-solving skills",
  "Felines are independent animals",
];

const similarities = await clusterTexts(texts);
console.log("Most similar text pairs:");
similarities.slice(0, 3).forEach(({ text1, text2, similarity }) => {
  console.log(`"${text1}" <-> "${text2}": ${similarity.toFixed(3)}`);
});

Question Answering Systems

Create embeddings optimized for question-answering applications.

import { google } from "@ai-sdk/google";
import { embed, embedMany } from "ai";

class QASystem {
  private knowledgeBase: Array<{
    question: string;
    answer: string;
    embedding: number[];
  }> = [];

  async addQAPairs(pairs: Array<{ question: string; answer: string }>) {
    const questions = pairs.map(pair => pair.question);
    const result = await embedMany({
      model: google.textEmbedding("text-embedding-004"),
      values: questions,
      providerOptions: {
        taskType: "QUESTION_ANSWERING",
      },
    });

    for (let i = 0; i < pairs.length; i++) {
      this.knowledgeBase.push({
        question: pairs[i].question,
        answer: pairs[i].answer,
        embedding: result.embeddings[i],
      });
    }
  }

  async findAnswer(userQuestion: string) {
    const queryResult = await embed({
      model: google.textEmbedding("text-embedding-004"),
      value: userQuestion,
      providerOptions: {
        taskType: "QUESTION_ANSWERING",
      },
    });

    const queryEmbedding = queryResult.embedding;

    // Find most similar question
    let bestMatch = { similarity: -1, answer: "", question: "" };
    
    for (const item of this.knowledgeBase) {
      const similarity = this.cosineSimilarity(queryEmbedding, item.embedding);
      if (similarity > bestMatch.similarity) {
        bestMatch = {
          similarity,
          answer: item.answer,
          question: item.question,
        };
      }
    }

    return bestMatch;
  }

  private cosineSimilarity(a: number[], b: number[]): number {
    const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0);
    const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
    const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
    return dotProduct / (magnitudeA * magnitudeB);
  }
}

// Example usage
const qaSystem = new QASystem();

await qaSystem.addQAPairs([
  {
    question: "What is machine learning?",
    answer: "Machine learning is a subset of AI that enables computers to learn from data.",
  },
  {
    question: "How does neural network work?",
    answer: "Neural networks use interconnected nodes to process information like the human brain.",
  },
  {
    question: "What is deep learning?",
    answer: "Deep learning uses neural networks with multiple layers to learn complex patterns.",
  },
]);

const result = await qaSystem.findAnswer("Can you explain neural networks?");
console.log("Best matching question:", result.question);
console.log("Answer:", result.answer);
console.log("Similarity score:", result.similarity);

Performance and Optimization

Dimensionality Reduction

Reduce embedding dimensions for storage and computational efficiency.

import { google } from "@ai-sdk/google";
import { embed } from "ai";

// Standard embedding (full dimensions)
const fullResult = await embed({
  model: google.textEmbedding("text-embedding-004"),
  value: "Sample text for embedding",
});

console.log("Full dimensions:", fullResult.embedding.length);

// Reduced dimensions for efficiency
const reducedResult = await embed({
  model: google.textEmbedding("text-embedding-004"),
  value: "Sample text for embedding",
  providerOptions: {
    outputDimensionality: 256, // Reduce to 256 dimensions
  },
});

console.log("Reduced dimensions:", reducedResult.embedding.length);

Batch Processing

Process multiple texts efficiently in batches.

import { google } from "@ai-sdk/google";
import { embedMany } from "ai";

async function processBatch(texts: string[], batchSize: number = 100) {
  const results = [];
  
  for (let i = 0; i < texts.length; i += batchSize) {
    const batch = texts.slice(i, i + batchSize);
    
    const result = await embedMany({
      model: google.textEmbedding("text-embedding-004"),
      values: batch,
      providerOptions: {
        taskType: "RETRIEVAL_DOCUMENT",
        outputDimensionality: 512,
      },
    });
    
    results.push(...result.embeddings);
  }
  
  return results;
}

// Process large dataset in batches
const largeDataset = Array.from({ length: 1000 }, (_, i) => `Document ${i}`);
const embeddings = await processBatch(largeDataset, 50);
console.log(`Processed ${embeddings.length} embeddings`);