CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-com-google-genai--google-genai

Java idiomatic SDK for the Gemini Developer APIs and Vertex AI APIs

Overview
Eval results
Files

embeddings-tokens.mddocs/

Embeddings and Token Operations

Generate embeddings for text and manage token counting and computation for prompt optimization.

Core Imports

import com.google.genai.Models;
import com.google.genai.AsyncModels;
import com.google.genai.LocalTokenizer;
import com.google.genai.types.EmbedContentResponse;
import com.google.genai.types.EmbedContentConfig;
import com.google.genai.types.ContentEmbedding;
import com.google.genai.types.CountTokensResponse;
import com.google.genai.types.CountTokensConfig;
import com.google.genai.types.ComputeTokensResponse;
import com.google.genai.types.ComputeTokensConfig;
import java.util.concurrent.CompletableFuture;

Embeddings

Models Service - Embeddings

package com.google.genai;

public final class Models {
  // Single text embedding
  public EmbedContentResponse embedContent(
      String model,
      String text,
      EmbedContentConfig config);

  // Multiple texts embedding
  public EmbedContentResponse embedContent(
      String model,
      List<String> texts,
      EmbedContentConfig config);
}

Async Models Service - Embeddings

package com.google.genai;

public final class AsyncModels {
  public CompletableFuture<EmbedContentResponse> embedContent(
      String model,
      String text,
      EmbedContentConfig config);

  public CompletableFuture<EmbedContentResponse> embedContent(
      String model,
      List<String> texts,
      EmbedContentConfig config);
}

Embed Content Config

package com.google.genai.types;

public final class EmbedContentConfig {
  public static Builder builder();

  public Optional<String> taskType();
  public Optional<String> title();
  public Optional<Integer> outputDimensionality();
  public Optional<String> mimeType();
  public Optional<Boolean> autoTruncate();
  public Optional<HttpOptions> httpOptions();
}

Task Types:

  • RETRIEVAL_QUERY - For search queries
  • RETRIEVAL_DOCUMENT - For documents to be searched
  • SEMANTIC_SIMILARITY - For similarity comparison
  • CLASSIFICATION - For text classification
  • CLUSTERING - For text clustering

Embed Content Response

package com.google.genai.types;

public final class EmbedContentResponse {
  public Optional<ContentEmbedding> embedding();
  public Optional<List<ContentEmbedding>> embeddings();
  public Optional<HttpResponse> sdkHttpResponse();
}

Content Embedding

package com.google.genai.types;

public final class ContentEmbedding {
  public Optional<List<Float>> values();
  public Optional<ContentEmbeddingStatistics> statistics();
}

Content Embedding Statistics

package com.google.genai.types;

public final class ContentEmbeddingStatistics {
  public Optional<Integer> tokenCount();
  public Optional<Boolean> truncated();
}

Basic Embedding Usage

import com.google.genai.Client;
import com.google.genai.types.EmbedContentResponse;

Client client = new Client();

// Single text embedding
EmbedContentResponse response = client.models.embedContent(
    "text-embedding-004",
    "Why is the sky blue?",
    null
);

// Access embedding values
response.embedding().ifPresent(embedding -> {
    embedding.values().ifPresent(values -> {
        System.out.println("Embedding dimension: " + values.size());
        System.out.println("First few values: " + values.subList(0, 5));
    });
});

Multiple Text Embeddings

import com.google.common.collect.ImmutableList;
import com.google.genai.types.ContentEmbedding;

List<String> texts = ImmutableList.of(
    "What is machine learning?",
    "How does AI work?",
    "Explain neural networks"
);

EmbedContentResponse response = client.models.embedContent(
    "text-embedding-004",
    texts,
    null
);

// Access multiple embeddings
response.embeddings().ifPresent(embeddings -> {
    System.out.println("Generated " + embeddings.size() + " embeddings");
    for (int i = 0; i < embeddings.size(); i++) {
        ContentEmbedding emb = embeddings.get(i);
        System.out.println("Text " + (i + 1) + " embedding dimension: " +
            emb.values().map(List::size).orElse(0));
    }
});

Embedding with Configuration

import com.google.genai.types.EmbedContentConfig;

EmbedContentConfig config = EmbedContentConfig.builder()
    .taskType("RETRIEVAL_DOCUMENT")
    .title("Document about AI")
    .outputDimensionality(256) // Reduce dimensionality
    .autoTruncate(true)
    .build();

EmbedContentResponse response = client.models.embedContent(
    "text-embedding-004",
    "This is a long document about artificial intelligence...",
    config
);

// Check if truncated
response.embedding().ifPresent(embedding -> {
    embedding.statistics().ifPresent(stats -> {
        if (stats.truncated().orElse(false)) {
            System.out.println("Input was truncated");
        }
        System.out.println("Token count: " + stats.tokenCount().orElse(0));
    });
});

Embeddings for Search

import com.google.genai.types.EmbedContentConfig;

// Embed query
EmbedContentConfig queryConfig = EmbedContentConfig.builder()
    .taskType("RETRIEVAL_QUERY")
    .build();

EmbedContentResponse queryResponse = client.models.embedContent(
    "text-embedding-004",
    "What is the capital of France?",
    queryConfig
);

// Embed documents
EmbedContentConfig docConfig = EmbedContentConfig.builder()
    .taskType("RETRIEVAL_DOCUMENT")
    .build();

List<String> documents = ImmutableList.of(
    "Paris is the capital and largest city of France.",
    "London is the capital city of England.",
    "Berlin is the capital and largest city of Germany."
);

EmbedContentResponse docsResponse = client.models.embedContent(
    "text-embedding-004",
    documents,
    docConfig
);

// Now compute similarity between query and documents
List<Float> queryEmbedding = queryResponse.embedding()
    .flatMap(ContentEmbedding::values)
    .orElse(ImmutableList.of());

docsResponse.embeddings().ifPresent(docEmbeddings -> {
    for (int i = 0; i < docEmbeddings.size(); i++) {
        List<Float> docEmbedding = docEmbeddings.get(i).values().orElse(ImmutableList.of());
        double similarity = cosineSimilarity(queryEmbedding, docEmbedding);
        System.out.println("Document " + (i + 1) + " similarity: " + similarity);
    }
});

// Helper method for cosine similarity
private static double cosineSimilarity(List<Float> a, List<Float> b) {
    double dotProduct = 0.0;
    double normA = 0.0;
    double normB = 0.0;
    for (int i = 0; i < a.size(); i++) {
        dotProduct += a.get(i) * b.get(i);
        normA += a.get(i) * a.get(i);
        normB += b.get(i) * b.get(i);
    }
    return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
}

Async Embeddings

import java.util.concurrent.CompletableFuture;

CompletableFuture<EmbedContentResponse> future = client.async.models.embedContent(
    "text-embedding-004",
    "Async embedding text",
    null
);

future.thenAccept(response -> {
    response.embedding().ifPresent(embedding -> {
        System.out.println("Embedding size: " +
            embedding.values().map(List::size).orElse(0));
    });
});

Token Operations

Models Service - Token Operations

package com.google.genai;

public final class Models {
  // Count tokens
  public CountTokensResponse countTokens(
      String model,
      String text,
      CountTokensConfig config);

  public CountTokensResponse countTokens(
      String model,
      List<Content> contents,
      CountTokensConfig config);

  // Compute tokens (Vertex AI only)
  public ComputeTokensResponse computeTokens(
      String model,
      String text,
      ComputeTokensConfig config);

  public ComputeTokensResponse computeTokens(
      String model,
      List<Content> contents,
      ComputeTokensConfig config);
}

Async Models Service - Token Operations

package com.google.genai;

public final class AsyncModels {
  public CompletableFuture<CountTokensResponse> countTokens(
      String model,
      String text,
      CountTokensConfig config);

  public CompletableFuture<CountTokensResponse> countTokens(
      String model,
      List<Content> contents,
      CountTokensConfig config);

  public CompletableFuture<ComputeTokensResponse> computeTokens(
      String model,
      String text,
      ComputeTokensConfig config);

  public CompletableFuture<ComputeTokensResponse> computeTokens(
      String model,
      List<Content> contents,
      ComputeTokensConfig config);
}

Count Tokens Config

package com.google.genai.types;

public final class CountTokensConfig {
  public static Builder builder();

  public Optional<GenerateContentConfig> generateContentConfig();
  public Optional<HttpOptions> httpOptions();
}

Count Tokens Response

package com.google.genai.types;

public final class CountTokensResponse {
  public Optional<Integer> totalTokens();
  public Optional<Integer> cachedContentTokenCount();
  public Optional<HttpResponse> sdkHttpResponse();
}

Compute Tokens Config

package com.google.genai.types;

public final class ComputeTokensConfig {
  public static Builder builder();

  public Optional<GenerateContentConfig> generateContentConfig();
  public Optional<HttpOptions> httpOptions();
}

Compute Tokens Response

package com.google.genai.types;

public final class ComputeTokensResponse {
  public Optional<List<ComputeTokensResult>> tokensInfo();
  public Optional<HttpResponse> sdkHttpResponse();
}

Compute Tokens Result

package com.google.genai.types;

public final class ComputeTokensResult {
  public Optional<List<Integer>> tokenIds();
  public Optional<List<String>> tokens();
  public Optional<String> role();
}

Basic Token Counting

import com.google.genai.types.CountTokensResponse;

CountTokensResponse response = client.models.countTokens(
    "gemini-2.0-flash",
    "What is your name?",
    null
);

System.out.println("Total tokens: " + response.totalTokens().orElse(0));

Count Tokens with Content

import com.google.genai.types.Content;
import com.google.genai.types.Part;
import com.google.common.collect.ImmutableList;

List<Content> contents = ImmutableList.of(
    Content.builder()
        .role("user")
        .parts(ImmutableList.of(Part.fromText("Hello, how are you?")))
        .build(),
    Content.builder()
        .role("model")
        .parts(ImmutableList.of(Part.fromText("I'm doing well, thank you!")))
        .build(),
    Content.builder()
        .role("user")
        .parts(ImmutableList.of(Part.fromText("Tell me about AI")))
        .build()
);

CountTokensResponse response = client.models.countTokens(
    "gemini-2.0-flash",
    contents,
    null
);

System.out.println("Total tokens in conversation: " + response.totalTokens().orElse(0));

Count Tokens with Generation Config

Use this to count tokens including system instructions and other config:

import com.google.genai.types.CountTokensConfig;
import com.google.genai.types.GenerateContentConfig;

GenerateContentConfig genConfig = GenerateContentConfig.builder()
    .systemInstruction(Content.fromParts(
        Part.fromText("You are a helpful assistant.")
    ))
    .build();

CountTokensConfig config = CountTokensConfig.builder()
    .generateContentConfig(genConfig)
    .build();

CountTokensResponse response = client.models.countTokens(
    "gemini-2.0-flash",
    "Tell me about AI",
    config
);

System.out.println("Total tokens (including system instruction): " +
    response.totalTokens().orElse(0));

Compute Tokens (Vertex AI Only)

Compute tokens returns detailed token IDs and strings:

import com.google.genai.types.ComputeTokensResponse;

Client client = Client.builder()
    .vertexAI(true)
    .project("your-project")
    .location("us-central1")
    .build();

ComputeTokensResponse response = client.models.computeTokens(
    "gemini-2.0-flash",
    "What is your name?",
    null
);

response.tokensInfo().ifPresent(tokensInfo -> {
    for (ComputeTokensResult result : tokensInfo) {
        System.out.println("Role: " + result.role().orElse("N/A"));

        result.tokenIds().ifPresent(ids -> {
            System.out.println("Token IDs: " + ids);
        });

        result.tokens().ifPresent(tokens -> {
            System.out.println("Tokens: " + tokens);
        });
    }
});

Async Token Operations

import java.util.concurrent.CompletableFuture;

CompletableFuture<CountTokensResponse> future = client.async.models.countTokens(
    "gemini-2.0-flash",
    "Count tokens for this text",
    null
);

future.thenAccept(response -> {
    System.out.println("Token count: " + response.totalTokens().orElse(0));
});

Local Tokenizer (Experimental)

NOTE: Local tokenizer is experimental and only supports text-based tokenization (no multimodal).

Count tokens locally without making API calls, useful for quota management and cost estimation. LocalTokenizer provides free, offline token counting that doesn't consume API quota.

LocalTokenizer Class

package com.google.genai;

public final class LocalTokenizer {
  // Constructor
  public LocalTokenizer(String modelName);

  // Count tokens
  public CountTokensResult countTokens(List<Content> contents, CountTokensConfig config);
  public CountTokensResult countTokens(List<Content> contents);
  public CountTokensResult countTokens(Content content, CountTokensConfig config);
  public CountTokensResult countTokens(Content content);
  public CountTokensResult countTokens(String content, CountTokensConfig config);
  public CountTokensResult countTokens(String content);

  // Compute tokens (detailed)
  public ComputeTokensResult computeTokens(List<Content> contents);
  public ComputeTokensResult computeTokens(Content content);
  public ComputeTokensResult computeTokens(String content);
}

Count Tokens Result

package com.google.genai.types;

public final class CountTokensResult {
  public Optional<Integer> totalTokens();
}

Compute Tokens Result

package com.google.genai.types;

public final class ComputeTokensResult {
  public Optional<Integer> totalTokens();
  public Optional<List<TokensInfo>> tokensInfo();
}

Tokens Info

package com.google.genai.types;

public final class TokensInfo {
  public Optional<String> role();
  public Optional<List<Integer>> tokenIds();
}

Basic Local Token Counting

import com.google.genai.LocalTokenizer;
import com.google.genai.types.CountTokensResult;

// Create local tokenizer
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");

// Count tokens for simple text
String text = "This is a sample text to count tokens for.";
CountTokensResult result = tokenizer.countTokens(text);

int tokenCount = result.totalTokens().orElse(0);
System.out.println("Token count: " + tokenCount);

// No API call was made - completely free!

Count Tokens for Content

import com.google.genai.types.Content;
import com.google.genai.types.Part;

LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");

Content content = Content.fromParts(
    Part.fromText("User message here")
);

CountTokensResult result = tokenizer.countTokens(content);
System.out.println("Tokens: " + result.totalTokens().orElse(0));

Count Tokens for Conversation

import java.util.List;

LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");

List<Content> conversation = List.of(
    Content.builder()
        .role("user")
        .parts(List.of(Part.fromText("Hello!")))
        .build(),
    Content.builder()
        .role("model")
        .parts(List.of(Part.fromText("Hi there! How can I help you today?")))
        .build(),
    Content.builder()
        .role("user")
        .parts(List.of(Part.fromText("Tell me about AI.")))
        .build()
);

CountTokensResult result = tokenizer.countTokens(conversation);
System.out.println("Conversation tokens: " + result.totalTokens().orElse(0));

Count Tokens with Config

import com.google.genai.types.CountTokensConfig;
import com.google.genai.types.GenerateContentConfig;
import com.google.genai.types.Tool;

// Create config with tools
GenerateContentConfig genConfig = GenerateContentConfig.builder()
    .tools(tools)
    .build();

CountTokensConfig config = CountTokensConfig.builder()
    .generateContentConfig(genConfig)
    .build();

LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
CountTokensResult result = tokenizer.countTokens("Text with tools", config);

Compute Tokens (Detailed)

import com.google.genai.types.ComputeTokensResult;
import com.google.genai.types.TokensInfo;

LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");

ComputeTokensResult result = tokenizer.computeTokens("Sample text");

result.totalTokens().ifPresent(total -> {
    System.out.println("Total tokens: " + total);
});

result.tokensInfo().ifPresent(tokensInfoList -> {
    for (TokensInfo info : tokensInfoList) {
        info.role().ifPresent(role -> {
            System.out.println("Role: " + role);
        });

        info.tokenIds().ifPresent(ids -> {
            System.out.println("Token IDs: " + ids);
        });
    }
});

Compute Tokens for Conversation

List<Content> conversation = List.of(
    Content.builder().role("user").parts(List.of(Part.fromText("Hi"))).build(),
    Content.builder().role("model").parts(List.of(Part.fromText("Hello!"))).build()
);

LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
ComputeTokensResult result = tokenizer.computeTokens(conversation);

result.tokensInfo().ifPresent(tokensInfoList -> {
    for (TokensInfo info : tokensInfoList) {
        String role = info.role().orElse("unknown");
        int tokenCount = info.tokenIds().map(List::size).orElse(0);
        System.out.println(role + ": " + tokenCount + " tokens");
    }
});

Compare Local vs API Token Counting

LocalTokenizer localTokenizer = new LocalTokenizer("gemini-2.0-flash");
String text = "Sample text for comparison";

// Local counting (free, instant)
CountTokensResult localResult = localTokenizer.countTokens(text);
int localCount = localResult.totalTokens().orElse(0);

// API counting (uses quota, requires network)
CountTokensResponse apiResult = client.models.countTokens(
    "gemini-2.0-flash",
    text,
    null
);
int apiCount = apiResult.totalTokens().orElse(0);

System.out.println("Local count: " + localCount);
System.out.println("API count: " + apiCount);
// Counts should be very close or identical

Pre-check Before API Call

LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
String longPrompt = /* very long text */;

// Check locally first (free)
CountTokensResult result = tokenizer.countTokens(longPrompt);
int tokenCount = result.totalTokens().orElse(0);

if (tokenCount > 30000) {
    System.out.println("Prompt exceeds context window, truncating...");
    // Truncate before making API call
} else {
    // Safe to proceed with API call
    GenerateContentResponse response = client.models.generateContent(
        "gemini-2.0-flash",
        longPrompt,
        null
    );
}

Batch Token Estimation

LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");

List<String> prompts = List.of(
    "Prompt 1",
    "Prompt 2",
    "Prompt 3"
);

int totalTokens = 0;
for (String prompt : prompts) {
    CountTokensResult result = tokenizer.countTokens(prompt);
    int tokens = result.totalTokens().orElse(0);
    totalTokens += tokens;
    System.out.println("Prompt: " + tokens + " tokens");
}

System.out.println("Total tokens for batch: " + totalTokens);
// Estimate cost before making batch API calls

Limitations

Text Only: LocalTokenizer only supports text-based tokenization. It does not handle:

  • Images
  • Audio
  • Video
  • Other multimodal content

Model Support: Limited to models with available tokenizer models. Check documentation for supported models.

Accuracy: Token counts may differ slightly from API counts for edge cases, but should be very close for typical use.

// This works
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
tokenizer.countTokens("Text content"); // ✓

// This is not supported
Content multimodal = Content.fromParts(
    Part.fromText("Text"),
    Part.fromImage(image) // LocalTokenizer cannot count image tokens
);
// tokenizer.countTokens(multimodal); // Will only count text part

Token Management Best Practices

Check Before Sending

// Count tokens before making expensive API call
CountTokensResponse countResponse = client.models.countTokens(
    "gemini-2.0-flash",
    longPrompt,
    null
);

int tokenCount = countResponse.totalTokens().orElse(0);

if (tokenCount > 30000) {
    System.out.println("Prompt too long, truncating...");
    // Truncate or split prompt
} else {
    // Proceed with generation
    GenerateContentResponse response = client.models.generateContent(
        "gemini-2.0-flash",
        longPrompt,
        null
    );
}

Budget Management

import com.google.genai.types.GenerateContentConfig;

// Set max output tokens to control costs
GenerateContentConfig config = GenerateContentConfig.builder()
    .maxOutputTokens(500)
    .build();

GenerateContentResponse response = client.models.generateContent(
    "gemini-2.0-flash",
    "Write a summary",
    config
);

// Check actual usage
response.usageMetadata().ifPresent(usage -> {
    System.out.println("Prompt tokens: " + usage.promptTokenCount().orElse(0));
    System.out.println("Response tokens: " + usage.candidatesTokenCount().orElse(0));
    System.out.println("Total tokens: " + usage.totalTokenCount().orElse(0));
});

Cached Content Token Savings

// When using cached content, check token savings
CountTokensResponse response = client.models.countTokens(
    "gemini-2.0-flash",
    "Query against cached content",
    null
);

response.totalTokens().ifPresent(total -> {
    response.cachedContentTokenCount().ifPresent(cached -> {
        int uncachedTokens = total - cached;
        System.out.println("Total tokens: " + total);
        System.out.println("Cached tokens: " + cached);
        System.out.println("Uncached tokens: " + uncachedTokens);
        System.out.println("Token savings: " + (cached * 100.0 / total) + "%");
    });
});

Install with Tessl CLI

npx tessl i tessl/maven-com-google-genai--google-genai

docs

batch-operations.md

caching.md

chat-sessions.md

client-configuration.md

content-generation.md

embeddings-tokens.md

error-handling.md

file-search-stores.md

files-management.md

image-operations.md

index.md

live-sessions.md

model-tuning.md

operations.md

tools-functions.md

types-reference.md

video-generation.md

tile.json