Java idiomatic SDK for the Gemini Developer APIs and Vertex AI APIs
Generate embeddings for text and manage token counting and computation for prompt optimization.
import com.google.genai.Models;
import com.google.genai.AsyncModels;
import com.google.genai.LocalTokenizer;
import com.google.genai.types.EmbedContentResponse;
import com.google.genai.types.EmbedContentConfig;
import com.google.genai.types.ContentEmbedding;
import com.google.genai.types.CountTokensResponse;
import com.google.genai.types.CountTokensConfig;
import com.google.genai.types.ComputeTokensResponse;
import com.google.genai.types.ComputeTokensConfig;
import java.util.concurrent.CompletableFuture;package com.google.genai;
public final class Models {
// Single text embedding
public EmbedContentResponse embedContent(
String model,
String text,
EmbedContentConfig config);
// Multiple texts embedding
public EmbedContentResponse embedContent(
String model,
List<String> texts,
EmbedContentConfig config);
}package com.google.genai;
public final class AsyncModels {
public CompletableFuture<EmbedContentResponse> embedContent(
String model,
String text,
EmbedContentConfig config);
public CompletableFuture<EmbedContentResponse> embedContent(
String model,
List<String> texts,
EmbedContentConfig config);
}package com.google.genai.types;
public final class EmbedContentConfig {
public static Builder builder();
public Optional<String> taskType();
public Optional<String> title();
public Optional<Integer> outputDimensionality();
public Optional<String> mimeType();
public Optional<Boolean> autoTruncate();
public Optional<HttpOptions> httpOptions();
}Task Types:
RETRIEVAL_QUERY - For search queriesRETRIEVAL_DOCUMENT - For documents to be searchedSEMANTIC_SIMILARITY - For similarity comparisonCLASSIFICATION - For text classificationCLUSTERING - For text clusteringpackage com.google.genai.types;
public final class EmbedContentResponse {
public Optional<ContentEmbedding> embedding();
public Optional<List<ContentEmbedding>> embeddings();
public Optional<HttpResponse> sdkHttpResponse();
}package com.google.genai.types;
public final class ContentEmbedding {
public Optional<List<Float>> values();
public Optional<ContentEmbeddingStatistics> statistics();
}package com.google.genai.types;
public final class ContentEmbeddingStatistics {
public Optional<Integer> tokenCount();
public Optional<Boolean> truncated();
}import com.google.genai.Client;
import com.google.genai.types.EmbedContentResponse;
Client client = new Client();
// Single text embedding
EmbedContentResponse response = client.models.embedContent(
"text-embedding-004",
"Why is the sky blue?",
null
);
// Access embedding values
response.embedding().ifPresent(embedding -> {
embedding.values().ifPresent(values -> {
System.out.println("Embedding dimension: " + values.size());
System.out.println("First few values: " + values.subList(0, 5));
});
});import com.google.common.collect.ImmutableList;
import com.google.genai.types.ContentEmbedding;
List<String> texts = ImmutableList.of(
"What is machine learning?",
"How does AI work?",
"Explain neural networks"
);
EmbedContentResponse response = client.models.embedContent(
"text-embedding-004",
texts,
null
);
// Access multiple embeddings
response.embeddings().ifPresent(embeddings -> {
System.out.println("Generated " + embeddings.size() + " embeddings");
for (int i = 0; i < embeddings.size(); i++) {
ContentEmbedding emb = embeddings.get(i);
System.out.println("Text " + (i + 1) + " embedding dimension: " +
emb.values().map(List::size).orElse(0));
}
});import com.google.genai.types.EmbedContentConfig;
EmbedContentConfig config = EmbedContentConfig.builder()
.taskType("RETRIEVAL_DOCUMENT")
.title("Document about AI")
.outputDimensionality(256) // Reduce dimensionality
.autoTruncate(true)
.build();
EmbedContentResponse response = client.models.embedContent(
"text-embedding-004",
"This is a long document about artificial intelligence...",
config
);
// Check if truncated
response.embedding().ifPresent(embedding -> {
embedding.statistics().ifPresent(stats -> {
if (stats.truncated().orElse(false)) {
System.out.println("Input was truncated");
}
System.out.println("Token count: " + stats.tokenCount().orElse(0));
});
});import com.google.genai.types.EmbedContentConfig;
// Embed query
EmbedContentConfig queryConfig = EmbedContentConfig.builder()
.taskType("RETRIEVAL_QUERY")
.build();
EmbedContentResponse queryResponse = client.models.embedContent(
"text-embedding-004",
"What is the capital of France?",
queryConfig
);
// Embed documents
EmbedContentConfig docConfig = EmbedContentConfig.builder()
.taskType("RETRIEVAL_DOCUMENT")
.build();
List<String> documents = ImmutableList.of(
"Paris is the capital and largest city of France.",
"London is the capital city of England.",
"Berlin is the capital and largest city of Germany."
);
EmbedContentResponse docsResponse = client.models.embedContent(
"text-embedding-004",
documents,
docConfig
);
// Now compute similarity between query and documents
List<Float> queryEmbedding = queryResponse.embedding()
.flatMap(ContentEmbedding::values)
.orElse(ImmutableList.of());
docsResponse.embeddings().ifPresent(docEmbeddings -> {
for (int i = 0; i < docEmbeddings.size(); i++) {
List<Float> docEmbedding = docEmbeddings.get(i).values().orElse(ImmutableList.of());
double similarity = cosineSimilarity(queryEmbedding, docEmbedding);
System.out.println("Document " + (i + 1) + " similarity: " + similarity);
}
});
// Helper method for cosine similarity
private static double cosineSimilarity(List<Float> a, List<Float> b) {
double dotProduct = 0.0;
double normA = 0.0;
double normB = 0.0;
for (int i = 0; i < a.size(); i++) {
dotProduct += a.get(i) * b.get(i);
normA += a.get(i) * a.get(i);
normB += b.get(i) * b.get(i);
}
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
}import java.util.concurrent.CompletableFuture;
CompletableFuture<EmbedContentResponse> future = client.async.models.embedContent(
"text-embedding-004",
"Async embedding text",
null
);
future.thenAccept(response -> {
response.embedding().ifPresent(embedding -> {
System.out.println("Embedding size: " +
embedding.values().map(List::size).orElse(0));
});
});package com.google.genai;
public final class Models {
// Count tokens
public CountTokensResponse countTokens(
String model,
String text,
CountTokensConfig config);
public CountTokensResponse countTokens(
String model,
List<Content> contents,
CountTokensConfig config);
// Compute tokens (Vertex AI only)
public ComputeTokensResponse computeTokens(
String model,
String text,
ComputeTokensConfig config);
public ComputeTokensResponse computeTokens(
String model,
List<Content> contents,
ComputeTokensConfig config);
}package com.google.genai;
public final class AsyncModels {
public CompletableFuture<CountTokensResponse> countTokens(
String model,
String text,
CountTokensConfig config);
public CompletableFuture<CountTokensResponse> countTokens(
String model,
List<Content> contents,
CountTokensConfig config);
public CompletableFuture<ComputeTokensResponse> computeTokens(
String model,
String text,
ComputeTokensConfig config);
public CompletableFuture<ComputeTokensResponse> computeTokens(
String model,
List<Content> contents,
ComputeTokensConfig config);
}package com.google.genai.types;
public final class CountTokensConfig {
public static Builder builder();
public Optional<GenerateContentConfig> generateContentConfig();
public Optional<HttpOptions> httpOptions();
}package com.google.genai.types;
public final class CountTokensResponse {
public Optional<Integer> totalTokens();
public Optional<Integer> cachedContentTokenCount();
public Optional<HttpResponse> sdkHttpResponse();
}package com.google.genai.types;
public final class ComputeTokensConfig {
public static Builder builder();
public Optional<GenerateContentConfig> generateContentConfig();
public Optional<HttpOptions> httpOptions();
}package com.google.genai.types;
public final class ComputeTokensResponse {
public Optional<List<ComputeTokensResult>> tokensInfo();
public Optional<HttpResponse> sdkHttpResponse();
}package com.google.genai.types;
public final class ComputeTokensResult {
public Optional<List<Integer>> tokenIds();
public Optional<List<String>> tokens();
public Optional<String> role();
}import com.google.genai.types.CountTokensResponse;
CountTokensResponse response = client.models.countTokens(
"gemini-2.0-flash",
"What is your name?",
null
);
System.out.println("Total tokens: " + response.totalTokens().orElse(0));import com.google.genai.types.Content;
import com.google.genai.types.Part;
import com.google.common.collect.ImmutableList;
List<Content> contents = ImmutableList.of(
Content.builder()
.role("user")
.parts(ImmutableList.of(Part.fromText("Hello, how are you?")))
.build(),
Content.builder()
.role("model")
.parts(ImmutableList.of(Part.fromText("I'm doing well, thank you!")))
.build(),
Content.builder()
.role("user")
.parts(ImmutableList.of(Part.fromText("Tell me about AI")))
.build()
);
CountTokensResponse response = client.models.countTokens(
"gemini-2.0-flash",
contents,
null
);
System.out.println("Total tokens in conversation: " + response.totalTokens().orElse(0));Use this to count tokens including system instructions and other config:
import com.google.genai.types.CountTokensConfig;
import com.google.genai.types.GenerateContentConfig;
GenerateContentConfig genConfig = GenerateContentConfig.builder()
.systemInstruction(Content.fromParts(
Part.fromText("You are a helpful assistant.")
))
.build();
CountTokensConfig config = CountTokensConfig.builder()
.generateContentConfig(genConfig)
.build();
CountTokensResponse response = client.models.countTokens(
"gemini-2.0-flash",
"Tell me about AI",
config
);
System.out.println("Total tokens (including system instruction): " +
response.totalTokens().orElse(0));Compute tokens returns detailed token IDs and strings:
import com.google.genai.types.ComputeTokensResponse;
Client client = Client.builder()
.vertexAI(true)
.project("your-project")
.location("us-central1")
.build();
ComputeTokensResponse response = client.models.computeTokens(
"gemini-2.0-flash",
"What is your name?",
null
);
response.tokensInfo().ifPresent(tokensInfo -> {
for (ComputeTokensResult result : tokensInfo) {
System.out.println("Role: " + result.role().orElse("N/A"));
result.tokenIds().ifPresent(ids -> {
System.out.println("Token IDs: " + ids);
});
result.tokens().ifPresent(tokens -> {
System.out.println("Tokens: " + tokens);
});
}
});import java.util.concurrent.CompletableFuture;
CompletableFuture<CountTokensResponse> future = client.async.models.countTokens(
"gemini-2.0-flash",
"Count tokens for this text",
null
);
future.thenAccept(response -> {
System.out.println("Token count: " + response.totalTokens().orElse(0));
});NOTE: Local tokenizer is experimental and only supports text-based tokenization (no multimodal).
Count tokens locally without making API calls, useful for quota management and cost estimation. LocalTokenizer provides free, offline token counting that doesn't consume API quota.
package com.google.genai;
public final class LocalTokenizer {
// Constructor
public LocalTokenizer(String modelName);
// Count tokens
public CountTokensResult countTokens(List<Content> contents, CountTokensConfig config);
public CountTokensResult countTokens(List<Content> contents);
public CountTokensResult countTokens(Content content, CountTokensConfig config);
public CountTokensResult countTokens(Content content);
public CountTokensResult countTokens(String content, CountTokensConfig config);
public CountTokensResult countTokens(String content);
// Compute tokens (detailed)
public ComputeTokensResult computeTokens(List<Content> contents);
public ComputeTokensResult computeTokens(Content content);
public ComputeTokensResult computeTokens(String content);
}package com.google.genai.types;
public final class CountTokensResult {
public Optional<Integer> totalTokens();
}package com.google.genai.types;
public final class ComputeTokensResult {
public Optional<Integer> totalTokens();
public Optional<List<TokensInfo>> tokensInfo();
}package com.google.genai.types;
public final class TokensInfo {
public Optional<String> role();
public Optional<List<Integer>> tokenIds();
}import com.google.genai.LocalTokenizer;
import com.google.genai.types.CountTokensResult;
// Create local tokenizer
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
// Count tokens for simple text
String text = "This is a sample text to count tokens for.";
CountTokensResult result = tokenizer.countTokens(text);
int tokenCount = result.totalTokens().orElse(0);
System.out.println("Token count: " + tokenCount);
// No API call was made - completely free!import com.google.genai.types.Content;
import com.google.genai.types.Part;
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
Content content = Content.fromParts(
Part.fromText("User message here")
);
CountTokensResult result = tokenizer.countTokens(content);
System.out.println("Tokens: " + result.totalTokens().orElse(0));import java.util.List;
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
List<Content> conversation = List.of(
Content.builder()
.role("user")
.parts(List.of(Part.fromText("Hello!")))
.build(),
Content.builder()
.role("model")
.parts(List.of(Part.fromText("Hi there! How can I help you today?")))
.build(),
Content.builder()
.role("user")
.parts(List.of(Part.fromText("Tell me about AI.")))
.build()
);
CountTokensResult result = tokenizer.countTokens(conversation);
System.out.println("Conversation tokens: " + result.totalTokens().orElse(0));import com.google.genai.types.CountTokensConfig;
import com.google.genai.types.GenerateContentConfig;
import com.google.genai.types.Tool;
// Create config with tools
GenerateContentConfig genConfig = GenerateContentConfig.builder()
.tools(tools)
.build();
CountTokensConfig config = CountTokensConfig.builder()
.generateContentConfig(genConfig)
.build();
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
CountTokensResult result = tokenizer.countTokens("Text with tools", config);import com.google.genai.types.ComputeTokensResult;
import com.google.genai.types.TokensInfo;
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
ComputeTokensResult result = tokenizer.computeTokens("Sample text");
result.totalTokens().ifPresent(total -> {
System.out.println("Total tokens: " + total);
});
result.tokensInfo().ifPresent(tokensInfoList -> {
for (TokensInfo info : tokensInfoList) {
info.role().ifPresent(role -> {
System.out.println("Role: " + role);
});
info.tokenIds().ifPresent(ids -> {
System.out.println("Token IDs: " + ids);
});
}
});List<Content> conversation = List.of(
Content.builder().role("user").parts(List.of(Part.fromText("Hi"))).build(),
Content.builder().role("model").parts(List.of(Part.fromText("Hello!"))).build()
);
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
ComputeTokensResult result = tokenizer.computeTokens(conversation);
result.tokensInfo().ifPresent(tokensInfoList -> {
for (TokensInfo info : tokensInfoList) {
String role = info.role().orElse("unknown");
int tokenCount = info.tokenIds().map(List::size).orElse(0);
System.out.println(role + ": " + tokenCount + " tokens");
}
});LocalTokenizer localTokenizer = new LocalTokenizer("gemini-2.0-flash");
String text = "Sample text for comparison";
// Local counting (free, instant)
CountTokensResult localResult = localTokenizer.countTokens(text);
int localCount = localResult.totalTokens().orElse(0);
// API counting (uses quota, requires network)
CountTokensResponse apiResult = client.models.countTokens(
"gemini-2.0-flash",
text,
null
);
int apiCount = apiResult.totalTokens().orElse(0);
System.out.println("Local count: " + localCount);
System.out.println("API count: " + apiCount);
// Counts should be very close or identicalLocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
String longPrompt = /* very long text */;
// Check locally first (free)
CountTokensResult result = tokenizer.countTokens(longPrompt);
int tokenCount = result.totalTokens().orElse(0);
if (tokenCount > 30000) {
System.out.println("Prompt exceeds context window, truncating...");
// Truncate before making API call
} else {
// Safe to proceed with API call
GenerateContentResponse response = client.models.generateContent(
"gemini-2.0-flash",
longPrompt,
null
);
}LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
List<String> prompts = List.of(
"Prompt 1",
"Prompt 2",
"Prompt 3"
);
int totalTokens = 0;
for (String prompt : prompts) {
CountTokensResult result = tokenizer.countTokens(prompt);
int tokens = result.totalTokens().orElse(0);
totalTokens += tokens;
System.out.println("Prompt: " + tokens + " tokens");
}
System.out.println("Total tokens for batch: " + totalTokens);
// Estimate cost before making batch API callsText Only: LocalTokenizer only supports text-based tokenization. It does not handle:
Model Support: Limited to models with available tokenizer models. Check documentation for supported models.
Accuracy: Token counts may differ slightly from API counts for edge cases, but should be very close for typical use.
// This works
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
tokenizer.countTokens("Text content"); // ✓
// This is not supported
Content multimodal = Content.fromParts(
Part.fromText("Text"),
Part.fromImage(image) // LocalTokenizer cannot count image tokens
);
// tokenizer.countTokens(multimodal); // Will only count text part// Count tokens before making expensive API call
CountTokensResponse countResponse = client.models.countTokens(
"gemini-2.0-flash",
longPrompt,
null
);
int tokenCount = countResponse.totalTokens().orElse(0);
if (tokenCount > 30000) {
System.out.println("Prompt too long, truncating...");
// Truncate or split prompt
} else {
// Proceed with generation
GenerateContentResponse response = client.models.generateContent(
"gemini-2.0-flash",
longPrompt,
null
);
}import com.google.genai.types.GenerateContentConfig;
// Set max output tokens to control costs
GenerateContentConfig config = GenerateContentConfig.builder()
.maxOutputTokens(500)
.build();
GenerateContentResponse response = client.models.generateContent(
"gemini-2.0-flash",
"Write a summary",
config
);
// Check actual usage
response.usageMetadata().ifPresent(usage -> {
System.out.println("Prompt tokens: " + usage.promptTokenCount().orElse(0));
System.out.println("Response tokens: " + usage.candidatesTokenCount().orElse(0));
System.out.println("Total tokens: " + usage.totalTokenCount().orElse(0));
});// When using cached content, check token savings
CountTokensResponse response = client.models.countTokens(
"gemini-2.0-flash",
"Query against cached content",
null
);
response.totalTokens().ifPresent(total -> {
response.cachedContentTokenCount().ifPresent(cached -> {
int uncachedTokens = total - cached;
System.out.println("Total tokens: " + total);
System.out.println("Cached tokens: " + cached);
System.out.println("Uncached tokens: " + uncachedTokens);
System.out.println("Token savings: " + (cached * 100.0 / total) + "%");
});
});Install with Tessl CLI
npx tessl i tessl/maven-com-google-genai--google-genaidocs