LangChain4j integration for Google AI Gemini models providing chat, streaming, embeddings, image generation, and batch processing capabilities
Token count estimator for Google AI Gemini that provides accurate token counts for text, messages, tool specifications, and tool execution requests before sending to the API. Essential for managing costs and staying within model token limits.
Main token counting class providing estimation methods for various input types.
/**
* Token count estimator for Google AI Gemini.
* Provides accurate token counting for various input types.
*/
public class GoogleAiGeminiTokenCountEstimator {
/**
* Creates a new builder for configuring the token count estimator.
* @return Builder instance
*/
public static Builder builder();
/**
* Estimates the number of tokens in a plain text string.
* @param text The text to analyze
* @return Estimated token count
*/
public int estimateTokenCountInText(String text);
/**
* Estimates the number of tokens in a single chat message.
* Includes message content, role, and any multimodal content.
* @param message The chat message to analyze
* @return Estimated token count
*/
public int estimateTokenCountInMessage(ChatMessage message);
/**
* Estimates the total number of tokens in multiple chat messages.
* Useful for checking conversation history size.
* @param messages Iterable of chat messages
* @return Estimated total token count
*/
public int estimateTokenCountInMessages(Iterable<ChatMessage> messages);
/**
* Estimates tokens used by tool execution requests.
* Includes function names, arguments, and execution results.
* @param toolExecutionRequests Iterable of tool execution requests
* @return Estimated token count
*/
public int estimateTokenCountInToolExecutionRequests(Iterable<ToolExecutionRequest> toolExecutionRequests);
/**
* Estimates tokens used by tool specifications.
* Includes function names, descriptions, and parameter schemas.
* @param toolSpecifications Iterable of tool specifications
* @return Estimated token count
*/
public int estimateTokenCountInToolSpecifications(Iterable<ToolSpecification> toolSpecifications);
}Builder class for constructing GoogleAiGeminiTokenCountEstimator instances.
/**
* Builder for GoogleAiGeminiTokenCountEstimator.
*/
public static class Builder {
/**
* Sets the HTTP client builder for customizing requests.
* @param httpClientBuilder HTTP client builder instance
* @return Builder instance for chaining
*/
public Builder httpClientBuilder(HttpClientBuilder httpClientBuilder);
/**
* Sets the model name for token counting (required).
* Different models may have different tokenization schemes.
* @param modelName Model identifier (e.g., "gemini-2.5-pro")
* @return Builder instance for chaining
*/
public Builder modelName(String modelName);
/**
* Sets the API key for authentication (required).
* @param apiKey Google AI API key
* @return Builder instance for chaining
*/
public Builder apiKey(String apiKey);
/**
* Sets the base URL for the API endpoint.
* @param baseUrl Custom base URL (optional)
* @return Builder instance for chaining
*/
public Builder baseUrl(String baseUrl);
/**
* Enables logging of both requests and responses.
* @param logRequestsAndResponses True to enable full logging
* @return Builder instance for chaining
*/
public Builder logRequestsAndResponses(Boolean logRequestsAndResponses);
/**
* Enables logging of requests only.
* @param logRequests True to enable request logging
* @return Builder instance for chaining
*/
public Builder logRequests(Boolean logRequests);
/**
* Enables logging of responses only.
* @param logResponses True to enable response logging
* @return Builder instance for chaining
*/
public Builder logResponses(Boolean logResponses);
/**
* Sets a custom logger for the estimator.
* @param logger Logger instance
* @return Builder instance for chaining
*/
public Builder logger(Logger logger);
/**
* Sets the request timeout duration.
* @param timeout Timeout duration
* @return Builder instance for chaining
*/
public Builder timeout(Duration timeout);
/**
* Sets the maximum number of retry attempts for failed requests.
* @param maxRetries Number of retries (default: 3)
* @return Builder instance for chaining
*/
public Builder maxRetries(Integer maxRetries);
/**
* Builds the GoogleAiGeminiTokenCountEstimator instance.
* @return Configured GoogleAiGeminiTokenCountEstimator
* @throws IllegalArgumentException if required fields are missing
*/
public GoogleAiGeminiTokenCountEstimator build();
}import dev.langchain4j.model.googleai.GoogleAiGeminiTokenCountEstimator;
// Create token counter
GoogleAiGeminiTokenCountEstimator tokenCounter = GoogleAiGeminiTokenCountEstimator.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-pro")
.build();
// Count tokens in text
String text = "Artificial intelligence is transforming the world of technology.";
int tokenCount = tokenCounter.estimateTokenCountInText(text);
System.out.println("Token count: " + tokenCount);import dev.langchain4j.data.message.UserMessage;
import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.data.message.SystemMessage;
GoogleAiGeminiTokenCountEstimator tokenCounter = GoogleAiGeminiTokenCountEstimator.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-pro")
.build();
// Count tokens in single message
UserMessage userMsg = UserMessage.from("What is quantum computing?");
int userTokens = tokenCounter.estimateTokenCountInMessage(userMsg);
System.out.println("User message tokens: " + userTokens);
// Count tokens in system message
SystemMessage systemMsg = SystemMessage.from("You are a helpful AI assistant.");
int systemTokens = tokenCounter.estimateTokenCountInMessage(systemMsg);
System.out.println("System message tokens: " + systemTokens);
// Count tokens in AI message
AiMessage aiMsg = AiMessage.from("Quantum computing uses quantum mechanics...");
int aiTokens = tokenCounter.estimateTokenCountInMessage(aiMsg);
System.out.println("AI message tokens: " + aiTokens);import dev.langchain4j.data.message.ChatMessage;
import java.util.List;
GoogleAiGeminiTokenCountEstimator tokenCounter = GoogleAiGeminiTokenCountEstimator.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-flash")
.build();
// Build conversation history
List<ChatMessage> conversation = List.of(
SystemMessage.from("You are a helpful assistant specialized in technology."),
UserMessage.from("What is machine learning?"),
AiMessage.from("Machine learning is a subset of AI that enables computers to learn from data..."),
UserMessage.from("Can you give me an example?"),
AiMessage.from("Sure! A common example is email spam filtering...")
);
// Count total tokens in conversation
int totalTokens = tokenCounter.estimateTokenCountInMessages(conversation);
System.out.println("Total conversation tokens: " + totalTokens);
// Check against model limit
int maxTokens = 32000; // Example limit for gemini-2.5-flash
if (totalTokens > maxTokens * 0.8) {
System.out.println("Warning: Approaching token limit!");
}import dev.langchain4j.data.message.ImageContent;
import dev.langchain4j.data.message.TextContent;
import dev.langchain4j.data.image.Image;
GoogleAiGeminiTokenCountEstimator tokenCounter = GoogleAiGeminiTokenCountEstimator.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-pro")
.build();
// Create multimodal message with image
Image image = Image.fromUrl("https://example.com/chart.png");
UserMessage multimodalMsg = UserMessage.from(
TextContent.from("What does this chart show?"),
ImageContent.from(image)
);
// Count tokens (includes image token cost)
int tokens = tokenCounter.estimateTokenCountInMessage(multimodalMsg);
System.out.println("Multimodal message tokens: " + tokens);import dev.langchain4j.agent.tool.ToolSpecification;
import dev.langchain4j.agent.tool.ToolParameters;
import java.util.List;
GoogleAiGeminiTokenCountEstimator tokenCounter = GoogleAiGeminiTokenCountEstimator.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-pro")
.build();
// Define tool specifications
ToolSpecification weatherTool = ToolSpecification.builder()
.name("get_weather")
.description("Get the current weather for a location")
.parameters(ToolParameters.builder()
.addParameter("location", "string", "The city and country")
.build())
.build();
ToolSpecification calculatorTool = ToolSpecification.builder()
.name("calculate")
.description("Perform mathematical calculations")
.parameters(ToolParameters.builder()
.addParameter("expression", "string", "The mathematical expression")
.build())
.build();
List<ToolSpecification> tools = List.of(weatherTool, calculatorTool);
// Count tokens used by tool specifications
int toolTokens = tokenCounter.estimateTokenCountInToolSpecifications(tools);
System.out.println("Tool specification tokens: " + toolTokens);import dev.langchain4j.agent.tool.ToolExecutionRequest;
import java.util.List;
GoogleAiGeminiTokenCountEstimator tokenCounter = GoogleAiGeminiTokenCountEstimator.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-pro")
.build();
// Create tool execution requests
ToolExecutionRequest request1 = ToolExecutionRequest.builder()
.name("get_weather")
.arguments("{\"location\": \"Paris, France\"}")
.build();
ToolExecutionRequest request2 = ToolExecutionRequest.builder()
.name("calculate")
.arguments("{\"expression\": \"2 + 2\"}")
.build();
List<ToolExecutionRequest> requests = List.of(request1, request2);
// Count tokens in execution requests
int execTokens = tokenCounter.estimateTokenCountInToolExecutionRequests(requests);
System.out.println("Tool execution request tokens: " + execTokens);import dev.langchain4j.agent.tool.Tool;
import dev.langchain4j.service.AiServices;
GoogleAiGeminiTokenCountEstimator tokenCounter = GoogleAiGeminiTokenCountEstimator.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-pro")
.build();
// Conversation history
List<ChatMessage> messages = List.of(
SystemMessage.from("You are a helpful assistant."),
UserMessage.from("What's the weather like?")
);
// Tool specifications
List<ToolSpecification> tools = List.of(
ToolSpecification.builder()
.name("get_weather")
.description("Get weather information")
.build()
);
// Calculate total tokens for request
int messageTokens = tokenCounter.estimateTokenCountInMessages(messages);
int toolTokens = tokenCounter.estimateTokenCountInToolSpecifications(tools);
int totalTokens = messageTokens + toolTokens;
System.out.println("Message tokens: " + messageTokens);
System.out.println("Tool tokens: " + toolTokens);
System.out.println("Total request tokens: " + totalTokens);GoogleAiGeminiTokenCountEstimator tokenCounter = GoogleAiGeminiTokenCountEstimator.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-flash")
.build();
String prompt = "Write a detailed essay about climate change, its causes, effects, and solutions.";
int inputTokens = tokenCounter.estimateTokenCountInText(prompt);
// Estimate output tokens (assume response is ~3x input)
int estimatedOutputTokens = inputTokens * 3;
// Calculate cost (example rates per million tokens)
double inputCostPerMillion = 0.075; // $0.075 per million input tokens
double outputCostPerMillion = 0.30; // $0.30 per million output tokens
double inputCost = (inputTokens / 1_000_000.0) * inputCostPerMillion;
double outputCost = (estimatedOutputTokens / 1_000_000.0) * outputCostPerMillion;
double totalCost = inputCost + outputCost;
System.out.printf("Estimated cost: $%.6f%n", totalCost);import java.util.ArrayList;
GoogleAiGeminiTokenCountEstimator tokenCounter = GoogleAiGeminiTokenCountEstimator.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-pro")
.build();
List<ChatMessage> fullHistory = List.of(
SystemMessage.from("You are a helpful assistant."),
UserMessage.from("Tell me about AI"),
AiMessage.from("AI stands for artificial intelligence..."),
UserMessage.from("What about machine learning?"),
AiMessage.from("Machine learning is a subset..."),
UserMessage.from("Can you explain neural networks?"),
AiMessage.from("Neural networks are computing systems...")
);
int maxTokens = 1000;
List<ChatMessage> truncatedHistory = new ArrayList<>();
int currentTokens = 0;
// Always keep system message
truncatedHistory.add(fullHistory.get(0));
currentTokens += tokenCounter.estimateTokenCountInMessage(fullHistory.get(0));
// Add messages from most recent backwards
for (int i = fullHistory.size() - 1; i > 0; i--) {
ChatMessage msg = fullHistory.get(i);
int msgTokens = tokenCounter.estimateTokenCountInMessage(msg);
if (currentTokens + msgTokens <= maxTokens) {
truncatedHistory.add(1, msg); // Insert after system message
currentTokens += msgTokens;
} else {
break;
}
}
System.out.println("Kept " + truncatedHistory.size() + " of " + fullHistory.size() + " messages");
System.out.println("Total tokens: " + currentTokens);GoogleAiGeminiTokenCountEstimator tokenCounter = GoogleAiGeminiTokenCountEstimator.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-flash")
.build();
List<String> texts = List.of(
"First document about machine learning...",
"Second document about data science...",
"Third document about artificial intelligence...",
"Fourth document about neural networks...",
"Fifth document about deep learning..."
);
int maxBatchTokens = 5000;
List<List<String>> batches = new ArrayList<>();
List<String> currentBatch = new ArrayList<>();
int currentBatchTokens = 0;
for (String text : texts) {
int textTokens = tokenCounter.estimateTokenCountInText(text);
if (currentBatchTokens + textTokens > maxBatchTokens && !currentBatch.isEmpty()) {
// Start new batch
batches.add(new ArrayList<>(currentBatch));
currentBatch.clear();
currentBatchTokens = 0;
}
currentBatch.add(text);
currentBatchTokens += textTokens;
}
if (!currentBatch.isEmpty()) {
batches.add(currentBatch);
}
System.out.println("Split into " + batches.size() + " batches");import java.time.Duration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Logger logger = LoggerFactory.getLogger("TokenCounter");
GoogleAiGeminiTokenCountEstimator tokenCounter = GoogleAiGeminiTokenCountEstimator.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-pro")
.timeout(Duration.ofSeconds(10))
.maxRetries(5)
.logRequestsAndResponses(true)
.logger(logger)
.build();
int tokens = tokenCounter.estimateTokenCountInText("Sample text");
System.out.println("Tokens with advanced config: " + tokens);// Compare token counts across different models
String text = "This is a sample text for token counting across different models.";
GoogleAiGeminiTokenCountEstimator proCounter = GoogleAiGeminiTokenCountEstimator.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-pro")
.build();
GoogleAiGeminiTokenCountEstimator flashCounter = GoogleAiGeminiTokenCountEstimator.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-flash")
.build();
int proTokens = proCounter.estimateTokenCountInText(text);
int flashTokens = flashCounter.estimateTokenCountInText(text);
System.out.println("Gemini Pro tokens: " + proTokens);
System.out.println("Gemini Flash tokens: " + flashTokens);import dev.langchain4j.memory.chat.MessageWindowChatMemory;
import dev.langchain4j.memory.ChatMemory;
GoogleAiGeminiTokenCountEstimator tokenCounter = GoogleAiGeminiTokenCountEstimator.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-pro")
.build();
ChatMemory memory = MessageWindowChatMemory.withMaxMessages(10);
// Add messages to memory
memory.add(UserMessage.from("Hello!"));
memory.add(AiMessage.from("Hi! How can I help?"));
memory.add(UserMessage.from("Tell me about AI"));
memory.add(AiMessage.from("AI is artificial intelligence..."));
// Check token count of memory
List<ChatMessage> messages = memory.messages();
int totalTokens = tokenCounter.estimateTokenCountInMessages(messages);
System.out.println("Chat memory contains " + totalTokens + " tokens");GoogleAiGeminiTokenCountEstimator tokenCounter = GoogleAiGeminiTokenCountEstimator.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-pro")
.build();
List<ChatMessage> messages = List.of(
SystemMessage.from("You are an expert programmer."),
UserMessage.from("Write a complete implementation of a binary search tree in Java")
);
int inputTokens = tokenCounter.estimateTokenCountInMessages(messages);
int modelMaxInput = 32000;
int estimatedOutputTokens = 4000;
if (inputTokens + estimatedOutputTokens > modelMaxInput) {
System.err.println("Error: Request would exceed model's context window");
System.err.println("Input: " + inputTokens + " + Estimated output: " + estimatedOutputTokens);
} else {
System.out.println("Request is within limits, proceeding...");
// Send request to model
}// Track API costs by counting tokens
GoogleAiGeminiTokenCountEstimator tokenCounter = GoogleAiGeminiTokenCountEstimator.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-flash")
.build();
int totalInputTokens = 0;
List<String> prompts = List.of(
"Prompt 1...",
"Prompt 2...",
"Prompt 3..."
);
for (String prompt : prompts) {
totalInputTokens += tokenCounter.estimateTokenCountInText(prompt);
}
System.out.println("Total input tokens for batch: " + totalInputTokens);// Ensure requests fit within model's context window
GoogleAiGeminiTokenCountEstimator tokenCounter = GoogleAiGeminiTokenCountEstimator.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-pro")
.build();
int contextWindowSize = 1_000_000; // Example: 1M tokens for gemini-2.5-pro
int reservedForOutput = 4000;
int maxInputTokens = contextWindowSize - reservedForOutput;
List<ChatMessage> messages = buildConversation();
int messageTokens = tokenCounter.estimateTokenCountInMessages(messages);
if (messageTokens > maxInputTokens) {
System.out.println("Truncating conversation to fit context window");
// Implement truncation logic
}gemini-2.5-pro - Pro model with large context windowgemini-2.5-flash - Flash model for faster processinggemini-2.5-flash-8b - Lightweight modelgemini-2.0-flash-exp - Experimental flash modelestimateTokenCountInMessages() for multiple messages instead of calling individuallyGoogleAiGeminiTokenCountEstimator implements the TokenCountEstimator interface:
import dev.langchain4j.model.tokenizer.TokenCountEstimator;
TokenCountEstimator estimator = GoogleAiGeminiTokenCountEstimator.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-pro")
.build();
// Use anywhere a TokenCountEstimator is expected
int tokens = estimator.estimateTokenCountInText("Sample text");Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-google-ai-gemini