LangChain4j integration for Google AI Gemini models providing chat, streaming, embeddings, image generation, and batch processing capabilities
Streaming chat model for Google AI Gemini enabling real-time token-by-token responses with multimodal support, function calling, and all advanced features available in the synchronous model. Ideal for interactive applications requiring immediate feedback.
Main streaming chat model class providing non-blocking chat interactions with real-time token delivery.
/**
* Streaming chat model for Google AI Gemini.
* Delivers responses token-by-token in real-time through a handler callback.
*/
public class GoogleAiGeminiStreamingChatModel {
/**
* Creates a new builder for configuring the streaming chat model.
* @return GoogleAiGeminiStreamingChatModelBuilder instance
*/
public static GoogleAiGeminiStreamingChatModelBuilder builder();
/**
* Gets the default request parameters configured for this model.
* @return ChatRequestParameters containing default configuration
*/
public ChatRequestParameters defaultRequestParameters();
/**
* Sends a chat request and streams the response through the handler.
* This method returns immediately and delivers tokens asynchronously.
* @param request The chat request containing messages and configuration
* @param handler StreamingResponseHandler to receive tokens and completion
*/
public void doChat(ChatRequest request, StreamingChatResponseHandler handler);
/**
* Returns the list of registered chat model listeners.
* @return List of ChatModelListener instances
*/
public List<ChatModelListener> listeners();
/**
* Returns the model provider (GOOGLE).
* @return ModelProvider enum value
*/
public ModelProvider provider();
}Builder class for constructing GoogleAiGeminiStreamingChatModel.
/**
* Builder for GoogleAiGeminiStreamingChatModel.
* Extends base builder with all configuration options.
*/
public static class GoogleAiGeminiStreamingChatModelBuilder
extends GoogleAiGeminiChatModelBaseBuilder<GoogleAiGeminiStreamingChatModelBuilder> {
/**
* Builds the GoogleAiGeminiStreamingChatModel instance.
* @return Configured GoogleAiGeminiStreamingChatModel
* @throws IllegalArgumentException if required fields are missing
*/
public GoogleAiGeminiStreamingChatModel build();
}All builder configuration methods are inherited from GoogleAiGeminiChatModelBaseBuilder (documented in Chat Models - Synchronous).
import dev.langchain4j.model.googleai.GoogleAiGeminiStreamingChatModel;
import dev.langchain4j.model.StreamingResponseHandler;
import dev.langchain4j.model.output.Response;
import dev.langchain4j.data.message.AiMessage;
GoogleAiGeminiStreamingChatModel model = GoogleAiGeminiStreamingChatModel.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-flash")
.build();
model.generate("Tell me a story about a robot", new StreamingResponseHandler<AiMessage>() {
@Override
public void onNext(String token) {
System.out.print(token); // Print each token as it arrives
}
@Override
public void onComplete(Response<AiMessage> response) {
System.out.println("\n\nComplete! Token usage: " + response.tokenUsage());
}
@Override
public void onError(Throwable error) {
System.err.println("Error: " + error.getMessage());
}
});StringBuilder responseBuilder = new StringBuilder();
model.generate("Explain photosynthesis", new StreamingResponseHandler<AiMessage>() {
@Override
public void onNext(String token) {
responseBuilder.append(token);
System.out.print(token);
}
@Override
public void onComplete(Response<AiMessage> response) {
String fullResponse = responseBuilder.toString();
System.out.println("\n\nFull response length: " + fullResponse.length());
}
@Override
public void onError(Throwable error) {
error.printStackTrace();
}
});import dev.langchain4j.agent.tool.Tool;
import dev.langchain4j.service.AiServices;
import dev.langchain4j.model.googleai.GeminiMode;
class Calculator {
@Tool("Add two numbers")
double add(double a, double b) {
return a + b;
}
@Tool("Multiply two numbers")
double multiply(double a, double b) {
return a * b;
}
}
GoogleAiGeminiStreamingChatModel model = GoogleAiGeminiStreamingChatModel.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-pro")
.toolConfig(GeminiMode.AUTO)
.build();
interface StreamingAssistant {
TokenStream chat(String message);
}
StreamingAssistant assistant = AiServices.builder(StreamingAssistant.class)
.streamingChatLanguageModel(model)
.tools(new Calculator())
.build();
assistant.chat("What is 25 times 4, plus 10?")
.onNext(System.out::print)
.onComplete(response -> System.out.println("\nDone!"))
.onError(Throwable::printStackTrace)
.start();import dev.langchain4j.data.message.UserMessage;
import dev.langchain4j.data.message.ImageContent;
import dev.langchain4j.data.message.TextContent;
import dev.langchain4j.data.image.Image;
GoogleAiGeminiStreamingChatModel model = GoogleAiGeminiStreamingChatModel.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-flash")
.mediaResolution(GeminiMediaResolutionLevel.MEDIA_RESOLUTION_HIGH)
.build();
Image image = Image.fromUrl("https://example.com/diagram.png");
UserMessage message = UserMessage.from(
TextContent.from("Describe this diagram in detail"),
ImageContent.from(image)
);
model.generate(message, new StreamingResponseHandler<AiMessage>() {
@Override
public void onNext(String token) {
System.out.print(token);
}
@Override
public void onComplete(Response<AiMessage> response) {
System.out.println("\n\nStreaming complete!");
}
@Override
public void onError(Throwable error) {
error.printStackTrace();
}
});GoogleAiGeminiStreamingChatModel creativeModel = GoogleAiGeminiStreamingChatModel.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-pro")
.temperature(1.5) // High temperature for creative responses
.topP(0.95)
.topK(40)
.build();
creativeModel.generate("Write a creative story about time travel",
new StreamingResponseHandler<AiMessage>() {
@Override
public void onNext(String token) {
System.out.print(token);
}
@Override
public void onComplete(Response<AiMessage> response) {
System.out.println("\n\nCreative story complete!");
}
@Override
public void onError(Throwable error) {
error.printStackTrace();
}
});GoogleAiGeminiStreamingChatModel model = GoogleAiGeminiStreamingChatModel.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-flash")
.stopSequences(List.of("END", "CONCLUSION"))
.build();
model.generate("Write a report and end with CONCLUSION",
new StreamingResponseHandler<AiMessage>() {
@Override
public void onNext(String token) {
System.out.print(token);
}
@Override
public void onComplete(Response<AiMessage> response) {
System.out.println("\n\nStopped at sequence!");
}
@Override
public void onError(Throwable error) {
error.printStackTrace();
}
});import dev.langchain4j.model.googleai.GeminiThinkingConfig;
GoogleAiGeminiStreamingChatModel model = GoogleAiGeminiStreamingChatModel.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.0-flash-thinking-exp")
.thinkingConfig(GeminiThinkingConfig.builder()
.includeThoughts(true)
.thinkingLevel(GeminiThinkingConfig.GeminiThinkingLevel.MEDIUM)
.build())
.build();
model.generate("Solve this logic puzzle: If all bloops are razzies...",
new StreamingResponseHandler<AiMessage>() {
@Override
public void onNext(String token) {
System.out.print(token);
}
@Override
public void onComplete(Response<AiMessage> response) {
System.out.println("\n\nReasoning complete!");
}
@Override
public void onError(Throwable error) {
error.printStackTrace();
}
});import dev.langchain4j.model.googleai.GeminiSafetySetting;
import dev.langchain4j.model.googleai.GeminiHarmCategory;
import dev.langchain4j.model.googleai.GeminiHarmBlockThreshold;
GoogleAiGeminiStreamingChatModel model = GoogleAiGeminiStreamingChatModel.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-pro")
.safetySettings(List.of(
new GeminiSafetySetting(
GeminiHarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
GeminiHarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE
)
))
.build();
model.generate("Your prompt here", new StreamingResponseHandler<AiMessage>() {
@Override
public void onNext(String token) {
System.out.print(token);
}
@Override
public void onComplete(Response<AiMessage> response) {
System.out.println("\n\nComplete!");
}
@Override
public void onError(Throwable error) {
// May occur if content is blocked by safety settings
error.printStackTrace();
}
});import java.time.Duration;
GoogleAiGeminiStreamingChatModel model = GoogleAiGeminiStreamingChatModel.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-pro")
.temperature(0.7)
.maxOutputTokens(4096)
.timeout(Duration.ofSeconds(120))
.allowGoogleSearch(true) // Enable web grounding
.allowCodeExecution(true) // Enable code execution
.includeCodeExecutionOutput(true)
.mediaResolution(GeminiMediaResolutionLevel.MEDIA_RESOLUTION_HIGH)
.logRequestsAndResponses(true)
.build();
model.generate("Complex query requiring search and code execution",
new StreamingResponseHandler<AiMessage>() {
private final long startTime = System.currentTimeMillis();
@Override
public void onNext(String token) {
System.out.print(token);
}
@Override
public void onComplete(Response<AiMessage> response) {
long duration = System.currentTimeMillis() - startTime;
System.out.println("\n\nCompleted in " + duration + "ms");
System.out.println("Tokens: " + response.tokenUsage());
}
@Override
public void onError(Throwable error) {
error.printStackTrace();
}
});import dev.langchain4j.service.AiServices;
import dev.langchain4j.service.TokenStream;
import dev.langchain4j.memory.chat.MessageWindowChatMemory;
interface StreamingAssistant {
TokenStream chat(String message);
}
GoogleAiGeminiStreamingChatModel model = GoogleAiGeminiStreamingChatModel.builder()
.apiKey(System.getenv("GOOGLE_AI_API_KEY"))
.modelName("gemini-2.5-flash")
.temperature(0.7)
.build();
StreamingAssistant assistant = AiServices.builder(StreamingAssistant.class)
.streamingChatLanguageModel(model)
.chatMemory(MessageWindowChatMemory.withMaxMessages(10))
.build();
assistant.chat("Hello, how are you?")
.onNext(System.out::print)
.onComplete(response -> System.out.println("\n\nDone!"))
.onError(Throwable::printStackTrace)
.start();
// Continue conversation with memory
assistant.chat("What did I just ask you?")
.onNext(System.out::print)
.onComplete(response -> System.out.println("\n\nDone!"))
.start();gemini-2.5-flash - Fast model ideal for streaming (recommended)gemini-2.5-pro - Flagship model with advanced capabilitiesgemini-2.5-flash-8b - Lightweight model for simple streaming tasksgemini-2.0-flash-exp - Experimental flash modelgemini-2.0-flash-thinking-exp-01-21 - Experimental thinking modelErrors during streaming are delivered to the onError callback:
model.generate("Your prompt", new StreamingResponseHandler<AiMessage>() {
@Override
public void onNext(String token) {
System.out.print(token);
}
@Override
public void onComplete(Response<AiMessage> response) {
System.out.println("\nSuccess!");
}
@Override
public void onError(Throwable error) {
if (error instanceof TimeoutException) {
System.err.println("Request timed out");
} else if (error instanceof IllegalStateException) {
System.err.println("Content blocked by safety filters");
} else {
System.err.println("Error: " + error.getMessage());
}
}
});| Feature | Streaming | Synchronous |
|---|---|---|
| Response delivery | Token-by-token | Complete response |
| First-token latency | ~100-1000ms | N/A |
| User experience | Real-time feedback | Wait for completion |
| Use case | Interactive UIs | Batch processing |
| Error handling | Callback-based | Exception-based |
| Resource usage | Open connection | Request-response |
GoogleAiGeminiStreamingChatModel implements the StreamingChatLanguageModel interface and can be used anywhere a LangChain4j streaming chat model is expected, including with AI Services, memory management, and tool integration.
Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-google-ai-gemini@1.11.0