Quarkus extension for integrating local Ollama language models with LangChain4j
Quarkus extension for integrating local Ollama language models with LangChain4j, enabling AI-powered applications with chat models, streaming capabilities, embeddings, and function calling support.
pom.xml:<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-ollama</artifactId>
<version>1.7.4</version>
</dependency>import io.quarkiverse.langchain4j.ollama.*;
import io.quarkiverse.langchain4j.ollama.runtime.config.*;
import dev.langchain4j.model.chat.ChatModel;
import dev.langchain4j.model.chat.StreamingChatModel;
import dev.langchain4j.model.embedding.EmbeddingModel;For declarative AI services:
import dev.langchain4j.service.SystemMessage;
import dev.langchain4j.service.UserMessage;
import io.quarkiverse.langchain4j.RegisterAiService;import jakarta.inject.Inject;
import dev.langchain4j.model.chat.ChatModel;
public class MyService {
@Inject
ChatModel chatModel;
public String chat(String message) {
return chatModel.chat(message);
}
}Configuration in application.properties:
quarkus.langchain4j.ollama.chat-model.model-id=llama3.2
quarkus.langchain4j.ollama.chat-model.temperature=0.7import io.quarkiverse.langchain4j.RegisterAiService;
import dev.langchain4j.service.SystemMessage;
import dev.langchain4j.service.UserMessage;
@RegisterAiService
public interface ChatAssistant {
@SystemMessage("You are a helpful assistant.")
@UserMessage("Answer this question: {question}")
String chat(String question);
}
// Usage
@Inject
ChatAssistant assistant;
String answer = assistant.chat("What is Quarkus?");import jakarta.inject.Inject;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.data.embedding.Embedding;
@Inject
EmbeddingModel embeddingModel;
Embedding embedding = embeddingModel.embed("Some text to embed").content();
float[] vector = embedding.vector();This extension integrates Ollama with Quarkus using the LangChain4j framework:
Chat models for conversational AI with support for both synchronous and streaming responses. Includes built-in function calling capabilities for tool execution.
// Via CDI injection
@Inject
ChatModel chatModel;
@Inject
StreamingChatModel streamingChatModel;Embedding models for generating vector representations of text, useful for semantic search, RAG (Retrieval-Augmented Generation), and similarity analysis.
// Via CDI injection
@Inject
EmbeddingModel embeddingModel;
// Programmatic API
class OllamaEmbeddingModel {
static Builder builder();
Response<List<Embedding>> embedAll(List<TextSegment> textSegments);
}Comprehensive configuration system supporting default and named configurations, with extensive options for model behavior, timeouts, logging, and TLS.
Configuration prefix: quarkus.langchain4j.ollama
# Default configuration
quarkus.langchain4j.ollama.base-url=http://localhost:11434
quarkus.langchain4j.ollama.chat-model.model-id=llama3.2
quarkus.langchain4j.ollama.chat-model.temperature=0.8
# Named configuration
quarkus.langchain4j.ollama.my-model.chat-model.model-id=llama3.1
quarkus.langchain4j.ollama.my-model.chat-model.temperature=0.7Type-safe data models for requests, responses, messages, and options. Includes support for tool calling, image inputs, and extensible message formats.
record ChatRequest(String model, List<Message> messages, List<Tool> tools, Options options, String format, Boolean stream) { }
record ChatResponse(String model, String createdAt, Message message, Boolean done, Integer promptEvalCount, Integer evalCount) { }
record Message(Role role, String content, List<ToolCall> toolCalls, List<String> images, Map<String, Object> additionalFields) { }
record Options(Double temperature, Integer topK, Double topP, Double repeatPenalty, Integer seed, Integer numPredict, Integer numCtx, List<String> stop) { }Function calling support allowing models to invoke external tools and business logic. Enables agentic workflows and dynamic capability extension.
record Tool(Type type, Function function) { }
record ToolCall(FunctionCall function) { }
// Nested types
record Tool.Function(String name, String description, Parameters parameters) { }
record Tool.Function.Parameters(String type, Map<String, Map<String, Object>> properties, List<String> required) { }
record ToolCall.FunctionCall(String name, Map<String, Object> arguments) { }Low-level HTTP client for direct Ollama API access when CDI injection is not available or when fine-grained control is needed.
class OllamaClient {
OllamaClient(String baseUrl, Duration timeout, boolean logRequests, boolean logResponses, boolean logCurl, String configName, String tlsConfigurationName);
ChatResponse chat(ChatRequest request);
Multi<ChatResponse> streamingChat(ChatRequest request);
EmbeddingResponse embedding(EmbeddingRequest request);
}Use multiple model configurations within the same application:
# Default model
quarkus.langchain4j.ollama.chat-model.model-id=llama3.2
# Fast model for simple tasks
quarkus.langchain4j.ollama.fast.chat-model.model-id=llama3.2:1b
quarkus.langchain4j.ollama.fast.chat-model.temperature=0.5
# Creative model for content generation
quarkus.langchain4j.ollama.creative.chat-model.model-id=llama3.2
quarkus.langchain4j.ollama.creative.chat-model.temperature=1.2@Inject
@Named("fast")
ChatModel fastModel;
@Inject
@Named("creative")
ChatModel creativeModel;Stream responses for real-time user feedback:
import dev.langchain4j.model.chat.StreamingChatModel;
import dev.langchain4j.model.chat.request.ChatRequest;
import dev.langchain4j.model.chat.response.StreamingChatResponseHandler;
import dev.langchain4j.model.chat.response.ChatResponse;
import dev.langchain4j.data.message.UserMessage;
@Inject
StreamingChatModel streamingModel;
ChatRequest request = ChatRequest.builder()
.messages(List.of(UserMessage.from("Tell me a story")))
.build();
streamingModel.doChat(request, new StreamingChatResponseHandler() {
@Override
public void onPartialResponse(String token) {
System.out.print(token);
}
@Override
public void onCompleteResponse(ChatResponse response) {
System.out.println("\n[Complete]");
}
@Override
public void onError(Throwable error) {
error.printStackTrace();
}
});import dev.langchain4j.agent.tool.Tool;
public class WeatherTools {
@Tool("Get current weather for a location")
public String getWeather(String location) {
return "Sunny, 72°F in " + location;
}
}
@RegisterAiService(tools = WeatherTools.class)
public interface WeatherAssistant {
String chat(String message);
}
// The model can automatically call getWeather() when needed
@Inject
WeatherAssistant assistant;
String response = assistant.chat("What's the weather in San Francisco?");@Inject
EmbeddingModel embeddingModel;
// Embed documents
List<TextSegment> documents = List.of(
TextSegment.from("Quarkus is a Java framework."),
TextSegment.from("Ollama runs models locally.")
);
Response<List<Embedding>> embeddings = embeddingModel.embedAll(documents);
// Later, embed query and find similar documents
Embedding queryEmbedding = embeddingModel.embed("What is Quarkus?").content();
// ... compute similarity with document embeddingsAll model operations may throw runtime exceptions:
Handle appropriately:
try {
String response = chatModel.chat("Hello");
} catch (Exception e) {
logger.error("Failed to generate response", e);
// Fallback logic
}ollama pull llama3.2)Install with Tessl CLI
npx tessl i tessl/maven-io-quarkiverse-langchain4j--quarkus-langchain4j-ollama@1.7.0