Quarkus extension deployment module for integrating Ollama LLM models with Quarkus applications through the LangChain4j framework
The Quarkus LangChain4j Ollama Deployment module creates synthetic CDI beans programmatically during the build process. These beans are created at runtime initialization and can be injected into application code for interacting with Ollama models.
Synthetic beans are CDI beans that are created programmatically by Quarkus extensions rather than being discovered through classpath scanning. They allow deployment modules to:
package io.quarkiverse.langchain4j.ollama.deployment;
import java.util.List;
import jakarta.enterprise.context.ApplicationScoped;
import org.jboss.jandex.AnnotationInstance;
import org.jboss.jandex.ClassType;
import org.jboss.jandex.ParameterizedType;
import org.jboss.jandex.Type;
import io.quarkus.arc.deployment.SyntheticBeanBuildItem;
import io.quarkus.deployment.annotations.BuildProducer;
import io.quarkus.deployment.annotations.BuildStep;
import io.quarkus.deployment.annotations.ExecutionTime;
import io.quarkus.deployment.annotations.Record;
import io.quarkiverse.langchain4j.deployment.DotNames;
import io.quarkiverse.langchain4j.deployment.items.SelectedChatModelProviderBuildItem;
import io.quarkiverse.langchain4j.deployment.items.SelectedEmbeddingModelCandidateBuildItem;
import io.quarkiverse.langchain4j.ollama.runtime.OllamaRecorder;
import io.quarkiverse.langchain4j.ModelName;
import io.quarkiverse.langchain4j.runtime.NamedConfigUtil;
import static io.quarkiverse.langchain4j.deployment.LangChain4jDotNames.*;
public class OllamaProcessor {
private static final String PROVIDER = "ollama";
@BuildStep
@Record(ExecutionTime.RUNTIME_INIT)
void generateBeans(
OllamaRecorder recorder,
List<SelectedChatModelProviderBuildItem> selectedChatItem,
List<SelectedEmbeddingModelCandidateBuildItem> selectedEmbedding,
BuildProducer<SyntheticBeanBuildItem> beanProducer
) {
// Generate chat model beans
for (var selected : selectedChatItem) {
if (PROVIDER.equals(selected.getProvider())) {
String configName = selected.getConfigName();
// ChatModel bean
var builder = SyntheticBeanBuildItem
.configure(CHAT_MODEL)
.setRuntimeInit()
.defaultBean()
.scope(ApplicationScoped.class)
.addInjectionPoint(ParameterizedType.create(
DotNames.CDI_INSTANCE,
new Type[] { ClassType.create(DotNames.CHAT_MODEL_LISTENER) },
null
))
.createWith(recorder.chatModel(configName));
addQualifierIfNecessary(builder, configName);
beanProducer.produce(builder.done());
// StreamingChatModel bean
var streamingBuilder = SyntheticBeanBuildItem
.configure(STREAMING_CHAT_MODEL)
.setRuntimeInit()
.defaultBean()
.scope(ApplicationScoped.class)
.addInjectionPoint(ParameterizedType.create(
DotNames.CDI_INSTANCE,
new Type[] { ClassType.create(DotNames.CHAT_MODEL_LISTENER) },
null
))
.createWith(recorder.streamingChatModel(configName));
addQualifierIfNecessary(streamingBuilder, configName);
beanProducer.produce(streamingBuilder.done());
}
}
// Generate embedding model beans
for (var selected : selectedEmbedding) {
if (PROVIDER.equals(selected.getProvider())) {
String configName = selected.getConfigName();
var builder = SyntheticBeanBuildItem
.configure(EMBEDDING_MODEL)
.setRuntimeInit()
.defaultBean()
.unremovable()
.scope(ApplicationScoped.class)
.supplier(recorder.embeddingModel(configName));
addQualifierIfNecessary(builder, configName);
beanProducer.produce(builder.done());
}
}
}
private void addQualifierIfNecessary(
SyntheticBeanBuildItem.ExtendedBeanConfigurator builder,
String configName
) {
if (!NamedConfigUtil.isDefault(configName)) {
builder.addQualifier(
AnnotationInstance.builder(ModelName.class)
.add("value", configName)
.build()
);
}
}
}Execution Time: ExecutionTime.RUNTIME_INIT - Beans are created at runtime initialization
Consumes:
OllamaRecorder - Recorder for runtime bean creationList<SelectedChatModelProviderBuildItem> - Selected chat model providersList<SelectedEmbeddingModelCandidateBuildItem> - Selected embedding model providersProduces: SyntheticBeanBuildItem - Synthetic bean registrations
// Type
dev.langchain4j.model.chat.ChatModel
// Configuration
SyntheticBeanBuildItem
.configure(CHAT_MODEL) // Bean type
.setRuntimeInit() // Runtime initialization
.defaultBean() // Can be overridden
.scope(ApplicationScoped.class) // Application-scoped
.addInjectionPoint(Instance<ChatModelListener>) // Inject listeners
.createWith(recorder.chatModel(configName)) // Creation logic
.addQualifier(@ModelName(configName)) // For named configs
.done()Bean Type: dev.langchain4j.model.chat.ChatModel
Scope: @ApplicationScoped - Single instance per application
Default Bean: Yes - Can be overridden by application beans with @Priority
Qualifiers:
@ModelName(configName) for named configurationsInjection Points:
Instance<ChatModelListener> - For chat model event listenersCreation: Via OllamaRecorder.chatModel(configName) at runtime initialization
Purpose: Provides the main interface for interacting with Ollama chat models. Applications inject this bean to send chat messages and receive responses.
// Type
dev.langchain4j.model.chat.StreamingChatModel
// Configuration
SyntheticBeanBuildItem
.configure(STREAMING_CHAT_MODEL) // Bean type
.setRuntimeInit() // Runtime initialization
.defaultBean() // Can be overridden
.scope(ApplicationScoped.class) // Application-scoped
.addInjectionPoint(Instance<ChatModelListener>) // Inject listeners
.createWith(recorder.streamingChatModel(configName)) // Creation logic
.addQualifier(@ModelName(configName)) // For named configs
.done()Bean Type: dev.langchain4j.model.chat.StreamingChatModel
Scope: @ApplicationScoped - Single instance per application
Default Bean: Yes - Can be overridden by application beans
Qualifiers:
@ModelName(configName) for named configurationsInjection Points:
Instance<ChatModelListener> - For chat model event listenersCreation: Via OllamaRecorder.streamingChatModel(configName) at runtime initialization
Purpose: Provides streaming interface for Ollama chat models. Applications inject this bean to receive streaming responses from chat models (tokens arrive as they're generated).
// Type
dev.langchain4j.model.embedding.EmbeddingModel
// Configuration
SyntheticBeanBuildItem
.configure(EMBEDDING_MODEL) // Bean type
.setRuntimeInit() // Runtime initialization
.defaultBean() // Can be overridden
.unremovable() // Not removed by Arc
.scope(ApplicationScoped.class) // Application-scoped
.supplier(recorder.embeddingModel(configName)) // Creation logic
.addQualifier(@ModelName(configName)) // For named configs
.done()Bean Type: dev.langchain4j.model.embedding.EmbeddingModel
Scope: @ApplicationScoped - Single instance per application
Default Bean: Yes - Can be overridden by application beans
Unremovable: Yes - Not removed during unused bean removal optimization
Qualifiers:
@ModelName(configName) for named configurationsInjection Points: None
Creation: Via OllamaRecorder.embeddingModel(configName) supplier at runtime initialization
Purpose: Provides interface for generating text embeddings using Ollama models. Applications inject this bean to convert text into vector embeddings for semantic search, similarity comparison, etc.
.setRuntimeInit()Purpose: Indicates the bean is created during runtime initialization phase.
When: After static initialization but before the application starts serving requests.
Why: Allows the bean creation logic to access runtime configuration and perform I/O operations.
.defaultBean()Purpose: Marks the bean as a default implementation that can be overridden.
Behavior: If an application provides its own bean of the same type, it takes precedence.
Use Case: Allows applications to provide custom implementations while falling back to the Ollama implementation by default.
.unremovable()Purpose: Prevents Arc (CDI container) from removing the bean during optimization.
Why: Embedding models might be used dynamically (via Instance<EmbeddingModel>) rather than direct injection, which would make them appear unused to the optimizer.
Applied To: EmbeddingModel bean only (chat models use direct injection with listeners).
.scope(ApplicationScoped.class)Purpose: Defines the lifecycle scope of the bean.
ApplicationScoped: Single instance per application, created on first use, destroyed on shutdown.
Why: Model clients maintain HTTP connections and state, so reusing a single instance is efficient.
.addInjectionPoint(ParameterizedType.create(
DotNames.CDI_INSTANCE,
new Type[] { ClassType.create(DotNames.CHAT_MODEL_LISTENER) },
null
))Type: Instance<ChatModelListener>
Purpose: Declares that the bean needs to inject all available chat model listeners.
Why: Chat models notify listeners of events (request sent, response received, error occurred).
How Used: The recorder's creation logic retrieves all listeners via the injected Instance and registers them with the model.
For the default (unnamed) configuration:
quarkus.langchain4j.ollama.chat-model.enabled=trueNo Qualifier: Beans are created without qualifiers.
Injection:
@Inject
ChatModel chatModel; // Injects default ChatModelFor named configurations:
quarkus.langchain4j.ollama.my-ollama.chat-model.enabled=trueQualifier: Beans are created with @ModelName("my-ollama") qualifier.
Injection:
@Inject
@ModelName("my-ollama")
ChatModel chatModel; // Injects named ChatModelprivate void addQualifierIfNecessary(
SyntheticBeanBuildItem.ExtendedBeanConfigurator builder,
String configName
) {
if (!NamedConfigUtil.isDefault(configName)) {
builder.addQualifier(
AnnotationInstance.builder(ModelName.class)
.add("value", configName)
.build()
);
}
}Logic:
@ModelName annotation instance with the config nameResult:
@ModelName(name) qualifier (qualified injection)The OllamaRecorder is responsible for the actual bean creation at runtime:
package io.quarkiverse.langchain4j.ollama.runtime;
import java.util.function.Function;
import java.util.function.Supplier;
import io.quarkus.runtime.annotations.Recorder;
import io.quarkus.arc.SyntheticCreationalContext;
import dev.langchain4j.model.chat.ChatModel;
import dev.langchain4j.model.chat.StreamingChatModel;
import dev.langchain4j.model.embedding.EmbeddingModel;
@Recorder
public class OllamaRecorder {
public Function<SyntheticCreationalContext<ChatModel>, ChatModel> chatModel(
String configName
);
public Function<SyntheticCreationalContext<StreamingChatModel>, StreamingChatModel> streamingChatModel(
String configName
);
public Supplier<EmbeddingModel> embeddingModel(
String configName
);
}Purpose: Provides runtime creation functions for synthetic beans.
Methods:
chatModel(configName) - Returns function that creates ChatModel at runtimestreamingChatModel(configName) - Returns function that creates StreamingChatModel at runtimeembeddingModel(configName) - Returns supplier that creates EmbeddingModel at runtimeHow It Works:
Build Time:
generateBeans build step creates SyntheticBeanBuildItem for each selected modelRuntime Initialization:
SyntheticCreationalContextApplication Runtime:
@Injectimport jakarta.inject.Inject;
import dev.langchain4j.model.chat.ChatModel;
import dev.langchain4j.data.message.UserMessage;
public class MyService {
@Inject
ChatModel chatModel;
public String chat(String userMessage) {
return chatModel.generate(userMessage);
}
}import jakarta.inject.Inject;
import dev.langchain4j.model.chat.ChatModel;
import io.quarkiverse.langchain4j.ModelName;
public class MyService {
@Inject
@ModelName("my-ollama")
ChatModel chatModel;
public String chat(String userMessage) {
return chatModel.generate(userMessage);
}
}import jakarta.inject.Inject;
import dev.langchain4j.model.chat.StreamingChatModel;
import dev.langchain4j.model.output.Response;
import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.model.StreamingResponseHandler;
public class MyService {
@Inject
StreamingChatModel streamingChatModel;
public void chatStreaming(String userMessage) {
streamingChatModel.generate(userMessage, new StreamingResponseHandler<AiMessage>() {
@Override
public void onNext(String token) {
System.out.print(token);
}
@Override
public void onComplete(Response<AiMessage> response) {
System.out.println("\nDone!");
}
@Override
public void onError(Throwable error) {
error.printStackTrace();
}
});
}
}import jakarta.inject.Inject;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.data.embedding.Embedding;
public class MyService {
@Inject
EmbeddingModel embeddingModel;
public float[] getEmbedding(String text) {
Embedding embedding = embeddingModel.embed(text).content();
return embedding.vector();
}
}import jakarta.inject.Inject;
import dev.langchain4j.model.chat.ChatModel;
import io.quarkiverse.langchain4j.ModelName;
public class MyService {
@Inject
ChatModel defaultChatModel; // Default instance
@Inject
@ModelName("ollama1")
ChatModel ollama1; // Named instance 1
@Inject
@ModelName("ollama2")
ChatModel ollama2; // Named instance 2
public String compareModels(String prompt) {
String response1 = ollama1.generate(prompt);
String response2 = ollama2.generate(prompt);
return "Model 1: " + response1 + "\nModel 2: " + response2;
}
}@ModelName qualifier for injectionInstance<ChatModelListener>@ApplicationScoped for efficient resource usageInstall with Tessl CLI
npx tessl i tessl/maven-io-quarkiverse-langchain4j--quarkus-langchain4j-ollama-deployment