CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-io-quarkiverse-langchain4j--quarkus-langchain4j-ollama

Quarkus extension for integrating local Ollama language models with LangChain4j

Overview
Eval results
Files

configuration.mddocs/

Configuration

Comprehensive configuration system for Ollama integration supporting default and named configurations, with extensive options for model behavior, connection settings, and logging.

Configuration Prefix

All configuration properties use the prefix: quarkus.langchain4j.ollama

Configuration Interfaces

// Root runtime configuration
@ConfigRoot(phase = RUN_TIME)
@ConfigMapping(prefix = "quarkus.langchain4j.ollama")
interface LangChain4jOllamaConfig {
    @WithParentName
    OllamaConfig defaultConfig();

    @WithParentName
    @WithDefaults
    Map<String, OllamaConfig> namedConfig();
}

// Ollama configuration group
interface LangChain4jOllamaConfig.OllamaConfig {
    Optional<String> baseUrl();
    Optional<String> tlsConfigurationName();
    @WithDefault("${quarkus.langchain4j.timeout}")
    Optional<Duration> timeout();
    @WithDefault("${quarkus.langchain4j.log-requests}")
    Optional<Boolean> logRequests();
    @WithDefault("${quarkus.langchain4j.log-responses}")
    Optional<Boolean> logResponses();
    @WithDefault("${quarkus.langchain4j.log-requests-curl}")
    Optional<Boolean> logRequestsCurl();
    @WithDefault("true")
    Boolean enableIntegration();
    ChatModelConfig chatModel();
    EmbeddingModelConfig embeddingModel();
}

// Chat model configuration
@ConfigGroup
interface ChatModelConfig {
    @WithDefault("${quarkus.langchain4j.temperature:0.8}")
    Double temperature();
    OptionalInt numPredict();
    Optional<List<String>> stop();
    @WithDefault("0.9")
    Double topP();
    @WithDefault("40")
    Integer topK();
    Optional<Integer> seed();
    Optional<String> format();
    Optional<Boolean> logRequests();
    Optional<Boolean> logResponses();
}

// Embedding model configuration
@ConfigGroup
interface EmbeddingModelConfig {
    @WithDefault("${quarkus.langchain4j.temperature:0.8}")
    Double temperature();
    @WithDefault("128")
    Integer numPredict();
    Optional<List<String>> stop();
    @WithDefault("0.9")
    Double topP();
    @WithDefault("40")
    Integer topK();
    Optional<Boolean> logRequests();
    Optional<Boolean> logResponses();
}

// Build-time fixed configuration
@ConfigRoot(phase = BUILD_AND_RUN_TIME_FIXED)
@ConfigMapping(prefix = "quarkus.langchain4j.ollama")
interface LangChain4jOllamaFixedRuntimeConfig {
    @WithParentName
    OllamaConfig defaultConfig();

    @WithParentName
    @WithDefaults
    Map<String, OllamaConfig> namedConfig();
}

interface LangChain4jOllamaFixedRuntimeConfig.OllamaConfig {
    ChatModelFixedRuntimeConfig chatModel();
    EmbeddingModelFixedRuntimeConfig embeddingModel();
}

// Chat model fixed configuration
@ConfigGroup
interface ChatModelFixedRuntimeConfig {
    @WithDefault("llama3.2")
    String modelId();
}

// Embedding model fixed configuration
@ConfigGroup
interface EmbeddingModelFixedRuntimeConfig {
    @WithDefault("nomic-embed-text")
    String modelId();
}

General Configuration Properties

Connection Settings

# Ollama server base URL
quarkus.langchain4j.ollama.base-url=http://localhost:11434

# Request timeout (duration format: 10s, 1m, etc.)
quarkus.langchain4j.ollama.timeout=10s

# Named TLS configuration (for HTTPS)
quarkus.langchain4j.ollama.tls-configuration-name=ollama-tls

Properties:

  • base-url (Optional<String>) - Ollama server URL. Default: "http://localhost:11434"
  • timeout (Optional<Duration>) - Request timeout. Default: 10 seconds (or from parent quarkus.langchain4j.timeout)
  • tls-configuration-name (Optional<String>) - Named Quarkus TLS configuration for HTTPS connections

Logging Settings

# Log request payloads
quarkus.langchain4j.ollama.log-requests=true

# Log response payloads
quarkus.langchain4j.ollama.log-responses=true

# Log equivalent cURL commands
quarkus.langchain4j.ollama.log-requests-curl=true

Properties:

  • log-requests (Optional<Boolean>) - Enable request logging. Default: false (or from parent quarkus.langchain4j.log-requests)
  • log-responses (Optional<Boolean>) - Enable response logging. Default: false (or from parent quarkus.langchain4j.log-responses)
  • log-requests-curl (Optional<Boolean>) - Log cURL commands for debugging. Default: false (or from parent quarkus.langchain4j.log-requests-curl)

Integration Control

# Enable/disable the Ollama integration
quarkus.langchain4j.ollama.enable-integration=true

Properties:

  • enable-integration (Boolean) - Enable or disable Ollama integration. Default: true

Chat Model Configuration

Model Selection

# Chat model ID (fixed at build time)
quarkus.langchain4j.ollama.chat-model.model-id=llama3.2

Properties:

  • model-id (String) - Ollama model name. Default: "llama3.2"
    • Examples: "llama3.2", "llama3.1", "mistral", "codellama", "llava" (vision)

Sampling Parameters

# Temperature: controls randomness (0.0 = deterministic, 2.0 = very random)
quarkus.langchain4j.ollama.chat-model.temperature=0.8

# Top-p (nucleus sampling): cumulative probability threshold
quarkus.langchain4j.ollama.chat-model.top-p=0.9

# Top-k: limit token selection to top K tokens
quarkus.langchain4j.ollama.chat-model.top-k=40

# Random seed for reproducibility
quarkus.langchain4j.ollama.chat-model.seed=42

Properties:

  • temperature (Double) - Sampling temperature (0.0-2.0). Default: 0.8 (or from parent quarkus.langchain4j.temperature)
  • top-p (Double) - Nucleus sampling threshold (0.0-1.0). Default: 0.9
  • top-k (Integer) - Top-k sampling limit. Default: 40
  • seed (Optional<Integer>) - Random seed for deterministic output

Generation Control

# Maximum tokens to generate
quarkus.langchain4j.ollama.chat-model.num-predict=1024

# Stop sequences (comma-separated)
quarkus.langchain4j.ollama.chat-model.stop=\n\n,END

# Response format: "json" or JSON schema
quarkus.langchain4j.ollama.chat-model.format=json

Properties:

  • num-predict (OptionalInt) - Maximum tokens to generate. No default (model-specific limit applies)
  • stop (Optional<List<String>>) - Stop sequences to end generation
  • format (Optional<String>) - Force JSON output: "json" for JSON mode, or provide JSON schema string

Chat Model Logging

# Override global logging for chat model
quarkus.langchain4j.ollama.chat-model.log-requests=true
quarkus.langchain4j.ollama.chat-model.log-responses=true

Properties:

  • log-requests (Optional<Boolean>) - Override global request logging for chat model
  • log-responses (Optional<Boolean>) - Override global response logging for chat model

Embedding Model Configuration

Model Selection

# Embedding model ID (fixed at build time)
quarkus.langchain4j.ollama.embedding-model.model-id=nomic-embed-text

Properties:

  • model-id (String) - Ollama embedding model name. Default: "nomic-embed-text"
    • Examples: "nomic-embed-text", "mxbai-embed-large", "all-minilm"

Sampling Parameters

# Temperature for embedding generation
quarkus.langchain4j.ollama.embedding-model.temperature=0.8

# Top-p sampling
quarkus.langchain4j.ollama.embedding-model.top-p=0.9

# Top-k sampling
quarkus.langchain4j.ollama.embedding-model.top-k=40

Properties:

  • temperature (Double) - Sampling temperature. Default: 0.8 (or from parent quarkus.langchain4j.temperature)
  • top-p (Double) - Nucleus sampling threshold. Default: 0.9
  • top-k (Integer) - Top-k sampling limit. Default: 40

Generation Control

# Maximum tokens for embedding (usually not needed to change)
quarkus.langchain4j.ollama.embedding-model.num-predict=128

# Stop sequences
quarkus.langchain4j.ollama.embedding-model.stop=END

Properties:

  • num-predict (Integer) - Maximum tokens. Default: 128
  • stop (Optional<List<String>>) - Stop sequences

Embedding Model Logging

# Override global logging for embedding model
quarkus.langchain4j.ollama.embedding-model.log-requests=true
quarkus.langchain4j.ollama.embedding-model.log-responses=false

Properties:

  • log-requests (Optional<Boolean>) - Override global request logging for embedding model
  • log-responses (Optional<Boolean>) - Override global response logging for embedding model

Named Configurations

Create multiple configurations with different settings using named configuration pattern:

# Default configuration
quarkus.langchain4j.ollama.chat-model.model-id=llama3.2
quarkus.langchain4j.ollama.chat-model.temperature=0.8

# Fast model for simple tasks
quarkus.langchain4j.ollama.fast.chat-model.model-id=llama3.2:1b
quarkus.langchain4j.ollama.fast.chat-model.temperature=0.5
quarkus.langchain4j.ollama.fast.chat-model.num-predict=256

# Creative model for content generation
quarkus.langchain4j.ollama.creative.chat-model.model-id=llama3.2
quarkus.langchain4j.ollama.creative.chat-model.temperature=1.2
quarkus.langchain4j.ollama.creative.chat-model.top-p=0.95

# Code generation model
quarkus.langchain4j.ollama.code.chat-model.model-id=codellama
quarkus.langchain4j.ollama.code.chat-model.temperature=0.2
quarkus.langchain4j.ollama.code.chat-model.format=json

# Large embeddings
quarkus.langchain4j.ollama.large-embed.embedding-model.model-id=mxbai-embed-large
quarkus.langchain4j.ollama.large-embed.timeout=20s

# Remote Ollama server
quarkus.langchain4j.ollama.remote.base-url=https://ollama.example.com
quarkus.langchain4j.ollama.remote.tls-configuration-name=remote-tls
quarkus.langchain4j.ollama.remote.timeout=30s

Inject named configurations:

@Inject
ChatModel defaultModel;

@Inject
@Named("fast")
ChatModel fastModel;

@Inject
@Named("creative")
ChatModel creativeModel;

@Inject
@Named("code")
ChatModel codeModel;

@Inject
@Named("large-embed")
EmbeddingModel largeEmbeddings;

Use in AI Services:

@RegisterAiService(modelName = "creative")
public interface CreativeWriter {
    String generateStory(String topic);
}

@RegisterAiService(modelName = "code")
public interface CodeGenerator {
    String generateCode(String description);
}

Configuration Inheritance

Configuration follows inheritance from parent Quarkus LangChain4j config:

# Parent configuration (applies to all LangChain4j providers)
quarkus.langchain4j.timeout=15s
quarkus.langchain4j.temperature=0.7
quarkus.langchain4j.log-requests=true
quarkus.langchain4j.log-responses=false

# Ollama inherits parent values unless overridden
quarkus.langchain4j.ollama.chat-model.temperature=0.9  # Overrides parent
# timeout and logging inherit from parent

TLS Configuration

For HTTPS connections to Ollama server:

Define TLS configuration:

# TLS configuration
quarkus.tls.ollama-tls.trust-store.pem.certs=ollama-server-cert.pem
quarkus.tls.ollama-tls.key-store.pem.keys=client-key.pem
quarkus.tls.ollama-tls.key-store.pem.certs=client-cert.pem

# Use in Ollama config
quarkus.langchain4j.ollama.base-url=https://ollama.example.com
quarkus.langchain4j.ollama.tls-configuration-name=ollama-tls

Configuration Migration

The extension supports migration from deprecated model-name to model-id:

// Configuration interceptor for migration
class ModelIdConfigRelocateInterceptor extends RelocateConfigSourceInterceptor {
    ModelIdConfigRelocateInterceptor();
}

// Configuration interceptor for fallback
class ModelIdConfigFallbackInterceptor extends FallbackConfigSourceInterceptor {
    ModelIdConfigFallbackInterceptor();
}

Old (deprecated):

quarkus.langchain4j.ollama.chat-model.model-name=llama3.2

New (recommended):

quarkus.langchain4j.ollama.chat-model.model-id=llama3.2

Both work, but model-id is recommended for consistency.

Complete Example

# ===== Default Configuration =====

# Connection
quarkus.langchain4j.ollama.base-url=http://localhost:11434
quarkus.langchain4j.ollama.timeout=15s
quarkus.langchain4j.ollama.enable-integration=true

# Logging
quarkus.langchain4j.ollama.log-requests=true
quarkus.langchain4j.ollama.log-responses=false
quarkus.langchain4j.ollama.log-requests-curl=false

# Chat Model
quarkus.langchain4j.ollama.chat-model.model-id=llama3.2
quarkus.langchain4j.ollama.chat-model.temperature=0.8
quarkus.langchain4j.ollama.chat-model.top-p=0.9
quarkus.langchain4j.ollama.chat-model.top-k=40
quarkus.langchain4j.ollama.chat-model.num-predict=2048
quarkus.langchain4j.ollama.chat-model.stop=\n\n

# Embedding Model
quarkus.langchain4j.ollama.embedding-model.model-id=nomic-embed-text

# ===== Production Configuration (named: "production") =====

quarkus.langchain4j.ollama.production.base-url=https://ollama.internal.company.com
quarkus.langchain4j.ollama.production.tls-configuration-name=internal-tls
quarkus.langchain4j.ollama.production.timeout=30s
quarkus.langchain4j.ollama.production.chat-model.model-id=llama3.1:70b
quarkus.langchain4j.ollama.production.chat-model.temperature=0.7
quarkus.langchain4j.ollama.production.log-requests=false
quarkus.langchain4j.ollama.production.log-responses=false

# ===== Development Configuration (Quarkus %dev profile) =====

%dev.quarkus.langchain4j.ollama.log-requests=true
%dev.quarkus.langchain4j.ollama.log-responses=true
%dev.quarkus.langchain4j.ollama.log-requests-curl=true
%dev.quarkus.langchain4j.ollama.chat-model.model-id=llama3.2:1b
%dev.quarkus.langchain4j.ollama.timeout=5s

# ===== Test Configuration (Quarkus %test profile) =====

%test.quarkus.langchain4j.ollama.chat-model.seed=12345
%test.quarkus.langchain4j.ollama.chat-model.temperature=0.0
%test.quarkus.langchain4j.ollama.timeout=60s

Configuration Best Practices

  1. Use named configurations for different use cases (fast, creative, code, etc.)
  2. Set timeouts appropriately based on model size and expected response length
  3. Enable logging in development for debugging, disable in production
  4. Use profiles (%dev, %test, %prod) for environment-specific config
  5. Set seeds in tests for reproducible results
  6. Lower temperature (0.1-0.3) for deterministic tasks like code generation
  7. Higher temperature (1.0-1.5) for creative tasks like story writing
  8. Configure TLS for production deployments with remote Ollama servers
  9. Use JSON format when structured output is required
  10. Monitor token limits (num-predict) to control response length and cost

Install with Tessl CLI

npx tessl i tessl/maven-io-quarkiverse-langchain4j--quarkus-langchain4j-ollama

docs

chat-models.md

configuration.md

data-models.md

embedding-models.md

http-client.md

index.md

tool-calling.md

tile.json