Quarkus extension for integrating IBM watsonx.ai foundation models with LangChain4j. Provides chat models, generation models, streaming models, embedding models, and scoring models for IBM watsonx.ai. Includes comprehensive configuration options, support for tool/function calling, text extraction from documents in Cloud Object Storage, and experimental built-in services for Google search, weather, and web crawling. Designed for enterprise Java applications using the Quarkus framework with built-in dependency injection and native compilation support.
Comprehensive Quarkus configuration for all models, authentication, and services through application.properties. The extension provides extensive configuration options with sensible defaults, supporting both default and named model configurations.
All configuration properties use the prefix: quarkus.langchain4j.watsonx
public interface WatsonConfig {
Optional<String> baseUrl();
Optional<String> apiKey();
Optional<Duration> timeout();
String version();
Optional<String> spaceId();
Optional<String> projectId();
Optional<Boolean> logRequests();
Optional<Boolean> logResponses();
Optional<Boolean> logRequestsCurl();
Boolean enableIntegration();
}Properties:
# Watsonx API base URL (required)
quarkus.langchain4j.watsonx.base-url=https://us-south.ml.cloud.ibm.com
# IBM Cloud API key (required)
quarkus.langchain4j.watsonx.api-key=your-api-key
# Request timeout (default: 10s)
quarkus.langchain4j.watsonx.timeout=10s
# API version (default: 2025-04-23)
quarkus.langchain4j.watsonx.version=2025-04-23
# Project ID or Space ID (mutually exclusive, one required)
quarkus.langchain4j.watsonx.project-id=your-project-id
# quarkus.langchain4j.watsonx.space-id=your-space-id
# Logging configuration (default: false)
quarkus.langchain4j.watsonx.log-requests=false
quarkus.langchain4j.watsonx.log-responses=false
quarkus.langchain4j.watsonx.log-requests-curl=false
# Enable/disable integration (default: true)
quarkus.langchain4j.watsonx.enable-integration=trueRegional Endpoints:
public interface IAMConfig {
URL baseUrl();
Optional<Duration> timeout();
String grantType();
}Properties:
# IAM endpoint (default: https://iam.cloud.ibm.com)
quarkus.langchain4j.watsonx.iam.base-url=https://iam.cloud.ibm.com
# IAM timeout (default: 10s)
quarkus.langchain4j.watsonx.iam.timeout=10s
# Grant type (default: urn:ibm:params:oauth:grant-type:apikey)
quarkus.langchain4j.watsonx.iam.grant-type=urn:ibm:params:oauth:grant-type:apikeypublic interface ChatModelConfig {
String modelName();
Optional<ToolChoice> toolChoice();
Optional<String> toolChoiceName();
Double frequencyPenalty();
Boolean logprobs();
Optional<Integer> topLogprobs();
Integer maxTokens();
Integer n();
Double presencePenalty();
Optional<Integer> seed();
Optional<List<String>> stop();
Double temperature();
Double topP();
Optional<String> responseFormat();
Optional<Boolean> logRequests();
Optional<Boolean> logResponses();
}Properties:
# Model name (default: meta-llama/llama-4-maverick-17b-128e-instruct-fp8)
quarkus.langchain4j.watsonx.chat-model.model-name=meta-llama/llama-4-maverick-17b-128e-instruct-fp8
# Tool choice strategy: auto, required
quarkus.langchain4j.watsonx.chat-model.tool-choice=auto
quarkus.langchain4j.watsonx.chat-model.tool-choice-name=specific_tool_name
# Frequency penalty (default: 0, range: -2 to 2)
quarkus.langchain4j.watsonx.chat-model.frequency-penalty=0.5
# Log probabilities (default: false)
quarkus.langchain4j.watsonx.chat-model.logprobs=false
quarkus.langchain4j.watsonx.chat-model.top-logprobs=5
# Maximum tokens to generate (default: 1024)
quarkus.langchain4j.watsonx.chat-model.max-tokens=2048
# Number of completions (default: 1)
quarkus.langchain4j.watsonx.chat-model.n=1
# Presence penalty (default: 0, range: -2 to 2)
quarkus.langchain4j.watsonx.chat-model.presence-penalty=0.3
# Random seed for reproducibility
quarkus.langchain4j.watsonx.chat-model.seed=42
# Stop sequences (comma-separated, max 4)
quarkus.langchain4j.watsonx.chat-model.stop=\\n\\n,END
# Temperature (default: 1.0, range: 0-2)
quarkus.langchain4j.watsonx.chat-model.temperature=0.7
# Top P (default: 1, range: 0-1)
quarkus.langchain4j.watsonx.chat-model.top-p=0.9
# Response format: text, json_object, json_schema
quarkus.langchain4j.watsonx.chat-model.response-format=text
# Override logging for this model
quarkus.langchain4j.watsonx.chat-model.log-requests=true
quarkus.langchain4j.watsonx.chat-model.log-responses=truepublic interface GenerationModelConfig {
String modelName();
String decodingMethod();
LengthPenaltyConfig lengthPenalty();
Integer maxNewTokens();
Integer minNewTokens();
Optional<Integer> randomSeed();
Optional<List<String>> stopSequences();
Double temperature();
Optional<Integer> topK();
Optional<Double> topP();
Optional<Double> repetitionPenalty();
Optional<Integer> truncateInputTokens();
Optional<Boolean> includeStopSequence();
Optional<Boolean> logRequests();
Optional<Boolean> logResponses();
String promptJoiner();
interface LengthPenaltyConfig {
Optional<Double> decayFactor();
Optional<Integer> startIndex();
}
}Properties:
# Model name (default: meta-llama/llama-4-maverick-17b-128e-instruct-fp8)
quarkus.langchain4j.watsonx.generation-model.model-name=meta-llama/llama-4-maverick-17b-128e-instruct-fp8
# Decoding method (default: greedy, values: greedy, sample)
quarkus.langchain4j.watsonx.generation-model.decoding-method=sample
# Length penalty configuration
quarkus.langchain4j.watsonx.generation-model.length-penalty.decay-factor=1.5
quarkus.langchain4j.watsonx.generation-model.length-penalty.start-index=10
# Maximum new tokens (default: 200)
quarkus.langchain4j.watsonx.generation-model.max-new-tokens=500
# Minimum new tokens (default: 0)
quarkus.langchain4j.watsonx.generation-model.min-new-tokens=50
# Random seed (range: >= 1)
quarkus.langchain4j.watsonx.generation-model.random-seed=42
# Stop sequences (comma-separated, max 6)
quarkus.langchain4j.watsonx.generation-model.stop-sequences=\\n\\n,END,---
# Temperature (default: 1.0, range: 0-2)
quarkus.langchain4j.watsonx.generation-model.temperature=0.7
# Top K (range: 1-100)
quarkus.langchain4j.watsonx.generation-model.top-k=50
# Top P (range: 0-1)
quarkus.langchain4j.watsonx.generation-model.top-p=0.9
# Repetition penalty (range: 1-2)
quarkus.langchain4j.watsonx.generation-model.repetition-penalty=1.2
# Truncate input tokens (default: 0)
quarkus.langchain4j.watsonx.generation-model.truncate-input-tokens=2048
# Include stop sequence in output
quarkus.langchain4j.watsonx.generation-model.include-stop-sequence=false
# Prompt joiner (default: \\n)
quarkus.langchain4j.watsonx.generation-model.prompt-joiner=\\n---\\n
# Override logging for this model
quarkus.langchain4j.watsonx.generation-model.log-requests=true
quarkus.langchain4j.watsonx.generation-model.log-responses=truepublic interface EmbeddingModelConfig {
String modelName();
Optional<Integer> truncateInputTokens();
Optional<Boolean> logRequests();
Optional<Boolean> logResponses();
}Properties:
# Model name (default: ibm/granite-embedding-278m-multilingual)
quarkus.langchain4j.watsonx.embedding-model.model-name=ibm/granite-embedding-278m-multilingual
# Truncate input from right if exceeded
quarkus.langchain4j.watsonx.embedding-model.truncate-input-tokens=512
# Override logging for this model
quarkus.langchain4j.watsonx.embedding-model.log-requests=false
quarkus.langchain4j.watsonx.embedding-model.log-responses=falsepublic interface ScoringModelConfig {
String modelName();
Optional<Integer> truncateInputTokens();
Optional<Boolean> logRequests();
Optional<Boolean> logResponses();
}Properties:
# Model name (default: cross-encoder/ms-marco-minilm-l-12-v2)
quarkus.langchain4j.watsonx.scoring-model.model-name=cross-encoder/ms-marco-minilm-l-12-v2
# Truncate input from right if exceeded
quarkus.langchain4j.watsonx.scoring-model.truncate-input-tokens=512
# Override logging for this model
quarkus.langchain4j.watsonx.scoring-model.log-requests=false
quarkus.langchain4j.watsonx.scoring-model.log-responses=falsepublic interface TextExtractionConfig {
String baseUrl();
DocumentReference documentReference();
ResultsReference resultsReference();
Optional<Boolean> logRequests();
Optional<Boolean> logResponses();
interface DocumentReference {
String connection();
String bucketName();
}
interface ResultsReference {
String connection();
String bucketName();
}
}Properties:
# Cloud Object Storage API base URL (required)
quarkus.langchain4j.watsonx.text-extraction.base-url=https://s3.us-south.cloud-object-storage.appdomain.cloud
# Input document reference (required)
quarkus.langchain4j.watsonx.text-extraction.document-reference.connection=input-connection-id
quarkus.langchain4j.watsonx.text-extraction.document-reference.bucket-name=input-documents
# Output results reference (required)
quarkus.langchain4j.watsonx.text-extraction.results-reference.connection=output-connection-id
quarkus.langchain4j.watsonx.text-extraction.results-reference.bucket-name=output-results
# Override logging for text extraction
quarkus.langchain4j.watsonx.text-extraction.log-requests=false
quarkus.langchain4j.watsonx.text-extraction.log-responses=falsepublic interface BuiltinServiceConfig {
Optional<String> baseUrl();
Optional<String> apiKey();
Optional<Duration> timeout();
Optional<Boolean> logRequests();
Optional<Boolean> logResponses();
GoogleSearchConfig googleSearch();
interface GoogleSearchConfig {
int maxResults();
}
}Properties:
# Services base URL (auto-calculated if empty)
quarkus.langchain4j.watsonx.built-in-service.base-url=https://api.watsonx.ai/services
# API key (inherits from watsonx.api-key if empty)
quarkus.langchain4j.watsonx.built-in-service.api-key=your-api-key
# Timeout (default: 10s)
quarkus.langchain4j.watsonx.built-in-service.timeout=10s
# Override logging for built-in services
quarkus.langchain4j.watsonx.built-in-service.log-requests=false
quarkus.langchain4j.watsonx.built-in-service.log-responses=false
# Google search configuration
quarkus.langchain4j.watsonx.built-in-service.google-search.max-results=10Configure multiple models with different settings using named configurations.
# Default configuration
quarkus.langchain4j.watsonx.chat-model.temperature=0.7
# Named configuration: "creative"
quarkus.langchain4j.watsonx.creative.chat-model.temperature=1.5
quarkus.langchain4j.watsonx.creative.chat-model.max-tokens=2000
# Named configuration: "factual"
quarkus.langchain4j.watsonx.factual.chat-model.temperature=0.1
quarkus.langchain4j.watsonx.factual.chat-model.max-tokens=500
# Named configuration can override connection settings
quarkus.langchain4j.watsonx.europe.base-url=https://eu-de.ml.cloud.ibm.com
quarkus.langchain4j.watsonx.europe.project-id=eu-project-idpublic interface LangChain4jWatsonxConfig {
WatsonConfig defaultConfig();
Map<String, WatsonConfig> namedConfig();
BuiltinServiceConfig builtInService();
}import jakarta.inject.Inject;
import jakarta.enterprise.inject.Named;
import dev.langchain4j.model.chat.ChatModel;
@ApplicationScoped
public class MultiModelService {
@Inject
ChatModel defaultModel;
@Inject
@Named("creative")
ChatModel creativeModel;
@Inject
@Named("factual")
ChatModel factualModel;
@Inject
@Named("europe")
ChatModel europeModel;
}# ===== Core Watsonx Configuration =====
quarkus.langchain4j.watsonx.base-url=https://us-south.ml.cloud.ibm.com
quarkus.langchain4j.watsonx.api-key=${WATSONX_API_KEY}
quarkus.langchain4j.watsonx.project-id=${WATSONX_PROJECT_ID}
quarkus.langchain4j.watsonx.version=2025-04-23
quarkus.langchain4j.watsonx.timeout=30s
quarkus.langchain4j.watsonx.log-requests=false
quarkus.langchain4j.watsonx.log-responses=false
quarkus.langchain4j.watsonx.log-requests-curl=false
quarkus.langchain4j.watsonx.enable-integration=true
# ===== IAM Configuration =====
quarkus.langchain4j.watsonx.iam.base-url=https://iam.cloud.ibm.com
quarkus.langchain4j.watsonx.iam.timeout=10s
# ===== Chat Model =====
quarkus.langchain4j.watsonx.chat-model.model-name=meta-llama/llama-4-maverick-17b-128e-instruct-fp8
quarkus.langchain4j.watsonx.chat-model.temperature=0.7
quarkus.langchain4j.watsonx.chat-model.max-tokens=2048
quarkus.langchain4j.watsonx.chat-model.top-p=0.9
quarkus.langchain4j.watsonx.chat-model.frequency-penalty=0.5
quarkus.langchain4j.watsonx.chat-model.presence-penalty=0.3
quarkus.langchain4j.watsonx.chat-model.tool-choice=auto
quarkus.langchain4j.watsonx.chat-model.n=1
quarkus.langchain4j.watsonx.chat-model.logprobs=false
# ===== Generation Model =====
quarkus.langchain4j.watsonx.generation-model.model-name=meta-llama/llama-4-maverick-17b-128e-instruct-fp8
quarkus.langchain4j.watsonx.generation-model.decoding-method=sample
quarkus.langchain4j.watsonx.generation-model.temperature=0.7
quarkus.langchain4j.watsonx.generation-model.max-new-tokens=500
quarkus.langchain4j.watsonx.generation-model.min-new-tokens=50
quarkus.langchain4j.watsonx.generation-model.top-k=50
quarkus.langchain4j.watsonx.generation-model.top-p=0.9
quarkus.langchain4j.watsonx.generation-model.repetition-penalty=1.2
quarkus.langchain4j.watsonx.generation-model.random-seed=42
quarkus.langchain4j.watsonx.generation-model.stop-sequences=\\n\\n,END
quarkus.langchain4j.watsonx.generation-model.truncate-input-tokens=2048
quarkus.langchain4j.watsonx.generation-model.length-penalty.decay-factor=1.5
quarkus.langchain4j.watsonx.generation-model.length-penalty.start-index=10
# ===== Embedding Model =====
quarkus.langchain4j.watsonx.embedding-model.model-name=ibm/granite-embedding-278m-multilingual
quarkus.langchain4j.watsonx.embedding-model.truncate-input-tokens=512
# ===== Scoring Model =====
quarkus.langchain4j.watsonx.scoring-model.model-name=cross-encoder/ms-marco-minilm-l-12-v2
quarkus.langchain4j.watsonx.scoring-model.truncate-input-tokens=512
# ===== Text Extraction =====
quarkus.langchain4j.watsonx.text-extraction.base-url=https://s3.us-south.cloud-object-storage.appdomain.cloud
quarkus.langchain4j.watsonx.text-extraction.document-reference.connection=${COS_INPUT_CONNECTION}
quarkus.langchain4j.watsonx.text-extraction.document-reference.bucket-name=input-docs
quarkus.langchain4j.watsonx.text-extraction.results-reference.connection=${COS_OUTPUT_CONNECTION}
quarkus.langchain4j.watsonx.text-extraction.results-reference.bucket-name=output-results
# ===== Built-in Services =====
quarkus.langchain4j.watsonx.built-in-service.timeout=15s
quarkus.langchain4j.watsonx.built-in-service.google-search.max-results=10
# ===== Named Configuration: Creative =====
quarkus.langchain4j.watsonx.creative.chat-model.temperature=1.5
quarkus.langchain4j.watsonx.creative.chat-model.max-tokens=4096
quarkus.langchain4j.watsonx.creative.chat-model.frequency-penalty=0.3
quarkus.langchain4j.watsonx.creative.chat-model.presence-penalty=0.6
# ===== Named Configuration: Factual =====
quarkus.langchain4j.watsonx.factual.chat-model.temperature=0.1
quarkus.langchain4j.watsonx.factual.chat-model.max-tokens=1024
quarkus.langchain4j.watsonx.factual.chat-model.seed=42
# ===== Named Configuration: Europe =====
quarkus.langchain4j.watsonx.europe.base-url=https://eu-de.ml.cloud.ibm.com
quarkus.langchain4j.watsonx.europe.project-id=${WATSONX_EU_PROJECT_ID}
quarkus.langchain4j.watsonx.europe.chat-model.model-name=meta-llama/llama-4-maverick-17b-128e-instruct-fp8Use environment variables for sensitive configuration:
# Reference environment variables
quarkus.langchain4j.watsonx.api-key=${WATSONX_API_KEY}
quarkus.langchain4j.watsonx.project-id=${WATSONX_PROJECT_ID}
quarkus.langchain4j.watsonx.text-extraction.document-reference.connection=${COS_INPUT_CONNECTION}
quarkus.langchain4j.watsonx.text-extraction.results-reference.connection=${COS_OUTPUT_CONNECTION}Shell:
export WATSONX_API_KEY=your-api-key
export WATSONX_PROJECT_ID=your-project-id
export COS_INPUT_CONNECTION=input-connection-id
export COS_OUTPUT_CONNECTION=output-connection-idUse Quarkus profiles for different environments:
# Default (dev) profile
%dev.quarkus.langchain4j.watsonx.log-requests=true
%dev.quarkus.langchain4j.watsonx.log-responses=true
%dev.quarkus.langchain4j.watsonx.chat-model.temperature=0.7
# Test profile
%test.quarkus.langchain4j.watsonx.enable-integration=false
%test.quarkus.langchain4j.watsonx.chat-model.temperature=0.0
# Production profile
%prod.quarkus.langchain4j.watsonx.log-requests=false
%prod.quarkus.langchain4j.watsonx.log-responses=false
%prod.quarkus.langchain4j.watsonx.timeout=60s
%prod.quarkus.langchain4j.watsonx.chat-model.temperature=0.7# Never commit API keys to source control
quarkus.langchain4j.watsonx.api-key=${WATSONX_API_KEY}
# Use different API keys for different environments
%dev.quarkus.langchain4j.watsonx.api-key=${DEV_WATSONX_API_KEY}
%prod.quarkus.langchain4j.watsonx.api-key=${PROD_WATSONX_API_KEY}# Enable detailed logging in development
%dev.quarkus.langchain4j.watsonx.log-requests=true
%dev.quarkus.langchain4j.watsonx.log-responses=true
%dev.quarkus.langchain4j.watsonx.log-requests-curl=true
# Disable logging in production
%prod.quarkus.langchain4j.watsonx.log-requests=false
%prod.quarkus.langchain4j.watsonx.log-responses=false
%prod.quarkus.langchain4j.watsonx.log-requests-curl=false
# Override per model if needed
%dev.quarkus.langchain4j.watsonx.chat-model.log-requests=true
%dev.quarkus.langchain4j.watsonx.embedding-model.log-requests=false# Set appropriate timeouts for your use case
quarkus.langchain4j.watsonx.timeout=30s
quarkus.langchain4j.watsonx.iam.timeout=10s
quarkus.langchain4j.watsonx.built-in-service.timeout=15s
# Increase for production workloads
%prod.quarkus.langchain4j.watsonx.timeout=60s# Use appropriate models for your use case
quarkus.langchain4j.watsonx.chat-model.model-name=meta-llama/llama-4-maverick-17b-128e-instruct-fp8
quarkus.langchain4j.watsonx.embedding-model.model-name=ibm/granite-embedding-278m-multilingual
quarkus.langchain4j.watsonx.scoring-model.model-name=cross-encoder/ms-marco-minilm-l-12-v2
# Use different models for different tasks
quarkus.langchain4j.watsonx.creative.chat-model.model-name=meta-llama/llama-3-70b-instruct
quarkus.langchain4j.watsonx.factual.chat-model.model-name=ibm/granite-13b-chat-v2# Configure different regions for different named configs
quarkus.langchain4j.watsonx.us.base-url=https://us-south.ml.cloud.ibm.com
quarkus.langchain4j.watsonx.us.project-id=${US_PROJECT_ID}
quarkus.langchain4j.watsonx.eu.base-url=https://eu-de.ml.cloud.ibm.com
quarkus.langchain4j.watsonx.eu.project-id=${EU_PROJECT_ID}
quarkus.langchain4j.watsonx.jp.base-url=https://jp-tok.ml.cloud.ibm.com
quarkus.langchain4j.watsonx.jp.project-id=${JP_PROJECT_ID}While Quarkus configuration is recommended, you can also create models programmatically:
import io.quarkiverse.langchain4j.watsonx.WatsonxChatModel;
import java.net.URL;
import java.time.Duration;
WatsonxChatModel model = WatsonxChatModel.builder()
.modelId("meta-llama/llama-4-maverick-17b-128e-instruct-fp8")
.url(new URL("https://us-south.ml.cloud.ibm.com"))
.projectId("your-project-id")
.tokenGenerator(tokenGenerator)
.temperature(0.7)
.maxTokens(2048)
.timeout(Duration.ofSeconds(30))
.logRequests(false)
.logResponses(false)
.build();This approach is useful for:
The extension validates configuration at startup:
Validation errors result in clear error messages at startup.
To disable the integration entirely:
quarkus.langchain4j.watsonx.enable-integration=falseThis is useful for:
Install with Tessl CLI
npx tessl i tessl/maven-io-quarkiverse-langchain4j--quarkus-langchain4j-watsonx