Quarkus extension for integrating IBM watsonx.ai foundation models with LangChain4j. Provides chat models, generation models, streaming models, embedding models, and scoring models for IBM watsonx.ai. Includes comprehensive configuration options, support for tool/function calling, text extraction from documents in Cloud Object Storage, and experimental built-in services for Google search, weather, and web crawling. Designed for enterprise Java applications using the Quarkus framework with built-in dependency injection and native compilation support.
Generate text embeddings for semantic search and score text segments for relevance ranking. These models are essential for RAG (Retrieval-Augmented Generation) applications, semantic similarity tasks, and document reranking.
Generate vector embeddings from text segments for semantic search and similarity comparison.
public class WatsonxEmbeddingModel implements dev.langchain4j.model.embedding.EmbeddingModel {
public static Builder builder();
public Response<List<Embedding>> embedAll(List<TextSegment> textSegments);
public WatsonxRestApi getClient();
public String getModelId();
public String getProjectId();
public String getSpaceId();
public String getVersion();
}Limitations:
Example Usage:
import io.quarkiverse.langchain4j.watsonx.WatsonxEmbeddingModel;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.model.output.Response;
import java.net.URL;
import java.util.List;
WatsonxEmbeddingModel model = WatsonxEmbeddingModel.builder()
.modelId("ibm/granite-embedding-278m-multilingual")
.url(new URL("https://us-south.ml.cloud.ibm.com"))
.projectId("your-project-id")
.tokenGenerator(tokenGenerator)
.build();
// Embed single text
TextSegment segment = TextSegment.from("Machine learning is a subset of AI");
Response<Embedding> response = model.embed(segment);
Embedding embedding = response.content();
float[] vector = embedding.vector();
// Embed multiple texts
List<TextSegment> segments = List.of(
TextSegment.from("Artificial intelligence"),
TextSegment.from("Natural language processing"),
TextSegment.from("Computer vision")
);
Response<List<Embedding>> batchResponse = model.embedAll(segments);
List<Embedding> embeddings = batchResponse.content();
for (int i = 0; i < embeddings.size(); i++) {
float[] vec = embeddings.get(i).vector();
System.out.println("Embedding " + i + " dimensions: " + vec.length);
}Semantic Similarity Example:
import dev.langchain4j.store.embedding.CosineSimilarity;
// Embed two texts
Embedding embedding1 = model.embed("Dogs are loyal pets").content();
Embedding embedding2 = model.embed("Cats are independent animals").content();
// Calculate cosine similarity
double similarity = CosineSimilarity.between(embedding1, embedding2);
System.out.println("Similarity: " + similarity); // Value between -1 and 1Configure embedding models with Watsonx-specific parameters.
public static class Builder extends Watsonx.Builder<WatsonxEmbeddingModel, Builder> {
// Inherited base parameters
public Builder modelId(String modelId);
public Builder version(String version);
public Builder spaceId(String spaceId);
public Builder projectId(String projectId);
public Builder url(URL url);
public Builder timeout(Duration timeout);
public Builder tokenGenerator(TokenGenerator tokenGenerator);
public Builder logRequests(boolean logRequests);
public Builder logResponses(boolean logResponses);
public Builder logCurl(boolean logCurl);
// Embedding-specific parameters
public Builder truncateInputTokens(Integer truncateInputTokens);
public WatsonxEmbeddingModel build();
}Parameter Details:
modelId (String, required): Watsonx embedding model identifier
version (String): API version
spaceId (String): Deployment space ID (mutually exclusive with projectId)
projectId (String): Project ID (mutually exclusive with spaceId)
url (URL, required): Watsonx API base URL
timeout (Duration): Request timeout
tokenGenerator (TokenGenerator, required): Handles IBM Cloud IAM authentication
logRequests (boolean): Enable request body logging
logResponses (boolean): Enable response body logging
logCurl (boolean): Log requests as cURL commands
truncateInputTokens (Integer): Truncate input if exceeds limit
Builder Example:
WatsonxEmbeddingModel model = WatsonxEmbeddingModel.builder()
.modelId("ibm/granite-embedding-278m-multilingual")
.url(new URL("https://us-south.ml.cloud.ibm.com"))
.projectId("abc123")
.tokenGenerator(tokenGenerator)
.truncateInputTokens(512)
.logRequests(false)
.timeout(Duration.ofSeconds(30))
.build();Score text segments against a query for relevance ranking and reranking. Essential for improving retrieval quality in RAG applications.
public class WatsonxScoringModel implements dev.langchain4j.model.scoring.ScoringModel {
public static Builder builder();
public Response<List<Double>> scoreAll(List<TextSegment> textSegments, String query);
public WatsonxRestApi getClient();
public String getModelId();
public String getProjectId();
public String getSpaceId();
public String getVersion();
}Example Usage:
import io.quarkiverse.langchain4j.watsonx.WatsonxScoringModel;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.output.Response;
import java.util.List;
import java.net.URL;
WatsonxScoringModel model = WatsonxScoringModel.builder()
.modelId("cross-encoder/ms-marco-minilm-l-12-v2")
.url(new URL("https://us-south.ml.cloud.ibm.com"))
.projectId("your-project-id")
.tokenGenerator(tokenGenerator)
.build();
String query = "What is machine learning?";
List<TextSegment> segments = List.of(
TextSegment.from("Machine learning is a method of data analysis that automates analytical model building."),
TextSegment.from("Pizza is a popular Italian dish with tomato and cheese."),
TextSegment.from("Deep learning is a subset of machine learning based on artificial neural networks.")
);
Response<List<Double>> response = model.scoreAll(segments, query);
List<Double> scores = response.content();
// Print segments with scores
for (int i = 0; i < segments.size(); i++) {
System.out.println("Score " + scores.get(i) + ": " + segments.get(i).text());
}
// Typical output:
// Score 0.95: Machine learning is a method of data analysis...
// Score 0.12: Pizza is a popular Italian dish...
// Score 0.87: Deep learning is a subset of machine learning...Reranking Example:
import java.util.stream.IntStream;
import java.util.Comparator;
// Get initial search results
List<TextSegment> searchResults = performInitialSearch(query);
// Score all results
List<Double> scores = model.scoreAll(searchResults, query).content();
// Create list of (index, score) pairs and sort by score descending
List<Integer> rankedIndices = IntStream.range(0, scores.size())
.boxed()
.sorted(Comparator.comparingDouble(i -> -scores.get(i)))
.toList();
// Get top 5 reranked results
List<TextSegment> topResults = rankedIndices.stream()
.limit(5)
.map(searchResults::get)
.toList();Configure scoring models with Watsonx-specific parameters.
public static class Builder extends Watsonx.Builder<WatsonxScoringModel, Builder> {
// Inherited base parameters
public Builder modelId(String modelId);
public Builder version(String version);
public Builder spaceId(String spaceId);
public Builder projectId(String projectId);
public Builder url(URL url);
public Builder timeout(Duration timeout);
public Builder tokenGenerator(TokenGenerator tokenGenerator);
public Builder logRequests(boolean logRequests);
public Builder logResponses(boolean logResponses);
public Builder logCurl(boolean logCurl);
// Scoring-specific parameters
public Builder truncateInputTokens(Integer truncateInputTokens);
public WatsonxScoringModel build();
}Parameter Details:
modelId (String, required): Watsonx scoring/reranking model identifier
version (String): API version
spaceId (String): Deployment space ID (mutually exclusive with projectId)
projectId (String): Project ID (mutually exclusive with spaceId)
url (URL, required): Watsonx API base URL
timeout (Duration): Request timeout
tokenGenerator (TokenGenerator, required): Handles IBM Cloud IAM authentication
logRequests (boolean): Enable request body logging
logResponses (boolean): Enable response body logging
logCurl (boolean): Log requests as cURL commands
truncateInputTokens (Integer): Truncate input if exceeds limit
Builder Example:
WatsonxScoringModel model = WatsonxScoringModel.builder()
.modelId("cross-encoder/ms-marco-minilm-l-12-v2")
.url(new URL("https://us-south.ml.cloud.ibm.com"))
.projectId("abc123")
.tokenGenerator(tokenGenerator)
.truncateInputTokens(512)
.timeout(Duration.ofSeconds(30))
.build();Use Quarkus CDI for automatic model creation and injection.
Configuration:
quarkus.langchain4j.watsonx.base-url=https://us-south.ml.cloud.ibm.com
quarkus.langchain4j.watsonx.api-key=your-api-key
quarkus.langchain4j.watsonx.project-id=your-project-id
# Embedding model configuration
quarkus.langchain4j.watsonx.embedding-model.model-name=ibm/granite-embedding-278m-multilingual
quarkus.langchain4j.watsonx.embedding-model.truncate-input-tokens=512
# Scoring model configuration
quarkus.langchain4j.watsonx.scoring-model.model-name=cross-encoder/ms-marco-minilm-l-12-v2
quarkus.langchain4j.watsonx.scoring-model.truncate-input-tokens=512Injection:
import jakarta.inject.Inject;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.model.scoring.ScoringModel;
@ApplicationScoped
public class SearchService {
@Inject
EmbeddingModel embeddingModel;
@Inject
ScoringModel scoringModel;
public List<TextSegment> search(String query, List<TextSegment> documents) {
// First: embed and find similar documents
Embedding queryEmbedding = embeddingModel.embed(query).content();
List<TextSegment> candidates = findSimilar(queryEmbedding, documents, 20);
// Second: rerank with scoring model
List<Double> scores = scoringModel.scoreAll(candidates, query).content();
// Return top 5 reranked results
return rankByScore(candidates, scores).stream().limit(5).toList();
}
}Use embedding models with LangChain4j embedding stores.
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.EmbeddingStoreIngestor;
import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.splitter.DocumentSplitters;
// Create embedding store (in-memory example)
EmbeddingStore<TextSegment> embeddingStore = new InMemoryEmbeddingStore<>();
// Ingest documents
Document document = Document.from("Long document text...");
EmbeddingStoreIngestor ingestor = EmbeddingStoreIngestor.builder()
.documentSplitter(DocumentSplitters.recursive(500, 50))
.embeddingModel(embeddingModel)
.embeddingStore(embeddingStore)
.build();
ingestor.ingest(document);
// Search similar segments
String query = "What is machine learning?";
Embedding queryEmbedding = embeddingModel.embed(query).content();
List<EmbeddingMatch<TextSegment>> matches = embeddingStore.findRelevant(
queryEmbedding,
10 // Top 10 results
);
for (EmbeddingMatch<TextSegment> match : matches) {
System.out.println("Score: " + match.score() + ", Text: " + match.embedded().text());
}Combine embedding-based retrieval with scoring-based reranking for optimal results.
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.EmbeddingMatch;
@ApplicationScoped
public class RAGService {
@Inject
EmbeddingModel embeddingModel;
@Inject
ScoringModel scoringModel;
@Inject
EmbeddingStore<TextSegment> embeddingStore;
public List<TextSegment> retrieveAndRerank(String query, int topK) {
// Stage 1: Embedding-based retrieval (fast, recall-oriented)
Embedding queryEmbedding = embeddingModel.embed(query).content();
// Retrieve more candidates than needed (e.g., 20 for top 5)
int candidateCount = topK * 4;
List<EmbeddingMatch<TextSegment>> matches = embeddingStore.findRelevant(
queryEmbedding,
candidateCount
);
// Extract text segments
List<TextSegment> candidates = matches.stream()
.map(EmbeddingMatch::embedded)
.toList();
// Stage 2: Scoring-based reranking (precise, precision-oriented)
List<Double> scores = scoringModel.scoreAll(candidates, query).content();
// Sort by score and return top K
return IntStream.range(0, candidates.size())
.boxed()
.sorted(Comparator.comparingDouble(i -> -scores.get(i)))
.limit(topK)
.map(candidates::get)
.toList();
}
}Why Two-Stage Retrieval?
import dev.langchain4j.service.AiServices;
import dev.langchain4j.model.chat.ChatModel;
import dev.langchain4j.rag.content.retriever.ContentRetriever;
import dev.langchain4j.rag.content.retriever.EmbeddingStoreContentRetriever;
@ApplicationScoped
public class RAGPipeline {
@Inject
ChatModel chatModel;
@Inject
EmbeddingModel embeddingModel;
@Inject
ScoringModel scoringModel;
@Inject
EmbeddingStore<TextSegment> embeddingStore;
public String answerQuestion(String question) {
// Retrieve relevant context
List<TextSegment> context = retrieveAndRerank(question, 3);
// Build prompt with context
String contextText = context.stream()
.map(TextSegment::text)
.collect(Collectors.joining("\n\n"));
String prompt = String.format(
"Context:\n%s\n\nQuestion: %s\n\nAnswer based on the context:",
contextText,
question
);
// Generate answer
ChatResponse response = chatModel.chat(UserMessage.from(prompt));
return response.aiMessage().text();
}
private List<TextSegment> retrieveAndRerank(String query, int topK) {
// Stage 1: Embedding retrieval
Embedding queryEmbedding = embeddingModel.embed(query).content();
List<EmbeddingMatch<TextSegment>> matches = embeddingStore.findRelevant(
queryEmbedding,
topK * 4
);
List<TextSegment> candidates = matches.stream()
.map(EmbeddingMatch::embedded)
.toList();
// Stage 2: Scoring reranking
List<Double> scores = scoringModel.scoreAll(candidates, query).content();
return IntStream.range(0, candidates.size())
.boxed()
.sorted(Comparator.comparingDouble(i -> -scores.get(i)))
.limit(topK)
.map(candidates::get)
.toList();
}
}From LangChain4j:
public class TextSegment {
public static TextSegment from(String text);
public static TextSegment from(String text, Metadata metadata);
public String text();
public Metadata metadata();
}From LangChain4j:
public class Embedding {
public float[] vector();
public int dimension();
}From LangChain4j:
public class Response<T> {
public T content();
public TokenUsage tokenUsage();
public FinishReason finishReason();
}Internal beans:
public record EmbeddingRequest(
String modelId,
String spaceId,
String projectId,
List<String> inputs,
EmbeddingParameters parameters
) {}
public record EmbeddingResponse(
String modelId,
List<Result> results
) {
public record Result(List<Float> embedding) {}
}
public record EmbeddingParameters(Integer truncateInputTokens) {}Internal beans:
public class ScoringRequest {
public static ScoringRequest of(
String modelId,
String spaceId,
String projectId,
String query,
List<TextSegment> textSegments,
ScoringParameters parameters
);
}
public record ScoringResponse(
String modelId,
List<ScoringOutput> results,
Integer inputTokenCount
) {
public record ScoringOutput(Integer index, Double score) {}
}
public record ScoringParameters(Integer truncateInputTokens) {}// Efficient: Process multiple segments in one request
List<TextSegment> segments = List.of(
TextSegment.from("Text 1"),
TextSegment.from("Text 2"),
TextSegment.from("Text 3")
);
Response<List<Embedding>> response = embeddingModel.embedAll(segments);
// Inefficient: Multiple API calls
for (TextSegment segment : segments) {
Response<Embedding> response = embeddingModel.embed(segment);
// Process individual response
}// Handle long texts with truncation
WatsonxEmbeddingModel model = WatsonxEmbeddingModel.builder()
.truncateInputTokens(512) // Model-specific limit
.build();
// Long texts are automatically truncated from the right
TextSegment longText = TextSegment.from("Very long document text...");
Embedding embedding = model.embed(longText).content();// Preserve metadata through embedding process
TextSegment segment = TextSegment.from(
"Document text",
Metadata.from("source", "doc1.pdf").put("page", 5)
);
// Store in embedding store
embeddingStore.add(embeddingModel.embed(segment).content(), segment);
// Retrieve with metadata
List<EmbeddingMatch<TextSegment>> matches = embeddingStore.findRelevant(query, 5);
for (EmbeddingMatch<TextSegment> match : matches) {
TextSegment retrieved = match.embedded();
String source = retrieved.metadata().getString("source");
int page = retrieved.metadata().getInteger("page");
}// Filter results by score threshold
String query = "machine learning";
List<TextSegment> candidates = getSearchCandidates(query);
List<Double> scores = scoringModel.scoreAll(candidates, query).content();
double threshold = 0.5;
List<TextSegment> filtered = IntStream.range(0, scores.size())
.filter(i -> scores.get(i) >= threshold)
.mapToObj(candidates::get)
.toList();// Cache embeddings to avoid recomputing
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
public class EmbeddingCache {
private final Map<String, Embedding> cache = new ConcurrentHashMap<>();
private final EmbeddingModel model;
public EmbeddingCache(EmbeddingModel model) {
this.model = model;
}
public Embedding getEmbedding(String text) {
return cache.computeIfAbsent(text, t ->
model.embed(TextSegment.from(t)).content()
);
}
}Embedding Models:
Scoring Models:
Install with Tessl CLI
npx tessl i tessl/maven-io-quarkiverse-langchain4j--quarkus-langchain4j-watsonx