Quarkus extension for Azure OpenAI integration with LangChain4j, providing ChatModel, StreamingChatModel, EmbeddingModel, and ImageModel implementations with Azure-specific authentication and configuration support.
Azure OpenAI embedding models generate vector representations of text for semantic search, retrieval-augmented generation (RAG), and similarity tasks. The extension provides AzureOpenAiEmbeddingModel which implements the LangChain4j EmbeddingModel interface.
package io.quarkiverse.langchain4j.azure.openai;
public class AzureOpenAiEmbeddingModel implements dev.langchain4j.model.embedding.EmbeddingModel {
/**
* Generate embeddings for a list of text segments.
* Processes a maximum of 16 segments per API call due to Azure OpenAI limitations.
* Automatically batches larger lists into multiple requests.
*
* @param textSegments List of text segments to embed
* @return Response containing list of embeddings and token usage information
*/
public dev.langchain4j.model.output.Response<java.util.List<dev.langchain4j.data.embedding.Embedding>>
embedAll(java.util.List<dev.langchain4j.data.segment.TextSegment> textSegments);
/**
* Create a builder for configuring the embedding model.
*
* @return Builder instance
*/
public static Builder builder();
}Batch Processing Limit: Azure OpenAI enforces a maximum batch size of 16 text segments per embedding request. The embedAll() method automatically handles this limitation by batching larger lists into multiple requests.
public static class Builder {
/**
* Set the Azure OpenAI endpoint URL.
* Format: https://{resource-name}.openai.azure.com/openai/deployments/{deployment-name}
*
* @param endpoint The full endpoint URL (required)
* @return This builder
*/
public Builder endpoint(String endpoint);
/**
* Set the Azure OpenAI API version.
* Format: YYYY-MM-DD (e.g., "2024-10-21")
*
* @param apiVersion The API version (required)
* @return This builder
*/
public Builder apiVersion(String apiVersion);
/**
* Set the Azure OpenAI API key for authentication.
* Either apiKey or adToken must be provided, but not both.
*
* @param apiKey The API key
* @return This builder
*/
public Builder apiKey(String apiKey);
/**
* Set the Azure AD token for authentication.
* Either apiKey or adToken must be provided, but not both.
*
* @param adToken The Azure AD token
* @return This builder
*/
public Builder adToken(String adToken);
}public static class Builder {
/**
* Set timeout for API calls.
* Default: 60 seconds
*
* @param timeout The timeout duration
* @return This builder
*/
public Builder timeout(java.time.Duration timeout);
/**
* Set maximum number of retry attempts.
* Default: 3
* Deprecated: Use MicroProfile Fault Tolerance instead.
*
* @param maxRetries Maximum retry attempts (must be >= 1)
* @return This builder
*/
public Builder maxRetries(Integer maxRetries);
/**
* Set proxy for network requests.
*
* @param proxy The proxy configuration
* @return This builder
*/
public Builder proxy(java.net.Proxy proxy);
}public static class Builder {
/**
* Enable request logging.
* Default: false
*
* @param logRequests Whether to log requests
* @return This builder
*/
public Builder logRequests(Boolean logRequests);
/**
* Enable response logging.
* Default: false
*
* @param logResponses Whether to log responses
* @return This builder
*/
public Builder logResponses(Boolean logResponses);
/**
* Set token count estimator for tracking token usage.
*
* @param tokenizer TokenCountEstimator instance
* @return This builder
*/
public Builder tokenizer(dev.langchain4j.model.TokenCountEstimator tokenizer);
}public static class Builder {
/**
* Set configuration name for named model instances.
* Used for CDI integration with @ModelName qualifier.
*
* @param configName The configuration name
* @return This builder
*/
public Builder configName(String configName);
/**
* Build the embedding model instance.
*
* @return Configured AzureOpenAiEmbeddingModel instance
*/
public AzureOpenAiEmbeddingModel build();
}import io.quarkiverse.langchain4j.azure.openai.AzureOpenAiEmbeddingModel;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.model.output.Response;
import java.time.Duration;
import java.util.List;
// Build the embedding model
AzureOpenAiEmbeddingModel embeddingModel = AzureOpenAiEmbeddingModel.builder()
.endpoint("https://my-resource.openai.azure.com/openai/deployments/text-embedding-ada-002")
.apiKey("your-api-key")
.apiVersion("2024-10-21")
.timeout(Duration.ofSeconds(60))
.maxRetries(3)
.build();
// Create text segments
List<TextSegment> segments = List.of(
TextSegment.from("Azure OpenAI provides enterprise-grade AI services"),
TextSegment.from("Quarkus is a Kubernetes-native Java framework"),
TextSegment.from("LangChain4j enables AI application development")
);
// Generate embeddings
Response<List<Embedding>> response = embeddingModel.embedAll(segments);
List<Embedding> embeddings = response.content();
int tokenCount = response.tokenUsage().inputTokenCount();
// Use embeddings (e.g., for similarity search)
for (Embedding embedding : embeddings) {
float[] vector = embedding.vector();
// Store or use the embedding vector
}Configure via application.properties:
quarkus.langchain4j.azure-openai.api-key=your-key
quarkus.langchain4j.azure-openai.embedding-model.deployment-name=text-embedding-ada-002
quarkus.langchain4j.azure-openai.resource-name=my-resourceInject and use:
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.data.segment.TextSegment;
import jakarta.inject.Inject;
public class EmbeddingService {
@Inject
EmbeddingModel embeddingModel;
public List<Embedding> embedTexts(List<String> texts) {
List<TextSegment> segments = texts.stream()
.map(TextSegment::from)
.toList();
Response<List<Embedding>> response = embeddingModel.embedAll(segments);
return response.content();
}
}The embedding model automatically handles batching for lists larger than 16 segments:
import dev.langchain4j.data.segment.TextSegment;
import java.util.List;
import java.util.stream.IntStream;
// Create a large list of text segments (e.g., 50 segments)
List<TextSegment> segments = IntStream.range(0, 50)
.mapToObj(i -> TextSegment.from("Document text " + i))
.toList();
// The model automatically processes this in batches of 16
// Total of 4 API calls will be made (16 + 16 + 16 + 2)
Response<List<Embedding>> response = embeddingModel.embedAll(segments);
// All 50 embeddings are returned in a single response
List<Embedding> embeddings = response.content();
System.out.println("Generated " + embeddings.size() + " embeddings");
// Token usage is aggregated across all batches
int totalTokens = response.tokenUsage().inputTokenCount();AzureOpenAiEmbeddingModel embeddingModel = AzureOpenAiEmbeddingModel.builder()
.endpoint("https://my-resource.openai.azure.com/openai/deployments/text-embedding-ada-002")
.adToken("your-azure-ad-token") // Use AD token instead of API key
.apiVersion("2024-10-21")
.build();Configure multiple embedding models:
# Fast, small embeddings
quarkus.langchain4j.azure-openai.small-embeddings.api-key=key1
quarkus.langchain4j.azure-openai.small-embeddings.endpoint=https://resource.openai.azure.com/openai/deployments/text-embedding-ada-002
# Large, high-quality embeddings
quarkus.langchain4j.azure-openai.large-embeddings.api-key=key2
quarkus.langchain4j.azure-openai.large-embeddings.endpoint=https://resource.openai.azure.com/openai/deployments/text-embedding-3-largeInject specific models:
import io.quarkiverse.langchain4j.ModelName;
@Inject
@ModelName("small-embeddings")
EmbeddingModel smallEmbeddingModel;
@Inject
@ModelName("large-embeddings")
EmbeddingModel largeEmbeddingModel;import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
public class DocumentIndexer {
@Inject
EmbeddingModel embeddingModel;
@Inject
EmbeddingStore<TextSegment> embeddingStore;
public void indexDocuments(List<String> documents) {
// Convert documents to text segments
List<TextSegment> segments = documents.stream()
.map(TextSegment::from)
.toList();
// Generate embeddings
Response<List<Embedding>> response = embeddingModel.embedAll(segments);
List<Embedding> embeddings = response.content();
// Store embeddings with their text segments
for (int i = 0; i < segments.size(); i++) {
embeddingStore.add(embeddings.get(i), segments.get(i));
}
}
public List<TextSegment> searchSimilar(String query, int maxResults) {
// Embed the query
TextSegment querySegment = TextSegment.from(query);
Response<List<Embedding>> response = embeddingModel.embedAll(List.of(querySegment));
Embedding queryEmbedding = response.content().get(0);
// Search for similar documents
return embeddingStore.findRelevant(queryEmbedding, maxResults)
.stream()
.map(match -> match.embedded())
.toList();
}
}import dev.langchain4j.model.output.TokenUsage;
Response<List<Embedding>> response = embeddingModel.embedAll(segments);
// Get token usage information
TokenUsage tokenUsage = response.tokenUsage();
int inputTokens = tokenUsage.inputTokenCount();
System.out.println("Processed " + inputTokens + " tokens");
// Estimate costs (example rates, check Azure pricing)
double costPer1kTokens = 0.0001; // Example rate for text-embedding-ada-002
double estimatedCost = (inputTokens / 1000.0) * costPer1kTokens;
System.out.println("Estimated cost: $" + estimatedCost);The embedding model processes a maximum of 16 text segments per API call due to Azure OpenAI limitations. When embedAll() is called with more than 16 segments:
For example:
apiKey or adToken must be providedWhen using declarative configuration, embedding model specific properties are prefixed with quarkus.langchain4j.azure-openai.embedding-model.:
# Override endpoint for embedding model specifically
quarkus.langchain4j.azure-openai.embedding-model.deployment-name=text-embedding-ada-002
# Override API key for embedding model specifically
quarkus.langchain4j.azure-openai.embedding-model.api-key=embedding-specific-key
# Logging configuration
quarkus.langchain4j.azure-openai.embedding-model.log-requests=true
quarkus.langchain4j.azure-openai.embedding-model.log-responses=true// From LangChain4j framework
package dev.langchain4j.data.segment;
public class TextSegment {
public static TextSegment from(String text);
public String text();
}
package dev.langchain4j.data.embedding;
public class Embedding {
public float[] vector();
public int dimension();
}
package dev.langchain4j.model.output;
public class Response<T> {
public T content();
public TokenUsage tokenUsage();
}
package dev.langchain4j.model.output;
public class TokenUsage {
public Integer inputTokenCount();
public Integer outputTokenCount();
public Integer totalTokenCount();
}Install with Tessl CLI
npx tessl i tessl/maven-io-quarkiverse-langchain4j--quarkus-langchain4j-azure-openai@1.7.0