LangChain4j integration for Azure OpenAI providing chat, streaming, embeddings, image generation, audio transcription, and token counting capabilities
The embedding model converts text into vector representations for semantic search, similarity comparisons, clustering, and other vector-based operations. Supports Azure OpenAI embedding models like text-embedding-ada-002 and text-embedding-3-small/large.
import dev.langchain4j.model.azure.AzureOpenAiEmbeddingModel;
import dev.langchain4j.model.azure.AzureOpenAiEmbeddingModelName;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.output.Response;
import dev.langchain4j.model.output.TokenUsage;AzureOpenAiEmbeddingModel model = AzureOpenAiEmbeddingModel.builder()
.endpoint("https://your-resource.openai.azure.com/")
.apiKey("your-api-key")
.deploymentName("text-embedding-ada-002")
.serviceVersion("2024-02-15-preview")
.build();
// Embed a single text
TextSegment text = TextSegment.from("The quick brown fox jumps over the lazy dog");
Response<Embedding> response = model.embed(text);
Embedding embedding = response.content();
// Get vector representation
float[] vector = embedding.vector(); // 1536-dimensional vector
int dimensions = embedding.dimension(); // 1536
// Embed multiple texts (batch, max 16)
List<TextSegment> texts = List.of(
TextSegment.from("First text"),
TextSegment.from("Second text"),
TextSegment.from("Third text")
);
Response<List<Embedding>> batchResponse = model.embedAll(texts);
List<Embedding> embeddings = batchResponse.content();
TokenUsage usage = batchResponse.tokenUsage(); // Track token consumptionpackage dev.langchain4j.model.azure;
/**
* Azure OpenAI embedding model for text vectorization.
* Thread-safe: Yes - instances are immutable and thread-safe.
* Batch size: Maximum 16 text segments per request.
* Token limit: Maximum 8191 tokens per segment.
* Dimensions: Fixed per model, configurable for text-embedding-3 models.
*/
class AzureOpenAiEmbeddingModel extends dev.langchain4j.model.embedding.DimensionAwareEmbeddingModel {
/**
* Creates builder for configuration.
* @return New Builder instance
*/
static Builder builder();
/**
* Embeds multiple text segments in a single batch request.
* More efficient than multiple embed() calls.
* @param textSegments List of 1-16 segments, each ≤ 8191 tokens
* @return Response with embeddings (same order as input) and token usage
* @throws IllegalArgumentException if textSegments is null, empty, or has > 16 segments
* @throws IllegalArgumentException if any segment exceeds 8191 tokens
* @throws java.util.concurrent.TimeoutException if request exceeds timeout (default 60s)
* @throws RuntimeException for network or API errors
*/
dev.langchain4j.model.output.Response<java.util.List<dev.langchain4j.data.embedding.Embedding>>
embedAll(java.util.List<dev.langchain4j.data.segment.TextSegment> textSegments);
/**
* Returns known dimension for the configured model.
* @return Dimension count or null if custom/unknown model
*/
@Override
protected Integer knownDimension();
/**
* Builder for AzureOpenAiEmbeddingModel configuration.
*/
class Builder {
// Mandatory configuration
/**
* @param endpoint Azure OpenAI endpoint URL
* @throws IllegalArgumentException if null, empty, or malformed
*/
Builder endpoint(String endpoint);
/**
* @param serviceVersion API version (e.g., "2024-02-15-preview")
* @throws IllegalArgumentException if null or empty
*/
Builder serviceVersion(String serviceVersion);
/**
* @param deploymentName Your embedding model deployment name
* @throws IllegalArgumentException if null or empty
*/
Builder deploymentName(String deploymentName);
// Authentication (choose exactly one)
Builder apiKey(String apiKey);
Builder nonAzureApiKey(String apiKey);
Builder tokenCredential(com.azure.core.credential.TokenCredential credential);
// Embedding configuration
/**
* Sets custom embedding dimensions (text-embedding-3 models only).
* Not supported by text-embedding-ada-002.
* @param dimensions Custom dimension count (e.g., 512, 1024)
* Valid ranges:
* - text-embedding-3-small: 1 to 1536 (default 1536)
* - text-embedding-3-large: 1 to 3072 (default 3072)
* @default null (use model's default: 1536 or 3072)
* @throws IllegalArgumentException if < 1 or > model max
* @throws IllegalArgumentException if used with non-text-embedding-3 model
*/
Builder dimensions(Integer dimensions);
// HTTP configuration (see configuration.md for details)
/**
* @default 60 seconds
*/
Builder timeout(java.time.Duration timeout);
Builder maxRetries(Integer maxRetries);
Builder retryOptions(com.azure.core.http.policy.RetryOptions retryOptions);
Builder proxyOptions(com.azure.core.http.ProxyOptions proxyOptions);
Builder httpClientProvider(com.azure.core.http.HttpClientProvider httpClientProvider);
Builder openAIClient(com.azure.ai.openai.OpenAIClient client);
Builder customHeaders(java.util.Map<String, String> customHeaders);
Builder userAgentSuffix(String userAgentSuffix);
// Observability
/**
* @default false
*/
Builder logRequestsAndResponses(boolean logRequestsAndResponses);
/**
* Builds immutable, thread-safe model instance.
* @throws IllegalStateException if required parameters missing
* @throws IllegalArgumentException if configuration invalid
*/
AzureOpenAiEmbeddingModel build();
}
}package dev.langchain4j.model.azure;
/**
* Enum of Azure OpenAI embedding model names with dimension information.
*/
enum AzureOpenAiEmbeddingModelName {
/** text-embedding-3-small: 1536 dims (default), configurable 1-1536, cost-effective */
TEXT_EMBEDDING_3_SMALL,
/** text-embedding-3-small-1: 1536 dims, version 1 */
TEXT_EMBEDDING_3_SMALL_1,
/** text-embedding-3-large: 3072 dims (default), configurable 1-3072, highest quality */
TEXT_EMBEDDING_3_LARGE,
/** text-embedding-3-large-1: 3072 dims, version 1 */
TEXT_EMBEDDING_3_LARGE_1,
/** text-embedding-ada-002: 1536 dims, fixed, legacy model */
TEXT_EMBEDDING_ADA_002,
/** text-embedding-ada-002-1: 1536 dims, version 1 */
TEXT_EMBEDDING_ADA_002_1,
/** text-embedding-ada-002-2: 1536 dims, version 2 */
TEXT_EMBEDDING_ADA_002_2;
/**
* Full model name for API requests.
* @return Model name string
*/
String modelName();
/**
* Base model type without version.
* @return Model type (e.g., "text-embedding-3-small")
*/
String modelType();
/**
* Version suffix.
* @return Version string or empty
*/
String modelVersion();
/**
* Default embedding dimension.
* @return 1536 for ada-002 and 3-small, 3072 for 3-large
*/
Integer dimension();
String toString();
/**
* Static lookup of dimension by model name string.
* @param modelName Model name to look up
* @return Dimension count or null if unknown
*/
static Integer knownDimension(String modelName);
}// Use default dimensions
model.builder()
.deploymentName("text-embedding-3-small") // 1536 dims (default)
.build();
// Reduce dimensions for storage/performance
model.builder()
.deploymentName("text-embedding-3-large")
.dimensions(1024) // Reduce from 3072 to 1024 (67% reduction)
.build();
// Minimum dimensions (not recommended)
.dimensions(256) // Very compact but lower qualityBenefits of dimension reduction:
Dimension recommendations:
/**
* Batch size: 1-16 segments per request.
* Always prefer embedAll() over multiple embed() calls.
*/
List<TextSegment> batch = new ArrayList<>();
for (int i = 0; i < 16; i++) { // Max 16
batch.add(TextSegment.from("Text " + i));
}
Response<List<Embedding>> response = model.embedAll(batch);
TokenUsage usage = response.tokenUsage();
// For > 16 texts, split into batches
List<TextSegment> allTexts = // ... more than 16 texts
List<Embedding> allEmbeddings = new ArrayList<>();
for (int i = 0; i < allTexts.size(); i += 16) {
int end = Math.min(i + 16, allTexts.size());
List<TextSegment> batch = allTexts.subList(i, end);
Response<List<Embedding>> response = model.embedAll(batch);
allEmbeddings.addAll(response.content());
}package dev.langchain4j.data.segment;
/**
* Text segment to be embedded.
*/
class TextSegment {
/**
* Creates segment from plain text.
* @param text Text content, max 8191 tokens
* @throws IllegalArgumentException if text is null
*/
static TextSegment from(String text);
/**
* Creates segment with metadata.
* @param text Text content
* @param metadata Optional metadata (not embedded, for reference)
*/
static TextSegment from(String text, Metadata metadata);
/**
* @return Text content
*/
String text();
/**
* @return Metadata or null
*/
Metadata metadata();
}package dev.langchain4j.data.embedding;
/**
* Vector embedding representation.
*/
class Embedding {
/**
* Creates embedding from float array.
* @param vector Float array of dimension values
*/
static Embedding from(float[] vector);
/**
* Returns vector as float array.
* @return Float array (e.g., length 1536 for ada-002)
*/
float[] vector();
/**
* Returns dimension count.
* @return Vector length (e.g., 1536, 3072)
*/
int dimension();
}package dev.langchain4j.model.embedding;
/**
* Base class for models that know their dimension.
*/
abstract class DimensionAwareEmbeddingModel implements EmbeddingModel {
/**
* Returns known dimension or null if unknown/custom.
* @return Dimension count or null
*/
protected abstract Integer knownDimension();
/**
* Returns dimension, throwing if unknown.
* @return Dimension count
* @throws IllegalStateException if dimension unknown
*/
public int dimension();
}// 1. Embed documents
List<String> documents = List.of(
"The capital of France is Paris.",
"Python is a programming language.",
"Mount Everest is the highest mountain."
);
List<TextSegment> segments = documents.stream()
.map(TextSegment::from)
.toList();
Response<List<Embedding>> docResponse = model.embedAll(segments);
List<Embedding> docEmbeddings = docResponse.content();
// 2. Embed query
TextSegment query = TextSegment.from("What is the tallest mountain?");
Response<Embedding> queryResponse = model.embed(query);
Embedding queryEmbedding = queryResponse.content();
// 3. Calculate cosine similarity
List<Double> similarities = new ArrayList<>();
for (Embedding docEmbed : docEmbeddings) {
double similarity = cosineSimilarity(
queryEmbedding.vector(),
docEmbed.vector()
);
similarities.add(similarity);
}
// 4. Find most similar document
int bestIdx = IntStream.range(0, similarities.size())
.boxed()
.max(Comparator.comparing(similarities::get))
.orElse(-1);
System.out.println("Best match: " + documents.get(bestIdx));
System.out.println("Similarity: " + similarities.get(bestIdx));
/**
* Cosine similarity: -1 (opposite) to 1 (identical).
* OpenAI embeddings are normalized, so dot product = cosine similarity.
*/
static double cosineSimilarity(float[] a, float[] b) {
double dotProduct = 0.0;
for (int i = 0; i < a.length; i++) {
dotProduct += a[i] * b[i];
}
return dotProduct; // Already normalized
}// Embed all texts
List<TextSegment> texts = // ... list of texts
Response<List<Embedding>> response = model.embedAll(texts);
List<float[]> vectors = response.content().stream()
.map(Embedding::vector)
.toList();
// Apply K-means or other clustering
// ... clustering logic using vectors// Embed all items
Response<List<Embedding>> response = model.embedAll(items);
List<Embedding> embeddings = response.content();
// Find duplicates by similarity threshold
double threshold = 0.95; // 95% similar
for (int i = 0; i < embeddings.size(); i++) {
for (int j = i + 1; j < embeddings.size(); j++) {
double similarity = cosineSimilarity(
embeddings.get(i).vector(),
embeddings.get(j).vector()
);
if (similarity > threshold) {
System.out.printf("Potential duplicate: %d and %d (%.3f)%n",
i, j, similarity);
}
}
}// GOOD: Single batch request
List<TextSegment> texts = List.of(/* 10 texts */);
Response<List<Embedding>> response = model.embedAll(texts); // 1 API call
// BAD: Multiple single requests
for (TextSegment text : texts) {
model.embed(text); // 10 API calls - 10x slower, 10x more expensive!
}// Full quality: 3072 dimensions, 100% storage
model.builder()
.deploymentName("text-embedding-3-large")
.build();
// Reduced dimensions: 1024 dimensions, 33% storage, 95%+ quality
model.builder()
.deploymentName("text-embedding-3-large")
.dimensions(1024) // 67% storage reduction
.build();Each model has a maximum token limit per text segment:
| Model | Max Tokens | Default Dimensions | Configurable Dimensions |
|---|---|---|---|
| text-embedding-ada-002 | 8191 | 1536 | No |
| text-embedding-3-small | 8191 | 1536 | Yes (1-1536) |
| text-embedding-3-large | 8191 | 3072 | Yes (1-3072) |
// Check token count before embedding
AzureOpenAiTokenCountEstimator estimator =
new AzureOpenAiTokenCountEstimator(
AzureOpenAiEmbeddingModelName.TEXT_EMBEDDING_ADA_002
);
String longText = // ... potentially long text
int tokens = estimator.estimateTokenCountInText(longText);
if (tokens > 8191) {
// Split text into chunks
List<String> chunks = splitIntoChunks(longText, 8000);
for (String chunk : chunks) {
model.embed(TextSegment.from(chunk));
}
} else {
model.embed(TextSegment.from(longText));
}try {
Response<List<Embedding>> response = model.embedAll(segments);
} catch (IllegalArgumentException e) {
// Invalid input:
// - null or empty segments list
// - more than 16 segments
// - segment exceeds 8191 tokens
// - invalid dimensions configuration
System.err.println("Invalid input: " + e.getMessage());
} catch (java.util.concurrent.TimeoutException e) {
// Request timed out (default 60s)
// Retried per retry policy
System.err.println("Request timed out");
} catch (RuntimeException e) {
// Network, API, or auth error
System.err.println("Error: " + e.getMessage());
}Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-azure-open-ai@1.11.0