CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-dev-langchain4j--langchain4j-azure-open-ai

LangChain4j integration for Azure OpenAI providing chat, streaming, embeddings, image generation, audio transcription, and token counting capabilities

Overview
Eval results
Files

embedding-model.mddocs/

Embedding Model

The embedding model converts text into vector representations for semantic search, similarity comparisons, clustering, and other vector-based operations. Supports Azure OpenAI embedding models like text-embedding-ada-002 and text-embedding-3-small/large.

Imports

import dev.langchain4j.model.azure.AzureOpenAiEmbeddingModel;
import dev.langchain4j.model.azure.AzureOpenAiEmbeddingModelName;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.output.Response;
import dev.langchain4j.model.output.TokenUsage;

Basic Usage

AzureOpenAiEmbeddingModel model = AzureOpenAiEmbeddingModel.builder()
    .endpoint("https://your-resource.openai.azure.com/")
    .apiKey("your-api-key")
    .deploymentName("text-embedding-ada-002")
    .serviceVersion("2024-02-15-preview")
    .build();

// Embed a single text
TextSegment text = TextSegment.from("The quick brown fox jumps over the lazy dog");
Response<Embedding> response = model.embed(text);
Embedding embedding = response.content();

// Get vector representation
float[] vector = embedding.vector();  // 1536-dimensional vector
int dimensions = embedding.dimension();  // 1536

// Embed multiple texts (batch, max 16)
List<TextSegment> texts = List.of(
    TextSegment.from("First text"),
    TextSegment.from("Second text"),
    TextSegment.from("Third text")
);
Response<List<Embedding>> batchResponse = model.embedAll(texts);
List<Embedding> embeddings = batchResponse.content();
TokenUsage usage = batchResponse.tokenUsage();  // Track token consumption

API

package dev.langchain4j.model.azure;

/**
 * Azure OpenAI embedding model for text vectorization.
 * Thread-safe: Yes - instances are immutable and thread-safe.
 * Batch size: Maximum 16 text segments per request.
 * Token limit: Maximum 8191 tokens per segment.
 * Dimensions: Fixed per model, configurable for text-embedding-3 models.
 */
class AzureOpenAiEmbeddingModel extends dev.langchain4j.model.embedding.DimensionAwareEmbeddingModel {
    /**
     * Creates builder for configuration.
     * @return New Builder instance
     */
    static Builder builder();

    /**
     * Embeds multiple text segments in a single batch request.
     * More efficient than multiple embed() calls.
     * @param textSegments List of 1-16 segments, each ≤ 8191 tokens
     * @return Response with embeddings (same order as input) and token usage
     * @throws IllegalArgumentException if textSegments is null, empty, or has > 16 segments
     * @throws IllegalArgumentException if any segment exceeds 8191 tokens
     * @throws java.util.concurrent.TimeoutException if request exceeds timeout (default 60s)
     * @throws RuntimeException for network or API errors
     */
    dev.langchain4j.model.output.Response<java.util.List<dev.langchain4j.data.embedding.Embedding>>
        embedAll(java.util.List<dev.langchain4j.data.segment.TextSegment> textSegments);

    /**
     * Returns known dimension for the configured model.
     * @return Dimension count or null if custom/unknown model
     */
    @Override
    protected Integer knownDimension();

    /**
     * Builder for AzureOpenAiEmbeddingModel configuration.
     */
    class Builder {
        // Mandatory configuration
        /**
         * @param endpoint Azure OpenAI endpoint URL
         * @throws IllegalArgumentException if null, empty, or malformed
         */
        Builder endpoint(String endpoint);

        /**
         * @param serviceVersion API version (e.g., "2024-02-15-preview")
         * @throws IllegalArgumentException if null or empty
         */
        Builder serviceVersion(String serviceVersion);

        /**
         * @param deploymentName Your embedding model deployment name
         * @throws IllegalArgumentException if null or empty
         */
        Builder deploymentName(String deploymentName);

        // Authentication (choose exactly one)
        Builder apiKey(String apiKey);
        Builder nonAzureApiKey(String apiKey);
        Builder tokenCredential(com.azure.core.credential.TokenCredential credential);

        // Embedding configuration
        /**
         * Sets custom embedding dimensions (text-embedding-3 models only).
         * Not supported by text-embedding-ada-002.
         * @param dimensions Custom dimension count (e.g., 512, 1024)
         * Valid ranges:
         * - text-embedding-3-small: 1 to 1536 (default 1536)
         * - text-embedding-3-large: 1 to 3072 (default 3072)
         * @default null (use model's default: 1536 or 3072)
         * @throws IllegalArgumentException if < 1 or > model max
         * @throws IllegalArgumentException if used with non-text-embedding-3 model
         */
        Builder dimensions(Integer dimensions);

        // HTTP configuration (see configuration.md for details)
        /**
         * @default 60 seconds
         */
        Builder timeout(java.time.Duration timeout);
        Builder maxRetries(Integer maxRetries);
        Builder retryOptions(com.azure.core.http.policy.RetryOptions retryOptions);
        Builder proxyOptions(com.azure.core.http.ProxyOptions proxyOptions);
        Builder httpClientProvider(com.azure.core.http.HttpClientProvider httpClientProvider);
        Builder openAIClient(com.azure.ai.openai.OpenAIClient client);
        Builder customHeaders(java.util.Map<String, String> customHeaders);
        Builder userAgentSuffix(String userAgentSuffix);

        // Observability
        /**
         * @default false
         */
        Builder logRequestsAndResponses(boolean logRequestsAndResponses);

        /**
         * Builds immutable, thread-safe model instance.
         * @throws IllegalStateException if required parameters missing
         * @throws IllegalArgumentException if configuration invalid
         */
        AzureOpenAiEmbeddingModel build();
    }
}

Model Names

package dev.langchain4j.model.azure;

/**
 * Enum of Azure OpenAI embedding model names with dimension information.
 */
enum AzureOpenAiEmbeddingModelName {
    /** text-embedding-3-small: 1536 dims (default), configurable 1-1536, cost-effective */
    TEXT_EMBEDDING_3_SMALL,
    /** text-embedding-3-small-1: 1536 dims, version 1 */
    TEXT_EMBEDDING_3_SMALL_1,
    /** text-embedding-3-large: 3072 dims (default), configurable 1-3072, highest quality */
    TEXT_EMBEDDING_3_LARGE,
    /** text-embedding-3-large-1: 3072 dims, version 1 */
    TEXT_EMBEDDING_3_LARGE_1,
    /** text-embedding-ada-002: 1536 dims, fixed, legacy model */
    TEXT_EMBEDDING_ADA_002,
    /** text-embedding-ada-002-1: 1536 dims, version 1 */
    TEXT_EMBEDDING_ADA_002_1,
    /** text-embedding-ada-002-2: 1536 dims, version 2 */
    TEXT_EMBEDDING_ADA_002_2;

    /**
     * Full model name for API requests.
     * @return Model name string
     */
    String modelName();

    /**
     * Base model type without version.
     * @return Model type (e.g., "text-embedding-3-small")
     */
    String modelType();

    /**
     * Version suffix.
     * @return Version string or empty
     */
    String modelVersion();

    /**
     * Default embedding dimension.
     * @return 1536 for ada-002 and 3-small, 3072 for 3-large
     */
    Integer dimension();

    String toString();

    /**
     * Static lookup of dimension by model name string.
     * @param modelName Model name to look up
     * @return Dimension count or null if unknown
     */
    static Integer knownDimension(String modelName);
}

Configuration

Dimension Customization (Text-Embedding-3 Only)

// Use default dimensions
model.builder()
    .deploymentName("text-embedding-3-small")  // 1536 dims (default)
    .build();

// Reduce dimensions for storage/performance
model.builder()
    .deploymentName("text-embedding-3-large")
    .dimensions(1024)  // Reduce from 3072 to 1024 (67% reduction)
    .build();

// Minimum dimensions (not recommended)
.dimensions(256)  // Very compact but lower quality

Benefits of dimension reduction:

  • Reduced storage: 1024 dims = 67% less storage than 3072
  • Faster similarity: Lower-dimensional cosine similarity is faster
  • Lower memory: Smaller vectors fit more in RAM
  • Maintained quality: Typically 95%+ quality retention at 1024 dims

Dimension recommendations:

  • Full quality: Use default (1536 or 3072)
  • Balanced: 1024 dims (good quality/size trade-off)
  • Compact: 512 dims (acceptable quality, 83% storage reduction)
  • Minimal: 256 dims (degraded quality, 92% storage reduction)

Batch Embedding

/**
 * Batch size: 1-16 segments per request.
 * Always prefer embedAll() over multiple embed() calls.
 */
List<TextSegment> batch = new ArrayList<>();
for (int i = 0; i < 16; i++) {  // Max 16
    batch.add(TextSegment.from("Text " + i));
}

Response<List<Embedding>> response = model.embedAll(batch);
TokenUsage usage = response.tokenUsage();

// For > 16 texts, split into batches
List<TextSegment> allTexts = // ... more than 16 texts
List<Embedding> allEmbeddings = new ArrayList<>();

for (int i = 0; i < allTexts.size(); i += 16) {
    int end = Math.min(i + 16, allTexts.size());
    List<TextSegment> batch = allTexts.subList(i, end);
    Response<List<Embedding>> response = model.embedAll(batch);
    allEmbeddings.addAll(response.content());
}

Types

package dev.langchain4j.data.segment;

/**
 * Text segment to be embedded.
 */
class TextSegment {
    /**
     * Creates segment from plain text.
     * @param text Text content, max 8191 tokens
     * @throws IllegalArgumentException if text is null
     */
    static TextSegment from(String text);

    /**
     * Creates segment with metadata.
     * @param text Text content
     * @param metadata Optional metadata (not embedded, for reference)
     */
    static TextSegment from(String text, Metadata metadata);

    /**
     * @return Text content
     */
    String text();

    /**
     * @return Metadata or null
     */
    Metadata metadata();
}
package dev.langchain4j.data.embedding;

/**
 * Vector embedding representation.
 */
class Embedding {
    /**
     * Creates embedding from float array.
     * @param vector Float array of dimension values
     */
    static Embedding from(float[] vector);

    /**
     * Returns vector as float array.
     * @return Float array (e.g., length 1536 for ada-002)
     */
    float[] vector();

    /**
     * Returns dimension count.
     * @return Vector length (e.g., 1536, 3072)
     */
    int dimension();
}
package dev.langchain4j.model.embedding;

/**
 * Base class for models that know their dimension.
 */
abstract class DimensionAwareEmbeddingModel implements EmbeddingModel {
    /**
     * Returns known dimension or null if unknown/custom.
     * @return Dimension count or null
     */
    protected abstract Integer knownDimension();

    /**
     * Returns dimension, throwing if unknown.
     * @return Dimension count
     * @throws IllegalStateException if dimension unknown
     */
    public int dimension();
}

Use Cases

Semantic Search

// 1. Embed documents
List<String> documents = List.of(
    "The capital of France is Paris.",
    "Python is a programming language.",
    "Mount Everest is the highest mountain."
);

List<TextSegment> segments = documents.stream()
    .map(TextSegment::from)
    .toList();

Response<List<Embedding>> docResponse = model.embedAll(segments);
List<Embedding> docEmbeddings = docResponse.content();

// 2. Embed query
TextSegment query = TextSegment.from("What is the tallest mountain?");
Response<Embedding> queryResponse = model.embed(query);
Embedding queryEmbedding = queryResponse.content();

// 3. Calculate cosine similarity
List<Double> similarities = new ArrayList<>();
for (Embedding docEmbed : docEmbeddings) {
    double similarity = cosineSimilarity(
        queryEmbedding.vector(),
        docEmbed.vector()
    );
    similarities.add(similarity);
}

// 4. Find most similar document
int bestIdx = IntStream.range(0, similarities.size())
    .boxed()
    .max(Comparator.comparing(similarities::get))
    .orElse(-1);

System.out.println("Best match: " + documents.get(bestIdx));
System.out.println("Similarity: " + similarities.get(bestIdx));

/**
 * Cosine similarity: -1 (opposite) to 1 (identical).
 * OpenAI embeddings are normalized, so dot product = cosine similarity.
 */
static double cosineSimilarity(float[] a, float[] b) {
    double dotProduct = 0.0;
    for (int i = 0; i < a.length; i++) {
        dotProduct += a[i] * b[i];
    }
    return dotProduct;  // Already normalized
}

Clustering

// Embed all texts
List<TextSegment> texts = // ... list of texts
Response<List<Embedding>> response = model.embedAll(texts);
List<float[]> vectors = response.content().stream()
    .map(Embedding::vector)
    .toList();

// Apply K-means or other clustering
// ... clustering logic using vectors

Duplicate Detection

// Embed all items
Response<List<Embedding>> response = model.embedAll(items);
List<Embedding> embeddings = response.content();

// Find duplicates by similarity threshold
double threshold = 0.95;  // 95% similar
for (int i = 0; i < embeddings.size(); i++) {
    for (int j = i + 1; j < embeddings.size(); j++) {
        double similarity = cosineSimilarity(
            embeddings.get(i).vector(),
            embeddings.get(j).vector()
        );
        if (similarity > threshold) {
            System.out.printf("Potential duplicate: %d and %d (%.3f)%n",
                i, j, similarity);
        }
    }
}

Performance

Batch Size Optimization

// GOOD: Single batch request
List<TextSegment> texts = List.of(/* 10 texts */);
Response<List<Embedding>> response = model.embedAll(texts);  // 1 API call

// BAD: Multiple single requests
for (TextSegment text : texts) {
    model.embed(text);  // 10 API calls - 10x slower, 10x more expensive!
}

Dimension Reduction

// Full quality: 3072 dimensions, 100% storage
model.builder()
    .deploymentName("text-embedding-3-large")
    .build();

// Reduced dimensions: 1024 dimensions, 33% storage, 95%+ quality
model.builder()
    .deploymentName("text-embedding-3-large")
    .dimensions(1024)  // 67% storage reduction
    .build();

Token Limits

Each model has a maximum token limit per text segment:

ModelMax TokensDefault DimensionsConfigurable Dimensions
text-embedding-ada-00281911536No
text-embedding-3-small81911536Yes (1-1536)
text-embedding-3-large81913072Yes (1-3072)
// Check token count before embedding
AzureOpenAiTokenCountEstimator estimator =
    new AzureOpenAiTokenCountEstimator(
        AzureOpenAiEmbeddingModelName.TEXT_EMBEDDING_ADA_002
    );

String longText = // ... potentially long text
int tokens = estimator.estimateTokenCountInText(longText);

if (tokens > 8191) {
    // Split text into chunks
    List<String> chunks = splitIntoChunks(longText, 8000);
    for (String chunk : chunks) {
        model.embed(TextSegment.from(chunk));
    }
} else {
    model.embed(TextSegment.from(longText));
}

Error Handling

try {
    Response<List<Embedding>> response = model.embedAll(segments);
} catch (IllegalArgumentException e) {
    // Invalid input:
    // - null or empty segments list
    // - more than 16 segments
    // - segment exceeds 8191 tokens
    // - invalid dimensions configuration
    System.err.println("Invalid input: " + e.getMessage());
} catch (java.util.concurrent.TimeoutException e) {
    // Request timed out (default 60s)
    // Retried per retry policy
    System.err.println("Request timed out");
} catch (RuntimeException e) {
    // Network, API, or auth error
    System.err.println("Error: " + e.getMessage());
}

Best Practices

  1. Always use embedAll() for multiple texts (max 16 per call)
  2. Cache embeddings - embeddings are stable, compute once and store
  3. Normalize vectors - OpenAI embeddings are pre-normalized
  4. Use appropriate dimensions - balance quality vs storage/performance
  5. Chunk long texts - split texts > 8191 tokens
  6. Estimate tokens first - avoid surprises with long texts
  7. Handle batches efficiently - split > 16 texts into multiple batches
  8. Reuse model instances - thread-safe, create once and share

Install with Tessl CLI

npx tessl i tessl/maven-dev-langchain4j--langchain4j-azure-open-ai

docs

audio-transcription.md

chat-models.md

configuration.md

embedding-model.md

image-model.md

index.md

language-models.md

token-counting.md

tile.json