CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-dev-langchain4j--langchain4j-open-ai

LangChain4j OpenAI Integration providing Java access to OpenAI APIs including chat models, embeddings, image generation, audio transcription, and moderation.

Overview
Eval results
Files

embedding-models.mddocs/

Embedding Models

Embedding models convert text into dense vector representations that capture semantic meaning. These embeddings enable similarity search, clustering, classification, and semantic search applications. OpenAI provides several embedding models optimized for different use cases and dimensionalities.

Embeddings are the foundation for retrieval-augmented generation (RAG), semantic search, recommendation systems, and anomaly detection. The text-embedding-3 models offer improved performance and configurable dimensions for storage optimization.

Capabilities

OpenAiEmbeddingModel

Synchronous embedding model that generates vector representations of text segments. Supports batch processing for efficient embedding of multiple text segments.

public class OpenAiEmbeddingModel extends DimensionAwareEmbeddingModel {
    public static OpenAiEmbeddingModelBuilder builder();

    // Core embedding methods
    public Response<Embedding> embed(String text);
    public Response<Embedding> embed(TextSegment textSegment);
    public Response<List<Embedding>> embedAll(List<TextSegment> textSegments);

    // Model information
    public Integer knownDimension();
    public String modelName();
}

OpenAiEmbeddingModelBuilder

Builder for configuring OpenAiEmbeddingModel instances with authentication, model selection, and batching options.

public static class OpenAiEmbeddingModelBuilder {
    // Core configuration
    public OpenAiEmbeddingModelBuilder modelName(String modelName);
    public OpenAiEmbeddingModelBuilder modelName(OpenAiEmbeddingModelName modelName);
    public OpenAiEmbeddingModelBuilder baseUrl(String baseUrl);
    public OpenAiEmbeddingModelBuilder apiKey(String apiKey);
    public OpenAiEmbeddingModelBuilder organizationId(String organizationId);
    public OpenAiEmbeddingModelBuilder projectId(String projectId);

    // Embedding configuration
    public OpenAiEmbeddingModelBuilder dimensions(Integer dimensions);
    public OpenAiEmbeddingModelBuilder user(String user);
    public OpenAiEmbeddingModelBuilder encodingFormat(String encodingFormat);

    // Batch processing
    public OpenAiEmbeddingModelBuilder maxSegmentsPerBatch(Integer maxSegmentsPerBatch);

    // HTTP configuration
    public OpenAiEmbeddingModelBuilder httpClientBuilder(HttpClientBuilder httpClientBuilder);
    public OpenAiEmbeddingModelBuilder timeout(Duration timeout);
    public OpenAiEmbeddingModelBuilder maxRetries(Integer maxRetries);
    public OpenAiEmbeddingModelBuilder customHeaders(Map<String, String> customHeaders);
    public OpenAiEmbeddingModelBuilder customHeaders(Supplier<Map<String, String>> customHeadersSupplier);
    public OpenAiEmbeddingModelBuilder customQueryParams(Map<String, String> customQueryParams);

    // Logging
    public OpenAiEmbeddingModelBuilder logRequests(Boolean logRequests);
    public OpenAiEmbeddingModelBuilder logResponses(Boolean logResponses);
    public OpenAiEmbeddingModelBuilder logger(Logger logger);

    // Build
    public OpenAiEmbeddingModel build();
}

Basic Usage Example

import dev.langchain4j.model.openai.OpenAiEmbeddingModel;
import dev.langchain4j.model.openai.OpenAiEmbeddingModelName;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.model.output.Response;

// Create embedding model
OpenAiEmbeddingModel model = OpenAiEmbeddingModel.builder()
    .apiKey(System.getenv("OPENAI_API_KEY"))
    .modelName(OpenAiEmbeddingModelName.TEXT_EMBEDDING_3_SMALL)
    .build();

// Embed single text
String text = "Artificial intelligence is transforming the world.";
Response<Embedding> response = model.embed(text);

Embedding embedding = response.content();
System.out.println("Embedding dimension: " + embedding.dimension());
System.out.println("First 5 values: " + Arrays.toString(
    Arrays.copyOf(embedding.vector(), 5)
));
System.out.println("Tokens used: " + response.tokenUsage().totalTokenCount());

// Embed multiple texts
List<TextSegment> segments = List.of(
    TextSegment.from("The quick brown fox jumps over the lazy dog."),
    TextSegment.from("A journey of a thousand miles begins with a single step."),
    TextSegment.from("To be or not to be, that is the question.")
);

Response<List<Embedding>> batchResponse = model.embedAll(segments);
List<Embedding> embeddings = batchResponse.content();

System.out.println("Generated " + embeddings.size() + " embeddings");
System.out.println("Total tokens: " + batchResponse.tokenUsage().totalTokenCount());

for (int i = 0; i < embeddings.size(); i++) {
    Embedding emb = embeddings.get(i);
    System.out.println("Embedding " + i + ": dimension=" + emb.dimension());
}

Semantic Search Example

import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore;
import dev.langchain4j.store.embedding.EmbeddingMatch;

// Create embedding model
OpenAiEmbeddingModel embeddingModel = OpenAiEmbeddingModel.builder()
    .apiKey(System.getenv("OPENAI_API_KEY"))
    .modelName(OpenAiEmbeddingModelName.TEXT_EMBEDDING_3_SMALL)
    .build();

// Create document corpus
List<TextSegment> documents = List.of(
    TextSegment.from("Python is a high-level programming language."),
    TextSegment.from("Java is a statically-typed object-oriented language."),
    TextSegment.from("JavaScript runs in web browsers."),
    TextSegment.from("Machine learning is a subset of artificial intelligence."),
    TextSegment.from("Neural networks are inspired by biological neurons.")
);

// Embed all documents
Response<List<Embedding>> embeddingsResponse = embeddingModel.embedAll(documents);
List<Embedding> documentEmbeddings = embeddingsResponse.content();

// Store embeddings
EmbeddingStore<TextSegment> embeddingStore = new InMemoryEmbeddingStore<>();
for (int i = 0; i < documents.size(); i++) {
    embeddingStore.add(documentEmbeddings.get(i), documents.get(i));
}

// Perform semantic search
String query = "Tell me about AI and deep learning";
Embedding queryEmbedding = embeddingModel.embed(query).content();

List<EmbeddingMatch<TextSegment>> matches = embeddingStore.findRelevant(
    queryEmbedding,
    3  // Top 3 results
);

System.out.println("Query: " + query);
System.out.println("\nTop matches:");
for (int i = 0; i < matches.size(); i++) {
    EmbeddingMatch<TextSegment> match = matches.get(i);
    System.out.println((i + 1) + ". Score: " + match.score());
    System.out.println("   Text: " + match.embedded().text());
}

Configurable Dimensions Example

// Use smaller dimensions for storage optimization
OpenAiEmbeddingModel compactModel = OpenAiEmbeddingModel.builder()
    .apiKey(System.getenv("OPENAI_API_KEY"))
    .modelName(OpenAiEmbeddingModelName.TEXT_EMBEDDING_3_SMALL)
    .dimensions(512)  // Reduce from default 1536 to 512
    .build();

Response<Embedding> compactEmbedding = compactModel.embed("Sample text");
System.out.println("Compact dimension: " + compactEmbedding.content().dimension());

// Use larger model for higher quality
OpenAiEmbeddingModel largeModel = OpenAiEmbeddingModel.builder()
    .apiKey(System.getenv("OPENAI_API_KEY"))
    .modelName(OpenAiEmbeddingModelName.TEXT_EMBEDDING_3_LARGE)
    .build();

Response<Embedding> largeEmbedding = largeModel.embed("Sample text");
System.out.println("Large dimension: " + largeEmbedding.content().dimension());

Batch Processing Example

import java.util.ArrayList;

// Configure batch size
OpenAiEmbeddingModel batchModel = OpenAiEmbeddingModel.builder()
    .apiKey(System.getenv("OPENAI_API_KEY"))
    .modelName(OpenAiEmbeddingModelName.TEXT_EMBEDDING_3_SMALL)
    .maxSegmentsPerBatch(100)  // Process 100 segments per API call
    .timeout(Duration.ofMinutes(2))
    .build();

// Prepare large dataset
List<TextSegment> largeDataset = new ArrayList<>();
for (int i = 0; i < 500; i++) {
    largeDataset.add(TextSegment.from("Document " + i + " content..."));
}

// Embed all (automatically batched)
Response<List<Embedding>> batchResponse = batchModel.embedAll(largeDataset);
System.out.println("Embedded " + batchResponse.content().size() + " documents");
System.out.println("Total tokens: " + batchResponse.tokenUsage().totalTokenCount());

Similarity Calculation Example

// Calculate cosine similarity between embeddings
public static double cosineSimilarity(Embedding embedding1, Embedding embedding2) {
    float[] vec1 = embedding1.vector();
    float[] vec2 = embedding2.vector();

    double dotProduct = 0.0;
    double norm1 = 0.0;
    double norm2 = 0.0;

    for (int i = 0; i < vec1.length; i++) {
        dotProduct += vec1[i] * vec2[i];
        norm1 += vec1[i] * vec1[i];
        norm2 += vec2[i] * vec2[i];
    }

    return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
}

// Use in practice
OpenAiEmbeddingModel model = OpenAiEmbeddingModel.builder()
    .apiKey(System.getenv("OPENAI_API_KEY"))
    .modelName(OpenAiEmbeddingModelName.TEXT_EMBEDDING_3_SMALL)
    .build();

Embedding emb1 = model.embed("I love machine learning").content();
Embedding emb2 = model.embed("Artificial intelligence is fascinating").content();
Embedding emb3 = model.embed("Pizza is delicious").content();

double similarity12 = cosineSimilarity(emb1, emb2);
double similarity13 = cosineSimilarity(emb1, emb3);

System.out.println("ML vs AI similarity: " + similarity12);  // High similarity
System.out.println("ML vs Pizza similarity: " + similarity13);  // Low similarity

Model Names

public enum OpenAiEmbeddingModelName {
    TEXT_EMBEDDING_3_SMALL("text-embedding-3-small", 1536),
    TEXT_EMBEDDING_3_LARGE("text-embedding-3-large", 3072),
    TEXT_EMBEDDING_ADA_002("text-embedding-ada-002", 1536);

    public String toString();
    public Integer dimension();

    public static Integer knownDimension(String modelName);
}

Model Comparison

ModelDimensionsUse CasePerformanceCost
text-embedding-3-small1536 (configurable)General purpose, cost-effectiveGoodLow
text-embedding-3-large3072 (configurable)High-quality semantic searchBestMedium
text-embedding-ada-0021536Legacy, backward compatibilityGoodLow

Types

Embedding

public class Embedding {
    public float[] vector();
    public List<Float> vectorAsList();
    public int dimension();

    public static Embedding from(float[] vector);
    public static Embedding from(List<Float> vector);
}

TextSegment

public class TextSegment {
    public String text();
    public Metadata metadata();

    public static TextSegment from(String text);
    public static TextSegment from(String text, Metadata metadata);
}

EmbeddingModel Interface

public interface EmbeddingModel {
    Response<Embedding> embed(String text);
    Response<Embedding> embed(TextSegment textSegment);
    Response<List<Embedding>> embedAll(List<TextSegment> textSegments);
}

DimensionAwareEmbeddingModel

public abstract class DimensionAwareEmbeddingModel implements EmbeddingModel {
    public abstract Integer knownDimension();
}

Response

public class Response<T> {
    public T content();
    public TokenUsage tokenUsage();
    public FinishReason finishReason();
}

Configuration Options

Dimensions

Configurable output dimensions for text-embedding-3 models:

  • text-embedding-3-small: Default 1536, configurable down to 256
  • text-embedding-3-large: Default 3072, configurable down to 256
  • text-embedding-ada-002: Fixed at 1536

Lower dimensions reduce:

  • Storage requirements
  • Vector database costs
  • Search latency

Trade-off: Slightly reduced semantic quality

Encoding Format

Format for returned embeddings:

  • "float": Default, standard floating-point
  • "base64": Base64-encoded, reduces bandwidth

Most applications should use default "float" format.

Max Segments Per Batch

Controls batching for embedAll() method:

  • Default: 2048 segments
  • OpenAI allows up to 2048 inputs per request
  • Larger batches = fewer API calls = lower latency
  • Adjust based on segment sizes and API limits

User Identifier

Optional string to track end-users:

  • Helps OpenAI monitor and detect abuse
  • Not required but recommended for production

Timeout

Maximum time to wait for API response:

  • Default varies by HTTP client
  • Set higher for large batches
  • Consider: number of segments × average text length

Max Retries

Number of retry attempts on failure:

  • Default: 2
  • Automatic retry on transient failures
  • Exponential backoff between retries

Best Practices

Choosing a Model

Use text-embedding-3-small when:

  • Budget is a concern
  • Storage space is limited
  • Speed is important
  • Semantic quality is good enough

Use text-embedding-3-large when:

  • Maximum semantic quality is needed
  • Budget allows higher costs
  • Working with complex or technical content
  • Fine-grained distinctions matter

Use text-embedding-ada-002 when:

  • Backward compatibility is required
  • Existing embeddings need consistency

Optimizing Dimensions

// Test different dimensions for your use case
for (int dim : List.of(256, 512, 1024, 1536)) {
    OpenAiEmbeddingModel model = OpenAiEmbeddingModel.builder()
        .apiKey(apiKey)
        .modelName(OpenAiEmbeddingModelName.TEXT_EMBEDDING_3_SMALL)
        .dimensions(dim)
        .build();

    // Evaluate quality vs. size trade-off
    evaluateModel(model, dim);
}

Efficient Batching

// Good: Batch multiple embeddings
List<TextSegment> segments = List.of(
    TextSegment.from("Text 1"),
    TextSegment.from("Text 2"),
    TextSegment.from("Text 3")
);
Response<List<Embedding>> response = model.embedAll(segments);

// Less efficient: Individual calls
for (TextSegment segment : segments) {
    Response<Embedding> response = model.embed(segment);  // Multiple API calls
}

Handling Large Datasets

import java.util.stream.Collectors;

// Split very large datasets into manageable chunks
List<TextSegment> allSegments = loadLargeDataset();
int chunkSize = 1000;

for (int i = 0; i < allSegments.size(); i += chunkSize) {
    int end = Math.min(i + chunkSize, allSegments.size());
    List<TextSegment> chunk = allSegments.subList(i, end);

    Response<List<Embedding>> response = model.embedAll(chunk);
    storeEmbeddings(response.content());

    // Rate limiting
    Thread.sleep(1000);  // 1 second between chunks
}

Text Preprocessing

// Clean and normalize text before embedding
public TextSegment preprocessText(String rawText) {
    String cleaned = rawText
        .trim()
        .replaceAll("\\s+", " ")  // Normalize whitespace
        .replaceAll("[\\p{Cntrl}&&[^\r\n\t]]", "")  // Remove control chars
        .toLowerCase();  // Optional: lowercase for consistency

    // Truncate if too long (OpenAI limit: 8191 tokens)
    if (cleaned.length() > 30000) {  // Approximate token limit
        cleaned = cleaned.substring(0, 30000);
    }

    return TextSegment.from(cleaned);
}

Caching Embeddings

import java.util.concurrent.ConcurrentHashMap;

public class EmbeddingCache {
    private final OpenAiEmbeddingModel model;
    private final ConcurrentHashMap<String, Embedding> cache;

    public EmbeddingCache(OpenAiEmbeddingModel model) {
        this.model = model;
        this.cache = new ConcurrentHashMap<>();
    }

    public Embedding embed(String text) {
        return cache.computeIfAbsent(text, t -> {
            Response<Embedding> response = model.embed(t);
            return response.content();
        });
    }

    public void clear() {
        cache.clear();
    }

    public int size() {
        return cache.size();
    }
}

Common Use Cases

Semantic Search

Finding documents similar to a query based on meaning rather than keyword matching.

Clustering

Grouping similar documents together based on embedding similarity.

Classification

Training classifiers using embeddings as features.

Recommendation Systems

Recommending items similar to user preferences based on embedding similarity.

Anomaly Detection

Identifying outliers by finding embeddings far from cluster centers.

RAG (Retrieval-Augmented Generation)

Retrieving relevant context for language model prompts based on semantic similarity.

Duplicate Detection

Finding near-duplicate or similar content by comparing embeddings.

Performance Considerations

Latency

  • Single embedding: ~50-200ms
  • Batch of 100: ~200-500ms
  • Network latency is dominant factor

Throughput

  • Rate limits apply per organization
  • Use batching to maximize throughput
  • Consider parallel requests with rate limiting

Cost

  • Charged per token processed
  • text-embedding-3-small: Lower cost per token
  • text-embedding-3-large: Higher cost per token
  • Batch processing reduces overhead

Install with Tessl CLI

npx tessl i tessl/maven-dev-langchain4j--langchain4j-open-ai

docs

advanced-features.md

audio-transcription-models.md

chat-models.md

embedding-models.md

image-models.md

index.md

language-models.md

model-catalog.md

moderation-models.md

request-response.md

token-management.md

README.md

tile.json