LangChain4j integration for Mistral AI providing chat completion, streaming, embedding, moderation, and code completion capabilities
Convert text into dense vector embeddings for semantic search, clustering, similarity comparison, and retrieval-augmented generation (RAG) applications. Mistral AI's embedding model produces 1024-dimensional vectors optimized for semantic understanding.
Generate vector embeddings from text using the Mistral embedding model.
public class MistralAiEmbeddingModel extends DimensionAwareEmbeddingModel {
/**
* Create a new builder for configuring MistralAiEmbeddingModel.
*
* @return MistralAiEmbeddingModelBuilder instance
*/
public static MistralAiEmbeddingModelBuilder builder() { ... }
/**
* Embed a single text string.
*
* @param text Text (non-null) string to embed
* @return Response containing Embedding with vector representation
*/
public Response<Embedding> embed(String text) { ... }
/**
* Embed a single text segment.
*
* @param textSegment TextSegment (non-null) to embed
* @return Response containing Embedding with vector representation
*/
public Response<Embedding> embed(TextSegment textSegment) { ... }
/**
* Embed multiple text segments in a single batch request.
*
* @param textSegments List (non-null) of TextSegment objects to embed
* @return Response containing List of Embedding objects, one per input segment
*/
public Response<List<Embedding>> embedAll(List<TextSegment> textSegments) { ... }
/**
* Get the dimensionality of the embedding vectors.
*
* @return Dimension count (1024 for mistral-embed)
*/
public int dimension() { ... }
/**
* Returns the configured model name string.
*
* @return Model name identifier
*/
public String modelName() { ... }
}import dev.langchain4j.model.mistralai.MistralAiEmbeddingModel;
import dev.langchain4j.model.mistralai.MistralAiEmbeddingModelName;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.model.output.Response;
MistralAiEmbeddingModel embeddingModel = MistralAiEmbeddingModel.builder()
.apiKey(System.getenv("MISTRAL_API_KEY"))
.modelName(MistralAiEmbeddingModelName.MISTRAL_EMBED)
.build();
Response<Embedding> response = embeddingModel.embed("Hello, world!");
Embedding embedding = response.content();
float[] vector = embedding.vector();
System.out.println("Embedding dimension: " + vector.length); // 1024
System.out.println("First 5 values: " + Arrays.toString(Arrays.copyOf(vector, 5)));Embed multiple texts efficiently in a single API call:
import dev.langchain4j.data.segment.TextSegment;
import java.util.Arrays;
import java.util.List;
List<TextSegment> documents = Arrays.asList(
TextSegment.from("Machine learning is a subset of artificial intelligence."),
TextSegment.from("Deep learning uses neural networks with multiple layers."),
TextSegment.from("Natural language processing enables computers to understand text."),
TextSegment.from("Computer vision allows machines to interpret visual information.")
);
Response<List<Embedding>> response = embeddingModel.embedAll(documents);
List<Embedding> embeddings = response.content();
System.out.println("Embedded " + embeddings.size() + " documents");
for (int i = 0; i < embeddings.size(); i++) {
Embedding emb = embeddings.get(i);
System.out.println("Document " + i + ": " + emb.dimension() + " dimensions");
}Find similar documents using embeddings:
import java.util.HashMap;
import java.util.Map;
// Create knowledge base
List<String> knowledgeBase = Arrays.asList(
"The Eiffel Tower is located in Paris, France.",
"The Great Wall of China stretches over 13,000 miles.",
"The Statue of Liberty was a gift from France to the United States.",
"Mount Everest is the tallest mountain on Earth."
);
// Embed all documents
List<TextSegment> segments = knowledgeBase.stream()
.map(TextSegment::from)
.collect(Collectors.toList());
Response<List<Embedding>> docsResponse = embeddingModel.embedAll(segments);
List<Embedding> docEmbeddings = docsResponse.content();
// Embed query
String query = "What famous structures are in France?";
Response<Embedding> queryResponse = embeddingModel.embed(query);
Embedding queryEmbedding = queryResponse.content();
// Calculate cosine similarity
Map<Integer, Double> similarities = new HashMap<>();
for (int i = 0; i < docEmbeddings.size(); i++) {
double similarity = cosineSimilarity(queryEmbedding.vector(),
docEmbeddings.get(i).vector());
similarities.put(i, similarity);
}
// Find most similar document
int mostSimilar = similarities.entrySet().stream()
.max(Map.Entry.comparingByValue())
.get()
.getKey();
System.out.println("Most relevant: " + knowledgeBase.get(mostSimilar));
System.out.println("Similarity score: " + similarities.get(mostSimilar));
// Helper method for cosine similarity
private static double cosineSimilarity(float[] a, float[] b) {
double dotProduct = 0.0;
double normA = 0.0;
double normB = 0.0;
for (int i = 0; i < a.length; i++) {
dotProduct += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
}Combine embeddings with Chat Models for RAG applications:
import dev.langchain4j.model.mistralai.MistralAiChatModel;
// Step 1: Embed and store documents
List<String> documents = loadDocuments();
List<TextSegment> segments = documents.stream()
.map(TextSegment::from)
.collect(Collectors.toList());
List<Embedding> docEmbeddings = embeddingModel.embedAll(segments).content();
// Step 2: Embed user query
String userQuery = "How does photosynthesis work?";
Embedding queryEmbedding = embeddingModel.embed(userQuery).content();
// Step 3: Find most relevant documents
List<String> relevantDocs = findTopK(queryEmbedding, docEmbeddings, documents, 3);
// Step 4: Create context from relevant documents
String context = String.join("\n\n", relevantDocs);
// Step 5: Generate answer using chat model with context
MistralAiChatModel chatModel = MistralAiChatModel.builder()
.apiKey(System.getenv("MISTRAL_API_KEY"))
.build();
String prompt = String.format(
"Context:\n%s\n\nQuestion: %s\n\nAnswer based on the context above:",
context, userQuery
);
ChatResponse answer = chatModel.chat(
List.of(UserMessage.from(prompt))
);
System.out.println(answer.aiMessage().text());Group similar documents using K-means clustering:
// Embed documents
List<Embedding> embeddings = embeddingModel.embedAll(segments).content();
// Convert to 2D array for clustering algorithm
float[][] vectors = embeddings.stream()
.map(Embedding::vector)
.toArray(float[][]::new);
// Apply K-means clustering (use your preferred ML library)
KMeans kmeans = new KMeans(5); // 5 clusters
int[] clusterAssignments = kmeans.fit(vectors);
// Group documents by cluster
Map<Integer, List<String>> clusters = new HashMap<>();
for (int i = 0; i < clusterAssignments.length; i++) {
int cluster = clusterAssignments[i];
clusters.computeIfAbsent(cluster, k -> new ArrayList<>())
.add(documents.get(i));
}
// Print clusters
for (Map.Entry<Integer, List<String>> entry : clusters.entrySet()) {
System.out.println("Cluster " + entry.getKey() + ":");
entry.getValue().forEach(doc -> System.out.println(" - " + doc));
}public static class MistralAiEmbeddingModelBuilder {
/**
* Set the Mistral AI API key (required).
*
* @param apiKey Your (non-null) Mistral AI API key
* @return Builder instance
* @throws IllegalArgumentException if parameter validation fails
*/
public MistralAiEmbeddingModelBuilder apiKey(String apiKey) { ... }
/**
* Set the base URL for the Mistral AI API.
* Default: https://api.mistral.ai/v1
*
* @param baseUrl Custom (non-null) API endpoint URL
* @return Builder instance
* @throws IllegalArgumentException if parameter validation fails
*/
public MistralAiEmbeddingModelBuilder baseUrl(String baseUrl) { ... }
/**
* Set the model name using enum.
* Default: MISTRAL_EMBED
*
* @param modelName MistralAiEmbeddingModelName (non-null) enum value
* @return Builder instance
* @throws IllegalArgumentException if parameter validation fails
*/
public MistralAiEmbeddingModelBuilder modelName(MistralAiEmbeddingModelName modelName) { ... }
/**
* Set the model name using string.
*
* @param modelName Model (non-null) identifier string
* @return Builder instance
* @throws IllegalArgumentException if parameter validation fails
*/
public MistralAiEmbeddingModelBuilder modelName(String modelName) { ... }
/**
* Set request timeout.
* Default: 60 seconds
*
* @param timeout Duration (non-null) for request timeout
* @return Builder instance
* @throws IllegalArgumentException if parameter validation fails
*/
public MistralAiEmbeddingModelBuilder timeout(Duration timeout) { ... }
/**
* Enable request logging.
*
* @param logRequests True (non-null) to log requests
* @return Builder instance
* @throws IllegalArgumentException if parameter validation fails
*/
public MistralAiEmbeddingModelBuilder logRequests(Boolean logRequests) { ... }
/**
* Enable response logging.
*
* @param logResponses True (non-null) to log responses
* @return Builder instance
* @throws IllegalArgumentException if parameter validation fails
*/
public MistralAiEmbeddingModelBuilder logResponses(Boolean logResponses) { ... }
/**
* Set custom SLF4J logger for logging.
*
* @param logger SLF (non-null)4J Logger instance
* @return Builder instance
* @throws IllegalArgumentException if parameter validation fails
*/
public MistralAiEmbeddingModelBuilder logger(Logger logger) { ... }
/**
* Set maximum retry attempts on failure.
* Default: 2
*
* @param maxRetries Maximum (non-null) number of retries
* @return Builder instance
* @throws IllegalArgumentException if parameter validation fails
*/
public MistralAiEmbeddingModelBuilder maxRetries(Integer maxRetries) { ... }
/**
* Set custom HTTP client builder.
*
* @param httpClientBuilder HttpClientBuilder (non-null) instance
* @return Builder instance
* @throws IllegalArgumentException if parameter validation fails
*/
public MistralAiEmbeddingModelBuilder httpClientBuilder(HttpClientBuilder httpClientBuilder) { ... }
/**
* Build the MistralAiEmbeddingModel instance.
*
* @return Configured MistralAiEmbeddingModel
*/
public MistralAiEmbeddingModel build() { ... }
}Always embed multiple texts in a single request when possible:
// Good: Single API call for multiple texts
Response<List<Embedding>> response = embeddingModel.embedAll(segments);
// Avoid: Multiple API calls for multiple texts
for (TextSegment segment : segments) {
embeddingModel.embed(segment); // Don't do this!
}Prepare text for optimal embedding quality:
private String preprocessText(String text) {
return text
.trim() // Remove whitespace
.replaceAll("\\s+", " ") // Normalize whitespace
.toLowerCase(); // Normalize case (optional)
}
String query = preprocessText(userInput);
Response<Embedding> response = embeddingModel.embed(query);Stay within the 8192 token limit per text:
private List<String> chunkText(String text, int maxTokens) {
// Split long text into chunks that fit within token limit
// Use appropriate tokenization strategy for your use case
List<String> chunks = new ArrayList<>();
// ... chunking logic
return chunks;
}
String longDocument = loadLongDocument();
List<String> chunks = chunkText(longDocument, 8000);
List<Embedding> embeddings = chunks.stream()
.map(chunk -> embeddingModel.embed(chunk).content())
.collect(Collectors.toList());Cache embeddings to avoid redundant API calls:
import java.util.concurrent.ConcurrentHashMap;
import java.util.Map;
public class EmbeddingCache {
private final MistralAiEmbeddingModel model;
private final Map<String, Embedding> cache = new ConcurrentHashMap<>();
public EmbeddingCache(MistralAiEmbeddingModel model) {
this.model = model;
}
public Embedding embed(String text) {
return cache.computeIfAbsent(text, t ->
model.embed(t).content()
);
}
public void clear() {
cache.clear();
}
}Store embeddings efficiently:
// Option 1: In-memory (for small datasets)
Map<String, Embedding> embeddingStore = new HashMap<>();
// Option 2: Vector database (for large datasets)
// Use libraries like: Pinecone, Weaviate, Milvus, Qdrant
// Example with hypothetical vector DB:
VectorDatabase db = new VectorDatabase();
db.store("doc-1", embedding.vector(), metadata);Choose the right similarity metric for your use case:
// Cosine similarity (recommended for normalized vectors)
double cosineSim = cosineSimilarity(vec1, vec2);
// Dot product (faster, equivalent for normalized vectors)
double dotProduct = dotProduct(vec1, vec2);
// Euclidean distance (less common for embeddings)
double euclidean = euclideanDistance(vec1, vec2);Track token consumption:
Response<List<Embedding>> response = embeddingModel.embedAll(segments);
TokenUsage usage = response.tokenUsage();
System.out.println("Input tokens: " + usage.inputTokenCount());
System.out.println("Total tokens: " + usage.totalTokenCount());
// Estimate cost
double costPerToken = 0.0001; // Example rate
double totalCost = usage.totalTokenCount() * costPerToken;
System.out.println("Estimated cost: $" + totalCost);import io.pinecone.PineconeClient;
import io.pinecone.Index;
PineconeClient pinecone = new PineconeClient(apiKey);
Index index = pinecone.getIndex("my-index");
// Embed and store
List<TextSegment> segments = documents.stream()
.map(TextSegment::from)
.collect(Collectors.toList());
Response<List<Embedding>> response = embeddingModel.embedAll(segments);
for (int i = 0; i < segments.size(); i++) {
String id = "doc-" + i;
float[] vector = response.content().get(i).vector();
Map<String, Object> metadata = Map.of(
"text", segments.get(i).text()
);
index.upsert(id, vector, metadata);
}
// Query
String query = "machine learning concepts";
Embedding queryEmb = embeddingModel.embed(query).content();
List<ScoredVector> results = index.query(queryEmb.vector(), 5);import io.weaviate.client.WeaviateClient;
import io.weaviate.client.base.Result;
import io.weaviate.client.v1.data.model.WeaviateObject;
WeaviateClient client = new WeaviateClient(config);
// Store with embeddings
for (int i = 0; i < documents.size(); i++) {
Embedding emb = embeddingModel.embed(documents.get(i)).content();
WeaviateObject object = WeaviateObject.builder()
.className("Document")
.properties(Map.of("text", documents.get(i)))
.vector(emb.vector())
.build();
client.data().creator().withObject(object).run();
}
// Semantic search
Embedding queryEmb = embeddingModel.embed(query).content();
Result<List<WeaviateObject>> results = client.data().objectsGetter()
.withClassName("Document")
.withNearVector(queryEmb.vector())
.withLimit(5)
.run();Handle API errors gracefully:
try {
Response<Embedding> response = embeddingModel.embed(text);
Embedding embedding = response.content();
// Use embedding
} catch (Exception e) {
if (e.getMessage().contains("rate limit")) {
// Handle rate limiting
Thread.sleep(1000);
// Retry
} else if (e.getMessage().contains("token limit")) {
// Text too long
String shorterText = truncate(text, 8000);
Response<Embedding> response = embeddingModel.embed(shorterText);
} else {
// Other error
logger.error("Embedding failed", e);
}
}Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-mistral-ai