Milvus embedding store integration for LangChain4j
—
Similarity search operations and filtering.
EmbeddingSearchResult<TextSegment> search(EmbeddingSearchRequest request);Performs similarity search for embeddings most similar to query.
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
import dev.langchain4j.store.embedding.EmbeddingSearchResult;
import dev.langchain4j.store.embedding.EmbeddingMatch;
Embedding queryEmbedding = model.embed(query).content();
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(10)
.build();
EmbeddingSearchResult<TextSegment> result = store.search(request);
List<EmbeddingMatch<TextSegment>> matches = result.matches();.queryEmbedding(Embedding queryEmbedding)The embedding to search for.
.maxResults(Integer maxResults)Limit number of results returned.
.maxResults(10) // Return top 10 matches.minScore(Double minScore)Filter results by similarity score threshold.
.minScore(0.75) // Only matches with score >= 0.75Score Interpretation:
.filter(Filter filter)Filter results by metadata.
import static dev.langchain4j.store.embedding.filter.MetadataFilterBuilder.metadataKey;
Filter filter = metadataKey("category").isEqualTo("docs");
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(10)
.filter(filter)
.build();for (EmbeddingMatch<TextSegment> match : matches) {
double score = match.score();
String id = match.embeddingId();
Embedding embedding = match.embedding(); // null unless retrieveEmbeddingsOnSearch=true
TextSegment segment = match.embedded();
if (segment != null) {
String text = segment.text();
Metadata metadata = segment.metadata();
}
}import static dev.langchain4j.store.embedding.filter.MetadataFilterBuilder.metadataKey;
// Equality
Filter filter = metadataKey("category").isEqualTo("tech");
// Greater than
Filter filter = metadataKey("year").isGreaterThan(2020);
// Greater than or equal
Filter filter = metadataKey("priority").isGreaterThanOrEqualTo(5);
// Less than
Filter filter = metadataKey("size").isLessThan(1000);
// Less than or equal
Filter filter = metadataKey("version").isLessThanOrEqualTo(2.0);
// In list
Filter filter = metadataKey("status").isIn("active", "pending", "approved");import static dev.langchain4j.store.embedding.filter.Filter.and;
import static dev.langchain4j.store.embedding.filter.Filter.or;
// AND
Filter combined = and(
metadataKey("category").isEqualTo("tech"),
metadataKey("year").isGreaterThan(2020)
);
// OR
Filter combined = or(
metadataKey("category").isEqualTo("tech"),
metadataKey("category").isEqualTo("science")
);
// Complex: (A OR B) AND C
Filter complex = and(
or(
metadataKey("category").isEqualTo("tech"),
metadataKey("category").isEqualTo("science")
),
metadataKey("year").isGreaterThan(2020)
);EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(5)
.build();EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(100)
.minScore(0.8) // Only high-quality
.build();Filter filter = metadataKey("category").isEqualTo("documentation");
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(10)
.filter(filter)
.build();Filter recent = metadataKey("year").isGreaterThanOrEqualTo(2023);
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(10)
.filter(recent)
.build();Filter dept = metadataKey("department").isEqualTo("engineering");
Filter status = metadataKey("status").isEqualTo("approved");
Filter confidence = metadataKey("confidence").isGreaterThan(0.9);
Filter combined = and(dept, status, confidence);
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(20)
.minScore(0.7)
.filter(combined)
.build();By default, embedding vectors are NOT returned in search results.
MilvusEmbeddingStore store = MilvusEmbeddingStore.builder()
.host("localhost")
.collectionName("my_collection")
.dimension(384)
.retrieveEmbeddingsOnSearch(true) // Enable
.build();
// Now match.embedding() is not null
for (EmbeddingMatch<TextSegment> match : results.matches()) {
Embedding embedding = match.embedding(); // Not null
System.out.println("Dimension: " + embedding.dimension());
}Performance Note: Enabling adds extra query overhead. Only enable if needed.
import io.milvus.common.clientenum.ConsistencyLevelEnum;
// For real-time requirements
MilvusEmbeddingStore store = MilvusEmbeddingStore.builder()
.consistencyLevel(ConsistencyLevelEnum.STRONG)
.build();Request only what you need:
// Good: Request exactly what you need
.maxResults(10)
// Bad: Request more then filter locally
.maxResults(1000) // Then use only 10// Efficient: Simple equality
Filter efficient = metadataKey("category").isEqualTo("tech");
// Less efficient: Complex nested
Filter complex = and(or(filter1, filter2), or(filter3, filter4));import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.store.embedding.EmbeddingMatch;
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
import dev.langchain4j.store.embedding.EmbeddingSearchResult;
import static dev.langchain4j.store.embedding.filter.MetadataFilterBuilder.metadataKey;
public void performSearch(String query) {
// Generate query embedding
Embedding queryEmbedding = model.embed(query).content();
// Build filter
Filter filter = metadataKey("category").isEqualTo("docs");
// Create request
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(10)
.minScore(0.75)
.filter(filter)
.build();
// Execute search
EmbeddingSearchResult<TextSegment> result = store.search(request);
// Process results
System.out.println("Found " + result.matches().size() + " matches");
for (EmbeddingMatch<TextSegment> match : result.matches()) {
System.out.println("Score: " + match.score());
System.out.println("ID: " + match.embeddingId());
TextSegment segment = match.embedded();
if (segment != null) {
System.out.println("Text: " + segment.text());
System.out.println("Metadata: " + segment.metadata());
}
System.out.println();
}
}match.embedded() is nullInstall with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-milvus