LangChain4j integration for Chroma embedding store enabling storage, retrieval, and similarity search of vector embeddings with metadata filtering support for both API V1 and V2.
Optimization strategies for ChromaEmbeddingStore operations.
// INEFFICIENT: Multiple single operations
for (Embedding embedding : embeddings) {
store.add(embedding); // N HTTP requests
}
// EFFICIENT: Single batch operation
List<String> ids = store.addAll(embeddings); // 1 HTTP requestPerformance Impact:
// Small batches (10-50 items) - Good for real-time
int batchSize = 50;
// Medium batches (100-500 items) - Balanced
int batchSize = 200;
// Large batches (500-1000 items) - Throughput focused
int batchSize = 500;Considerations:
public void indexLargeDataset(
List<String> documents,
int batchSize
) {
for (int i = 0; i < documents.size(); i += batchSize) {
int end = Math.min(i + batchSize, documents.size());
List<String> batch = documents.subList(i, end);
List<Embedding> embeddings = batch.stream()
.map(doc -> embeddingModel.embed(doc).content())
.collect(Collectors.toList());
List<TextSegment> segments = batch.stream()
.map(TextSegment::from)
.collect(Collectors.toList());
store.addAll(embeddings, segments);
}
}// GOOD: Create once, reuse many times
public class DocumentService {
private final ChromaEmbeddingStore store;
public DocumentService() {
this.store = ChromaEmbeddingStore.builder()
.baseUrl("http://localhost:8000")
.collectionName("documents")
.build();
}
public void addDocument(Embedding emb, TextSegment seg) {
store.add(emb, seg);
}
public EmbeddingSearchResult<TextSegment> search(Embedding query) {
return store.search(
EmbeddingSearchRequest.builder()
.queryEmbedding(query)
.maxResults(10)
.build()
);
}
}
// BAD: Create new store for each operation
public void addDocument(Embedding emb, TextSegment seg) {
ChromaEmbeddingStore store = ChromaEmbeddingStore.builder()
.baseUrl("http://localhost:8000")
.build();
store.add(emb, seg); // Creates new connection every time
}public class StorePool {
private final List<ChromaEmbeddingStore> stores;
private final AtomicInteger counter = new AtomicInteger(0);
public StorePool(int poolSize, String baseUrl, String collection) {
this.stores = IntStream.range(0, poolSize)
.mapToObj(i -> ChromaEmbeddingStore.builder()
.baseUrl(baseUrl)
.collectionName(collection)
.build())
.collect(Collectors.toList());
}
public ChromaEmbeddingStore getStore() {
int index = Math.abs(counter.getAndIncrement() % stores.size());
return stores.get(index);
}
}// Short timeout for quick operations
ChromaEmbeddingStore fastStore = ChromaEmbeddingStore.builder()
.baseUrl("http://localhost:8000")
.timeout(Duration.ofSeconds(5))
.build();
// Long timeout for large batches
ChromaEmbeddingStore batchStore = ChromaEmbeddingStore.builder()
.baseUrl("http://localhost:8000")
.timeout(Duration.ofSeconds(60))
.build();public class AdaptiveStore {
public String addWithAdaptiveTimeout(
Embedding embedding,
TextSegment segment
) {
int baseTimeout = 5;
int maxRetries = 3;
for (int i = 0; i < maxRetries; i++) {
try {
ChromaEmbeddingStore store = ChromaEmbeddingStore.builder()
.baseUrl("http://localhost:8000")
.timeout(Duration.ofSeconds(baseTimeout * (i + 1)))
.build();
return store.add(embedding, segment);
} catch (java.net.http.HttpTimeoutException e) {
if (i == maxRetries - 1) throw e;
System.out.println("Timeout, increasing to " +
(baseTimeout * (i + 2)) + "s");
}
}
throw new RuntimeException("Failed after retries");
}
}// Retrieve only what you need
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(5) // Small result set = faster
.build();
// Avoid retrieving too many results
EmbeddingSearchRequest slowRequest = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(1000) // Large result set = slower
.build();// Filter low-quality results early
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(10)
.minScore(0.7) // May reduce processing
.build();// More selective = faster
Filter selective = metadataKey("id").isEqualTo("exact-value");
// Less selective = slower
Filter broad = metadataKey("year").isGreaterThan(2000);// Small model - faster, less accurate
EmbeddingModel fast = new AllMiniLmL6V2EmbeddingModel();
// Large model - slower, more accurate
// Choose based on accuracy vs speed requirements// INEFFICIENT: Embed one at a time
List<Embedding> embeddings = new ArrayList<>();
for (String doc : documents) {
embeddings.add(embeddingModel.embed(doc).content());
}
// EFFICIENT: Batch embed if model supports it
// Some models support batch embedding for better performance
List<Embedding> embeddings = documents.stream()
.map(doc -> embeddingModel.embed(doc).content())
.collect(Collectors.toList());public class CachedEmbeddingService {
private final Map<String, Embedding> cache = new ConcurrentHashMap<>();
private final EmbeddingModel model;
public Embedding getEmbedding(String text) {
return cache.computeIfAbsent(
text,
t -> model.embed(t).content()
);
}
public void clearCache() {
cache.clear();
}
}public void indexDocumentsParallel(List<String> documents) {
int batchSize = 100;
IntStream.range(0, (documents.size() + batchSize - 1) / batchSize)
.parallel()
.forEach(batchIndex -> {
int start = batchIndex * batchSize;
int end = Math.min(start + batchSize, documents.size());
List<String> batch = documents.subList(start, end);
List<Embedding> embeddings = batch.stream()
.map(doc -> embeddingModel.embed(doc).content())
.collect(Collectors.toList());
List<TextSegment> segments = batch.stream()
.map(TextSegment::from)
.collect(Collectors.toList());
// Each thread uses same store (thread-safe for reads)
synchronized (store) {
store.addAll(embeddings, segments);
}
});
}Note: ChromaEmbeddingStore is not guaranteed thread-safe for writes. Synchronize as needed.
public List<EmbeddingSearchResult<TextSegment>> searchMultiple(
List<String> queries
) {
return queries.parallelStream()
.map(query -> {
Embedding emb = embeddingModel.embed(query).content();
return store.search(
EmbeddingSearchRequest.builder()
.queryEmbedding(emb)
.maxResults(5)
.build()
);
})
.collect(Collectors.toList());
}// For very large datasets, process in streams
public void indexLargeFile(Path filePath, int batchSize) throws IOException {
try (Stream<String> lines = Files.lines(filePath)) {
AtomicInteger counter = new AtomicInteger(0);
List<String> batch = new ArrayList<>();
lines.forEach(line -> {
batch.add(line);
if (batch.size() >= batchSize) {
processBatch(batch);
batch.clear();
}
});
// Process remaining
if (!batch.isEmpty()) {
processBatch(batch);
}
}
}
private void processBatch(List<String> batch) {
List<Embedding> embeddings = batch.stream()
.map(doc -> embeddingModel.embed(doc).content())
.collect(Collectors.toList());
List<TextSegment> segments = batch.stream()
.map(TextSegment::from)
.collect(Collectors.toList());
store.addAll(embeddings, segments);
}// Keep metadata lean
Metadata compact = new Metadata()
.put("id", id)
.put("category", category);
// Avoid storing large text in metadata
// Store in TextSegment.text() insteadpublic class TimedStore {
public String addWithTiming(Embedding emb, TextSegment seg) {
long start = System.nanoTime();
String id = store.add(emb, seg);
long duration = System.nanoTime() - start;
System.out.println("Add took: " + (duration / 1_000_000) + "ms");
return id;
}
public void batchAddWithTiming(
List<Embedding> embeddings,
List<TextSegment> segments
) {
long start = System.nanoTime();
List<String> ids = store.addAll(embeddings, segments);
long duration = System.nanoTime() - start;
long avgPerItem = duration / embeddings.size();
System.out.println("Batch add took: " + (duration / 1_000_000) + "ms");
System.out.println("Avg per item: " + (avgPerItem / 1_000_000) + "ms");
}
}public class PerformanceMetrics {
private final LongAdder totalOperations = new LongAdder();
private final LongAdder totalDuration = new LongAdder();
private long minDuration = Long.MAX_VALUE;
private long maxDuration = 0;
public void recordOperation(long durationNanos) {
totalOperations.increment();
totalDuration.add(durationNanos);
synchronized (this) {
minDuration = Math.min(minDuration, durationNanos);
maxDuration = Math.max(maxDuration, durationNanos);
}
}
public void printStats() {
long ops = totalOperations.sum();
long duration = totalDuration.sum();
long avg = ops > 0 ? duration / ops : 0;
System.out.println("Operations: " + ops);
System.out.println("Avg: " + (avg / 1_000_000) + "ms");
System.out.println("Min: " + (minDuration / 1_000_000) + "ms");
System.out.println("Max: " + (maxDuration / 1_000_000) + "ms");
}
}addAll() for multiple embeddingsmaxResults to what you needminScore to filter earlypublic class Benchmark {
public void benchmarkBatchSizes() {
int[] batchSizes = {10, 50, 100, 500, 1000};
List<String> documents = generateTestDocuments(10000);
for (int batchSize : batchSizes) {
long start = System.currentTimeMillis();
indexInBatches(documents, batchSize);
long duration = System.currentTimeMillis() - start;
System.out.println("Batch size " + batchSize +
": " + duration + "ms");
// Clear collection for next test
store.removeAll();
}
}
}Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-chroma@1.11.0