LangChain4j integration for Chroma embedding store enabling storage, retrieval, and similarity search of vector embeddings with metadata filtering support for both API V1 and V2.
Removing embeddings from the Chroma vector store.
store.remove("embedding-id-to-remove");This is a convenience method from the EmbeddingStore interface (default implementation).
import java.util.Arrays;
List<String> idsToRemove = Arrays.asList("id1", "id2", "id3");
store.removeAll(idsToRemove);import java.util.Collection;
Collection<String> ids = getIdsToRemove();
store.removeAll(ids);import dev.langchain4j.store.embedding.filter.Filter;
import static dev.langchain4j.store.embedding.filter.MetadataFilterBuilder.*;
// Remove all archived documents
Filter filter = metadataKey("status").isEqualTo("archived");
store.removeAll(filter);// Remove old draft documents
Filter filter = metadataKey("status").isEqualTo("draft")
.and(metadataKey("year").isLessThan(2020));
store.removeAll(filter);// Remove documents with low ratings
Filter lowRating = metadataKey("rating").isLessThan(3.0);
store.removeAll(lowRating);
// Remove old entries by timestamp
long cutoffTime = System.currentTimeMillis() - (365L * 24 * 60 * 60 * 1000);
Filter oldEntries = metadataKey("timestamp").isLessThan(cutoffTime);
store.removeAll(oldEntries);// Remove documents with specific categories
Filter categories = metadataKey("category")
.isIn(Arrays.asList("spam", "outdated", "duplicate"));
store.removeAll(categories);// Remove: (status = "draft" OR status = "archived") AND year < 2021
Filter complex = metadataKey("status").isEqualTo("draft")
.or(metadataKey("status").isEqualTo("archived"))
.and(metadataKey("year").isLessThan(2021));
store.removeAll(complex);store.removeAll();Warning: This deletes and recreates the entire collection. All embeddings are permanently removed.
Implementation Note: Internally, this operation:
// Remove documents older than 1 year
long oneYearAgo = System.currentTimeMillis() - (365L * 24 * 60 * 60 * 1000);
Filter oldDocuments = metadataKey("created_at").isLessThan(oneYearAgo);
store.removeAll(oldDocuments);// Remove temporary or test data
Filter testData = metadataKey("environment").isEqualTo("test")
.or(metadataKey("temporary").isEqualTo(true));
store.removeAll(testData);// After finding duplicates via search
List<String> duplicateIds = findDuplicateEmbeddings();
store.removeAll(duplicateIds);// Remove specific IDs in batches to avoid large operations
List<String> allIdsToRemove = getLargeIdList();
int batchSize = 100;
for (int i = 0; i < allIdsToRemove.size(); i += batchSize) {
int end = Math.min(i + batchSize, allIdsToRemove.size());
List<String> batch = allIdsToRemove.subList(i, end);
store.removeAll(batch);
}// Update documents by removing old and adding new versions
List<String> outdatedIds = Arrays.asList("doc1", "doc2", "doc3");
store.removeAll(outdatedIds);
// Add updated versions
List<Embedding> updatedEmbeddings = generateNewEmbeddings();
List<TextSegment> updatedSegments = createNewSegments();
store.addAll(outdatedIds, updatedEmbeddings, updatedSegments);// Preview what will be removed before actually removing
Filter filter = metadataKey("status").isEqualTo("archived");
// Search to preview
EmbeddingSearchRequest previewRequest = EmbeddingSearchRequest.builder()
.queryEmbedding(anyEmbedding) // Use any embedding
.maxResults(1000)
.filter(filter)
.build();
EmbeddingSearchResult<TextSegment> preview = store.search(previewRequest);
System.out.println("Will remove " + preview.matches().size() + " documents");
// Confirm and remove
if (confirmDeletion()) {
store.removeAll(filter);
}// Remove all documents from a specific source
Filter sourceFilter = metadataKey("source").isEqualTo("deprecated-api");
store.removeAll(sourceFilter);// DANGEROUS - removes everything
// store.removeAll();
// SAFER - be explicit about what you're removing
if (shouldClearCollection()) {
System.out.println("WARNING: About to remove all embeddings");
if (confirmAction()) {
store.removeAll();
System.out.println("Collection cleared");
}
}// Before bulk removal, optionally backup
Filter toRemove = metadataKey("year").isLessThan(2020);
// Retrieve data first
EmbeddingSearchRequest backupRequest = EmbeddingSearchRequest.builder()
.queryEmbedding(anyEmbedding)
.maxResults(10000)
.filter(toRemove)
.build();
EmbeddingSearchResult<TextSegment> backup = store.search(backupRequest);
saveBackup(backup);
// Then remove
store.removeAll(toRemove);Filter filter = metadataKey("status").isEqualTo("draft");
// Test filter with search first to see what matches
EmbeddingSearchRequest testRequest = EmbeddingSearchRequest.builder()
.queryEmbedding(sampleEmbedding)
.maxResults(10)
.filter(filter)
.build();
EmbeddingSearchResult<TextSegment> testResult = store.search(testRequest);
if (testResult.matches().isEmpty()) {
System.out.println("No matches found - filter may be incorrect");
} else {
System.out.println("Filter matches " + testResult.matches().size() + " documents");
// Proceed with removal
store.removeAll(filter);
}try {
store.removeAll(idsToRemove);
} catch (IllegalArgumentException e) {
// Invalid IDs or filter
System.err.println("Invalid removal parameters: " + e.getMessage());
} catch (java.net.http.HttpTimeoutException e) {
// Operation timed out
System.err.println("Removal timed out: " + e.getMessage());
} catch (Exception e) {
// Network or Chroma errors
System.err.println("Removal failed: " + e.getMessage());
}// Increase timeout for large removal operations
ChromaEmbeddingStore store = ChromaEmbeddingStore.builder()
.baseUrl("http://localhost:8000")
.collectionName("large-collection")
.timeout(Duration.ofSeconds(60)) // Longer timeout
.build();
Filter largeFilter = metadataKey("category").isIn(manyCategories);
store.removeAll(largeFilter);Removing by filter is often more efficient than removing by IDs:
// LESS EFFICIENT: Remove many IDs individually
for (String id : manyIds) {
store.remove(id); // N operations
}
// MORE EFFICIENT: Remove many IDs in batch
store.removeAll(manyIds); // 1 operation
// MOST EFFICIENT: Remove by filter (if applicable)
Filter filter = metadataKey("batch_id").isEqualTo(batchId);
store.removeAll(filter); // 1 operation, server-side filteringFor very large collections, consider:
// Option 1: Remove in batches by filter
for (int year = 2000; year < 2020; year++) {
Filter yearFilter = metadataKey("year").isEqualTo(year);
store.removeAll(yearFilter);
System.out.println("Removed year: " + year);
}
// Option 2: Remove all and rebuild if removing most data
if (shouldRebuild()) {
store.removeAll(); // Clear everything
// Re-index only what you need
reindexCurrentData(store);
}Unlike add and search operations, remove operations:
Best Practice: Use search with the same filter first to preview what will be removed.
See: ChromaEmbeddingStore API for complete method signatures.
Related:
Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-chroma@1.11.0