LangChain4j integration for Chroma embedding store enabling storage, retrieval, and similarity search of vector embeddings with metadata filtering support for both API V1 and V2.
—
Removing embeddings from the Chroma vector store.
store.remove("embedding-id-to-remove");This is a convenience method from the EmbeddingStore interface (default implementation).
import java.util.Arrays;
List<String> idsToRemove = Arrays.asList("id1", "id2", "id3");
store.removeAll(idsToRemove);import java.util.Collection;
Collection<String> ids = getIdsToRemove();
store.removeAll(ids);import dev.langchain4j.store.embedding.filter.Filter;
import static dev.langchain4j.store.embedding.filter.MetadataFilterBuilder.*;
// Remove all archived documents
Filter filter = metadataKey("status").isEqualTo("archived");
store.removeAll(filter);// Remove old draft documents
Filter filter = metadataKey("status").isEqualTo("draft")
.and(metadataKey("year").isLessThan(2020));
store.removeAll(filter);// Remove documents with low ratings
Filter lowRating = metadataKey("rating").isLessThan(3.0);
store.removeAll(lowRating);
// Remove old entries by timestamp
long cutoffTime = System.currentTimeMillis() - (365L * 24 * 60 * 60 * 1000);
Filter oldEntries = metadataKey("timestamp").isLessThan(cutoffTime);
store.removeAll(oldEntries);// Remove documents with specific categories
Filter categories = metadataKey("category")
.isIn(Arrays.asList("spam", "outdated", "duplicate"));
store.removeAll(categories);// Remove: (status = "draft" OR status = "archived") AND year < 2021
Filter complex = metadataKey("status").isEqualTo("draft")
.or(metadataKey("status").isEqualTo("archived"))
.and(metadataKey("year").isLessThan(2021));
store.removeAll(complex);store.removeAll();Warning: This deletes and recreates the entire collection. All embeddings are permanently removed.
Implementation Note: Internally, this operation:
// Remove documents older than 1 year
long oneYearAgo = System.currentTimeMillis() - (365L * 24 * 60 * 60 * 1000);
Filter oldDocuments = metadataKey("created_at").isLessThan(oneYearAgo);
store.removeAll(oldDocuments);// Remove temporary or test data
Filter testData = metadataKey("environment").isEqualTo("test")
.or(metadataKey("temporary").isEqualTo(true));
store.removeAll(testData);// After finding duplicates via search
List<String> duplicateIds = findDuplicateEmbeddings();
store.removeAll(duplicateIds);// Remove specific IDs in batches to avoid large operations
List<String> allIdsToRemove = getLargeIdList();
int batchSize = 100;
for (int i = 0; i < allIdsToRemove.size(); i += batchSize) {
int end = Math.min(i + batchSize, allIdsToRemove.size());
List<String> batch = allIdsToRemove.subList(i, end);
store.removeAll(batch);
}// Update documents by removing old and adding new versions
List<String> outdatedIds = Arrays.asList("doc1", "doc2", "doc3");
store.removeAll(outdatedIds);
// Add updated versions
List<Embedding> updatedEmbeddings = generateNewEmbeddings();
List<TextSegment> updatedSegments = createNewSegments();
store.addAll(outdatedIds, updatedEmbeddings, updatedSegments);// Preview what will be removed before actually removing
Filter filter = metadataKey("status").isEqualTo("archived");
// Search to preview
EmbeddingSearchRequest previewRequest = EmbeddingSearchRequest.builder()
.queryEmbedding(anyEmbedding) // Use any embedding
.maxResults(1000)
.filter(filter)
.build();
EmbeddingSearchResult<TextSegment> preview = store.search(previewRequest);
System.out.println("Will remove " + preview.matches().size() + " documents");
// Confirm and remove
if (confirmDeletion()) {
store.removeAll(filter);
}// Remove all documents from a specific source
Filter sourceFilter = metadataKey("source").isEqualTo("deprecated-api");
store.removeAll(sourceFilter);// DANGEROUS - removes everything
// store.removeAll();
// SAFER - be explicit about what you're removing
if (shouldClearCollection()) {
System.out.println("WARNING: About to remove all embeddings");
if (confirmAction()) {
store.removeAll();
System.out.println("Collection cleared");
}
}// Before bulk removal, optionally backup
Filter toRemove = metadataKey("year").isLessThan(2020);
// Retrieve data first
EmbeddingSearchRequest backupRequest = EmbeddingSearchRequest.builder()
.queryEmbedding(anyEmbedding)
.maxResults(10000)
.filter(toRemove)
.build();
EmbeddingSearchResult<TextSegment> backup = store.search(backupRequest);
saveBackup(backup);
// Then remove
store.removeAll(toRemove);Filter filter = metadataKey("status").isEqualTo("draft");
// Test filter with search first to see what matches
EmbeddingSearchRequest testRequest = EmbeddingSearchRequest.builder()
.queryEmbedding(sampleEmbedding)
.maxResults(10)
.filter(filter)
.build();
EmbeddingSearchResult<TextSegment> testResult = store.search(testRequest);
if (testResult.matches().isEmpty()) {
System.out.println("No matches found - filter may be incorrect");
} else {
System.out.println("Filter matches " + testResult.matches().size() + " documents");
// Proceed with removal
store.removeAll(filter);
}try {
store.removeAll(idsToRemove);
} catch (IllegalArgumentException e) {
// Invalid IDs or filter
System.err.println("Invalid removal parameters: " + e.getMessage());
} catch (java.net.http.HttpTimeoutException e) {
// Operation timed out
System.err.println("Removal timed out: " + e.getMessage());
} catch (Exception e) {
// Network or Chroma errors
System.err.println("Removal failed: " + e.getMessage());
}// Increase timeout for large removal operations
ChromaEmbeddingStore store = ChromaEmbeddingStore.builder()
.baseUrl("http://localhost:8000")
.collectionName("large-collection")
.timeout(Duration.ofSeconds(60)) // Longer timeout
.build();
Filter largeFilter = metadataKey("category").isIn(manyCategories);
store.removeAll(largeFilter);Removing by filter is often more efficient than removing by IDs:
// LESS EFFICIENT: Remove many IDs individually
for (String id : manyIds) {
store.remove(id); // N operations
}
// MORE EFFICIENT: Remove many IDs in batch
store.removeAll(manyIds); // 1 operation
// MOST EFFICIENT: Remove by filter (if applicable)
Filter filter = metadataKey("batch_id").isEqualTo(batchId);
store.removeAll(filter); // 1 operation, server-side filteringFor very large collections, consider:
// Option 1: Remove in batches by filter
for (int year = 2000; year < 2020; year++) {
Filter yearFilter = metadataKey("year").isEqualTo(year);
store.removeAll(yearFilter);
System.out.println("Removed year: " + year);
}
// Option 2: Remove all and rebuild if removing most data
if (shouldRebuild()) {
store.removeAll(); // Clear everything
// Re-index only what you need
reindexCurrentData(store);
}Unlike add and search operations, remove operations:
Best Practice: Use search with the same filter first to preview what will be removed.
See: ChromaEmbeddingStore API for complete method signatures.
Related:
Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-chroma