LangChain4j integration for Chroma embedding store enabling storage, retrieval, and similarity search of vector embeddings with metadata filtering support for both API V1 and V2.
Guide for migrating from V1 to V2 API and upgrading between versions.
Chroma V2 API (0.7.0+) introduces hierarchical organization with tenants, databases, and collections.
Key Differences:
Before (V1):
ChromaEmbeddingStore store = ChromaEmbeddingStore.builder()
.baseUrl("http://localhost:8000")
.collectionName("my-collection")
.build();After (V2):
ChromaEmbeddingStore store = ChromaEmbeddingStore.builder()
.apiVersion(ChromaApiVersion.V2) // Add this
.baseUrl("http://localhost:8000")
.collectionName("my-collection")
// Defaults: tenantName="default", databaseName="default"
.build();ChromaEmbeddingStore store = ChromaEmbeddingStore.builder()
.apiVersion(ChromaApiVersion.V2)
.baseUrl("http://localhost:8000")
.tenantName("production") // New in V2
.databaseName("main") // New in V2
.collectionName("my-collection")
.timeout(Duration.ofSeconds(15))
.build();.apiVersion(ChromaApiVersion.V2) to buildertenantName and databaseNameV2 can use default tenant/database:
// V1 collection "my-collection"
// → V2 "default" tenant / "default" database / "my-collection"
ChromaEmbeddingStore storeV2 = ChromaEmbeddingStore.builder()
.apiVersion(ChromaApiVersion.V2)
.baseUrl("http://localhost:8000")
// Uses default tenant and database
.collectionName("my-collection") // Same collection name
.build();Note: Chroma handles this automatically. Your existing V1 collections are accessible in V2 under default tenant/database.
If you want to organize collections under custom tenant/database:
public class DataMigration {
public void migrateToCustomTenant() {
// Source (V1 or V2 default)
ChromaEmbeddingStore source = ChromaEmbeddingStore.builder()
.apiVersion(ChromaApiVersion.V1)
.baseUrl("http://localhost:8000")
.collectionName("my-collection")
.build();
// Destination (V2 custom)
ChromaEmbeddingStore dest = ChromaEmbeddingStore.builder()
.apiVersion(ChromaApiVersion.V2)
.baseUrl("http://localhost:8000")
.tenantName("production")
.databaseName("main")
.collectionName("my-collection")
.build();
// Retrieve all documents
Embedding anyEmbedding = Embedding.from(new float[]{1.0f, 0.0f, 0.0f});
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(anyEmbedding)
.maxResults(10000) // Adjust based on collection size
.build();
EmbeddingSearchResult<TextSegment> results = source.search(request);
// Copy to new tenant/database
List<Embedding> embeddings = new ArrayList<>();
List<TextSegment> segments = new ArrayList<>();
for (EmbeddingMatch<TextSegment> match : results.matches()) {
embeddings.add(match.embedding());
segments.add(match.embedded());
}
dest.addAll(embeddings, segments);
}
}Note: This approach works for small to medium collections. For large collections, use batch processing.
public class BatchMigration {
public void migrateLargeCollection(int batchSize) {
ChromaEmbeddingStore source = createSourceStore();
ChromaEmbeddingStore dest = createDestStore();
Embedding queryEmb = Embedding.from(new float[]{1.0f, 0.0f, 0.0f});
int offset = 0;
boolean hasMore = true;
while (hasMore) {
// Retrieve batch
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmb)
.maxResults(batchSize)
.build();
EmbeddingSearchResult<TextSegment> results = source.search(request);
if (results.matches().isEmpty()) {
hasMore = false;
break;
}
// Copy batch
List<Embedding> embeddings = results.matches().stream()
.map(EmbeddingMatch::embedding)
.collect(Collectors.toList());
List<TextSegment> segments = results.matches().stream()
.map(EmbeddingMatch::embedded)
.collect(Collectors.toList());
dest.addAll(embeddings, segments);
System.out.println("Migrated batch at offset " + offset);
offset += batchSize;
}
}
}Note: Chroma's search doesn't natively support offset/pagination. For very large collections, consider using metadata timestamps to track migration progress.
Maven:
<!-- From older version -->
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-chroma</artifactId>
<version>0.30.0</version>
</dependency>
<!-- To latest -->
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-chroma</artifactId>
<version>1.11.0</version>
</dependency>Gradle:
// From: implementation 'dev.langchain4j:langchain4j-chroma:0.30.0'
implementation 'dev.langchain4j:langchain4j-chroma:1.11.0'Migration:
// DEPRECATED
ChromaEmbeddingStore store = new ChromaEmbeddingStore(
"http://localhost:8000",
"my-collection",
Duration.ofSeconds(10),
true,
true
);
// RECOMMENDED
ChromaEmbeddingStore store = ChromaEmbeddingStore.builder()
.baseUrl("http://localhost:8000")
.collectionName("my-collection")
.timeout(Duration.ofSeconds(10))
.logRequests(true)
.logResponses(true)
.build();New features (backward compatible):
// Optional: Add listeners for observability
EmbeddingStore<TextSegment> observed = store
.addListener(new LoggingEmbeddingStoreListener());Backup data:
# Backup Chroma data directory
cp -r /path/to/chroma/data /path/to/backupUpdate Chroma:
# Docker
docker pull chromadb/chroma:latest
# Or update pip package
pip install --upgrade chromadbRestart Chroma:
docker run -p 8000:8000 chromadb/chroma:latestVerify migration:
// Test connection
ChromaEmbeddingStore store = ChromaEmbeddingStore.builder()
.baseUrl("http://localhost:8000")
.build();
Embedding test = Embedding.from(new float[]{1.0f, 0.0f, 0.0f});
String id = store.add(test);
store.remove(id);
System.out.println("Migration successful");If migrating to multi-tenant setup:
// Before: All collections in single namespace
// Collections: "customer1-docs", "customer2-docs", "customer3-docs"
// After: Organized by tenant
public class TenantMigration {
public void migrateToTenants() {
Map<String, String> collectionToTenant = Map.of(
"customer1-docs", "customer-1",
"customer2-docs", "customer-2",
"customer3-docs", "customer-3"
);
for (Map.Entry<String, String> entry : collectionToTenant.entrySet()) {
String oldCollection = entry.getKey();
String tenant = entry.getValue();
// Source
ChromaEmbeddingStore source = ChromaEmbeddingStore.builder()
.baseUrl("http://localhost:8000")
.collectionName(oldCollection)
.build();
// Destination with tenant
ChromaEmbeddingStore dest = ChromaEmbeddingStore.builder()
.apiVersion(ChromaApiVersion.V2)
.baseUrl("http://localhost:8000")
.tenantName(tenant)
.databaseName("default")
.collectionName("documents") // Unified name
.build();
migrateCollection(source, dest);
}
}
}public class VersionedStore {
private final String baseUrl;
private final String collectionName;
public ChromaEmbeddingStore createStore(boolean useV2) {
if (useV2) {
return ChromaEmbeddingStore.builder()
.apiVersion(ChromaApiVersion.V2)
.baseUrl(baseUrl)
.collectionName(collectionName)
.build();
} else {
return ChromaEmbeddingStore.builder()
.apiVersion(ChromaApiVersion.V1)
.baseUrl(baseUrl)
.collectionName(collectionName)
.build();
}
}
}public class FeatureFlaggedStore {
public ChromaEmbeddingStore createStore() {
boolean useV2 = System.getenv("USE_CHROMA_V2")
.map(Boolean::parseBoolean)
.orElse(false);
ChromaEmbeddingStore.Builder builder = ChromaEmbeddingStore.builder()
.baseUrl("http://localhost:8000")
.collectionName("my-collection");
if (useV2) {
builder.apiVersion(ChromaApiVersion.V2)
.tenantName("production")
.databaseName("main");
}
return builder.build();
}
}public class MigrationValidator {
public void validateMigration(
ChromaEmbeddingStore source,
ChromaEmbeddingStore dest
) {
// Sample queries
List<Embedding> testQueries = generateTestQueries();
for (Embedding query : testQueries) {
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(query)
.maxResults(10)
.build();
EmbeddingSearchResult<TextSegment> sourceResults =
source.search(request);
EmbeddingSearchResult<TextSegment> destResults =
dest.search(request);
// Compare results
if (sourceResults.matches().size() != destResults.matches().size()) {
System.err.println("Result count mismatch!");
}
// Compare content
for (int i = 0; i < sourceResults.matches().size(); i++) {
String sourceText = sourceResults.matches().get(i)
.embedded().text();
String destText = destResults.matches().get(i)
.embedded().text();
if (!sourceText.equals(destText)) {
System.err.println("Content mismatch at index " + i);
}
}
}
System.out.println("Validation complete");
}
}public class DualWriteStore {
private final ChromaEmbeddingStore v1Store;
private final ChromaEmbeddingStore v2Store;
public String add(Embedding embedding, TextSegment segment) {
// Write to both
String id1 = v1Store.add(embedding, segment);
String id2 = v2Store.add(embedding, segment);
// Return V2 ID (primary)
return id2;
}
public EmbeddingSearchResult<TextSegment> search(
EmbeddingSearchRequest request
) {
// Read from V2 (primary)
return v2Store.search(request);
}
}public class GradualCutover {
private double v2TrafficPercentage = 0.0;
public EmbeddingSearchResult<TextSegment> search(
EmbeddingSearchRequest request
) {
if (Math.random() < v2TrafficPercentage) {
return v2Store.search(request);
} else {
return v1Store.search(request);
}
}
public void increaseV2Traffic(double percentage) {
this.v2TrafficPercentage = Math.min(1.0, percentage);
}
}// Ensure correct tenant/database
ChromaEmbeddingStore store = ChromaEmbeddingStore.builder()
.apiVersion(ChromaApiVersion.V2)
.baseUrl("http://localhost:8000")
.tenantName("default") // Check this matches
.databaseName("default") // Check this matches
.collectionName("my-collection")
.build();Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-chroma@1.11.0