LangChain4j integration for Chroma embedding store enabling storage, retrieval, and similarity search of vector embeddings with metadata filtering support for both API V1 and V2.
—
Guide for migrating from V1 to V2 API and upgrading between versions.
Chroma V2 API (0.7.0+) introduces hierarchical organization with tenants, databases, and collections.
Key Differences:
Before (V1):
ChromaEmbeddingStore store = ChromaEmbeddingStore.builder()
.baseUrl("http://localhost:8000")
.collectionName("my-collection")
.build();After (V2):
ChromaEmbeddingStore store = ChromaEmbeddingStore.builder()
.apiVersion(ChromaApiVersion.V2) // Add this
.baseUrl("http://localhost:8000")
.collectionName("my-collection")
// Defaults: tenantName="default", databaseName="default"
.build();ChromaEmbeddingStore store = ChromaEmbeddingStore.builder()
.apiVersion(ChromaApiVersion.V2)
.baseUrl("http://localhost:8000")
.tenantName("production") // New in V2
.databaseName("main") // New in V2
.collectionName("my-collection")
.timeout(Duration.ofSeconds(15))
.build();.apiVersion(ChromaApiVersion.V2) to buildertenantName and databaseNameV2 can use default tenant/database:
// V1 collection "my-collection"
// → V2 "default" tenant / "default" database / "my-collection"
ChromaEmbeddingStore storeV2 = ChromaEmbeddingStore.builder()
.apiVersion(ChromaApiVersion.V2)
.baseUrl("http://localhost:8000")
// Uses default tenant and database
.collectionName("my-collection") // Same collection name
.build();Note: Chroma handles this automatically. Your existing V1 collections are accessible in V2 under default tenant/database.
If you want to organize collections under custom tenant/database:
public class DataMigration {
public void migrateToCustomTenant() {
// Source (V1 or V2 default)
ChromaEmbeddingStore source = ChromaEmbeddingStore.builder()
.apiVersion(ChromaApiVersion.V1)
.baseUrl("http://localhost:8000")
.collectionName("my-collection")
.build();
// Destination (V2 custom)
ChromaEmbeddingStore dest = ChromaEmbeddingStore.builder()
.apiVersion(ChromaApiVersion.V2)
.baseUrl("http://localhost:8000")
.tenantName("production")
.databaseName("main")
.collectionName("my-collection")
.build();
// Retrieve all documents
Embedding anyEmbedding = Embedding.from(new float[]{1.0f, 0.0f, 0.0f});
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(anyEmbedding)
.maxResults(10000) // Adjust based on collection size
.build();
EmbeddingSearchResult<TextSegment> results = source.search(request);
// Copy to new tenant/database
List<Embedding> embeddings = new ArrayList<>();
List<TextSegment> segments = new ArrayList<>();
for (EmbeddingMatch<TextSegment> match : results.matches()) {
embeddings.add(match.embedding());
segments.add(match.embedded());
}
dest.addAll(embeddings, segments);
}
}Note: This approach works for small to medium collections. For large collections, use batch processing.
public class BatchMigration {
public void migrateLargeCollection(int batchSize) {
ChromaEmbeddingStore source = createSourceStore();
ChromaEmbeddingStore dest = createDestStore();
Embedding queryEmb = Embedding.from(new float[]{1.0f, 0.0f, 0.0f});
int offset = 0;
boolean hasMore = true;
while (hasMore) {
// Retrieve batch
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmb)
.maxResults(batchSize)
.build();
EmbeddingSearchResult<TextSegment> results = source.search(request);
if (results.matches().isEmpty()) {
hasMore = false;
break;
}
// Copy batch
List<Embedding> embeddings = results.matches().stream()
.map(EmbeddingMatch::embedding)
.collect(Collectors.toList());
List<TextSegment> segments = results.matches().stream()
.map(EmbeddingMatch::embedded)
.collect(Collectors.toList());
dest.addAll(embeddings, segments);
System.out.println("Migrated batch at offset " + offset);
offset += batchSize;
}
}
}Note: Chroma's search doesn't natively support offset/pagination. For very large collections, consider using metadata timestamps to track migration progress.
Maven:
<!-- From older version -->
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-chroma</artifactId>
<version>0.30.0</version>
</dependency>
<!-- To latest -->
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-chroma</artifactId>
<version>1.11.0</version>
</dependency>Gradle:
// From: implementation 'dev.langchain4j:langchain4j-chroma:0.30.0'
implementation 'dev.langchain4j:langchain4j-chroma:1.11.0'Migration:
// DEPRECATED
ChromaEmbeddingStore store = new ChromaEmbeddingStore(
"http://localhost:8000",
"my-collection",
Duration.ofSeconds(10),
true,
true
);
// RECOMMENDED
ChromaEmbeddingStore store = ChromaEmbeddingStore.builder()
.baseUrl("http://localhost:8000")
.collectionName("my-collection")
.timeout(Duration.ofSeconds(10))
.logRequests(true)
.logResponses(true)
.build();New features (backward compatible):
// Optional: Add listeners for observability
EmbeddingStore<TextSegment> observed = store
.addListener(new LoggingEmbeddingStoreListener());Backup data:
# Backup Chroma data directory
cp -r /path/to/chroma/data /path/to/backupUpdate Chroma:
# Docker
docker pull chromadb/chroma:latest
# Or update pip package
pip install --upgrade chromadbRestart Chroma:
docker run -p 8000:8000 chromadb/chroma:latestVerify migration:
// Test connection
ChromaEmbeddingStore store = ChromaEmbeddingStore.builder()
.baseUrl("http://localhost:8000")
.build();
Embedding test = Embedding.from(new float[]{1.0f, 0.0f, 0.0f});
String id = store.add(test);
store.remove(id);
System.out.println("Migration successful");If migrating to multi-tenant setup:
// Before: All collections in single namespace
// Collections: "customer1-docs", "customer2-docs", "customer3-docs"
// After: Organized by tenant
public class TenantMigration {
public void migrateToTenants() {
Map<String, String> collectionToTenant = Map.of(
"customer1-docs", "customer-1",
"customer2-docs", "customer-2",
"customer3-docs", "customer-3"
);
for (Map.Entry<String, String> entry : collectionToTenant.entrySet()) {
String oldCollection = entry.getKey();
String tenant = entry.getValue();
// Source
ChromaEmbeddingStore source = ChromaEmbeddingStore.builder()
.baseUrl("http://localhost:8000")
.collectionName(oldCollection)
.build();
// Destination with tenant
ChromaEmbeddingStore dest = ChromaEmbeddingStore.builder()
.apiVersion(ChromaApiVersion.V2)
.baseUrl("http://localhost:8000")
.tenantName(tenant)
.databaseName("default")
.collectionName("documents") // Unified name
.build();
migrateCollection(source, dest);
}
}
}public class VersionedStore {
private final String baseUrl;
private final String collectionName;
public ChromaEmbeddingStore createStore(boolean useV2) {
if (useV2) {
return ChromaEmbeddingStore.builder()
.apiVersion(ChromaApiVersion.V2)
.baseUrl(baseUrl)
.collectionName(collectionName)
.build();
} else {
return ChromaEmbeddingStore.builder()
.apiVersion(ChromaApiVersion.V1)
.baseUrl(baseUrl)
.collectionName(collectionName)
.build();
}
}
}public class FeatureFlaggedStore {
public ChromaEmbeddingStore createStore() {
boolean useV2 = System.getenv("USE_CHROMA_V2")
.map(Boolean::parseBoolean)
.orElse(false);
ChromaEmbeddingStore.Builder builder = ChromaEmbeddingStore.builder()
.baseUrl("http://localhost:8000")
.collectionName("my-collection");
if (useV2) {
builder.apiVersion(ChromaApiVersion.V2)
.tenantName("production")
.databaseName("main");
}
return builder.build();
}
}public class MigrationValidator {
public void validateMigration(
ChromaEmbeddingStore source,
ChromaEmbeddingStore dest
) {
// Sample queries
List<Embedding> testQueries = generateTestQueries();
for (Embedding query : testQueries) {
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(query)
.maxResults(10)
.build();
EmbeddingSearchResult<TextSegment> sourceResults =
source.search(request);
EmbeddingSearchResult<TextSegment> destResults =
dest.search(request);
// Compare results
if (sourceResults.matches().size() != destResults.matches().size()) {
System.err.println("Result count mismatch!");
}
// Compare content
for (int i = 0; i < sourceResults.matches().size(); i++) {
String sourceText = sourceResults.matches().get(i)
.embedded().text();
String destText = destResults.matches().get(i)
.embedded().text();
if (!sourceText.equals(destText)) {
System.err.println("Content mismatch at index " + i);
}
}
}
System.out.println("Validation complete");
}
}public class DualWriteStore {
private final ChromaEmbeddingStore v1Store;
private final ChromaEmbeddingStore v2Store;
public String add(Embedding embedding, TextSegment segment) {
// Write to both
String id1 = v1Store.add(embedding, segment);
String id2 = v2Store.add(embedding, segment);
// Return V2 ID (primary)
return id2;
}
public EmbeddingSearchResult<TextSegment> search(
EmbeddingSearchRequest request
) {
// Read from V2 (primary)
return v2Store.search(request);
}
}public class GradualCutover {
private double v2TrafficPercentage = 0.0;
public EmbeddingSearchResult<TextSegment> search(
EmbeddingSearchRequest request
) {
if (Math.random() < v2TrafficPercentage) {
return v2Store.search(request);
} else {
return v1Store.search(request);
}
}
public void increaseV2Traffic(double percentage) {
this.v2TrafficPercentage = Math.min(1.0, percentage);
}
}// Ensure correct tenant/database
ChromaEmbeddingStore store = ChromaEmbeddingStore.builder()
.apiVersion(ChromaApiVersion.V2)
.baseUrl("http://localhost:8000")
.tenantName("default") // Check this matches
.databaseName("default") // Check this matches
.collectionName("my-collection")
.build();Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-chroma