LangChain4j integration for Chroma embedding store enabling storage, retrieval, and similarity search of vector embeddings with metadata filtering support for both API V1 and V2.
Comprehensive guide to using metadata with ChromaEmbeddingStore.
Metadata allows attaching key-value pairs to embeddings for:
Metadata metadata = new Metadata()
.put("string_field", "value") // String
.put("int_field", 42) // Integer
.put("long_field", 123456789L) // Long
.put("float_field", 3.14f) // Float
.put("double_field", 3.14159) // Double
.put("uuid_field", UUID.randomUUID()); // UUID// NOT SUPPORTED - will cause errors
// metadata.put("bool_field", true); // Boolean - NOT supported
// metadata.put("array_field", array); // Arrays - NOT supported
// metadata.put("object_field", object); // Objects - NOT supported// Option 1: Use string
Metadata meta1 = new Metadata().put("active", "true");
Filter f1 = metadataKey("active").isEqualTo("true");
// Option 2: Use 0/1
Metadata meta2 = new Metadata().put("active", 1);
Filter f2 = metadataKey("active").isEqualTo(1);Metadata docMetadata = new Metadata()
.put("title", "Document Title")
.put("author", "John Doe")
.put("created_at", System.currentTimeMillis())
.put("modified_at", System.currentTimeMillis())
.put("version", 1)
.put("document_id", UUID.randomUUID());Metadata categoryMetadata = new Metadata()
.put("category", "technology")
.put("subcategory", "machine-learning")
.put("tags", "ai,ml,embeddings") // Comma-separated
.put("difficulty", "intermediate");Metadata sourceMetadata = new Metadata()
.put("source", "/docs/guide.pdf")
.put("page", 42)
.put("chapter", 3)
.put("url", "https://example.com/docs/guide.pdf");long now = System.currentTimeMillis();
long thirtyDaysAgo = now - (30L * 24 * 60 * 60 * 1000);
Metadata timeMetadata = new Metadata()
.put("indexed_at", now)
.put("published_at", thirtyDaysAgo)
.put("year", 2024)
.put("month", 3);Metadata statusMetadata = new Metadata()
.put("status", "published")
.put("visibility", "public")
.put("archived", 0) // Boolean as int
.put("priority", 5);Metadata qualityMetadata = new Metadata()
.put("rating", 4.5)
.put("views", 1250)
.put("likes", 87)
.put("confidence", 0.95);Metadata meta = new Metadata();Metadata meta = new Metadata()
.put("key1", "value1")
.put("key2", 42)
.put("key3", 3.14);Map<String, Object> map = new HashMap<>();
map.put("author", "John Doe");
map.put("year", 2024);
map.put("rating", 4.5);
Metadata meta = new Metadata(map);public class MetadataBuilder {
public static Metadata forDocument(
String author,
int year,
String category
) {
return new Metadata()
.put("author", author)
.put("year", year)
.put("category", category)
.put("indexed_at", System.currentTimeMillis());
}
}
// Usage
Metadata meta = MetadataBuilder.forDocument("John Doe", 2024, "tech");Metadata meta = segment.metadata();
String author = meta.getString("author");
Integer year = meta.getInteger("year");
Double rating = meta.getDouble("rating");
UUID id = meta.getUUID("document_id");
Long timestamp = meta.getLong("created_at");
Float score = meta.getFloat("score");// Check existence first
if (meta.containsKey("year")) {
int year = meta.getInteger("year");
}
// Or handle null
Integer year = meta.getInteger("year");
if (year != null) {
// Use year
} else {
// Use default
year = 2024;
}Map<String, Object> map = meta.toMap();
// Iterate all values
for (Map.Entry<String, Object> entry : map.entrySet()) {
System.out.println(entry.getKey() + ": " + entry.getValue());
}Filter authorFilter = metadataKey("author").isEqualTo("John Doe");
Filter categoryFilter = metadataKey("category").isEqualTo("tech");// Only work with numeric metadata
Filter yearFilter = metadataKey("year").isGreaterThanOrEqualTo(2020);
Filter ratingFilter = metadataKey("rating").isGreaterThan(4.0);
Filter priorityFilter = metadataKey("priority").isLessThanOrEqualTo(5);Filter categoriesFilter = metadataKey("category")
.isIn(Arrays.asList("tech", "science", "engineering"));
Filter excludeStatuses = metadataKey("status")
.isNotIn(Arrays.asList("draft", "archived"));See: Filtering Guide for complete filter documentation.
Consider these questions:
public class DocumentMetadataSchema {
public static Metadata create(
String documentId,
String source,
String category,
String author,
int year,
double rating
) {
return new Metadata()
// Identity
.put("document_id", UUID.fromString(documentId))
.put("source", source)
// Classification
.put("category", category)
.put("author", author)
// Temporal
.put("year", year)
.put("indexed_at", System.currentTimeMillis())
// Quality
.put("rating", rating)
// Status
.put("status", "published")
.put("version", 1);
}
}// GOOD: Consistent naming conventions
.put("created_at", timestamp)
.put("updated_at", timestamp)
.put("indexed_at", timestamp)
// BAD: Inconsistent naming
.put("createdAt", timestamp) // camelCase
.put("updated_time", timestamp) // Different suffix
.put("indexTimestamp", timestamp) // Different formatpublic interface StandardMetadataFields {
String ID = "id";
String SOURCE = "source";
String CATEGORY = "category";
String AUTHOR = "author";
String CREATED_AT = "created_at";
String UPDATED_AT = "updated_at";
String STATUS = "status";
String VERSION = "version";
}
// Usage
Metadata meta = new Metadata()
.put(StandardMetadataFields.ID, id)
.put(StandardMetadataFields.SOURCE, source)
.put(StandardMetadataFields.CATEGORY, category);// Use multiple fields for hierarchy
Metadata hierarchical = new Metadata()
.put("category_l1", "technology")
.put("category_l2", "artificial-intelligence")
.put("category_l3", "machine-learning");
// Filter at any level
Filter l1Filter = metadataKey("category_l1").isEqualTo("technology");
Filter l2Filter = metadataKey("category_l2").isEqualTo("artificial-intelligence");Metadata versionedMetadata = new Metadata()
.put("document_id", documentId)
.put("version", 2)
.put("created_at", originalTimestamp)
.put("updated_at", System.currentTimeMillis())
.put("changelog", "Fixed typos, updated examples");
// Query latest version
Filter latestVersion = metadataKey("document_id").isEqualTo(documentId);
// Then sort by version in application codeSince arrays aren't supported, use comma-separated strings:
// Store
Metadata meta = new Metadata()
.put("tags", "ai,ml,embeddings,vector-search");
// Parse when needed
String tags = meta.getString("tags");
List<String> tagList = Arrays.asList(tags.split(","));
// Filter (limited - can only match exact string)
Filter tagFilter = metadataKey("tags").isEqualTo("ai,ml,embeddings,vector-search");
// Better: Use separate fields for important multi-values
Metadata betterMeta = new Metadata()
.put("primary_tag", "ai")
.put("secondary_tag", "ml")
.put("tertiary_tag", "embeddings");public Metadata computeMetadata(String text, String source) {
Metadata meta = new Metadata()
.put("source", source)
.put("indexed_at", System.currentTimeMillis());
// Compute values
int wordCount = text.split("\\s+").length;
meta.put("word_count", wordCount);
// Categorize by length
String lengthCategory = wordCount < 100 ? "short" :
wordCount < 500 ? "medium" : "long";
meta.put("length_category", lengthCategory);
// Extract year from text if possible
Pattern yearPattern = Pattern.compile("\\b(19|20)\\d{2}\\b");
Matcher matcher = yearPattern.matcher(text);
if (matcher.find()) {
meta.put("extracted_year", Integer.parseInt(matcher.group()));
}
return meta;
}// GOOD: Compact metadata
Metadata compact = new Metadata()
.put("id", id)
.put("category", category)
.put("year", year);
// BAD: Storing large text in metadata
Metadata bloated = new Metadata()
.put("id", id)
.put("full_text", longDocument); // Store in TextSegment insteadChoose metadata fields that you'll filter on:
// Fields you'll filter on
Metadata filterable = new Metadata()
.put("category", "tech") // Will filter
.put("year", 2024) // Will filter
.put("status", "published"); // Will filter
// Metadata that doesn't need filtering can still be stored
filterable.put("notes", "Internal notes"); // Won't filter, just metadatapublic class MetadataValidator {
public void validate(Metadata meta) {
// Required fields
if (!meta.containsKey("category")) {
throw new IllegalArgumentException("category is required");
}
// Type validation
Integer year = meta.getInteger("year");
if (year != null && (year < 1900 || year > 2100)) {
throw new IllegalArgumentException("invalid year: " + year);
}
// Value validation
String status = meta.getString("status");
if (status != null &&
!Arrays.asList("draft", "published", "archived").contains(status)) {
throw new IllegalArgumentException("invalid status: " + status);
}
}
}See:
Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-chroma@1.11.0