Quarkus extension for integrating LangChain4j with PostgreSQL pgvector for embedding storage
Quarkus extension for integrating LangChain4j's pgvector embedding store with Quarkus applications. Provides a managed EmbeddingStore implementation backed by PostgreSQL with the pgvector extension for efficient vector similarity search in Retrieval-Augmented Generation (RAG) applications.
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-pgvector</artifactId>
<version>1.7.4</version>
</dependency>For proper dependency management, add the Quarkus platform BOM to your dependencyManagement section:
<dependencyManagement>
<dependencies>
<dependency>
<groupId>io.quarkus.platform</groupId>
<artifactId>quarkus-bom</artifactId>
<version>3.17.4</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>Then add the required dependencies:
<dependencies>
<!-- PGVector extension (Quarkiverse extension - requires explicit version) -->
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-pgvector</artifactId>
<version>1.7.4</version>
</dependency>
<!-- PostgreSQL JDBC driver (version managed by Quarkus BOM) -->
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-jdbc-postgresql</artifactId>
</dependency>
<!-- Example embedding model (optional) -->
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-embeddings-all-minilm-l6-v2-q</artifactId>
<version>1.0.0-beta5</version>
</dependency>
</dependencies>Note: The langchain4j-embeddings-all-minilm-l6-v2-q model produces 384-dimension vectors, which matches the examples in this documentation.
quarkus-jdbc-postgresql extension)import io.quarkiverse.langchain4j.pgvector.PgVectorEmbeddingStore;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.data.document.Metadata;
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
import dev.langchain4j.store.embedding.EmbeddingSearchResult;
import dev.langchain4j.store.embedding.filter.Filter;
import dev.langchain4j.store.embedding.filter.MetadataFilterBuilder;
import jakarta.inject.Inject;import io.quarkiverse.langchain4j.pgvector.PgVectorEmbeddingStore;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.data.document.Metadata;
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
import dev.langchain4j.store.embedding.EmbeddingSearchResult;
import dev.langchain4j.model.embedding.EmbeddingModel;
import jakarta.inject.Inject;
public class MyRAGService {
@Inject
PgVectorEmbeddingStore embeddingStore;
@Inject
EmbeddingModel embeddingModel;
public void storeDocument(String text) {
// Create text segment with metadata
Metadata metadata = Metadata.from("source", "documentation")
.put("category", "api");
TextSegment segment = TextSegment.from(text, metadata);
// Generate embedding
Embedding embedding = embeddingModel.embed(segment).content();
// Store in pgvector
String id = embeddingStore.add(embedding, segment);
}
public List<TextSegment> searchSimilar(String query, int maxResults) {
// Generate query embedding
Embedding queryEmbedding = embeddingModel.embed(query).content();
// Search for similar embeddings
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(maxResults)
.minScore(0.7)
.build();
EmbeddingSearchResult<TextSegment> result = embeddingStore.search(request);
return result.matches().stream()
.map(match -> match.embedded())
.collect(Collectors.toList());
}
}Minimal Configuration (application.properties):
# Required: Set vector dimension to match your embedding model
quarkus.langchain4j.pgvector.dimension=384
# Configure PostgreSQL datasource
quarkus.datasource.db-kind=postgresql
quarkus.datasource.jdbc.url=jdbc:postgresql://localhost:5432/mydb
quarkus.datasource.username=user
quarkus.datasource.password=passwordThe main embedding store implementation that integrates Quarkus datasources with LangChain4j's pgvector functionality.
/**
* Quarkus PGVector EmbeddingStore Implementation
*
* Extends the LangChain4j PgVectorEmbeddingStore with Quarkus-specific
* datasource integration. Automatically configures pgvector extension
* on database connections.
*/
public class PgVectorEmbeddingStore
extends dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore
implements dev.langchain4j.store.embedding.EmbeddingStore<TextSegment>CDI Injection:
@Inject
PgVectorEmbeddingStore embeddingStore;The PgVectorEmbeddingStore is automatically registered as a CDI bean and can be injected into any Quarkus component.
Constructor:
/**
* Constructor for programmatic instantiation with explicit configuration
* Typically not needed as the store is automatically configured via CDI
*
* @param datasource The Quarkus datasource
* @param table The database table name for storing embeddings
* @param dimension The vector dimension (must match your embedding model)
* @param useIndex Whether to use IVFFlat index for vector search
* @param indexListSize The IVFFlat number of lists for index tuning
* @param createTable Whether to create the table if it doesn't exist
* @param dropTableFirst Whether to drop the table before creating (for testing)
* @param metadataConfig The metadata storage configuration
*/
public PgVectorEmbeddingStore(
DataSource datasource,
String table,
Integer dimension,
Boolean useIndex,
Integer indexListSize,
Boolean createTable,
Boolean dropTableFirst,
MetadataConfig metadataConfig
);Note: In most cases, you should use CDI injection rather than the constructor directly. The constructor is provided for advanced use cases where programmatic configuration is required.
Advanced Method:
/**
* Get a database connection with pgvector settings already configured
* Useful for advanced use cases requiring direct database access
* The connection is obtained from the configured datasource and has
* pgvector type registration already applied
*
* @return Connection with pgvector configured
* @throws SQLException if connection cannot be obtained
*/
protected Connection getConnection() throws SQLException;This method is inherited from the parent LangChain4j class but is available for advanced users who need direct database access with pgvector already configured on the connection.
Store embeddings with or without associated text segments and metadata.
/**
* Add a single embedding to the store
* @param embedding The vector embedding to store
* @return Unique identifier for the stored embedding
*/
String add(Embedding embedding);
/**
* Add an embedding with associated text segment
* @param embedding The vector embedding to store
* @param textSegment The text segment with optional metadata
* @return Unique identifier for the stored embedding
*/
String add(Embedding embedding, TextSegment textSegment);
/**
* Add multiple embeddings to the store
* @param embeddings List of embeddings to store
* @return List of unique identifiers for the stored embeddings
*/
List<String> addAll(List<Embedding> embeddings);
/**
* Add multiple embeddings with associated text segments
* @param embeddings List of embeddings to store
* @param textSegments List of text segments with optional metadata
* @return List of unique identifiers for the stored embeddings
*/
List<String> addAll(List<Embedding> embeddings, List<TextSegment> textSegments);Examples:
// Add embedding only
Embedding embedding = embeddingModel.embed("Hello world").content();
String id = embeddingStore.add(embedding);
// Add embedding with text and metadata
Metadata metadata = Metadata.from("author", "John Doe");
TextSegment segment = TextSegment.from("Important document content", metadata);
Embedding embedding = embeddingModel.embed(segment).content();
String id = embeddingStore.add(embedding, segment);
// Add multiple embeddings
List<String> texts = List.of("Text 1", "Text 2", "Text 3");
List<Embedding> embeddings = embeddingModel.embedAll(texts).content();
List<TextSegment> segments = texts.stream()
.map(TextSegment::from)
.collect(Collectors.toList());
List<String> ids = embeddingStore.addAll(embeddings, segments);Search for similar embeddings using vector similarity with optional metadata filtering.
/**
* Search for similar embeddings based on query embedding
* @param request Search request with query embedding, filters, and constraints
* @return Search results containing matches with similarity scores
*/
EmbeddingSearchResult<TextSegment> search(EmbeddingSearchRequest request);EmbeddingSearchRequest Builder:
EmbeddingSearchRequest.builder()
.queryEmbedding(Embedding queryEmbedding) // Required: query vector
.maxResults(int maxResults) // Optional: max number of results (default: 5)
.minScore(double minScore) // Optional: minimum similarity score (0.0-1.0)
.filter(Filter filter) // Optional: metadata filter
.build()Examples:
import dev.langchain4j.store.embedding.filter.MetadataFilterBuilder;
// Basic similarity search
Embedding queryEmbedding = embeddingModel.embed("search query").content();
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(10)
.build();
EmbeddingSearchResult<TextSegment> result = embeddingStore.search(request);
// Get results with scores
result.matches().forEach(match -> {
TextSegment segment = match.embedded();
double score = match.score(); // Similarity score (0.0-1.0)
String text = segment.text();
System.out.println("Score: " + score + ", Text: " + text);
});
// Search with minimum score threshold
EmbeddingSearchRequest filteredRequest = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(5)
.minScore(0.8) // Only return results with score >= 0.8
.build();
// Search with metadata filtering
EmbeddingSearchRequest metadataRequest = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.filter(MetadataFilterBuilder.metadataKey("category").isEqualTo("api"))
.maxResults(10)
.build();
// Complex metadata filter (using instance methods on Filter)
Filter categoryFilter = MetadataFilterBuilder.metadataKey("category").isEqualTo("api");
Filter versionFilter = MetadataFilterBuilder.metadataKey("version").isGreaterThan("2.0");
Filter complexFilter = categoryFilter.and(versionFilter);
EmbeddingSearchRequest complexRequest = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.filter(complexFilter)
.maxResults(10)
.build();Remove embeddings from the store by ID or remove all embeddings.
/**
* Remove a single embedding by ID
* @param id The unique identifier of the embedding to remove
*/
void remove(String id);
/**
* Remove multiple embeddings by IDs
* @param ids Collection of unique identifiers of embeddings to remove
*/
void removeAll(Collection<String> ids);
/**
* Remove all embeddings from the store
*/
void removeAll();Examples:
// Remove single embedding
String id = embeddingStore.add(embedding, segment);
embeddingStore.remove(id);
// Remove multiple embeddings
List<String> ids = embeddingStore.addAll(embeddings, segments);
embeddingStore.removeAll(ids);
// Clear the entire store (use with caution)
embeddingStore.removeAll();All configuration uses the quarkus.langchain4j.pgvector prefix.
/**
* Table name for storing embeddings
* Property: quarkus.langchain4j.pgvector.table
* Default: "embeddings"
*/
String table();
/**
* Dimension of the embedding vectors
* Must match the dimension of vectors produced by your embedding model
* Property: quarkus.langchain4j.pgvector.dimension
* Required: Yes
* Examples:
* - 384 for AllMiniLmL6V2QuantizedEmbeddingModel
* - 1536 for OpenAI's text-embedding-ada-002
*/
Integer dimension();
/**
* Enable IVFFlat index for faster similarity search
* Property: quarkus.langchain4j.pgvector.use-index
* Default: false
*/
Boolean useIndex();
/**
* IVFFlat index list size (number of clusters)
* Property: quarkus.langchain4j.pgvector.index-list-size
* Default: 0
*/
Integer indexListSize();
/**
* Whether to create the table if it doesn't exist
* Property: quarkus.langchain4j.pgvector.create-table
* Default: true
*/
Boolean createTable();
/**
* Whether to drop the table before creating it (useful for testing)
* Property: quarkus.langchain4j.pgvector.drop-table-first
* Default: false
*/
Boolean dropTableFirst();
/**
* Whether to create the pgvector extension on startup
* Property: quarkus.langchain4j.pgvector.register-vector-pg-extension
* Default: false (automatically set to true in dev/test mode)
*/
Boolean registerVectorPGExtension();Example Configuration:
# Required
quarkus.langchain4j.pgvector.dimension=384
# Optional - Table configuration
quarkus.langchain4j.pgvector.table=my_embeddings
quarkus.langchain4j.pgvector.create-table=true
quarkus.langchain4j.pgvector.drop-table-first=false
# Optional - Performance optimization
quarkus.langchain4j.pgvector.use-index=true
quarkus.langchain4j.pgvector.index-list-size=100
# Optional - pgvector extension (auto-enabled in dev/test)
quarkus.langchain4j.pgvector.register-vector-pg-extension=false/**
* Name of the configured Postgres datasource to use
* If not set, uses the default datasource from the Agroal extension
* Property: quarkus.langchain4j.pgvector.datasource
* Build-time property (cannot be changed at runtime)
*/
Optional<String> datasource();Example with Named Datasource:
# Configure a named datasource
quarkus.datasource.pgvector-db.db-kind=postgresql
quarkus.datasource.pgvector-db.jdbc.url=jdbc:postgresql://localhost:5432/vectordb
quarkus.datasource.pgvector-db.username=user
quarkus.datasource.pgvector-db.password=password
# Use the named datasource
quarkus.langchain4j.pgvector.datasource=pgvector-db
quarkus.langchain4j.pgvector.dimension=384Configure how metadata is stored in the database. Three storage modes are supported.
/**
* Metadata storage mode
* Property: quarkus.langchain4j.pgvector.metadata.storage-mode
* Values:
* - COLUMN_PER_KEY: Static metadata with predefined columns
* - COMBINED_JSON: Dynamic metadata stored as JSON
* - COMBINED_JSONB: Dynamic metadata stored as binary JSON (optimized for queries)
* Default: COMBINED_JSON
*/
MetadataStorageMode storageMode();
/**
* SQL definition of metadata field(s)
* Property: quarkus.langchain4j.pgvector.metadata.column-definitions
* Default: ["metadata JSON NULL"]
* Examples:
* - JSONB: ["metadata JSONB NULL"]
* - COLUMNS: ["condominium_id uuid null", "user uuid null"]
*/
List<String> columnDefinitions();
/**
* List of fields to use as indexes
* Property: quarkus.langchain4j.pgvector.metadata.indexes
* Note: Indexes are not allowed with JSON storage mode (use JSONB or COLUMNS)
* Examples:
* - JSONB: ["(metadata->'key')", "(metadata->'name')"]
* - COLUMNS: ["key", "name", "age"]
*/
Optional<List<String>> indexes();
/**
* PostgreSQL index type
* Property: quarkus.langchain4j.pgvector.metadata.index-type
* Values: BTREE, GIN, or other PostgreSQL index types
* Default: BTREE
*/
String indexType();For dynamic metadata when you don't know the metadata fields in advance.
quarkus.langchain4j.pgvector.dimension=384
quarkus.langchain4j.pgvector.metadata.storage-mode=COMBINED_JSON
quarkus.langchain4j.pgvector.metadata.column-definitions=metadata JSON NULLUsage:
// Store any metadata dynamically
Metadata metadata = Metadata.from("author", "Alice")
.put("date", "2024-01-15")
.put("version", "1.0");
TextSegment segment = TextSegment.from("Document text", metadata);
embeddingStore.add(embedding, segment);Note: JSON storage mode does not support indexes on metadata fields.
For dynamic metadata with better query performance on large datasets.
quarkus.langchain4j.pgvector.dimension=384
quarkus.langchain4j.pgvector.metadata.storage-mode=COMBINED_JSONB
quarkus.langchain4j.pgvector.metadata.column-definitions=metadata JSONB NULL
# Optional: Create indexes for frequently queried fields
quarkus.langchain4j.pgvector.metadata.indexes=(metadata->'category'),(metadata->'status')
quarkus.langchain4j.pgvector.metadata.index-type=GINUsage:
// Same API as JSON, but with better query performance
Metadata metadata = Metadata.from("category", "documentation")
.put("status", "published");
TextSegment segment = TextSegment.from("Document text", metadata);
embeddingStore.add(embedding, segment);
// Efficient metadata filtering
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.filter(MetadataFilterBuilder.metadataKey("category").isEqualTo("documentation"))
.maxResults(10)
.build();For static metadata when you know all metadata fields in advance.
quarkus.langchain4j.pgvector.dimension=384
quarkus.langchain4j.pgvector.metadata.storage-mode=COLUMN_PER_KEY
# Define specific columns for metadata
quarkus.langchain4j.pgvector.metadata.column-definitions=\
doc_type varchar(50) null,\
author varchar(100) null,\
created_at timestamp null,\
project_id uuid null
# Optional: Create indexes on specific columns
quarkus.langchain4j.pgvector.metadata.indexes=doc_type,project_id
quarkus.langchain4j.pgvector.metadata.index-type=BTREEUsage:
// Use predefined metadata fields
Metadata metadata = Metadata.from("doc_type", "api-reference")
.put("author", "Bob")
.put("created_at", "2024-01-15T10:00:00")
.put("project_id", "550e8400-e29b-41d4-a716-446655440000");
TextSegment segment = TextSegment.from("API documentation", metadata);
embeddingStore.add(embedding, segment);
// Efficient metadata filtering with column-based indexes
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.filter(MetadataFilterBuilder.metadataKey("doc_type").isEqualTo("api-reference"))
.maxResults(10)
.build();For large datasets, enable IVFFlat indexing to improve search performance.
quarkus.langchain4j.pgvector.dimension=384
quarkus.langchain4j.pgvector.use-index=true
quarkus.langchain4j.pgvector.index-list-size=100Guidelines:
index-list-size to approximately the square root of your row countindex-list-size=316 (√100,000 ≈ 316)Choose the appropriate metadata storage mode for your use case:
In development mode, Quarkus automatically starts a PostgreSQL container with pgvector extension using Docker.
# No datasource configuration needed in dev mode
quarkus.langchain4j.pgvector.dimension=384The extension automatically provides the pgvector/pgvector:pg17 Docker image.
The extension is fully compatible with Quarkus native image builds.
mvn package -Pnativeimport io.quarkiverse.langchain4j.pgvector.PgVectorEmbeddingStore;
import dev.langchain4j.model.embedding.EmbeddingModel;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
@ApplicationScoped
public class DocumentService {
@Inject
PgVectorEmbeddingStore embeddingStore;
@Inject
EmbeddingModel embeddingModel;
public void indexDocument(String text) {
Embedding embedding = embeddingModel.embed(text).content();
TextSegment segment = TextSegment.from(text);
embeddingStore.add(embedding, segment);
}
}The extension uses PgVectorAgroalPoolInterceptor to automatically configure pgvector on database connections.
/**
* Connection pool interceptor for pgvector setup
* From: io.quarkiverse.langchain4j.pgvector.PgVectorAgroalPoolInterceptor
*/
public class PgVectorAgroalPoolInterceptor implements AgroalPoolInterceptor {
/**
* Constructor for the pool interceptor
* @param config The PgVectorEmbeddingStoreConfig configuration
*/
public PgVectorAgroalPoolInterceptor(PgVectorEmbeddingStoreConfig config);
/**
* Intercepts connection creation to add pgvector settings
* Automatically creates the vector extension in dev/test profiles
* Registers pgvector datatype on the connection
* @param connection The database connection being created
*/
public void onConnectionCreate(Connection connection);
}How it works:
onConnectionCreate is invokedregister-vector-pg-extension=true), it executes CREATE EXTENSION IF NOT EXISTS vectorNote: This is handled automatically by the extension. Users typically don't need to interact with this class directly unless implementing custom connection pool behaviors.
/**
* Represents a vector embedding
* From: dev.langchain4j.data.embedding.Embedding
*/
public class Embedding {
/**
* Get the vector as a float array
*/
public float[] vector();
/**
* Get the dimension of the vector
*/
public int dimension();
}/**
* Represents a text segment with optional metadata
* From: dev.langchain4j.data.segment.TextSegment
*/
public class TextSegment {
/**
* Create a text segment from string
*/
public static TextSegment from(String text);
/**
* Create a text segment with metadata
*/
public static TextSegment from(String text, Metadata metadata);
/**
* Get the text content
*/
public String text();
/**
* Get the metadata
*/
public Metadata metadata();
}/**
* Key-value metadata associated with text segments
* From: dev.langchain4j.data.document.Metadata
*/
public class Metadata {
/**
* Create metadata from a single key-value pair
*/
public static Metadata from(String key, String value);
/**
* Create metadata from a map
*/
public static Metadata from(Map<String, String> metadata);
/**
* Add a key-value pair
*/
public Metadata put(String key, String value);
/**
* Get a value by key
*/
public String get(String key);
}/**
* Builder-based request object for searching embeddings
* From: dev.langchain4j.store.embedding.EmbeddingSearchRequest
*/
public class EmbeddingSearchRequest {
public static Builder builder();
public static class Builder {
/**
* Set the query embedding (required)
*/
public Builder queryEmbedding(Embedding queryEmbedding);
/**
* Set maximum number of results to return
*/
public Builder maxResults(int maxResults);
/**
* Set minimum similarity score threshold (0.0-1.0)
*/
public Builder minScore(double minScore);
/**
* Set metadata filter
*/
public Builder filter(Filter filter);
/**
* Build the request
*/
public EmbeddingSearchRequest build();
}
}/**
* Search results containing matches with similarity scores
* From: dev.langchain4j.store.embedding.EmbeddingSearchResult
*/
public class EmbeddingSearchResult<Embedded> {
/**
* Get list of embedding matches
*/
public List<EmbeddingMatch<Embedded>> matches();
}/**
* A single embedding match with similarity score
* From: dev.langchain4j.store.embedding.EmbeddingMatch
*/
public class EmbeddingMatch<Embedded> {
/**
* Get similarity score (0.0-1.0, higher is more similar)
*/
public double score();
/**
* Get the unique identifier of the embedding
*/
public String embeddingId();
/**
* Get the embedding vector
*/
public Embedding embedding();
/**
* Get the embedded content (e.g., TextSegment)
*/
public Embedded embedded();
}/**
* Metadata storage mode enumeration
* From: dev.langchain4j.store.embedding.pgvector.MetadataStorageMode
*/
public enum MetadataStorageMode {
/**
* Static metadata with one column per metadata key
*/
COLUMN_PER_KEY,
/**
* Dynamic metadata stored as JSON
*/
COMBINED_JSON,
/**
* Dynamic metadata stored as binary JSON (JSONB)
*/
COMBINED_JSONB
}/**
* Filter interface for metadata-based filtering in embedding searches
* From: dev.langchain4j.store.embedding.filter.Filter
*
* Provides methods for combining filters with logical operators.
*/
public interface Filter {
/**
* Combine this filter with another using AND logic
* @param another The filter to AND with this one
* @return Combined filter that matches when both filters match
*/
Filter and(Filter another);
/**
* Combine this filter with another using OR logic
* @param another The filter to OR with this one
* @return Combined filter that matches when either filter matches
*/
Filter or(Filter another);
}/**
* Builder for creating metadata filters
* From: dev.langchain4j.store.embedding.filter.MetadataFilterBuilder
*/
public class MetadataFilterBuilder {
/**
* Start building a filter for a specific metadata key
* @param key The metadata key to filter on
* @return KeyFilterBuilder for building the filter condition
*/
public static KeyFilterBuilder metadataKey(String key);
public static class KeyFilterBuilder {
/**
* Filter for metadata values equal to the specified value
* @param value The value to match
* @return Filter that matches when the metadata key equals the value
*/
public Filter isEqualTo(String value);
/**
* Filter for metadata values not equal to the specified value
* @param value The value to not match
* @return Filter that matches when the metadata key does not equal the value
*/
public Filter isNotEqualTo(String value);
/**
* Filter for metadata values greater than the specified value
* @param value The value to compare against
* @return Filter that matches when the metadata key is greater than the value
*/
public Filter isGreaterThan(Comparable<?> value);
/**
* Filter for metadata values greater than or equal to the specified value
* @param value The value to compare against
* @return Filter that matches when the metadata key is greater than or equal to the value
*/
public Filter isGreaterThanOrEqualTo(Comparable<?> value);
/**
* Filter for metadata values less than the specified value
* @param value The value to compare against
* @return Filter that matches when the metadata key is less than the value
*/
public Filter isLessThan(Comparable<?> value);
/**
* Filter for metadata values less than or equal to the specified value
* @param value The value to compare against
* @return Filter that matches when the metadata key is less than or equal to the value
*/
public Filter isLessThanOrEqualTo(Comparable<?> value);
/**
* Filter for metadata values in the specified list
* @param values The list of values to match
* @return Filter that matches when the metadata key is in the list
*/
public Filter isIn(Collection<?> values);
/**
* Filter for metadata values not in the specified list
* @param values The list of values to not match
* @return Filter that matches when the metadata key is not in the list
*/
public Filter isNotIn(Collection<?> values);
}
}Usage Example:
import dev.langchain4j.store.embedding.filter.Filter;
import dev.langchain4j.store.embedding.filter.MetadataFilterBuilder;
// Simple equality filter
Filter categoryFilter = MetadataFilterBuilder
.metadataKey("category")
.isEqualTo("documentation");
// Comparison filter
Filter recentFilter = MetadataFilterBuilder
.metadataKey("timestamp")
.isGreaterThan("2024-01-01");
// Combined filter with AND (using instance method)
Filter categoryFilterPart = MetadataFilterBuilder.metadataKey("category").isEqualTo("api");
Filter versionFilterPart = MetadataFilterBuilder.metadataKey("version").isGreaterThan("2.0");
Filter combinedFilter = categoryFilterPart.and(versionFilterPart);
// Combined filter with OR (using instance method)
Filter publishedFilter = MetadataFilterBuilder.metadataKey("status").isEqualTo("published");
Filter reviewedFilter = MetadataFilterBuilder.metadataKey("status").isEqualTo("reviewed");
Filter eitherFilter = publishedFilter.or(reviewedFilter);
// Chaining multiple filters
Filter complexFilter = categoryFilterPart.and(versionFilterPart).or(publishedFilter);
// Use in search
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.filter(combinedFilter)
.maxResults(10)
.build();@ApplicationScoped
public class RAGService {
@Inject
PgVectorEmbeddingStore embeddingStore;
@Inject
EmbeddingModel embeddingModel;
public void indexDocuments(List<String> documents) {
List<TextSegment> segments = documents.stream()
.map(TextSegment::from)
.collect(Collectors.toList());
List<Embedding> embeddings = embeddingModel.embedAll(
documents.stream()
.map(doc -> TextSegment.from(doc))
.collect(Collectors.toList())
).content();
embeddingStore.addAll(embeddings, segments);
}
public String answerQuestion(String question) {
// Find relevant context
Embedding queryEmbedding = embeddingModel.embed(question).content();
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(3)
.minScore(0.7)
.build();
List<String> context = embeddingStore.search(request)
.matches()
.stream()
.map(match -> match.embedded().text())
.collect(Collectors.toList());
// Use context with LLM to generate answer
return generateAnswer(question, context);
}
}@ApplicationScoped
public class DocumentManager {
@Inject
PgVectorEmbeddingStore embeddingStore;
@Inject
EmbeddingModel embeddingModel;
public String addDocument(String text, String category, String author) {
Metadata metadata = Metadata.from("category", category)
.put("author", author)
.put("timestamp", Instant.now().toString());
TextSegment segment = TextSegment.from(text, metadata);
Embedding embedding = embeddingModel.embed(segment).content();
return embeddingStore.add(embedding, segment);
}
public List<TextSegment> searchByCategory(String query, String category) {
Embedding queryEmbedding = embeddingModel.embed(query).content();
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.filter(MetadataFilterBuilder.metadataKey("category").isEqualTo(category))
.maxResults(10)
.build();
return embeddingStore.search(request)
.matches()
.stream()
.map(match -> match.embedded())
.collect(Collectors.toList());
}
}# application.properties for tests
%test.quarkus.langchain4j.pgvector.dimension=384
%test.quarkus.langchain4j.pgvector.drop-table-first=true
%test.quarkus.langchain4j.pgvector.register-vector-pg-extension=true@QuarkusTest
public class EmbeddingStoreTest {
@Inject
PgVectorEmbeddingStore embeddingStore;
@BeforeEach
void setUp() {
embeddingStore.removeAll(); // Clear before each test
}
@Test
void testAddAndSearch() {
// Test implementation
}
}Install with Tessl CLI
npx tessl i tessl/maven-io-quarkiverse-langchain4j--quarkus-langchain4j-pgvector