LangChain4j PGVector integration for PostgreSQL-based vector embedding storage and retrieval
Configure how metadata is stored and indexed in the database with three flexible storage modes.
Defines the storage mode for embedding metadata.
/**
* Metadata storage mode enumeration
*/
enum MetadataStorageMode {
/**
* For static metadata when you know in advance the list of metadata keys
* Each metadata key gets its own database column
* Best for: Known, fixed metadata structure with frequent queries
*/
COLUMN_PER_KEY,
/**
* For dynamic metadata when you don't know the list of metadata keys
* Stores all metadata in a single JSON column
* Best for: Dynamic, flexible metadata structure
*/
COMBINED_JSON,
/**
* Same as COMBINED_JSON, but stored in binary format
* Optimized for querying on large datasets
* Best for: Dynamic metadata with frequent queries on large datasets
*/
COMBINED_JSONB
}Interface for configuring metadata storage.
/**
* Interface for metadata storage configuration
*/
interface MetadataStorageConfig {
/**
* Returns the metadata storage mode
* @return MetadataStorageMode (COLUMN_PER_KEY, COMBINED_JSON, or COMBINED_JSONB)
*/
MetadataStorageMode storageMode();
/**
* Returns the SQL definition of metadata field(s) list
* @return List of column definitions
*/
List<String> columnDefinitions();
/**
* Returns the list of metadata fields to use as indexes
* @return List of index definitions
*/
List<String> indexes();
/**
* Returns the index type to use
* @return Index type (e.g., "BTREE", "GIN")
*/
String indexType();
}Get the default metadata storage configuration.
/**
* Returns the default metadata storage configuration
* Uses COMBINED_JSON mode with a single JSON column
* @return Default MetadataStorageConfig
*/
static MetadataStorageConfig defaultConfig();Usage Example:
import dev.langchain4j.store.embedding.pgvector.DefaultMetadataStorageConfig;
import dev.langchain4j.store.embedding.pgvector.MetadataStorageConfig;
// Get default configuration
MetadataStorageConfig config = DefaultMetadataStorageConfig.defaultConfig();
// Use with builder
PgVectorEmbeddingStore store = PgVectorEmbeddingStore.builder()
.host("localhost")
.port(5432)
.database("postgres")
.user("my_user")
.password("my_password")
.table("embeddings")
.dimension(384)
.metadataStorageConfig(config) // Uses COMBINED_JSON mode
.build();Create custom metadata storage configurations.
/**
* Creates a builder for DefaultMetadataStorageConfig
* @return Builder for creating custom metadata storage configuration
*/
static DefaultMetadataStorageConfigBuilder builder();/**
* Creates a default metadata storage configuration
* Uses COMBINED_JSON mode with default settings
*/
public DefaultMetadataStorageConfig();
/**
* Creates a custom metadata storage configuration
* @param storageMode The metadata storage mode
* @param columnDefinitions List of SQL column definitions
* @param indexes List of index definitions
* @param indexType PostgreSQL index type (e.g., "BTREE", "GIN")
*/
public DefaultMetadataStorageConfig(
MetadataStorageMode storageMode,
List<String> columnDefinitions,
List<String> indexes,
String indexType
);Utility class for parsing and working with SQL column definitions.
/**
* Parses SQL column definition strings into structured format
* Used internally but available for custom metadata configuration
*/
class MetadataColumDefinition {
/**
* Parses a SQL column definition string
* @param sqlDefinition SQL definition like "metadata JSON NULL" or "source VARCHAR(255) NULL"
* @return Parsed MetadataColumDefinition
*/
public static MetadataColumDefinition from(String sqlDefinition);
/**
* Returns the full SQL definition
* @return Complete SQL column definition string
*/
public String getFullDefinition();
/**
* Returns the column name
* @return Column name extracted from definition
*/
public String getName();
/**
* Returns the column type
* @return SQL type extracted from definition
*/
public String getType();
}Builder class for creating custom metadata storage configurations.
/**
* Builder for DefaultMetadataStorageConfig
*/
class DefaultMetadataStorageConfigBuilder {
/**
* Sets the metadata storage mode
* @param storageMode The storage mode to use
* @return Builder instance for chaining
*/
DefaultMetadataStorageConfigBuilder storageMode(MetadataStorageMode storageMode);
/**
* Sets the column definitions
* Format depends on storage mode:
* - COMBINED_JSON: List with one entry like "metadata JSON NULL"
* - COMBINED_JSONB: List with one entry like "metadata JSONB NULL"
* - COLUMN_PER_KEY: List with entries like "source VARCHAR(255) NULL", "page INTEGER NULL"
* @param columnDefinitions List of SQL column definitions
* @return Builder instance for chaining
*/
DefaultMetadataStorageConfigBuilder columnDefinitions(List<String> columnDefinitions);
/**
* Sets the list of fields to index
* Format depends on storage mode:
* - COMBINED_JSON/JSONB: List with entries like "(metadata->'key')", "(metadata->'name')"
* - COLUMN_PER_KEY: List with entries like "source", "page"
* @param indexes List of index definitions
* @return Builder instance for chaining
*/
DefaultMetadataStorageConfigBuilder indexes(List<String> indexes);
/**
* Sets the index type
* Common values: "BTREE" (default), "GIN", "GIST"
* @param indexType PostgreSQL index type
* @return Builder instance for chaining
*/
DefaultMetadataStorageConfigBuilder indexType(String indexType);
/**
* Builds the metadata storage configuration
* @return Configured DefaultMetadataStorageConfig
*/
DefaultMetadataStorageConfig build();
}Best for static metadata with known keys.
Advantages:
Disadvantages:
Usage Example:
import dev.langchain4j.store.embedding.pgvector.DefaultMetadataStorageConfig;
import dev.langchain4j.store.embedding.pgvector.MetadataStorageMode;
import java.util.Arrays;
MetadataStorageConfig config = DefaultMetadataStorageConfig.builder()
.storageMode(MetadataStorageMode.COLUMN_PER_KEY)
.columnDefinitions(Arrays.asList(
"source VARCHAR(255) NULL",
"page INTEGER NULL",
"section VARCHAR(255) NULL",
"created_date DATE NULL"
))
.indexes(Arrays.asList("source", "page", "created_date"))
.indexType("BTREE")
.build();
PgVectorEmbeddingStore store = PgVectorEmbeddingStore.builder()
.host("localhost")
.port(5432)
.database("postgres")
.user("my_user")
.password("my_password")
.table("embeddings")
.dimension(384)
.metadataStorageConfig(config)
.build();Metadata Usage:
import dev.langchain4j.data.document.Metadata;
Metadata metadata = new Metadata();
metadata.put("source", "documentation");
metadata.put("page", 42);
metadata.put("section", "installation");
// Only these predefined keys can be usedBest for dynamic, flexible metadata structure.
Advantages:
Disadvantages:
Usage Example:
import java.util.Collections;
MetadataStorageConfig config = DefaultMetadataStorageConfig.builder()
.storageMode(MetadataStorageMode.COMBINED_JSON)
.columnDefinitions(Collections.singletonList("metadata JSON NULL"))
.indexes(Arrays.asList(
"(metadata->'source')",
"(metadata->'page')"
))
.indexType("BTREE")
.build();
PgVectorEmbeddingStore store = PgVectorEmbeddingStore.builder()
.host("localhost")
.port(5432)
.database("postgres")
.user("my_user")
.password("my_password")
.table("embeddings")
.dimension(384)
.metadataStorageConfig(config)
.build();Metadata Usage:
Metadata metadata = new Metadata();
metadata.put("source", "documentation");
metadata.put("page", 42);
metadata.put("author", "John Doe");
metadata.put("custom_field", "any value");
// Any metadata keys can be used dynamicallyBest for dynamic metadata with frequent queries on large datasets.
Advantages:
Disadvantages:
Usage Example:
import java.util.Collections;
MetadataStorageConfig config = DefaultMetadataStorageConfig.builder()
.storageMode(MetadataStorageMode.COMBINED_JSONB)
.columnDefinitions(Collections.singletonList("metadata JSONB NULL"))
.indexes(Arrays.asList(
"(metadata->'source')",
"(metadata->'page')"
))
.indexType("GIN") // GIN index works well with JSONB
.build();
PgVectorEmbeddingStore store = PgVectorEmbeddingStore.builder()
.host("localhost")
.port(5432)
.database("postgres")
.user("my_user")
.password("my_password")
.table("embeddings")
.dimension(384)
.metadataStorageConfig(config)
.build();If no configuration is specified, the default is used:
// These are equivalent:
// Explicit default
MetadataStorageConfig config = DefaultMetadataStorageConfig.defaultConfig();
PgVectorEmbeddingStore store = PgVectorEmbeddingStore.builder()
.host("localhost")
.port(5432)
.database("postgres")
.user("my_user")
.password("my_password")
.table("embeddings")
.dimension(384)
.metadataStorageConfig(config)
.build();
// Implicit default (no metadataStorageConfig specified)
PgVectorEmbeddingStore store = PgVectorEmbeddingStore.builder()
.host("localhost")
.port(5432)
.database("postgres")
.user("my_user")
.password("my_password")
.table("embeddings")
.dimension(384)
.build();Recommended for production with dynamic metadata:
import dev.langchain4j.store.embedding.pgvector.DefaultMetadataStorageConfig;
import dev.langchain4j.store.embedding.pgvector.MetadataStorageMode;
import java.util.Collections;
import java.util.Arrays;
MetadataStorageConfig config = DefaultMetadataStorageConfig.builder()
.storageMode(MetadataStorageMode.COMBINED_JSONB)
.columnDefinitions(Collections.singletonList("metadata JSONB NULL"))
.indexes(Arrays.asList(
"(metadata->'source')",
"(metadata->'created_date')",
"(metadata->'document_id')"
))
.indexType("GIN")
.build();
PgVectorEmbeddingStore store = PgVectorEmbeddingStore.builder()
.host("localhost")
.port(5432)
.database("postgres")
.user("my_user")
.password("my_password")
.table("embeddings")
.dimension(384)
.metadataStorageConfig(config)
.useIndex(true)
.indexListSize(100)
.build();For applications with fixed metadata structure:
MetadataStorageConfig config = DefaultMetadataStorageConfig.builder()
.storageMode(MetadataStorageMode.COLUMN_PER_KEY)
.columnDefinitions(Arrays.asList(
"document_id UUID NULL",
"source VARCHAR(255) NOT NULL",
"page INTEGER NULL",
"chapter VARCHAR(255) NULL",
"created_date TIMESTAMP NULL",
"language VARCHAR(10) NULL"
))
.indexes(Arrays.asList(
"document_id",
"source",
"created_date"
))
.indexType("BTREE")
.build();
PgVectorEmbeddingStore store = PgVectorEmbeddingStore.builder()
.host("localhost")
.port(5432)
.database("postgres")
.user("my_user")
.password("my_password")
.table("embeddings")
.dimension(384)
.metadataStorageConfig(config)
.build();Metadata configuration affects how filters work:
import dev.langchain4j.store.embedding.filter.Filter;
import dev.langchain4j.store.embedding.filter.MetadataFilterBuilder;
// Filtering works the same regardless of storage mode
Filter filter = MetadataFilterBuilder.metadataKey("source").isEqualTo("documentation");
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(5)
.filter(filter)
.build();
EmbeddingSearchResult<TextSegment> result = embeddingStore.search(request);However, performance varies by storage mode:
.indexType("BTREE").indexType("GIN")createTable is trueInstall with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-pgvector@1.11.0