PostgreSQL pgvector extension for vector similarity search and operations, enabling AI/ML applications with embedding storage and similarity queries.
import { vector } from "@electric-sql/pglite/vector";import { PGlite } from "@electric-sql/pglite";
import { vector } from "@electric-sql/pglite/vector";
const db = await PGlite.create({
extensions: {
vector,
},
});
// Enable the pgvector extension
await db.exec("CREATE EXTENSION IF NOT EXISTS vector;");Extension object that adds pgvector support to PGlite.
/**
* PostgreSQL pgvector extension for vector operations
* Adds support for vector data types and similarity functions
*/
const vector: Extension;PostgreSQL vector data type for storing embeddings and performing similarity operations.
-- Vector type with specified dimensions
CREATE TABLE embeddings (
id SERIAL PRIMARY KEY,
content TEXT,
embedding VECTOR(1536) -- 1536-dimensional vector
);
-- Vector operations
SELECT * FROM embeddings
WHERE embedding <-> $1 < 0.5 -- L2 distance
ORDER BY embedding <-> $1 -- Order by similarity
LIMIT 10;Vector similarity and distance operators provided by pgvector.
-- L2 distance (Euclidean)
SELECT embedding <-> '[1,2,3]'::vector FROM embeddings;
-- Inner product (dot product)
SELECT embedding <#> '[1,2,3]'::vector FROM embeddings;
-- Cosine distance
SELECT embedding <=> '[1,2,3]'::vector FROM embeddings;Built-in functions for vector operations.
-- Vector dimension
SELECT vector_dims(embedding) FROM embeddings;
-- Vector norm
SELECT vector_norm(embedding) FROM embeddings;
-- Vector addition/subtraction
SELECT '[1,2,3]'::vector + '[4,5,6]'::vector;
SELECT '[1,2,3]'::vector - '[4,5,6]'::vector;Index types for efficient vector similarity search.
-- IVFFlat index for approximate similarity search
CREATE INDEX ON embeddings USING ivfflat (embedding vector_l2_ops)
WITH (lists = 100);
-- HNSW index for approximate similarity search (if available)
CREATE INDEX ON embeddings USING hnsw (embedding vector_l2_ops);
-- Different distance metrics
CREATE INDEX ON embeddings USING ivfflat (embedding vector_ip_ops); -- Inner product
CREATE INDEX ON embeddings USING ivfflat (embedding vector_cosine_ops); -- Cosine/** Vector extension type */
type VectorExtension = Extension;Usage Examples:
import { PGlite } from "@electric-sql/pglite";
import { vector } from "@electric-sql/pglite/vector";
const db = await PGlite.create({
extensions: {
vector,
},
});
// Enable pgvector extension
await db.exec("CREATE EXTENSION IF NOT EXISTS vector;");
// Create table with vector column
await db.exec(`
CREATE TABLE documents (
id SERIAL PRIMARY KEY,
title TEXT,
content TEXT,
embedding VECTOR(1536)
);
`);
// Insert documents with embeddings
const embedding1 = Array.from({length: 1536}, () => Math.random());
const embedding2 = Array.from({length: 1536}, () => Math.random());
await db.query(`
INSERT INTO documents (title, content, embedding)
VALUES ($1, $2, $3)
`, ["Document 1", "Content about AI", `[${embedding1.join(',')}]`]);
await db.query(`
INSERT INTO documents (title, content, embedding)
VALUES ($1, $2, $3)
`, ["Document 2", "Content about ML", `[${embedding2.join(',')}]`]);
// Find similar documents using L2 distance
const queryEmbedding = Array.from({length: 1536}, () => Math.random());
const similarDocs = await db.query(`
SELECT title, content, embedding <-> $1 as distance
FROM documents
ORDER BY embedding <-> $1
LIMIT 5
`, [`[${queryEmbedding.join(',')}]`]);
console.log("Similar documents:", similarDocs.rows);// Create index for faster similarity search
await db.exec(`
CREATE INDEX ON documents
USING ivfflat (embedding vector_l2_ops)
WITH (lists = 100);
`);
// Similarity search with threshold
const threshold = 0.5;
const results = await db.query(`
SELECT title, content, embedding <-> $1 as distance
FROM documents
WHERE embedding <-> $1 < $2
ORDER BY embedding <-> $1
`, [`[${queryEmbedding.join(',')}]`, threshold]);
// Cosine similarity search
const cosineResults = await db.query(`
SELECT title, content, 1 - (embedding <=> $1) as similarity
FROM documents
ORDER BY embedding <=> $1
LIMIT 10
`, [`[${queryEmbedding.join(',')}]`]);
// Inner product (dot product) search
const dotProductResults = await db.query(`
SELECT title, content, -(embedding <#> $1) as score
FROM documents
ORDER BY embedding <#> $1 DESC
LIMIT 10
`, [`[${queryEmbedding.join(',')}]`]);// Vector statistics
const stats = await db.query(`
SELECT
COUNT(*) as doc_count,
AVG(vector_dims(embedding)) as avg_dimensions,
MIN(vector_norm(embedding)) as min_norm,
MAX(vector_norm(embedding)) as max_norm,
AVG(vector_norm(embedding)) as avg_norm
FROM documents
`);
// Find documents with similar embeddings to each other
const clusters = await db.query(`
SELECT
a.title as doc1,
b.title as doc2,
a.embedding <-> b.embedding as distance
FROM documents a
CROSS JOIN documents b
WHERE a.id < b.id
AND a.embedding <-> b.embedding < 0.3
ORDER BY distance
`);
// Vector operations
await db.exec(`
CREATE TABLE vector_math AS
SELECT
'[1,2,3]'::vector + '[4,5,6]'::vector as addition,
'[1,2,3]'::vector - '[4,5,6]'::vector as subtraction,
vector_dims('[1,2,3,4,5]'::vector) as dimensions,
vector_norm('[3,4]'::vector) as norm
`);
const mathResults = await db.query("SELECT * FROM vector_math");
console.log("Vector math:", mathResults.rows);// Full semantic search implementation
class SemanticSearch {
private db: PGlite;
constructor(db: PGlite) {
this.db = db;
}
async initialize() {
await this.db.exec("CREATE EXTENSION IF NOT EXISTS vector;");
await this.db.exec(`
CREATE TABLE IF NOT EXISTS semantic_documents (
id SERIAL PRIMARY KEY,
title TEXT NOT NULL,
content TEXT NOT NULL,
embedding VECTOR(384), -- Example: sentence-transformers embedding size
created_at TIMESTAMP DEFAULT NOW()
);
`);
// Create index for efficient search
await this.db.exec(`
CREATE INDEX IF NOT EXISTS semantic_documents_embedding_idx
ON semantic_documents
USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 100);
`);
}
async addDocument(title: string, content: string, embedding: number[]) {
await this.db.query(`
INSERT INTO semantic_documents (title, content, embedding)
VALUES ($1, $2, $3)
`, [title, content, `[${embedding.join(',')}]`]);
}
async search(queryEmbedding: number[], limit: number = 10, threshold: number = 0.7) {
const results = await this.db.query(`
SELECT
id,
title,
content,
1 - (embedding <=> $1) as similarity
FROM semantic_documents
WHERE 1 - (embedding <=> $1) > $2
ORDER BY embedding <=> $1
LIMIT $3
`, [`[${queryEmbedding.join(',')}]`, threshold, limit]);
return results.rows;
}
}
// Usage
const search = new SemanticSearch(db);
await search.initialize();
// Add documents (embeddings would come from your ML model)
await search.addDocument(
"Machine Learning Basics",
"Introduction to ML concepts and algorithms",
Array.from({length: 384}, () => Math.random())
);
// Search for similar documents
const searchResults = await search.search(
Array.from({length: 384}, () => Math.random()),
5,
0.8
);
console.log("Search results:", searchResults);