PostgreSQL pgvector extension support for Python with vector operations and similarity search across multiple database libraries
—
Direct database driver integration for applications using raw PostgreSQL connections, providing vector type registration and serialization across all major Python PostgreSQL drivers.
Modern PostgreSQL adapter with both synchronous and asynchronous vector type registration.
def register_vector(context):
"""
Register vector types with Psycopg 3 connection.
Args:
context: Psycopg 3 connection or cursor context
Registers:
- vector type (VECTOR)
- bit type (BIT)
- halfvec type (HALFVEC, if available)
- sparsevec type (SPARSEVEC, if available)
"""
def register_vector_async(context):
"""
Asynchronously register vector types with Psycopg 3 connection.
Args:
context: Psycopg 3 async connection or cursor context
Returns:
Awaitable that registers all available vector types
"""Usage Examples:
import psycopg
from pgvector.psycopg import register_vector, register_vector_async
from pgvector import Vector, HalfVector, SparseVector, Bit
# Synchronous connection
conn = psycopg.connect("postgresql://user:password@localhost/dbname")
register_vector(conn)
# Insert and query vectors
with conn.cursor() as cur:
# Create table
cur.execute("""
CREATE TABLE IF NOT EXISTS documents (
id SERIAL PRIMARY KEY,
content TEXT,
embedding VECTOR(384),
sparse_features SPARSEVEC(1000),
binary_hash BIT(64)
)
""")
# Insert vector data
embedding = Vector([0.1, 0.2] * 192) # 384 dimensions
sparse_vec = SparseVector({0: 1.0, 100: 2.5}, 1000)
binary_vec = Bit("1010" * 16) # 64 bits
cur.execute(
"INSERT INTO documents (content, embedding, sparse_features, binary_hash) VALUES (%s, %s, %s, %s)",
("Sample document", embedding, sparse_vec, binary_vec)
)
# Query with similarity search
query_vector = Vector([0.15, 0.25] * 192)
cur.execute(
"SELECT content, embedding <-> %s as distance FROM documents ORDER BY distance LIMIT 5",
(query_vector,)
)
results = cur.fetchall()
for content, distance in results:
print(f"Content: {content}, Distance: {distance}")
conn.commit()
conn.close()
# Asynchronous connection
import asyncio
async def async_vector_operations():
async with await psycopg.AsyncConnection.connect(
"postgresql://user:password@localhost/dbname"
) as conn:
await register_vector_async(conn)
async with conn.cursor() as cur:
embedding = Vector([0.1, 0.2, 0.3] * 128)
await cur.execute(
"SELECT content FROM documents WHERE embedding <-> %s < 0.5",
(embedding,)
)
async for row in cur:
print(f"Similar document: {row[0]}")
# Run async example
asyncio.run(async_vector_operations())Legacy PostgreSQL adapter support for vector operations.
def register_vector(conn_or_curs, globally=False, arrays=True):
"""
Register vector types with Psycopg 2 connection.
Args:
conn_or_curs: Psycopg 2 connection or cursor
globally (bool, optional): Register globally for all connections (default: False)
arrays (bool, optional): Register array types as well (default: True)
"""Usage Examples:
import psycopg2
from pgvector.psycopg2 import register_vector
from pgvector import Vector, SparseVector
# Connect to database
conn = psycopg2.connect("postgresql://user:password@localhost/dbname")
# Register vector types for this connection
register_vector(conn)
cur = conn.cursor()
# Create table with vector column
cur.execute("""
CREATE TABLE IF NOT EXISTS embeddings (
id SERIAL PRIMARY KEY,
text TEXT,
embedding VECTOR(512)
)
""")
# Insert vector data
texts_and_vectors = [
("First document", Vector([0.1] * 512)),
("Second document", Vector([0.2] * 512)),
("Third document", Vector([0.3] * 512))
]
cur.executemany(
"INSERT INTO embeddings (text, embedding) VALUES (%s, %s)",
texts_and_vectors
)
# Similarity search
query_vector = Vector([0.15] * 512)
cur.execute(
"""
SELECT text, embedding <-> %s as distance
FROM embeddings
ORDER BY distance
LIMIT 3
""",
(query_vector,)
)
for text, distance in cur.fetchall():
print(f"Text: {text}, Distance: {distance}")
conn.commit()
conn.close()High-performance asynchronous PostgreSQL driver integration.
def register_vector(connection):
"""
Register vector types with asyncpg connection.
Args:
connection: asyncpg connection instance
Returns:
Awaitable that registers all available vector types
"""Usage Examples:
import asyncio
import asyncpg
from pgvector.asyncpg import register_vector
from pgvector import Vector, HalfVector
async def asyncpg_example():
# Connect to database
conn = await asyncpg.connect("postgresql://user:password@localhost/dbname")
# Register vector types
await register_vector(conn)
# Create table
await conn.execute("""
CREATE TABLE IF NOT EXISTS products (
id SERIAL PRIMARY KEY,
name TEXT,
features VECTOR(256),
description_embedding HALFVEC(128)
)
""")
# Insert data with vectors
products = [
("Laptop", Vector([0.1] * 256), HalfVector([0.2] * 128)),
("Phone", Vector([0.3] * 256), HalfVector([0.4] * 128)),
("Tablet", Vector([0.5] * 256), HalfVector([0.6] * 128))
]
await conn.executemany(
"INSERT INTO products (name, features, description_embedding) VALUES ($1, $2, $3)",
products
)
# Vector similarity search
query_features = Vector([0.2] * 256)
results = await conn.fetch(
"""
SELECT name, features <-> $1 as similarity
FROM products
ORDER BY similarity
LIMIT 5
""",
query_features
)
for row in results:
print(f"Product: {row['name']}, Similarity: {row['similarity']}")
# Batch operations with prepared statements
stmt = await conn.prepare(
"SELECT name FROM products WHERE features <-> $1 < $2"
)
similar_products = await stmt.fetch(query_features, 0.5)
print(f"Found {len(similar_products)} similar products")
await conn.close()
# Run async example
asyncio.run(asyncpg_example())Pure Python PostgreSQL driver integration.
def register_vector(context):
"""
Register vector types with pg8000 connection.
Args:
context: pg8000 connection instance
"""Usage Examples:
import pg8000.native
from pgvector.pg8000 import register_vector
from pgvector import Vector, Bit
# Connect to database
conn = pg8000.native.Connection(
user="user",
password="password",
host="localhost",
database="dbname"
)
# Register vector types
register_vector(conn)
# Create table with vector and bit columns
conn.run("""
CREATE TABLE IF NOT EXISTS items (
id SERIAL PRIMARY KEY,
name TEXT,
embedding VECTOR(128),
tags BIT(32)
)
""")
# Insert data
items = [
("Item 1", Vector([0.1] * 128), Bit("1010" * 8)),
("Item 2", Vector([0.2] * 128), Bit("0101" * 8)),
("Item 3", Vector([0.3] * 128), Bit("1100" * 8))
]
for name, embedding, tags in items:
conn.run(
"INSERT INTO items (name, embedding, tags) VALUES (:name, :embedding, :tags)",
name=name,
embedding=embedding,
tags=tags
)
# Query with vector similarity
query_vector = Vector([0.15] * 128)
results = conn.run(
"""
SELECT name, embedding <-> :query as distance
FROM items
ORDER BY distance
LIMIT 3
""",
query=query_vector
)
for row in results:
print(f"Item: {row[0]}, Distance: {row[1]}")
# Hamming distance for bit vectors
query_bits = Bit("1010" * 8)
bit_results = conn.run(
"""
SELECT name, tags <~> :query_bits as hamming_distance
FROM items
ORDER BY hamming_distance
LIMIT 3
""",
query_bits=query_bits
)
for row in bit_results:
print(f"Item: {row[0]}, Hamming Distance: {row[1]}")
conn.close()| Driver | Sync | Async | Performance | Pure Python | Vector Types |
|---|---|---|---|---|---|
| Psycopg 3 | ✅ | ✅ | High | No | All |
| Psycopg 2 | ✅ | ❌ | High | No | All |
| asyncpg | ❌ | ✅ | Very High | No | All |
| pg8000 | ✅ | ❌ | Medium | Yes | All |
All drivers will raise appropriate database errors for:
Register vector types immediately after connection establishment and before any vector operations.
Install with Tessl CLI
npx tessl i tessl/pypi-pgvector