CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pgvector

PostgreSQL pgvector extension support for Python with vector operations and similarity search across multiple database libraries

Pending
Overview
Eval results
Files

database-drivers.mddocs/

Database Driver Support

Direct database driver integration for applications using raw PostgreSQL connections, providing vector type registration and serialization across all major Python PostgreSQL drivers.

Capabilities

Psycopg 3 Support

Modern PostgreSQL adapter with both synchronous and asynchronous vector type registration.

def register_vector(context):
    """
    Register vector types with Psycopg 3 connection.
    
    Args:
        context: Psycopg 3 connection or cursor context
        
    Registers:
        - vector type (VECTOR)
        - bit type (BIT) 
        - halfvec type (HALFVEC, if available)
        - sparsevec type (SPARSEVEC, if available)
    """

def register_vector_async(context):
    """
    Asynchronously register vector types with Psycopg 3 connection.
    
    Args:
        context: Psycopg 3 async connection or cursor context
        
    Returns:
        Awaitable that registers all available vector types
    """

Usage Examples:

import psycopg
from pgvector.psycopg import register_vector, register_vector_async
from pgvector import Vector, HalfVector, SparseVector, Bit

# Synchronous connection
conn = psycopg.connect("postgresql://user:password@localhost/dbname")
register_vector(conn)

# Insert and query vectors
with conn.cursor() as cur:
    # Create table
    cur.execute("""
        CREATE TABLE IF NOT EXISTS documents (
            id SERIAL PRIMARY KEY,
            content TEXT,
            embedding VECTOR(384),
            sparse_features SPARSEVEC(1000),
            binary_hash BIT(64)
        )
    """)
    
    # Insert vector data
    embedding = Vector([0.1, 0.2] * 192)  # 384 dimensions
    sparse_vec = SparseVector({0: 1.0, 100: 2.5}, 1000)
    binary_vec = Bit("1010" * 16)  # 64 bits
    
    cur.execute(
        "INSERT INTO documents (content, embedding, sparse_features, binary_hash) VALUES (%s, %s, %s, %s)",
        ("Sample document", embedding, sparse_vec, binary_vec)
    )
    
    # Query with similarity search
    query_vector = Vector([0.15, 0.25] * 192)
    cur.execute(
        "SELECT content, embedding <-> %s as distance FROM documents ORDER BY distance LIMIT 5",
        (query_vector,)
    )
    
    results = cur.fetchall()
    for content, distance in results:
        print(f"Content: {content}, Distance: {distance}")

conn.commit()
conn.close()

# Asynchronous connection
import asyncio

async def async_vector_operations():
    async with await psycopg.AsyncConnection.connect(
        "postgresql://user:password@localhost/dbname"
    ) as conn:
        await register_vector_async(conn)
        
        async with conn.cursor() as cur:
            embedding = Vector([0.1, 0.2, 0.3] * 128)
            
            await cur.execute(
                "SELECT content FROM documents WHERE embedding <-> %s < 0.5",
                (embedding,)
            )
            
            async for row in cur:
                print(f"Similar document: {row[0]}")

# Run async example
asyncio.run(async_vector_operations())

Psycopg 2 Support

Legacy PostgreSQL adapter support for vector operations.

def register_vector(conn_or_curs, globally=False, arrays=True):
    """
    Register vector types with Psycopg 2 connection.
    
    Args:
        conn_or_curs: Psycopg 2 connection or cursor
        globally (bool, optional): Register globally for all connections (default: False)
        arrays (bool, optional): Register array types as well (default: True)
    """

Usage Examples:

import psycopg2
from pgvector.psycopg2 import register_vector
from pgvector import Vector, SparseVector

# Connect to database
conn = psycopg2.connect("postgresql://user:password@localhost/dbname")

# Register vector types for this connection
register_vector(conn)
cur = conn.cursor()

# Create table with vector column
cur.execute("""
    CREATE TABLE IF NOT EXISTS embeddings (
        id SERIAL PRIMARY KEY,
        text TEXT,
        embedding VECTOR(512)
    )
""")

# Insert vector data
texts_and_vectors = [
    ("First document", Vector([0.1] * 512)),
    ("Second document", Vector([0.2] * 512)),  
    ("Third document", Vector([0.3] * 512))
]

cur.executemany(
    "INSERT INTO embeddings (text, embedding) VALUES (%s, %s)",
    texts_and_vectors
)

# Similarity search
query_vector = Vector([0.15] * 512)
cur.execute(
    """
    SELECT text, embedding <-> %s as distance 
    FROM embeddings 
    ORDER BY distance 
    LIMIT 3
    """,
    (query_vector,)
)

for text, distance in cur.fetchall():
    print(f"Text: {text}, Distance: {distance}")

conn.commit()
conn.close()

asyncpg Support

High-performance asynchronous PostgreSQL driver integration.

def register_vector(connection):
    """
    Register vector types with asyncpg connection.
    
    Args:
        connection: asyncpg connection instance
        
    Returns:
        Awaitable that registers all available vector types
    """

Usage Examples:

import asyncio
import asyncpg
from pgvector.asyncpg import register_vector
from pgvector import Vector, HalfVector

async def asyncpg_example():
    # Connect to database
    conn = await asyncpg.connect("postgresql://user:password@localhost/dbname")
    
    # Register vector types
    await register_vector(conn)
    
    # Create table
    await conn.execute("""
        CREATE TABLE IF NOT EXISTS products (
            id SERIAL PRIMARY KEY,
            name TEXT,
            features VECTOR(256),
            description_embedding HALFVEC(128)
        )
    """)
    
    # Insert data with vectors
    products = [
        ("Laptop", Vector([0.1] * 256), HalfVector([0.2] * 128)),
        ("Phone", Vector([0.3] * 256), HalfVector([0.4] * 128)),
        ("Tablet", Vector([0.5] * 256), HalfVector([0.6] * 128))
    ]
    
    await conn.executemany(
        "INSERT INTO products (name, features, description_embedding) VALUES ($1, $2, $3)",
        products
    )
    
    # Vector similarity search
    query_features = Vector([0.2] * 256)
    
    results = await conn.fetch(
        """
        SELECT name, features <-> $1 as similarity
        FROM products 
        ORDER BY similarity
        LIMIT 5
        """,
        query_features
    )
    
    for row in results:
        print(f"Product: {row['name']}, Similarity: {row['similarity']}")
    
    # Batch operations with prepared statements
    stmt = await conn.prepare(
        "SELECT name FROM products WHERE features <-> $1 < $2"
    )
    
    similar_products = await stmt.fetch(query_features, 0.5)
    print(f"Found {len(similar_products)} similar products")
    
    await conn.close()

# Run async example
asyncio.run(asyncpg_example())

pg8000 Support

Pure Python PostgreSQL driver integration.

def register_vector(context):
    """
    Register vector types with pg8000 connection.
    
    Args:
        context: pg8000 connection instance
    """

Usage Examples:

import pg8000.native
from pgvector.pg8000 import register_vector
from pgvector import Vector, Bit

# Connect to database
conn = pg8000.native.Connection(
    user="user",
    password="password", 
    host="localhost",
    database="dbname"
)

# Register vector types
register_vector(conn)

# Create table with vector and bit columns
conn.run("""
    CREATE TABLE IF NOT EXISTS items (
        id SERIAL PRIMARY KEY,
        name TEXT,
        embedding VECTOR(128),
        tags BIT(32)
    )
""")

# Insert data
items = [
    ("Item 1", Vector([0.1] * 128), Bit("1010" * 8)),
    ("Item 2", Vector([0.2] * 128), Bit("0101" * 8)),
    ("Item 3", Vector([0.3] * 128), Bit("1100" * 8))
]

for name, embedding, tags in items:
    conn.run(
        "INSERT INTO items (name, embedding, tags) VALUES (:name, :embedding, :tags)",
        name=name,
        embedding=embedding,
        tags=tags
    )

# Query with vector similarity
query_vector = Vector([0.15] * 128)
results = conn.run(
    """
    SELECT name, embedding <-> :query as distance
    FROM items
    ORDER BY distance
    LIMIT 3
    """,
    query=query_vector
)

for row in results:
    print(f"Item: {row[0]}, Distance: {row[1]}")

# Hamming distance for bit vectors
query_bits = Bit("1010" * 8)
bit_results = conn.run(
    """
    SELECT name, tags <~> :query_bits as hamming_distance
    FROM items
    ORDER BY hamming_distance
    LIMIT 3
    """,
    query_bits=query_bits
)

for row in bit_results:
    print(f"Item: {row[0]}, Hamming Distance: {row[1]}")

conn.close()

Driver Comparison

DriverSyncAsyncPerformancePure PythonVector Types
Psycopg 3HighNoAll
Psycopg 2HighNoAll
asyncpgVery HighNoAll
pg8000MediumYesAll

Error Handling

All drivers will raise appropriate database errors for:

  • Invalid vector dimensions
  • Unsupported vector operations
  • Connection failures
  • PostgreSQL extension not installed
  • Type registration failures

Register vector types immediately after connection establishment and before any vector operations.

Install with Tessl CLI

npx tessl i tessl/pypi-pgvector

docs

core-vectors.md

database-drivers.md

django-integration.md

index.md

peewee-integration.md

sqlalchemy-integration.md

tile.json