tessl/pypi-pgvector

PostgreSQL pgvector extension support for Python with vector operations and similarity search across multiple database libraries

—

Pending

Overview

Eval results

Files

Core Vector Types

Name: tessl/pypi-pgvector
Author: tessl

The foundation of pgvector providing four distinct vector types optimized for different use cases and memory requirements.

Capabilities

Vector Class (32-bit Float)

Standard precision vectors using 32-bit floating point numbers for full precision vector operations.

class Vector:
    def __init__(self, value):
        """
        Create a vector from various input types.
        
        Args:
            value: Array-like input (list, tuple, numpy array)
        
        Raises:
            ValueError: If input has wrong dimensions or invalid format
        """
    
    def __repr__(self) -> str:
        """String representation of the vector."""
    
    def __eq__(self, other) -> bool:
        """Compare vectors for equality."""
    
    def dimensions(self) -> int:
        """Get the number of dimensions in the vector."""
    
    def to_list(self) -> list:
        """Convert vector to Python list."""
    
    def to_numpy(self) -> np.ndarray:
        """Convert vector to numpy array with dtype float32."""
    
    def to_text(self) -> str:
        """Convert to PostgreSQL text representation."""
    
    def to_binary(self) -> bytes:
        """Convert to PostgreSQL binary representation."""
    
    @classmethod
    def from_text(cls, value: str) -> 'Vector':
        """Create vector from PostgreSQL text format."""
    
    @classmethod  
    def from_binary(cls, value: bytes) -> 'Vector':
        """Create vector from PostgreSQL binary format."""
    
    @classmethod
    def _to_db(cls, value, dim=None):
        """Convert value to database text format."""
    
    @classmethod
    def _to_db_binary(cls, value):
        """Convert value to database binary format."""
    
    @classmethod
    def _from_db(cls, value):
        """Convert database value to numpy array."""
    
    @classmethod
    def _from_db_binary(cls, value):
        """Convert database binary value to numpy array."""

Usage Examples:

from pgvector import Vector
import numpy as np

# Create from list
v1 = Vector([1.0, 2.5, 3.2])

# Create from numpy array
arr = np.array([1, 2, 3], dtype=np.float32)
v2 = Vector(arr)

# Create from tuple
v3 = Vector((1.5, 2.5, 3.5))

# Get properties
dimensions = v1.dimensions()  # 3
as_list = v1.to_list()  # [1.0, 2.5, 3.2]
as_numpy = v1.to_numpy()  # numpy array

# PostgreSQL serialization
text_repr = v1.to_text()  # '[1,2.5,3.2]'
binary_repr = v1.to_binary()  # bytes

# Parse from PostgreSQL formats
v4 = Vector.from_text('[1,2,3]')
v5 = Vector.from_binary(binary_repr)

HalfVector Class (16-bit Float)

Memory-efficient vectors using 16-bit floating point numbers, ideal for large-scale vector storage with acceptable precision loss.

class HalfVector:
    def __init__(self, value):
        """
        Create a half-precision vector.
        
        Args:
            value: Array-like input (list, tuple, numpy array)
        
        Raises:
            ValueError: If input has wrong dimensions or invalid format
        """
    
    def __repr__(self) -> str:
        """String representation of the half vector."""
    
    def __eq__(self, other) -> bool:
        """Compare half vectors for equality."""
    
    def dimensions(self) -> int:
        """Get the number of dimensions in the vector."""
    
    def to_list(self) -> list:
        """Convert vector to Python list."""
    
    def to_numpy(self) -> np.ndarray:
        """Convert vector to numpy array with dtype float16."""
    
    def to_text(self) -> str:
        """Convert to PostgreSQL text representation."""
    
    def to_binary(self) -> bytes:
        """Convert to PostgreSQL binary representation."""
    
    @classmethod
    def from_text(cls, value: str) -> 'HalfVector':
        """Create half vector from PostgreSQL text format."""
    
    @classmethod
    def from_binary(cls, value: bytes) -> 'HalfVector':
        """Create half vector from PostgreSQL binary format."""
    
    @classmethod
    def _to_db(cls, value, dim=None):
        """Convert value to database text format."""
    
    @classmethod
    def _to_db_binary(cls, value):
        """Convert value to database binary format."""
    
    @classmethod
    def _from_db(cls, value):
        """Convert database value to HalfVector."""
    
    @classmethod
    def _from_db_binary(cls, value):
        """Convert database binary value to HalfVector."""

Usage Examples:

from pgvector import HalfVector

# Create half-precision vector (uses ~50% memory of Vector)
hv = HalfVector([1.5, 2.0, 3.5])

# Same API as Vector class
dimensions = hv.dimensions()
as_list = hv.to_list()
text_format = hv.to_text()

# Memory efficient for large datasets
large_half_vector = HalfVector(list(range(1000)))

SparseVector Class

Efficient storage for high-dimensional vectors with many zero values, storing only non-zero elements with their indices.

class SparseVector:
    def __init__(self, value, dimensions=None, /):
        """
        Create a sparse vector from various input formats.
        
        Args:
            value: dict, scipy sparse array, or dense array-like
            dimensions: Required when value is dict, ignored otherwise
        
        Raises:
            ValueError: If dimensions missing for dict input or extra for others
        """
    
    def __repr__(self) -> str:
        """String representation of the sparse vector."""
    
    def __eq__(self, other) -> bool:
        """Compare sparse vectors for equality."""
    
    def dimensions(self) -> int:
        """Get the total number of dimensions."""
    
    def indices(self) -> list:
        """Get indices of non-zero elements."""
    
    def values(self) -> list:
        """Get non-zero values."""
    
    def to_coo(self):
        """Convert to scipy COO sparse array."""
    
    def to_list(self) -> list:
        """Convert to dense Python list."""
    
    def to_numpy(self) -> np.ndarray:
        """Convert to dense numpy array."""
    
    def to_text(self) -> str:
        """Convert to PostgreSQL text representation."""
    
    def to_binary(self) -> bytes:
        """Convert to PostgreSQL binary representation."""
    
    @classmethod
    def from_text(cls, value: str) -> 'SparseVector':
        """Create sparse vector from PostgreSQL text format."""
    
    @classmethod
    def from_binary(cls, value: bytes) -> 'SparseVector':
        """Create sparse vector from PostgreSQL binary format."""
    
    @classmethod
    def _from_parts(cls, dim: int, indices: list, values: list) -> 'SparseVector':
        """Create sparse vector from dimensions, indices, and values."""
    
    @classmethod
    def _to_db(cls, value, dim=None):
        """Convert value to database text format."""
    
    @classmethod
    def _to_db_binary(cls, value):
        """Convert value to database binary format."""
    
    @classmethod
    def _from_db(cls, value):
        """Convert database value to SparseVector."""
    
    @classmethod
    def _from_db_binary(cls, value):
        """Convert database binary value to SparseVector."""

Usage Examples:

from pgvector import SparseVector
import numpy as np

# Create from dictionary (index: value)
sv1 = SparseVector({0: 1.0, 10: 2.5, 100: 3.0}, 1000)

# Create from dense array (zeros will be optimized out)
dense = [1.0, 0.0, 0.0, 2.5, 0.0]
sv2 = SparseVector(dense)

# Create from scipy sparse array
try:
    from scipy.sparse import coo_array
    coords = ([0, 0, 0], [0, 2, 4])  # row, col indices
    data = [1.0, 2.5, 3.0]
    sparse_array = coo_array((data, coords), shape=(1, 5))
    sv3 = SparseVector(sparse_array)
except ImportError:
    pass  # scipy not available

# Access sparse structure
dimensions = sv1.dimensions()  # 1000
indices = sv1.indices()  # [0, 10, 100]
values = sv1.values()  # [1.0, 2.5, 3.0]

# Convert to dense formats
dense_list = sv1.to_list()  # Full 1000-element list with zeros
dense_numpy = sv1.to_numpy()  # Dense numpy array

# Convert to scipy sparse
sparse_coo = sv1.to_coo()  # COO sparse array

Bit Class

Binary vectors for bit operations supporting Hamming and Jaccard distance calculations.

class Bit:
    def __init__(self, value):
        """
        Create a bit vector from various input formats.
        
        Args:
            value: bytes, binary string, or boolean array-like
        
        Raises:
            ValueError: If input format is invalid
        """
    
    def __repr__(self) -> str:
        """String representation of the bit vector."""
    
    def __eq__(self, other) -> bool:
        """Compare bit vectors for equality."""
    
    def to_list(self) -> list:
        """Convert to list of booleans."""
    
    def to_numpy(self) -> np.ndarray:
        """Convert to boolean numpy array."""
    
    def to_text(self) -> str:
        """Convert to binary string representation."""
    
    def to_binary(self) -> bytes:
        """Convert to PostgreSQL binary representation."""
    
    @classmethod
    def from_text(cls, value: str) -> 'Bit':
        """Create bit vector from binary string."""
    
    @classmethod
    def from_binary(cls, value: bytes) -> 'Bit':
        """Create bit vector from PostgreSQL binary format."""
    
    @classmethod
    def _to_db(cls, value):
        """Convert value to database text format."""
    
    @classmethod
    def _to_db_binary(cls, value):
        """Convert value to database binary format."""

Usage Examples:

from pgvector import Bit
import numpy as np

# Create from binary string
b1 = Bit("1010")

# Create from boolean list
b2 = Bit([True, False, True, False])

# Create from boolean numpy array
bool_array = np.array([True, False, True, True], dtype=bool)
b3 = Bit(bool_array)

# Create from bytes
b4 = Bit(b'\x0f')  # Represents "00001111"

# Convert to different formats
as_list = b1.to_list()  # [True, False, True, False]
as_numpy = b1.to_numpy()  # boolean numpy array
as_text = b1.to_text()  # "1010"

# PostgreSQL serialization
binary_format = b1.to_binary()
b5 = Bit.from_binary(binary_format)