PostgreSQL pgvector extension support for Python with vector operations and similarity search across multiple database libraries
—
The foundation of pgvector providing four distinct vector types optimized for different use cases and memory requirements.
Standard precision vectors using 32-bit floating point numbers for full precision vector operations.
class Vector:
def __init__(self, value):
"""
Create a vector from various input types.
Args:
value: Array-like input (list, tuple, numpy array)
Raises:
ValueError: If input has wrong dimensions or invalid format
"""
def __repr__(self) -> str:
"""String representation of the vector."""
def __eq__(self, other) -> bool:
"""Compare vectors for equality."""
def dimensions(self) -> int:
"""Get the number of dimensions in the vector."""
def to_list(self) -> list:
"""Convert vector to Python list."""
def to_numpy(self) -> np.ndarray:
"""Convert vector to numpy array with dtype float32."""
def to_text(self) -> str:
"""Convert to PostgreSQL text representation."""
def to_binary(self) -> bytes:
"""Convert to PostgreSQL binary representation."""
@classmethod
def from_text(cls, value: str) -> 'Vector':
"""Create vector from PostgreSQL text format."""
@classmethod
def from_binary(cls, value: bytes) -> 'Vector':
"""Create vector from PostgreSQL binary format."""
@classmethod
def _to_db(cls, value, dim=None):
"""Convert value to database text format."""
@classmethod
def _to_db_binary(cls, value):
"""Convert value to database binary format."""
@classmethod
def _from_db(cls, value):
"""Convert database value to numpy array."""
@classmethod
def _from_db_binary(cls, value):
"""Convert database binary value to numpy array."""Usage Examples:
from pgvector import Vector
import numpy as np
# Create from list
v1 = Vector([1.0, 2.5, 3.2])
# Create from numpy array
arr = np.array([1, 2, 3], dtype=np.float32)
v2 = Vector(arr)
# Create from tuple
v3 = Vector((1.5, 2.5, 3.5))
# Get properties
dimensions = v1.dimensions() # 3
as_list = v1.to_list() # [1.0, 2.5, 3.2]
as_numpy = v1.to_numpy() # numpy array
# PostgreSQL serialization
text_repr = v1.to_text() # '[1,2.5,3.2]'
binary_repr = v1.to_binary() # bytes
# Parse from PostgreSQL formats
v4 = Vector.from_text('[1,2,3]')
v5 = Vector.from_binary(binary_repr)Memory-efficient vectors using 16-bit floating point numbers, ideal for large-scale vector storage with acceptable precision loss.
class HalfVector:
def __init__(self, value):
"""
Create a half-precision vector.
Args:
value: Array-like input (list, tuple, numpy array)
Raises:
ValueError: If input has wrong dimensions or invalid format
"""
def __repr__(self) -> str:
"""String representation of the half vector."""
def __eq__(self, other) -> bool:
"""Compare half vectors for equality."""
def dimensions(self) -> int:
"""Get the number of dimensions in the vector."""
def to_list(self) -> list:
"""Convert vector to Python list."""
def to_numpy(self) -> np.ndarray:
"""Convert vector to numpy array with dtype float16."""
def to_text(self) -> str:
"""Convert to PostgreSQL text representation."""
def to_binary(self) -> bytes:
"""Convert to PostgreSQL binary representation."""
@classmethod
def from_text(cls, value: str) -> 'HalfVector':
"""Create half vector from PostgreSQL text format."""
@classmethod
def from_binary(cls, value: bytes) -> 'HalfVector':
"""Create half vector from PostgreSQL binary format."""
@classmethod
def _to_db(cls, value, dim=None):
"""Convert value to database text format."""
@classmethod
def _to_db_binary(cls, value):
"""Convert value to database binary format."""
@classmethod
def _from_db(cls, value):
"""Convert database value to HalfVector."""
@classmethod
def _from_db_binary(cls, value):
"""Convert database binary value to HalfVector."""Usage Examples:
from pgvector import HalfVector
# Create half-precision vector (uses ~50% memory of Vector)
hv = HalfVector([1.5, 2.0, 3.5])
# Same API as Vector class
dimensions = hv.dimensions()
as_list = hv.to_list()
text_format = hv.to_text()
# Memory efficient for large datasets
large_half_vector = HalfVector(list(range(1000)))Efficient storage for high-dimensional vectors with many zero values, storing only non-zero elements with their indices.
class SparseVector:
def __init__(self, value, dimensions=None, /):
"""
Create a sparse vector from various input formats.
Args:
value: dict, scipy sparse array, or dense array-like
dimensions: Required when value is dict, ignored otherwise
Raises:
ValueError: If dimensions missing for dict input or extra for others
"""
def __repr__(self) -> str:
"""String representation of the sparse vector."""
def __eq__(self, other) -> bool:
"""Compare sparse vectors for equality."""
def dimensions(self) -> int:
"""Get the total number of dimensions."""
def indices(self) -> list:
"""Get indices of non-zero elements."""
def values(self) -> list:
"""Get non-zero values."""
def to_coo(self):
"""Convert to scipy COO sparse array."""
def to_list(self) -> list:
"""Convert to dense Python list."""
def to_numpy(self) -> np.ndarray:
"""Convert to dense numpy array."""
def to_text(self) -> str:
"""Convert to PostgreSQL text representation."""
def to_binary(self) -> bytes:
"""Convert to PostgreSQL binary representation."""
@classmethod
def from_text(cls, value: str) -> 'SparseVector':
"""Create sparse vector from PostgreSQL text format."""
@classmethod
def from_binary(cls, value: bytes) -> 'SparseVector':
"""Create sparse vector from PostgreSQL binary format."""
@classmethod
def _from_parts(cls, dim: int, indices: list, values: list) -> 'SparseVector':
"""Create sparse vector from dimensions, indices, and values."""
@classmethod
def _to_db(cls, value, dim=None):
"""Convert value to database text format."""
@classmethod
def _to_db_binary(cls, value):
"""Convert value to database binary format."""
@classmethod
def _from_db(cls, value):
"""Convert database value to SparseVector."""
@classmethod
def _from_db_binary(cls, value):
"""Convert database binary value to SparseVector."""Usage Examples:
from pgvector import SparseVector
import numpy as np
# Create from dictionary (index: value)
sv1 = SparseVector({0: 1.0, 10: 2.5, 100: 3.0}, 1000)
# Create from dense array (zeros will be optimized out)
dense = [1.0, 0.0, 0.0, 2.5, 0.0]
sv2 = SparseVector(dense)
# Create from scipy sparse array
try:
from scipy.sparse import coo_array
coords = ([0, 0, 0], [0, 2, 4]) # row, col indices
data = [1.0, 2.5, 3.0]
sparse_array = coo_array((data, coords), shape=(1, 5))
sv3 = SparseVector(sparse_array)
except ImportError:
pass # scipy not available
# Access sparse structure
dimensions = sv1.dimensions() # 1000
indices = sv1.indices() # [0, 10, 100]
values = sv1.values() # [1.0, 2.5, 3.0]
# Convert to dense formats
dense_list = sv1.to_list() # Full 1000-element list with zeros
dense_numpy = sv1.to_numpy() # Dense numpy array
# Convert to scipy sparse
sparse_coo = sv1.to_coo() # COO sparse arrayBinary vectors for bit operations supporting Hamming and Jaccard distance calculations.
class Bit:
def __init__(self, value):
"""
Create a bit vector from various input formats.
Args:
value: bytes, binary string, or boolean array-like
Raises:
ValueError: If input format is invalid
"""
def __repr__(self) -> str:
"""String representation of the bit vector."""
def __eq__(self, other) -> bool:
"""Compare bit vectors for equality."""
def to_list(self) -> list:
"""Convert to list of booleans."""
def to_numpy(self) -> np.ndarray:
"""Convert to boolean numpy array."""
def to_text(self) -> str:
"""Convert to binary string representation."""
def to_binary(self) -> bytes:
"""Convert to PostgreSQL binary representation."""
@classmethod
def from_text(cls, value: str) -> 'Bit':
"""Create bit vector from binary string."""
@classmethod
def from_binary(cls, value: bytes) -> 'Bit':
"""Create bit vector from PostgreSQL binary format."""
@classmethod
def _to_db(cls, value):
"""Convert value to database text format."""
@classmethod
def _to_db_binary(cls, value):
"""Convert value to database binary format."""Usage Examples:
from pgvector import Bit
import numpy as np
# Create from binary string
b1 = Bit("1010")
# Create from boolean list
b2 = Bit([True, False, True, False])
# Create from boolean numpy array
bool_array = np.array([True, False, True, True], dtype=bool)
b3 = Bit(bool_array)
# Create from bytes
b4 = Bit(b'\x0f') # Represents "00001111"
# Convert to different formats
as_list = b1.to_list() # [True, False, True, False]
as_numpy = b1.to_numpy() # boolean numpy array
as_text = b1.to_text() # "1010"
# PostgreSQL serialization
binary_format = b1.to_binary()
b5 = Bit.from_binary(binary_format)All vector types raise ValueError for:
The Bit class additionally raises warnings when converting non-boolean data to boolean values.
Install with Tessl CLI
npx tessl i tessl/pypi-pgvector