PostgreSQL pgvector extension support for Python with vector operations and similarity search across multiple database libraries
—
Complete Django ORM support providing vector field types, distance functions, and indexing capabilities for building vector-powered Django applications with familiar ORM patterns.
Django model fields for storing different vector types in PostgreSQL with pgvector extension.
class VectorField(Field):
"""
Django field for storing Vector (float32) data.
Args:
dimensions (int, optional): Fixed number of dimensions
**kwargs: Standard Django field parameters
"""
class HalfVectorField(Field):
"""
Django field for storing HalfVector (float16) data.
Args:
dimensions (int, optional): Fixed number of dimensions
**kwargs: Standard Django field parameters
"""
class SparseVectorField(Field):
"""
Django field for storing SparseVector data.
Args:
dimensions (int, optional): Fixed number of dimensions
**kwargs: Standard Django field parameters
"""
class BitField(Field):
"""
Django field for storing Bit vector data.
Args:
**kwargs: Standard Django field parameters
"""Usage Examples:
from django.db import models
from pgvector.django import VectorField, HalfVectorField, SparseVectorField, BitField
class Document(models.Model):
content = models.TextField()
embedding = VectorField(dimensions=1536) # OpenAI embeddings
title_embedding = HalfVectorField(dimensions=768) # Memory efficient
sparse_features = SparseVectorField(dimensions=10000) # High-dimensional sparse
binary_hash = BitField() # Binary features
class Meta:
db_table = 'documents'
# Create model instances
from pgvector import Vector, HalfVector, SparseVector, Bit
doc = Document(
content="Sample document",
embedding=Vector([0.1, 0.2, 0.3] * 512), # 1536 dimensions
title_embedding=HalfVector([0.5, 0.6, 0.7] * 256), # 768 dimensions
sparse_features=SparseVector({0: 1.0, 500: 2.5}, 10000),
binary_hash=Bit("1010110")
)
doc.save()Django ORM functions for calculating vector similarities and distances in database queries.
class L2Distance(Func):
"""
Euclidean (L2) distance function.
Args:
expression: Model field or expression
vector: Vector, HalfVector, SparseVector, or Value
**kwargs: Additional function parameters
"""
class CosineDistance(Func):
"""
Cosine distance function (1 - cosine similarity).
Args:
expression: Model field or expression
vector: Vector, HalfVector, SparseVector, or Value
**kwargs: Additional function parameters
"""
class MaxInnerProduct(Func):
"""
Maximum inner product (negative inner product) function.
Args:
expression: Model field or expression
vector: Vector, HalfVector, SparseVector, or Value
**kwargs: Additional function parameters
"""
class L1Distance(Func):
"""
Manhattan (L1) distance function.
Args:
expression: Model field or expression
vector: Vector, HalfVector, SparseVector, or Value
**kwargs: Additional function parameters
"""
class HammingDistance(Func):
"""
Hamming distance function for bit vectors.
Args:
expression: Model field or expression
vector: Bit vector or binary string
**kwargs: Additional function parameters
"""
class JaccardDistance(Func):
"""
Jaccard distance function for bit vectors.
Args:
expression: Model field or expression
vector: Bit vector or binary string
**kwargs: Additional function parameters
"""Usage Examples:
from django.db.models import F
from pgvector.django import L2Distance, CosineDistance, MaxInnerProduct, HammingDistance
from pgvector import Vector, Bit
# Find documents similar to query vector
query_vector = Vector([0.1, 0.2, 0.3] * 512)
# L2 (Euclidean) distance search
similar_docs = Document.objects.annotate(
distance=L2Distance('embedding', query_vector)
).order_by('distance')[:10]
# Cosine similarity search (lower cosine distance = higher similarity)
cosine_similar = Document.objects.annotate(
cosine_dist=CosineDistance('embedding', query_vector)
).order_by('cosine_dist')[:10]
# Maximum inner product search
inner_product_similar = Document.objects.annotate(
inner_product=MaxInnerProduct('embedding', query_vector)
).order_by('inner_product')[:10]
# Hamming distance for binary vectors
query_bits = Bit("1010110")
binary_similar = Document.objects.annotate(
hamming_dist=HammingDistance('binary_hash', query_bits)
).order_by('hamming_dist')[:10]
# Filter by distance threshold
close_docs = Document.objects.filter(
L2Distance('embedding', query_vector) < 0.5
)Django index classes for optimizing vector similarity searches with approximate nearest neighbor algorithms.
class IvfflatIndex(Index):
"""
IVFFlat index for approximate nearest neighbor search.
Args:
fields: List of field names to index
lists (int, optional): Number of inverted lists (default: computed)
**kwargs: Standard Django index parameters
"""
class HnswIndex(Index):
"""
HNSW (Hierarchical Navigable Small World) index for approximate nearest neighbor search.
Args:
fields: List of field names to index
m (int, optional): Maximum connections per node (default: 16)
ef_construction (int, optional): Size of candidate list during construction (default: 64)
**kwargs: Standard Django index parameters
"""Usage Examples:
from django.db import models
from pgvector.django import VectorField, HnswIndex, IvfflatIndex
class Document(models.Model):
content = models.TextField()
embedding = VectorField(dimensions=1536)
class Meta:
# Add vector indexes for performance
indexes = [
# HNSW index for high recall approximate search
HnswIndex(
fields=['embedding'],
name='embedding_hnsw_idx',
m=16, # connections per node
ef_construction=64 # build-time search quality
),
# IVFFlat index for faster build times
IvfflatIndex(
fields=['embedding'],
name='embedding_ivfflat_idx',
lists=100 # number of clusters
)
]
# Indexes are automatically used by the query planner for distance searches
similar = Document.objects.annotate(
distance=L2Distance('embedding', query_vector)
).order_by('distance')[:10] # Uses index automaticallyDjango database extension registration for pgvector functionality.
class VectorExtension:
"""
Django database extension for pgvector.
Handles PostgreSQL extension installation and configuration.
"""Usage Examples:
# In Django settings.py
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.postgresql',
'NAME': 'your_db',
# ... other settings
}
}
# Create and run migration to enable extension
# python manage.py makemigrations --empty your_app_name
# In the generated migration file:
from django.contrib.postgres.operations import CreateExtension
class Migration(migrations.Migration):
dependencies = [
('your_app', '0001_initial'),
]
operations = [
CreateExtension('vector'), # Enable pgvector extension
]# models.py
from django.db import models
from pgvector.django import VectorField, HnswIndex
class EmbeddingModel(models.Model):
text = models.TextField()
embedding = VectorField(dimensions=384) # sentence-transformers dimension
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
indexes = [
HnswIndex(fields=['embedding'], name='embedding_hnsw_idx')
]
# views.py
from django.http import JsonResponse
from pgvector.django import L2Distance
from pgvector import Vector
def similarity_search(request):
query_embedding = Vector([0.1, 0.2] * 192) # 384 dimensions
# Find top 10 most similar embeddings
results = EmbeddingModel.objects.annotate(
distance=L2Distance('embedding', query_embedding)
).order_by('distance')[:10]
return JsonResponse([{
'text': item.text,
'distance': float(item.distance),
'created_at': item.created_at.isoformat()
} for item in results], safe=False)Install with Tessl CLI
npx tessl i tessl/pypi-pgvector