Tessl Tile for pypi/pgvector@0.4.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

core-vectors.md database-drivers.md django-integration.md index.md peewee-integration.md sqlalchemy-integration.md

sqlalchemy-integration.mddocs/

0
# SQLAlchemy Integration
1

2
Native SQLAlchemy types and functions providing full vector support for SQLAlchemy applications with type safety and query builder integration.
3

4
## Capabilities
5

6
### Vector Types
7

8
SQLAlchemy column types for storing different vector types in PostgreSQL with pgvector extension.
9

10
```python { .api }
11
class VECTOR(UserDefinedType):
12
    """
13
    SQLAlchemy type for Vector (float32) columns.
14
    
15
    Args:
16
        dim (int, optional): Fixed number of dimensions
17
    """
18
    
19
    def __init__(self, dim=None):
20
        """Initialize VECTOR type with optional dimension constraint."""
21
    
22
    def get_col_spec(self, **kw) -> str:
23
        """Get column specification for DDL generation."""
24
    
25
    def bind_processor(self, dialect):
26
        """Return a conversion function for processing bind values."""
27
    
28
    def literal_processor(self, dialect):
29
        """Return a conversion function for processing literal values."""
30
    
31
    def result_processor(self, dialect, coltype):
32
        """Return a conversion function for processing result values."""
33
    
34
    class comparator_factory(UserDefinedType.Comparator):
35
        """Comparator for vector distance operations."""
36
        
37
        def l2_distance(self, other):
38
            """L2 (Euclidean) distance operation."""
39
        
40
        def max_inner_product(self, other):
41
            """Maximum inner product operation."""
42
        
43
        def cosine_distance(self, other):
44
            """Cosine distance operation."""
45
        
46
        def l1_distance(self, other):
47
            """L1 (Manhattan) distance operation."""
48

49
class HALFVEC(UserDefinedType):
50
    """
51
    SQLAlchemy type for HalfVector (float16) columns.
52
    
53
    Args:
54
        dim (int, optional): Fixed number of dimensions
55
    """
56
    
57
    def __init__(self, dim=None):
58
        """Initialize HALFVEC type with optional dimension constraint."""
59
    
60
    def get_col_spec(self, **kw) -> str:
61
        """Get column specification for DDL generation."""
62
    
63
    def bind_processor(self, dialect):
64
        """Return a conversion function for processing bind values."""
65
    
66
    def literal_processor(self, dialect):
67
        """Return a conversion function for processing literal values."""
68
    
69
    def result_processor(self, dialect, coltype):
70
        """Return a conversion function for processing result values."""
71
    
72
    class comparator_factory(UserDefinedType.Comparator):
73
        """Comparator for half vector distance operations."""
74
        
75
        def l2_distance(self, other):
76
            """L2 (Euclidean) distance operation."""
77
        
78
        def max_inner_product(self, other):
79
            """Maximum inner product operation."""
80
        
81
        def cosine_distance(self, other):
82
            """Cosine distance operation."""
83
        
84
        def l1_distance(self, other):
85
            """L1 (Manhattan) distance operation."""
86

87
class SPARSEVEC(UserDefinedType):
88
    """
89
    SQLAlchemy type for SparseVector columns.
90
    
91
    Args:
92
        dim (int, optional): Fixed number of dimensions
93
    """
94
    
95
    def __init__(self, dim=None):
96
        """Initialize SPARSEVEC type with optional dimension constraint."""
97
    
98
    def get_col_spec(self, **kw) -> str:
99
        """Get column specification for DDL generation."""
100
    
101
    def bind_processor(self, dialect):
102
        """Return a conversion function for processing bind values."""
103
    
104
    def literal_processor(self, dialect):
105
        """Return a conversion function for processing literal values."""
106
    
107
    def result_processor(self, dialect, coltype):
108
        """Return a conversion function for processing result values."""
109
    
110
    class comparator_factory(UserDefinedType.Comparator):
111
        """Comparator for sparse vector distance operations."""
112
        
113
        def l2_distance(self, other):
114
            """L2 (Euclidean) distance operation."""
115
        
116
        def max_inner_product(self, other):
117
            """Maximum inner product operation."""
118
        
119
        def cosine_distance(self, other):
120
            """Cosine distance operation."""
121
        
122
        def l1_distance(self, other):
123
            """L1 (Manhattan) distance operation."""
124

125
class BIT(UserDefinedType):
126
    """
127
    SQLAlchemy type for Bit vector columns.
128
    
129
    Args:
130
        length (int, optional): Fixed bit length
131
    """
132
    
133
    def __init__(self, length=None):
134
        """Initialize BIT type with optional length constraint."""
135
    
136
    def get_col_spec(self, **kw) -> str:
137
        """Get column specification for DDL generation."""
138
    
139
    class comparator_factory(UserDefinedType.Comparator):
140
        """Comparator for bit vector distance operations."""
141
        
142
        def hamming_distance(self, other):
143
            """Hamming distance operation."""
144
        
145
        def jaccard_distance(self, other):
146
            """Jaccard distance operation."""
147

148
# Type alias for compatibility
149
Vector = VECTOR
150
```
151

152
**Usage Examples:**
153

154
```python
155
from sqlalchemy import Column, Integer, Text, create_engine
156
from sqlalchemy.ext.declarative import declarative_base
157
from sqlalchemy.orm import sessionmaker
158
from pgvector.sqlalchemy import VECTOR, HALFVEC, SPARSEVEC, BIT, Vector
159
from pgvector import Vector as PgVector, HalfVector, SparseVector, Bit
160

161
Base = declarative_base()
162

163
class Document(Base):
164
    __tablename__ = 'documents'
165
    
166
    id = Column(Integer, primary_key=True)
167
    content = Column(Text)
168
    embedding = Column(VECTOR(1536))  # OpenAI embeddings
169
    title_embedding = Column(HALFVEC(768))  # Memory efficient
170
    sparse_features = Column(SPARSEVEC(10000))  # High-dimensional sparse
171
    binary_hash = Column(BIT())  # Binary features
172

173
# Create engine and session
174
engine = create_engine('postgresql://user:pass@localhost/db')
175
Base.metadata.create_all(engine)
176
Session = sessionmaker(bind=engine)
177
session = Session()
178

179
# Create document with vectors
180
doc = Document(
181
    content="Sample document",
182
    embedding=PgVector([0.1, 0.2, 0.3] * 512),
183
    title_embedding=HalfVector([0.5, 0.6, 0.7] * 256),
184
    sparse_features=SparseVector({0: 1.0, 500: 2.5}, 10000),
185
    binary_hash=Bit("1010110")
186
)
187
session.add(doc)
188
session.commit()
189
```
190

191
### Distance Operations
192

193
Use the built-in distance methods for similarity queries:
194

195
```python
196
from sqlalchemy import select
197

198
# L2 distance (Euclidean)
199
query_vector = PgVector([0.2, 0.3, 0.4] * 512)
200
nearest = session.scalars(
201
    select(Document)
202
    .order_by(Document.embedding.l2_distance(query_vector))
203
    .limit(5)
204
).all()
205

206
# Cosine distance  
207
similar = session.scalars(
208
    select(Document)
209
    .order_by(Document.embedding.cosine_distance(query_vector))
210
    .limit(10)
211
).all()
212

213
# Maximum inner product (similarity)
214
products = session.scalars(
215
    select(Document)
216
    .order_by(Document.embedding.max_inner_product(query_vector))
217
    .limit(10)
218
).all()
219

220
# Get distance values
221
distances = session.scalars(
222
    select(Document.embedding.l2_distance(query_vector))
223
).all()
224

225
# Filter by distance threshold
226
nearby = session.scalars(
227
    select(Document)
228
    .filter(Document.embedding.l2_distance(query_vector) < 0.5)
229
).all()
230
```
231

232
### Aggregate Functions
233

234
Built-in aggregate functions for vector operations:
235

236
```python { .api }
237
def avg(*args):
238
    """
239
    Calculate average vector across multiple rows.
240
    
241
    Args:
242
        *args: Vector columns or expressions
243
        
244
    Returns:
245
        Average vector
246
    """
247

248
def sum(*args):
249
    """
250
    Calculate sum of vectors across multiple rows.
251
    
252
    Args:
253
        *args: Vector columns or expressions
254
        
255
    Returns:
256
        Sum vector
257
    """
258
```
259

260
**Usage Examples:**
261

262
```python
263
from pgvector.sqlalchemy import avg, sum
264

265
# Calculate average embedding
266
avg_embedding = session.scalars(
267
    select(avg(Document.embedding))
268
).first()
269

270
# Calculate sum of embeddings
271
sum_embedding = session.scalars(
272
    select(sum(Document.embedding))
273
).first()
274

275
# Group by category and average
276
from sqlalchemy import func
277
category_averages = session.execute(
278
    select(Document.category, avg(Document.embedding))
279
    .group_by(Document.category)
280
).all()
281
```
282

283
### Index Creation
284

285
Create approximate indexes for faster similarity search:
286

287
```python
288
from sqlalchemy import Index
289

290
# HNSW index for L2 distance
291
hnsw_index = Index(
292
    'embedding_hnsw_idx',
293
    Document.embedding,
294
    postgresql_using='hnsw',
295
    postgresql_with={'m': 16, 'ef_construction': 64},
296
    postgresql_ops={'embedding': 'vector_l2_ops'}
297
)
298

299
# IVFFlat index for L2 distance
300
ivfflat_index = Index(
301
    'embedding_ivfflat_idx', 
302
    Document.embedding,
303
    postgresql_using='ivfflat',
304
    postgresql_with={'lists': 100},
305
    postgresql_ops={'embedding': 'vector_l2_ops'}
306
)
307

308
# Create indexes
309
hnsw_index.create(engine)
310
ivfflat_index.create(engine)
311

312
# Use different operators for different distance types:
313
# - vector_l2_ops: L2 distance (default)
314
# - vector_ip_ops: Inner product
315
# - vector_cosine_ops: Cosine distance
316
```
317

318
### Half-Precision Indexing
319

320
Index vectors at half-precision for memory efficiency:
321

322
```python
323
from sqlalchemy.sql import func
324

325
# Create half-precision index
326
half_index = Index(
327
    'embedding_half_idx',
328
    func.cast(Document.embedding, HALFVEC(1536)).label('embedding'),
329
    postgresql_using='hnsw',
330
    postgresql_with={'m': 16, 'ef_construction': 64},
331
    postgresql_ops={'embedding': 'halfvec_l2_ops'}
332
)
333

334
# Query with half-precision casting
335
query_vector = PgVector([0.1, 0.2, 0.3] * 512)
336
half_results = session.scalars(
337
    select(Document)
338
    .order_by(
339
        func.cast(Document.embedding, HALFVEC(1536))
340
        .l2_distance(query_vector)
341
    )
342
    .limit(5)
343
).all()
344
```
345

346
### Array Support
347

348
Store arrays of vectors with proper type registration:
349

350
```python
351
from sqlalchemy import ARRAY, event
352
from pgvector.psycopg import register_vector
353

354
class MultiVectorDocument(Base):
355
    __tablename__ = 'multi_vector_docs'
356
    
357
    id = Column(Integer, primary_key=True)
358
    embeddings = Column(ARRAY(VECTOR(768)))  # Array of vectors
359

360
# Register vector types with the database connection
361
@event.listens_for(engine, "connect")
362
def connect(dbapi_connection, connection_record):
363
    register_vector(dbapi_connection)
364

365
# For async connections
366
from pgvector.psycopg import register_vector_async
367

368
@event.listens_for(engine.sync_engine, "connect")  
369
def connect_async(dbapi_connection, connection_record):
370
    dbapi_connection.run_async(register_vector_async)
371
```

Version

Tile

Files

sqlalchemy-integration.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

sqlalchemy-integration.mddocs/