0
# Peewee Integration
1
2
Peewee ORM field types for vector operations in Peewee-based applications with full vector type support and query integration.
3
4
## Capabilities
5
6
### Vector Field Types
7
8
Peewee model fields for storing different vector types in PostgreSQL with pgvector extension.
9
10
```python { .api }
11
class VectorField(Field):
12
"""
13
Peewee field for storing Vector (float32) data.
14
15
Args:
16
dimensions (int, optional): Fixed number of dimensions
17
**kwargs: Standard Peewee field parameters
18
"""
19
20
class HalfVectorField(Field):
21
"""
22
Peewee field for storing HalfVector (float16) data.
23
24
Args:
25
dimensions (int, optional): Fixed number of dimensions
26
**kwargs: Standard Peewee field parameters
27
"""
28
29
class SparseVectorField(Field):
30
"""
31
Peewee field for storing SparseVector data.
32
33
Args:
34
dimensions (int, optional): Fixed number of dimensions
35
**kwargs: Standard Peewee field parameters
36
"""
37
38
class FixedBitField(Field):
39
"""
40
Peewee field for storing Bit vector data.
41
42
Args:
43
**kwargs: Standard Peewee field parameters
44
"""
45
```
46
47
**Usage Examples:**
48
49
```python
50
from peewee import Model, TextField, PostgresqlDatabase, IntegerField
51
from pgvector.peewee import VectorField, HalfVectorField, SparseVectorField, FixedBitField
52
53
# Database connection
54
db = PostgresqlDatabase(
55
'your_database',
56
user='user',
57
password='password',
58
host='localhost'
59
)
60
61
class Document(Model):
62
content = TextField()
63
embedding = VectorField(dimensions=1536) # OpenAI embeddings
64
title_embedding = HalfVectorField(dimensions=768) # Memory efficient
65
sparse_features = SparseVectorField(dimensions=10000) # High-dimensional sparse
66
binary_hash = FixedBitField() # Binary features
67
68
class Meta:
69
database = db
70
table_name = 'documents'
71
72
# Create tables
73
db.create_tables([Document])
74
75
# Insert data
76
from pgvector import Vector, HalfVector, SparseVector, Bit
77
78
doc = Document.create(
79
content="Sample document",
80
embedding=Vector([0.1, 0.2, 0.3] * 512), # 1536 dimensions
81
title_embedding=HalfVector([0.5, 0.6, 0.7] * 256), # 768 dimensions
82
sparse_features=SparseVector({0: 1.0, 500: 2.5}, 10000),
83
binary_hash=Bit("1010110")
84
)
85
```
86
87
### Distance Operations
88
89
Using PostgreSQL distance operators in Peewee queries for similarity search.
90
91
**Usage Examples:**
92
93
```python
94
from peewee import fn, SQL
95
from pgvector import Vector, Bit
96
97
query_vector = Vector([0.1, 0.2, 0.3] * 512) # 1536 dimensions
98
99
# L2 (Euclidean) distance using <-> operator
100
l2_results = (Document
101
.select(
102
Document.content,
103
SQL('embedding <-> %s', query_vector.to_text()).alias('distance')
104
)
105
.order_by(SQL('embedding <-> %s', query_vector.to_text()))
106
.limit(10))
107
108
for doc in l2_results:
109
print(f"Content: {doc.content}, Distance: {doc.distance}")
110
111
# Cosine distance using <=> operator
112
cosine_results = (Document
113
.select(
114
Document.content,
115
SQL('embedding <=> %s', query_vector.to_text()).alias('cosine_distance')
116
)
117
.order_by(SQL('embedding <=> %s', query_vector.to_text()))
118
.limit(10))
119
120
# Inner product distance using <#> operator
121
inner_product_results = (Document
122
.select(
123
Document.content,
124
SQL('embedding <#> %s', query_vector.to_text()).alias('inner_product')
125
)
126
.order_by(SQL('embedding <#> %s', query_vector.to_text()))
127
.limit(10))
128
129
# Filter by distance threshold
130
close_documents = (Document
131
.select()
132
.where(SQL('embedding <-> %s < 0.5', query_vector.to_text())))
133
134
# Hamming distance for bit vectors
135
query_bits = Bit("1010110" + "0" * 57) # Pad to required length
136
hamming_results = (Document
137
.select(
138
Document.content,
139
SQL('binary_hash <~> %s', query_bits.to_text()).alias('hamming_distance')
140
)
141
.order_by(SQL('binary_hash <~> %s', query_bits.to_text()))
142
.limit(10))
143
144
# Jaccard distance for bit vectors
145
jaccard_results = (Document
146
.select(
147
Document.content,
148
SQL('binary_hash <%> %s', query_bits.to_text()).alias('jaccard_distance')
149
)
150
.order_by(SQL('binary_hash <%> %s', query_bits.to_text()))
151
.limit(10))
152
```
153
154
### Index Creation
155
156
Creating vector indexes for improved query performance in Peewee.
157
158
**Usage Examples:**
159
160
```python
161
from peewee import SQL
162
163
# Create HNSW index using raw SQL
164
def create_hnsw_index():
165
db.execute_sql("""
166
CREATE INDEX IF NOT EXISTS documents_embedding_hnsw_idx
167
ON documents
168
USING hnsw (embedding vector_l2_ops)
169
WITH (m = 16, ef_construction = 64)
170
""")
171
172
# Create IVFFlat index using raw SQL
173
def create_ivfflat_index():
174
db.execute_sql("""
175
CREATE INDEX IF NOT EXISTS documents_embedding_ivfflat_idx
176
ON documents
177
USING ivfflat (embedding vector_l2_ops)
178
WITH (lists = 100)
179
""")
180
181
# Create indexes after table creation
182
db.create_tables([Document])
183
create_hnsw_index()
184
create_ivfflat_index()
185
186
# Index for sparse vectors
187
def create_sparse_index():
188
db.execute_sql("""
189
CREATE INDEX IF NOT EXISTS documents_sparse_features_idx
190
ON documents
191
USING ivfflat (sparse_features sparsevec_l2_ops)
192
WITH (lists = 50)
193
""")
194
195
create_sparse_index()
196
```
197
198
### Advanced Query Patterns
199
200
Complex similarity search patterns using Peewee with pgvector.
201
202
**Usage Examples:**
203
204
```python
205
from peewee import Case, fn, Value
206
from datetime import datetime, timedelta
207
208
class Article(Model):
209
title = TextField()
210
content = TextField()
211
category = TextField()
212
embedding = VectorField(dimensions=384)
213
published_at = DateTimeField()
214
215
class Meta:
216
database = db
217
218
# Hybrid search: combine semantic similarity with metadata filtering
219
def hybrid_search(query_embedding, category=None, days_ago=7, limit=10):
220
base_query = Article.select(
221
Article.title,
222
Article.content,
223
Article.category,
224
SQL('embedding <=> %s', query_embedding.to_text()).alias('similarity')
225
)
226
227
if category:
228
base_query = base_query.where(Article.category == category)
229
230
if days_ago:
231
cutoff_date = datetime.now() - timedelta(days=days_ago)
232
base_query = base_query.where(Article.published_at >= cutoff_date)
233
234
return (base_query
235
.order_by(SQL('embedding <=> %s', query_embedding.to_text()))
236
.limit(limit))
237
238
# Multi-vector search with weighted combination
239
def multi_vector_search(title_embedding, content_embedding, title_weight=0.3, content_weight=0.7):
240
return (Document
241
.select(
242
Document.content,
243
SQL(
244
'(%s * (embedding <=> %s) + %s * (title_embedding <=> %s))',
245
title_weight, title_embedding.to_text(),
246
content_weight, content_embedding.to_text()
247
).alias('weighted_similarity')
248
)
249
.order_by(SQL(
250
'(%s * (embedding <=> %s) + %s * (title_embedding <=> %s))',
251
title_weight, title_embedding.to_text(),
252
content_weight, content_embedding.to_text()
253
))
254
.limit(10))
255
256
# Similarity clustering
257
def find_similar_clusters(reference_embedding, threshold=0.3):
258
"""Find documents that are similar to each other and to reference."""
259
return (Document
260
.select(
261
Document.id,
262
Document.content,
263
SQL('embedding <=> %s', reference_embedding.to_text()).alias('ref_similarity')
264
)
265
.where(SQL('embedding <=> %s < %s', reference_embedding.to_text(), threshold)))
266
267
# Vector aggregation
268
def get_category_centroids():
269
"""Calculate average embeddings by category."""
270
# Note: Peewee doesn't have built-in vector avg, use raw SQL
271
results = db.execute_sql("""
272
SELECT category, AVG(embedding) as centroid_embedding
273
FROM documents
274
GROUP BY category
275
""")
276
277
return [(row[0], Vector.from_text(row[1])) for row in results]
278
```
279
280
## Complete Peewee Example
281
282
```python
283
from peewee import Model, TextField, DateTimeField, PostgresqlDatabase
284
from pgvector.peewee import VectorField
285
from pgvector import Vector
286
from datetime import datetime
287
288
# Database setup
289
db = PostgresqlDatabase(
290
'semantic_search_db',
291
user='user',
292
password='password',
293
host='localhost'
294
)
295
296
class NewsArticle(Model):
297
title = TextField()
298
content = TextField()
299
category = TextField()
300
embedding = VectorField(dimensions=384) # sentence-transformers
301
published_at = DateTimeField(default=datetime.now)
302
303
class Meta:
304
database = db
305
table_name = 'news_articles'
306
307
# Create table and indexes
308
db.create_tables([NewsArticle])
309
310
# Create vector index
311
db.execute_sql("""
312
CREATE INDEX IF NOT EXISTS news_embedding_hnsw_idx
313
ON news_articles
314
USING hnsw (embedding vector_cosine_ops)
315
WITH (m = 16, ef_construction = 64)
316
""")
317
318
# Semantic search service
319
class SemanticSearchService:
320
@staticmethod
321
def search_articles(query_embedding, category=None, limit=10):
322
query = (NewsArticle
323
.select(
324
NewsArticle.title,
325
NewsArticle.content,
326
NewsArticle.category,
327
NewsArticle.published_at,
328
SQL('embedding <=> %s', query_embedding.to_text()).alias('similarity')
329
))
330
331
if category:
332
query = query.where(NewsArticle.category == category)
333
334
return (query
335
.order_by(SQL('embedding <=> %s', query_embedding.to_text()))
336
.limit(limit))
337
338
@staticmethod
339
def find_related_articles(article_id, limit=5):
340
article = NewsArticle.get_by_id(article_id)
341
342
return (NewsArticle
343
.select(
344
NewsArticle.title,
345
SQL('embedding <=> %s', article.embedding.to_text()).alias('similarity')
346
)
347
.where(NewsArticle.id != article_id)
348
.order_by(SQL('embedding <=> %s', article.embedding.to_text()))
349
.limit(limit))
350
351
# Usage
352
service = SemanticSearchService()
353
354
# Search for articles
355
query_vector = Vector([0.1] * 384) # Your query embedding
356
results = service.search_articles(query_vector, category='technology', limit=5)
357
358
for article in results:
359
print(f"Title: {article.title}")
360
print(f"Similarity: {article.similarity}")
361
print("---")
362
363
# Find related articles
364
related = service.find_related_articles(article_id=1, limit=3)
365
for related_article in related:
366
print(f"Related: {related_article.title} (similarity: {related_article.similarity})")
367
```