Tessl Tile for pypi/pgvector@0.4.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

core-vectors.md database-drivers.md django-integration.md index.md peewee-integration.md sqlalchemy-integration.md

peewee-integration.mddocs/

0
# Peewee Integration
1

2
Peewee ORM field types for vector operations in Peewee-based applications with full vector type support and query integration.
3

4
## Capabilities
5

6
### Vector Field Types
7

8
Peewee model fields for storing different vector types in PostgreSQL with pgvector extension.
9

10
```python { .api }
11
class VectorField(Field):
12
    """
13
    Peewee field for storing Vector (float32) data.
14
    
15
    Args:
16
        dimensions (int, optional): Fixed number of dimensions
17
        **kwargs: Standard Peewee field parameters
18
    """
19

20
class HalfVectorField(Field):
21
    """
22
    Peewee field for storing HalfVector (float16) data.
23
    
24
    Args:
25
        dimensions (int, optional): Fixed number of dimensions
26
        **kwargs: Standard Peewee field parameters
27
    """
28

29
class SparseVectorField(Field):
30
    """
31
    Peewee field for storing SparseVector data.
32
    
33
    Args:
34
        dimensions (int, optional): Fixed number of dimensions
35
        **kwargs: Standard Peewee field parameters
36
    """
37

38
class FixedBitField(Field):
39
    """
40
    Peewee field for storing Bit vector data.
41
    
42
    Args:
43
        **kwargs: Standard Peewee field parameters
44
    """
45
```
46

47
**Usage Examples:**
48

49
```python
50
from peewee import Model, TextField, PostgresqlDatabase, IntegerField
51
from pgvector.peewee import VectorField, HalfVectorField, SparseVectorField, FixedBitField
52

53
# Database connection
54
db = PostgresqlDatabase(
55
    'your_database',
56
    user='user',
57
    password='password',
58
    host='localhost'
59
)
60

61
class Document(Model):
62
    content = TextField()
63
    embedding = VectorField(dimensions=1536)  # OpenAI embeddings
64
    title_embedding = HalfVectorField(dimensions=768)  # Memory efficient
65
    sparse_features = SparseVectorField(dimensions=10000)  # High-dimensional sparse
66
    binary_hash = FixedBitField()  # Binary features
67
    
68
    class Meta:
69
        database = db
70
        table_name = 'documents'
71

72
# Create tables
73
db.create_tables([Document])
74

75
# Insert data
76
from pgvector import Vector, HalfVector, SparseVector, Bit
77

78
doc = Document.create(
79
    content="Sample document",
80
    embedding=Vector([0.1, 0.2, 0.3] * 512),  # 1536 dimensions
81
    title_embedding=HalfVector([0.5, 0.6, 0.7] * 256),  # 768 dimensions
82
    sparse_features=SparseVector({0: 1.0, 500: 2.5}, 10000),
83
    binary_hash=Bit("1010110")
84
)
85
```
86

87
### Distance Operations
88

89
Using PostgreSQL distance operators in Peewee queries for similarity search.
90

91
**Usage Examples:**
92

93
```python
94
from peewee import fn, SQL
95
from pgvector import Vector, Bit
96

97
query_vector = Vector([0.1, 0.2, 0.3] * 512)  # 1536 dimensions
98

99
# L2 (Euclidean) distance using <-> operator
100
l2_results = (Document
101
    .select(
102
        Document.content,
103
        SQL('embedding <-> %s', query_vector.to_text()).alias('distance')
104
    )
105
    .order_by(SQL('embedding <-> %s', query_vector.to_text()))
106
    .limit(10))
107

108
for doc in l2_results:
109
    print(f"Content: {doc.content}, Distance: {doc.distance}")
110

111
# Cosine distance using <=> operator
112
cosine_results = (Document
113
    .select(
114
        Document.content,
115
        SQL('embedding <=> %s', query_vector.to_text()).alias('cosine_distance')
116
    )
117
    .order_by(SQL('embedding <=> %s', query_vector.to_text()))
118
    .limit(10))
119

120
# Inner product distance using <#> operator
121
inner_product_results = (Document
122
    .select(
123
        Document.content,
124
        SQL('embedding <#> %s', query_vector.to_text()).alias('inner_product')
125
    )
126
    .order_by(SQL('embedding <#> %s', query_vector.to_text()))
127
    .limit(10))
128

129
# Filter by distance threshold
130
close_documents = (Document
131
    .select()
132
    .where(SQL('embedding <-> %s < 0.5', query_vector.to_text())))
133

134
# Hamming distance for bit vectors
135
query_bits = Bit("1010110" + "0" * 57)  # Pad to required length
136
hamming_results = (Document
137
    .select(
138
        Document.content,
139
        SQL('binary_hash <~> %s', query_bits.to_text()).alias('hamming_distance')
140
    )
141
    .order_by(SQL('binary_hash <~> %s', query_bits.to_text()))
142
    .limit(10))
143

144
# Jaccard distance for bit vectors
145
jaccard_results = (Document
146
    .select(
147
        Document.content,
148
        SQL('binary_hash <%> %s', query_bits.to_text()).alias('jaccard_distance')
149
    )
150
    .order_by(SQL('binary_hash <%> %s', query_bits.to_text()))
151
    .limit(10))
152
```
153

154
### Index Creation
155

156
Creating vector indexes for improved query performance in Peewee.
157

158
**Usage Examples:**
159

160
```python
161
from peewee import SQL
162

163
# Create HNSW index using raw SQL
164
def create_hnsw_index():
165
    db.execute_sql("""
166
        CREATE INDEX IF NOT EXISTS documents_embedding_hnsw_idx 
167
        ON documents 
168
        USING hnsw (embedding vector_l2_ops) 
169
        WITH (m = 16, ef_construction = 64)
170
    """)
171

172
# Create IVFFlat index using raw SQL
173
def create_ivfflat_index():
174
    db.execute_sql("""
175
        CREATE INDEX IF NOT EXISTS documents_embedding_ivfflat_idx
176
        ON documents
177
        USING ivfflat (embedding vector_l2_ops)
178
        WITH (lists = 100)
179
    """)
180

181
# Create indexes after table creation
182
db.create_tables([Document])
183
create_hnsw_index()
184
create_ivfflat_index()
185

186
# Index for sparse vectors
187
def create_sparse_index():
188
    db.execute_sql("""
189
        CREATE INDEX IF NOT EXISTS documents_sparse_features_idx
190
        ON documents
191
        USING ivfflat (sparse_features sparsevec_l2_ops)
192
        WITH (lists = 50)
193
    """)
194

195
create_sparse_index()
196
```
197

198
### Advanced Query Patterns
199

200
Complex similarity search patterns using Peewee with pgvector.
201

202
**Usage Examples:**
203

204
```python
205
from peewee import Case, fn, Value
206
from datetime import datetime, timedelta
207

208
class Article(Model):
209
    title = TextField()
210
    content = TextField()
211
    category = TextField()
212
    embedding = VectorField(dimensions=384)
213
    published_at = DateTimeField()
214
    
215
    class Meta:
216
        database = db
217

218
# Hybrid search: combine semantic similarity with metadata filtering
219
def hybrid_search(query_embedding, category=None, days_ago=7, limit=10):
220
    base_query = Article.select(
221
        Article.title,
222
        Article.content,
223
        Article.category,
224
        SQL('embedding <=> %s', query_embedding.to_text()).alias('similarity')
225
    )
226
    
227
    if category:
228
        base_query = base_query.where(Article.category == category)
229
    
230
    if days_ago:
231
        cutoff_date = datetime.now() - timedelta(days=days_ago)
232
        base_query = base_query.where(Article.published_at >= cutoff_date)
233
    
234
    return (base_query
235
        .order_by(SQL('embedding <=> %s', query_embedding.to_text()))
236
        .limit(limit))
237

238
# Multi-vector search with weighted combination
239
def multi_vector_search(title_embedding, content_embedding, title_weight=0.3, content_weight=0.7):
240
    return (Document
241
        .select(
242
            Document.content,
243
            SQL(
244
                '(%s * (embedding <=> %s) + %s * (title_embedding <=> %s))',
245
                title_weight, title_embedding.to_text(),
246
                content_weight, content_embedding.to_text()
247
            ).alias('weighted_similarity')
248
        )
249
        .order_by(SQL(
250
            '(%s * (embedding <=> %s) + %s * (title_embedding <=> %s))',
251
            title_weight, title_embedding.to_text(),
252
            content_weight, content_embedding.to_text()
253
        ))
254
        .limit(10))
255

256
# Similarity clustering
257
def find_similar_clusters(reference_embedding, threshold=0.3):
258
    """Find documents that are similar to each other and to reference."""
259
    return (Document
260
        .select(
261
            Document.id,
262
            Document.content,
263
            SQL('embedding <=> %s', reference_embedding.to_text()).alias('ref_similarity')
264
        )
265
        .where(SQL('embedding <=> %s < %s', reference_embedding.to_text(), threshold)))
266

267
# Vector aggregation
268
def get_category_centroids():
269
    """Calculate average embeddings by category."""
270
    # Note: Peewee doesn't have built-in vector avg, use raw SQL
271
    results = db.execute_sql("""
272
        SELECT category, AVG(embedding) as centroid_embedding
273
        FROM documents
274
        GROUP BY category
275
    """)
276
    
277
    return [(row[0], Vector.from_text(row[1])) for row in results]
278
```
279

280
## Complete Peewee Example
281

282
```python
283
from peewee import Model, TextField, DateTimeField, PostgresqlDatabase
284
from pgvector.peewee import VectorField
285
from pgvector import Vector
286
from datetime import datetime
287

288
# Database setup
289
db = PostgresqlDatabase(
290
    'semantic_search_db',
291
    user='user',
292
    password='password',
293
    host='localhost'
294
)
295

296
class NewsArticle(Model):
297
    title = TextField()
298
    content = TextField()
299
    category = TextField()
300
    embedding = VectorField(dimensions=384)  # sentence-transformers
301
    published_at = DateTimeField(default=datetime.now)
302
    
303
    class Meta:
304
        database = db
305
        table_name = 'news_articles'
306

307
# Create table and indexes
308
db.create_tables([NewsArticle])
309

310
# Create vector index
311
db.execute_sql("""
312
    CREATE INDEX IF NOT EXISTS news_embedding_hnsw_idx
313
    ON news_articles
314
    USING hnsw (embedding vector_cosine_ops)
315
    WITH (m = 16, ef_construction = 64)
316
""")
317

318
# Semantic search service
319
class SemanticSearchService:
320
    @staticmethod
321
    def search_articles(query_embedding, category=None, limit=10):
322
        query = (NewsArticle
323
            .select(
324
                NewsArticle.title,
325
                NewsArticle.content,
326
                NewsArticle.category,
327
                NewsArticle.published_at,
328
                SQL('embedding <=> %s', query_embedding.to_text()).alias('similarity')
329
            ))
330
        
331
        if category:
332
            query = query.where(NewsArticle.category == category)
333
        
334
        return (query
335
            .order_by(SQL('embedding <=> %s', query_embedding.to_text()))
336
            .limit(limit))
337
    
338
    @staticmethod
339
    def find_related_articles(article_id, limit=5):
340
        article = NewsArticle.get_by_id(article_id)
341
        
342
        return (NewsArticle
343
            .select(
344
                NewsArticle.title,
345
                SQL('embedding <=> %s', article.embedding.to_text()).alias('similarity')
346
            )
347
            .where(NewsArticle.id != article_id)
348
            .order_by(SQL('embedding <=> %s', article.embedding.to_text()))
349
            .limit(limit))
350

351
# Usage
352
service = SemanticSearchService()
353

354
# Search for articles
355
query_vector = Vector([0.1] * 384)  # Your query embedding
356
results = service.search_articles(query_vector, category='technology', limit=5)
357

358
for article in results:
359
    print(f"Title: {article.title}")
360
    print(f"Similarity: {article.similarity}")
361
    print("---")
362

363
# Find related articles
364
related = service.find_related_articles(article_id=1, limit=3)
365
for related_article in related:
366
    print(f"Related: {related_article.title} (similarity: {related_article.similarity})")
367
```

Version

Tile

Files

peewee-integration.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

peewee-integration.mddocs/