or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-vectors.mddatabase-drivers.mddjango-integration.mdindex.mdpeewee-integration.mdsqlalchemy-integration.md

peewee-integration.mddocs/

0

# Peewee Integration

1

2

Peewee ORM field types for vector operations in Peewee-based applications with full vector type support and query integration.

3

4

## Capabilities

5

6

### Vector Field Types

7

8

Peewee model fields for storing different vector types in PostgreSQL with pgvector extension.

9

10

```python { .api }

11

class VectorField(Field):

12

"""

13

Peewee field for storing Vector (float32) data.

14

15

Args:

16

dimensions (int, optional): Fixed number of dimensions

17

**kwargs: Standard Peewee field parameters

18

"""

19

20

class HalfVectorField(Field):

21

"""

22

Peewee field for storing HalfVector (float16) data.

23

24

Args:

25

dimensions (int, optional): Fixed number of dimensions

26

**kwargs: Standard Peewee field parameters

27

"""

28

29

class SparseVectorField(Field):

30

"""

31

Peewee field for storing SparseVector data.

32

33

Args:

34

dimensions (int, optional): Fixed number of dimensions

35

**kwargs: Standard Peewee field parameters

36

"""

37

38

class FixedBitField(Field):

39

"""

40

Peewee field for storing Bit vector data.

41

42

Args:

43

**kwargs: Standard Peewee field parameters

44

"""

45

```

46

47

**Usage Examples:**

48

49

```python

50

from peewee import Model, TextField, PostgresqlDatabase, IntegerField

51

from pgvector.peewee import VectorField, HalfVectorField, SparseVectorField, FixedBitField

52

53

# Database connection

54

db = PostgresqlDatabase(

55

'your_database',

56

user='user',

57

password='password',

58

host='localhost'

59

)

60

61

class Document(Model):

62

content = TextField()

63

embedding = VectorField(dimensions=1536) # OpenAI embeddings

64

title_embedding = HalfVectorField(dimensions=768) # Memory efficient

65

sparse_features = SparseVectorField(dimensions=10000) # High-dimensional sparse

66

binary_hash = FixedBitField() # Binary features

67

68

class Meta:

69

database = db

70

table_name = 'documents'

71

72

# Create tables

73

db.create_tables([Document])

74

75

# Insert data

76

from pgvector import Vector, HalfVector, SparseVector, Bit

77

78

doc = Document.create(

79

content="Sample document",

80

embedding=Vector([0.1, 0.2, 0.3] * 512), # 1536 dimensions

81

title_embedding=HalfVector([0.5, 0.6, 0.7] * 256), # 768 dimensions

82

sparse_features=SparseVector({0: 1.0, 500: 2.5}, 10000),

83

binary_hash=Bit("1010110")

84

)

85

```

86

87

### Distance Operations

88

89

Using PostgreSQL distance operators in Peewee queries for similarity search.

90

91

**Usage Examples:**

92

93

```python

94

from peewee import fn, SQL

95

from pgvector import Vector, Bit

96

97

query_vector = Vector([0.1, 0.2, 0.3] * 512) # 1536 dimensions

98

99

# L2 (Euclidean) distance using <-> operator

100

l2_results = (Document

101

.select(

102

Document.content,

103

SQL('embedding <-> %s', query_vector.to_text()).alias('distance')

104

)

105

.order_by(SQL('embedding <-> %s', query_vector.to_text()))

106

.limit(10))

107

108

for doc in l2_results:

109

print(f"Content: {doc.content}, Distance: {doc.distance}")

110

111

# Cosine distance using <=> operator

112

cosine_results = (Document

113

.select(

114

Document.content,

115

SQL('embedding <=> %s', query_vector.to_text()).alias('cosine_distance')

116

)

117

.order_by(SQL('embedding <=> %s', query_vector.to_text()))

118

.limit(10))

119

120

# Inner product distance using <#> operator

121

inner_product_results = (Document

122

.select(

123

Document.content,

124

SQL('embedding <#> %s', query_vector.to_text()).alias('inner_product')

125

)

126

.order_by(SQL('embedding <#> %s', query_vector.to_text()))

127

.limit(10))

128

129

# Filter by distance threshold

130

close_documents = (Document

131

.select()

132

.where(SQL('embedding <-> %s < 0.5', query_vector.to_text())))

133

134

# Hamming distance for bit vectors

135

query_bits = Bit("1010110" + "0" * 57) # Pad to required length

136

hamming_results = (Document

137

.select(

138

Document.content,

139

SQL('binary_hash <~> %s', query_bits.to_text()).alias('hamming_distance')

140

)

141

.order_by(SQL('binary_hash <~> %s', query_bits.to_text()))

142

.limit(10))

143

144

# Jaccard distance for bit vectors

145

jaccard_results = (Document

146

.select(

147

Document.content,

148

SQL('binary_hash <%> %s', query_bits.to_text()).alias('jaccard_distance')

149

)

150

.order_by(SQL('binary_hash <%> %s', query_bits.to_text()))

151

.limit(10))

152

```

153

154

### Index Creation

155

156

Creating vector indexes for improved query performance in Peewee.

157

158

**Usage Examples:**

159

160

```python

161

from peewee import SQL

162

163

# Create HNSW index using raw SQL

164

def create_hnsw_index():

165

db.execute_sql("""

166

CREATE INDEX IF NOT EXISTS documents_embedding_hnsw_idx

167

ON documents

168

USING hnsw (embedding vector_l2_ops)

169

WITH (m = 16, ef_construction = 64)

170

""")

171

172

# Create IVFFlat index using raw SQL

173

def create_ivfflat_index():

174

db.execute_sql("""

175

CREATE INDEX IF NOT EXISTS documents_embedding_ivfflat_idx

176

ON documents

177

USING ivfflat (embedding vector_l2_ops)

178

WITH (lists = 100)

179

""")

180

181

# Create indexes after table creation

182

db.create_tables([Document])

183

create_hnsw_index()

184

create_ivfflat_index()

185

186

# Index for sparse vectors

187

def create_sparse_index():

188

db.execute_sql("""

189

CREATE INDEX IF NOT EXISTS documents_sparse_features_idx

190

ON documents

191

USING ivfflat (sparse_features sparsevec_l2_ops)

192

WITH (lists = 50)

193

""")

194

195

create_sparse_index()

196

```

197

198

### Advanced Query Patterns

199

200

Complex similarity search patterns using Peewee with pgvector.

201

202

**Usage Examples:**

203

204

```python

205

from peewee import Case, fn, Value

206

from datetime import datetime, timedelta

207

208

class Article(Model):

209

title = TextField()

210

content = TextField()

211

category = TextField()

212

embedding = VectorField(dimensions=384)

213

published_at = DateTimeField()

214

215

class Meta:

216

database = db

217

218

# Hybrid search: combine semantic similarity with metadata filtering

219

def hybrid_search(query_embedding, category=None, days_ago=7, limit=10):

220

base_query = Article.select(

221

Article.title,

222

Article.content,

223

Article.category,

224

SQL('embedding <=> %s', query_embedding.to_text()).alias('similarity')

225

)

226

227

if category:

228

base_query = base_query.where(Article.category == category)

229

230

if days_ago:

231

cutoff_date = datetime.now() - timedelta(days=days_ago)

232

base_query = base_query.where(Article.published_at >= cutoff_date)

233

234

return (base_query

235

.order_by(SQL('embedding <=> %s', query_embedding.to_text()))

236

.limit(limit))

237

238

# Multi-vector search with weighted combination

239

def multi_vector_search(title_embedding, content_embedding, title_weight=0.3, content_weight=0.7):

240

return (Document

241

.select(

242

Document.content,

243

SQL(

244

'(%s * (embedding <=> %s) + %s * (title_embedding <=> %s))',

245

title_weight, title_embedding.to_text(),

246

content_weight, content_embedding.to_text()

247

).alias('weighted_similarity')

248

)

249

.order_by(SQL(

250

'(%s * (embedding <=> %s) + %s * (title_embedding <=> %s))',

251

title_weight, title_embedding.to_text(),

252

content_weight, content_embedding.to_text()

253

))

254

.limit(10))

255

256

# Similarity clustering

257

def find_similar_clusters(reference_embedding, threshold=0.3):

258

"""Find documents that are similar to each other and to reference."""

259

return (Document

260

.select(

261

Document.id,

262

Document.content,

263

SQL('embedding <=> %s', reference_embedding.to_text()).alias('ref_similarity')

264

)

265

.where(SQL('embedding <=> %s < %s', reference_embedding.to_text(), threshold)))

266

267

# Vector aggregation

268

def get_category_centroids():

269

"""Calculate average embeddings by category."""

270

# Note: Peewee doesn't have built-in vector avg, use raw SQL

271

results = db.execute_sql("""

272

SELECT category, AVG(embedding) as centroid_embedding

273

FROM documents

274

GROUP BY category

275

""")

276

277

return [(row[0], Vector.from_text(row[1])) for row in results]

278

```

279

280

## Complete Peewee Example

281

282

```python

283

from peewee import Model, TextField, DateTimeField, PostgresqlDatabase

284

from pgvector.peewee import VectorField

285

from pgvector import Vector

286

from datetime import datetime

287

288

# Database setup

289

db = PostgresqlDatabase(

290

'semantic_search_db',

291

user='user',

292

password='password',

293

host='localhost'

294

)

295

296

class NewsArticle(Model):

297

title = TextField()

298

content = TextField()

299

category = TextField()

300

embedding = VectorField(dimensions=384) # sentence-transformers

301

published_at = DateTimeField(default=datetime.now)

302

303

class Meta:

304

database = db

305

table_name = 'news_articles'

306

307

# Create table and indexes

308

db.create_tables([NewsArticle])

309

310

# Create vector index

311

db.execute_sql("""

312

CREATE INDEX IF NOT EXISTS news_embedding_hnsw_idx

313

ON news_articles

314

USING hnsw (embedding vector_cosine_ops)

315

WITH (m = 16, ef_construction = 64)

316

""")

317

318

# Semantic search service

319

class SemanticSearchService:

320

@staticmethod

321

def search_articles(query_embedding, category=None, limit=10):

322

query = (NewsArticle

323

.select(

324

NewsArticle.title,

325

NewsArticle.content,

326

NewsArticle.category,

327

NewsArticle.published_at,

328

SQL('embedding <=> %s', query_embedding.to_text()).alias('similarity')

329

))

330

331

if category:

332

query = query.where(NewsArticle.category == category)

333

334

return (query

335

.order_by(SQL('embedding <=> %s', query_embedding.to_text()))

336

.limit(limit))

337

338

@staticmethod

339

def find_related_articles(article_id, limit=5):

340

article = NewsArticle.get_by_id(article_id)

341

342

return (NewsArticle

343

.select(

344

NewsArticle.title,

345

SQL('embedding <=> %s', article.embedding.to_text()).alias('similarity')

346

)

347

.where(NewsArticle.id != article_id)

348

.order_by(SQL('embedding <=> %s', article.embedding.to_text()))

349

.limit(limit))

350

351

# Usage

352

service = SemanticSearchService()

353

354

# Search for articles

355

query_vector = Vector([0.1] * 384) # Your query embedding

356

results = service.search_articles(query_vector, category='technology', limit=5)

357

358

for article in results:

359

print(f"Title: {article.title}")

360

print(f"Similarity: {article.similarity}")

361

print("---")

362

363

# Find related articles

364

related = service.find_related_articles(article_id=1, limit=3)

365

for related_article in related:

366

print(f"Related: {related_article.title} (similarity: {related_article.similarity})")

367

```