or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-vectors.mddatabase-drivers.mddjango-integration.mdindex.mdpeewee-integration.mdsqlalchemy-integration.md

sqlalchemy-integration.mddocs/

0

# SQLAlchemy Integration

1

2

Native SQLAlchemy types and functions providing full vector support for SQLAlchemy applications with type safety and query builder integration.

3

4

## Capabilities

5

6

### Vector Types

7

8

SQLAlchemy column types for storing different vector types in PostgreSQL with pgvector extension.

9

10

```python { .api }

11

class VECTOR(UserDefinedType):

12

"""

13

SQLAlchemy type for Vector (float32) columns.

14

15

Args:

16

dim (int, optional): Fixed number of dimensions

17

"""

18

19

def __init__(self, dim=None):

20

"""Initialize VECTOR type with optional dimension constraint."""

21

22

def get_col_spec(self, **kw) -> str:

23

"""Get column specification for DDL generation."""

24

25

def bind_processor(self, dialect):

26

"""Return a conversion function for processing bind values."""

27

28

def literal_processor(self, dialect):

29

"""Return a conversion function for processing literal values."""

30

31

def result_processor(self, dialect, coltype):

32

"""Return a conversion function for processing result values."""

33

34

class comparator_factory(UserDefinedType.Comparator):

35

"""Comparator for vector distance operations."""

36

37

def l2_distance(self, other):

38

"""L2 (Euclidean) distance operation."""

39

40

def max_inner_product(self, other):

41

"""Maximum inner product operation."""

42

43

def cosine_distance(self, other):

44

"""Cosine distance operation."""

45

46

def l1_distance(self, other):

47

"""L1 (Manhattan) distance operation."""

48

49

class HALFVEC(UserDefinedType):

50

"""

51

SQLAlchemy type for HalfVector (float16) columns.

52

53

Args:

54

dim (int, optional): Fixed number of dimensions

55

"""

56

57

def __init__(self, dim=None):

58

"""Initialize HALFVEC type with optional dimension constraint."""

59

60

def get_col_spec(self, **kw) -> str:

61

"""Get column specification for DDL generation."""

62

63

def bind_processor(self, dialect):

64

"""Return a conversion function for processing bind values."""

65

66

def literal_processor(self, dialect):

67

"""Return a conversion function for processing literal values."""

68

69

def result_processor(self, dialect, coltype):

70

"""Return a conversion function for processing result values."""

71

72

class comparator_factory(UserDefinedType.Comparator):

73

"""Comparator for half vector distance operations."""

74

75

def l2_distance(self, other):

76

"""L2 (Euclidean) distance operation."""

77

78

def max_inner_product(self, other):

79

"""Maximum inner product operation."""

80

81

def cosine_distance(self, other):

82

"""Cosine distance operation."""

83

84

def l1_distance(self, other):

85

"""L1 (Manhattan) distance operation."""

86

87

class SPARSEVEC(UserDefinedType):

88

"""

89

SQLAlchemy type for SparseVector columns.

90

91

Args:

92

dim (int, optional): Fixed number of dimensions

93

"""

94

95

def __init__(self, dim=None):

96

"""Initialize SPARSEVEC type with optional dimension constraint."""

97

98

def get_col_spec(self, **kw) -> str:

99

"""Get column specification for DDL generation."""

100

101

def bind_processor(self, dialect):

102

"""Return a conversion function for processing bind values."""

103

104

def literal_processor(self, dialect):

105

"""Return a conversion function for processing literal values."""

106

107

def result_processor(self, dialect, coltype):

108

"""Return a conversion function for processing result values."""

109

110

class comparator_factory(UserDefinedType.Comparator):

111

"""Comparator for sparse vector distance operations."""

112

113

def l2_distance(self, other):

114

"""L2 (Euclidean) distance operation."""

115

116

def max_inner_product(self, other):

117

"""Maximum inner product operation."""

118

119

def cosine_distance(self, other):

120

"""Cosine distance operation."""

121

122

def l1_distance(self, other):

123

"""L1 (Manhattan) distance operation."""

124

125

class BIT(UserDefinedType):

126

"""

127

SQLAlchemy type for Bit vector columns.

128

129

Args:

130

length (int, optional): Fixed bit length

131

"""

132

133

def __init__(self, length=None):

134

"""Initialize BIT type with optional length constraint."""

135

136

def get_col_spec(self, **kw) -> str:

137

"""Get column specification for DDL generation."""

138

139

class comparator_factory(UserDefinedType.Comparator):

140

"""Comparator for bit vector distance operations."""

141

142

def hamming_distance(self, other):

143

"""Hamming distance operation."""

144

145

def jaccard_distance(self, other):

146

"""Jaccard distance operation."""

147

148

# Type alias for compatibility

149

Vector = VECTOR

150

```

151

152

**Usage Examples:**

153

154

```python

155

from sqlalchemy import Column, Integer, Text, create_engine

156

from sqlalchemy.ext.declarative import declarative_base

157

from sqlalchemy.orm import sessionmaker

158

from pgvector.sqlalchemy import VECTOR, HALFVEC, SPARSEVEC, BIT, Vector

159

from pgvector import Vector as PgVector, HalfVector, SparseVector, Bit

160

161

Base = declarative_base()

162

163

class Document(Base):

164

__tablename__ = 'documents'

165

166

id = Column(Integer, primary_key=True)

167

content = Column(Text)

168

embedding = Column(VECTOR(1536)) # OpenAI embeddings

169

title_embedding = Column(HALFVEC(768)) # Memory efficient

170

sparse_features = Column(SPARSEVEC(10000)) # High-dimensional sparse

171

binary_hash = Column(BIT()) # Binary features

172

173

# Create engine and session

174

engine = create_engine('postgresql://user:pass@localhost/db')

175

Base.metadata.create_all(engine)

176

Session = sessionmaker(bind=engine)

177

session = Session()

178

179

# Create document with vectors

180

doc = Document(

181

content="Sample document",

182

embedding=PgVector([0.1, 0.2, 0.3] * 512),

183

title_embedding=HalfVector([0.5, 0.6, 0.7] * 256),

184

sparse_features=SparseVector({0: 1.0, 500: 2.5}, 10000),

185

binary_hash=Bit("1010110")

186

)

187

session.add(doc)

188

session.commit()

189

```

190

191

### Distance Operations

192

193

Use the built-in distance methods for similarity queries:

194

195

```python

196

from sqlalchemy import select

197

198

# L2 distance (Euclidean)

199

query_vector = PgVector([0.2, 0.3, 0.4] * 512)

200

nearest = session.scalars(

201

select(Document)

202

.order_by(Document.embedding.l2_distance(query_vector))

203

.limit(5)

204

).all()

205

206

# Cosine distance

207

similar = session.scalars(

208

select(Document)

209

.order_by(Document.embedding.cosine_distance(query_vector))

210

.limit(10)

211

).all()

212

213

# Maximum inner product (similarity)

214

products = session.scalars(

215

select(Document)

216

.order_by(Document.embedding.max_inner_product(query_vector))

217

.limit(10)

218

).all()

219

220

# Get distance values

221

distances = session.scalars(

222

select(Document.embedding.l2_distance(query_vector))

223

).all()

224

225

# Filter by distance threshold

226

nearby = session.scalars(

227

select(Document)

228

.filter(Document.embedding.l2_distance(query_vector) < 0.5)

229

).all()

230

```

231

232

### Aggregate Functions

233

234

Built-in aggregate functions for vector operations:

235

236

```python { .api }

237

def avg(*args):

238

"""

239

Calculate average vector across multiple rows.

240

241

Args:

242

*args: Vector columns or expressions

243

244

Returns:

245

Average vector

246

"""

247

248

def sum(*args):

249

"""

250

Calculate sum of vectors across multiple rows.

251

252

Args:

253

*args: Vector columns or expressions

254

255

Returns:

256

Sum vector

257

"""

258

```

259

260

**Usage Examples:**

261

262

```python

263

from pgvector.sqlalchemy import avg, sum

264

265

# Calculate average embedding

266

avg_embedding = session.scalars(

267

select(avg(Document.embedding))

268

).first()

269

270

# Calculate sum of embeddings

271

sum_embedding = session.scalars(

272

select(sum(Document.embedding))

273

).first()

274

275

# Group by category and average

276

from sqlalchemy import func

277

category_averages = session.execute(

278

select(Document.category, avg(Document.embedding))

279

.group_by(Document.category)

280

).all()

281

```

282

283

### Index Creation

284

285

Create approximate indexes for faster similarity search:

286

287

```python

288

from sqlalchemy import Index

289

290

# HNSW index for L2 distance

291

hnsw_index = Index(

292

'embedding_hnsw_idx',

293

Document.embedding,

294

postgresql_using='hnsw',

295

postgresql_with={'m': 16, 'ef_construction': 64},

296

postgresql_ops={'embedding': 'vector_l2_ops'}

297

)

298

299

# IVFFlat index for L2 distance

300

ivfflat_index = Index(

301

'embedding_ivfflat_idx',

302

Document.embedding,

303

postgresql_using='ivfflat',

304

postgresql_with={'lists': 100},

305

postgresql_ops={'embedding': 'vector_l2_ops'}

306

)

307

308

# Create indexes

309

hnsw_index.create(engine)

310

ivfflat_index.create(engine)

311

312

# Use different operators for different distance types:

313

# - vector_l2_ops: L2 distance (default)

314

# - vector_ip_ops: Inner product

315

# - vector_cosine_ops: Cosine distance

316

```

317

318

### Half-Precision Indexing

319

320

Index vectors at half-precision for memory efficiency:

321

322

```python

323

from sqlalchemy.sql import func

324

325

# Create half-precision index

326

half_index = Index(

327

'embedding_half_idx',

328

func.cast(Document.embedding, HALFVEC(1536)).label('embedding'),

329

postgresql_using='hnsw',

330

postgresql_with={'m': 16, 'ef_construction': 64},

331

postgresql_ops={'embedding': 'halfvec_l2_ops'}

332

)

333

334

# Query with half-precision casting

335

query_vector = PgVector([0.1, 0.2, 0.3] * 512)

336

half_results = session.scalars(

337

select(Document)

338

.order_by(

339

func.cast(Document.embedding, HALFVEC(1536))

340

.l2_distance(query_vector)

341

)

342

.limit(5)

343

).all()

344

```

345

346

### Array Support

347

348

Store arrays of vectors with proper type registration:

349

350

```python

351

from sqlalchemy import ARRAY, event

352

from pgvector.psycopg import register_vector

353

354

class MultiVectorDocument(Base):

355

__tablename__ = 'multi_vector_docs'

356

357

id = Column(Integer, primary_key=True)

358

embeddings = Column(ARRAY(VECTOR(768))) # Array of vectors

359

360

# Register vector types with the database connection

361

@event.listens_for(engine, "connect")

362

def connect(dbapi_connection, connection_record):

363

register_vector(dbapi_connection)

364

365

# For async connections

366

from pgvector.psycopg import register_vector_async

367

368

@event.listens_for(engine.sync_engine, "connect")

369

def connect_async(dbapi_connection, connection_record):

370

dbapi_connection.run_async(register_vector_async)

371

```