or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-vectors.mddatabase-drivers.mddjango-integration.mdindex.mdpeewee-integration.mdsqlalchemy-integration.md

core-vectors.mddocs/

0

# Core Vector Types

1

2

The foundation of pgvector providing four distinct vector types optimized for different use cases and memory requirements.

3

4

## Capabilities

5

6

### Vector Class (32-bit Float)

7

8

Standard precision vectors using 32-bit floating point numbers for full precision vector operations.

9

10

```python { .api }

11

class Vector:

12

def __init__(self, value):

13

"""

14

Create a vector from various input types.

15

16

Args:

17

value: Array-like input (list, tuple, numpy array)

18

19

Raises:

20

ValueError: If input has wrong dimensions or invalid format

21

"""

22

23

def __repr__(self) -> str:

24

"""String representation of the vector."""

25

26

def __eq__(self, other) -> bool:

27

"""Compare vectors for equality."""

28

29

def dimensions(self) -> int:

30

"""Get the number of dimensions in the vector."""

31

32

def to_list(self) -> list:

33

"""Convert vector to Python list."""

34

35

def to_numpy(self) -> np.ndarray:

36

"""Convert vector to numpy array with dtype float32."""

37

38

def to_text(self) -> str:

39

"""Convert to PostgreSQL text representation."""

40

41

def to_binary(self) -> bytes:

42

"""Convert to PostgreSQL binary representation."""

43

44

@classmethod

45

def from_text(cls, value: str) -> 'Vector':

46

"""Create vector from PostgreSQL text format."""

47

48

@classmethod

49

def from_binary(cls, value: bytes) -> 'Vector':

50

"""Create vector from PostgreSQL binary format."""

51

52

@classmethod

53

def _to_db(cls, value, dim=None):

54

"""Convert value to database text format."""

55

56

@classmethod

57

def _to_db_binary(cls, value):

58

"""Convert value to database binary format."""

59

60

@classmethod

61

def _from_db(cls, value):

62

"""Convert database value to numpy array."""

63

64

@classmethod

65

def _from_db_binary(cls, value):

66

"""Convert database binary value to numpy array."""

67

```

68

69

**Usage Examples:**

70

71

```python

72

from pgvector import Vector

73

import numpy as np

74

75

# Create from list

76

v1 = Vector([1.0, 2.5, 3.2])

77

78

# Create from numpy array

79

arr = np.array([1, 2, 3], dtype=np.float32)

80

v2 = Vector(arr)

81

82

# Create from tuple

83

v3 = Vector((1.5, 2.5, 3.5))

84

85

# Get properties

86

dimensions = v1.dimensions() # 3

87

as_list = v1.to_list() # [1.0, 2.5, 3.2]

88

as_numpy = v1.to_numpy() # numpy array

89

90

# PostgreSQL serialization

91

text_repr = v1.to_text() # '[1,2.5,3.2]'

92

binary_repr = v1.to_binary() # bytes

93

94

# Parse from PostgreSQL formats

95

v4 = Vector.from_text('[1,2,3]')

96

v5 = Vector.from_binary(binary_repr)

97

```

98

99

### HalfVector Class (16-bit Float)

100

101

Memory-efficient vectors using 16-bit floating point numbers, ideal for large-scale vector storage with acceptable precision loss.

102

103

```python { .api }

104

class HalfVector:

105

def __init__(self, value):

106

"""

107

Create a half-precision vector.

108

109

Args:

110

value: Array-like input (list, tuple, numpy array)

111

112

Raises:

113

ValueError: If input has wrong dimensions or invalid format

114

"""

115

116

def __repr__(self) -> str:

117

"""String representation of the half vector."""

118

119

def __eq__(self, other) -> bool:

120

"""Compare half vectors for equality."""

121

122

def dimensions(self) -> int:

123

"""Get the number of dimensions in the vector."""

124

125

def to_list(self) -> list:

126

"""Convert vector to Python list."""

127

128

def to_numpy(self) -> np.ndarray:

129

"""Convert vector to numpy array with dtype float16."""

130

131

def to_text(self) -> str:

132

"""Convert to PostgreSQL text representation."""

133

134

def to_binary(self) -> bytes:

135

"""Convert to PostgreSQL binary representation."""

136

137

@classmethod

138

def from_text(cls, value: str) -> 'HalfVector':

139

"""Create half vector from PostgreSQL text format."""

140

141

@classmethod

142

def from_binary(cls, value: bytes) -> 'HalfVector':

143

"""Create half vector from PostgreSQL binary format."""

144

145

@classmethod

146

def _to_db(cls, value, dim=None):

147

"""Convert value to database text format."""

148

149

@classmethod

150

def _to_db_binary(cls, value):

151

"""Convert value to database binary format."""

152

153

@classmethod

154

def _from_db(cls, value):

155

"""Convert database value to HalfVector."""

156

157

@classmethod

158

def _from_db_binary(cls, value):

159

"""Convert database binary value to HalfVector."""

160

```

161

162

**Usage Examples:**

163

164

```python

165

from pgvector import HalfVector

166

167

# Create half-precision vector (uses ~50% memory of Vector)

168

hv = HalfVector([1.5, 2.0, 3.5])

169

170

# Same API as Vector class

171

dimensions = hv.dimensions()

172

as_list = hv.to_list()

173

text_format = hv.to_text()

174

175

# Memory efficient for large datasets

176

large_half_vector = HalfVector(list(range(1000)))

177

```

178

179

### SparseVector Class

180

181

Efficient storage for high-dimensional vectors with many zero values, storing only non-zero elements with their indices.

182

183

```python { .api }

184

class SparseVector:

185

def __init__(self, value, dimensions=None, /):

186

"""

187

Create a sparse vector from various input formats.

188

189

Args:

190

value: dict, scipy sparse array, or dense array-like

191

dimensions: Required when value is dict, ignored otherwise

192

193

Raises:

194

ValueError: If dimensions missing for dict input or extra for others

195

"""

196

197

def __repr__(self) -> str:

198

"""String representation of the sparse vector."""

199

200

def __eq__(self, other) -> bool:

201

"""Compare sparse vectors for equality."""

202

203

def dimensions(self) -> int:

204

"""Get the total number of dimensions."""

205

206

def indices(self) -> list:

207

"""Get indices of non-zero elements."""

208

209

def values(self) -> list:

210

"""Get non-zero values."""

211

212

def to_coo(self):

213

"""Convert to scipy COO sparse array."""

214

215

def to_list(self) -> list:

216

"""Convert to dense Python list."""

217

218

def to_numpy(self) -> np.ndarray:

219

"""Convert to dense numpy array."""

220

221

def to_text(self) -> str:

222

"""Convert to PostgreSQL text representation."""

223

224

def to_binary(self) -> bytes:

225

"""Convert to PostgreSQL binary representation."""

226

227

@classmethod

228

def from_text(cls, value: str) -> 'SparseVector':

229

"""Create sparse vector from PostgreSQL text format."""

230

231

@classmethod

232

def from_binary(cls, value: bytes) -> 'SparseVector':

233

"""Create sparse vector from PostgreSQL binary format."""

234

235

@classmethod

236

def _from_parts(cls, dim: int, indices: list, values: list) -> 'SparseVector':

237

"""Create sparse vector from dimensions, indices, and values."""

238

239

@classmethod

240

def _to_db(cls, value, dim=None):

241

"""Convert value to database text format."""

242

243

@classmethod

244

def _to_db_binary(cls, value):

245

"""Convert value to database binary format."""

246

247

@classmethod

248

def _from_db(cls, value):

249

"""Convert database value to SparseVector."""

250

251

@classmethod

252

def _from_db_binary(cls, value):

253

"""Convert database binary value to SparseVector."""

254

```

255

256

**Usage Examples:**

257

258

```python

259

from pgvector import SparseVector

260

import numpy as np

261

262

# Create from dictionary (index: value)

263

sv1 = SparseVector({0: 1.0, 10: 2.5, 100: 3.0}, 1000)

264

265

# Create from dense array (zeros will be optimized out)

266

dense = [1.0, 0.0, 0.0, 2.5, 0.0]

267

sv2 = SparseVector(dense)

268

269

# Create from scipy sparse array

270

try:

271

from scipy.sparse import coo_array

272

coords = ([0, 0, 0], [0, 2, 4]) # row, col indices

273

data = [1.0, 2.5, 3.0]

274

sparse_array = coo_array((data, coords), shape=(1, 5))

275

sv3 = SparseVector(sparse_array)

276

except ImportError:

277

pass # scipy not available

278

279

# Access sparse structure

280

dimensions = sv1.dimensions() # 1000

281

indices = sv1.indices() # [0, 10, 100]

282

values = sv1.values() # [1.0, 2.5, 3.0]

283

284

# Convert to dense formats

285

dense_list = sv1.to_list() # Full 1000-element list with zeros

286

dense_numpy = sv1.to_numpy() # Dense numpy array

287

288

# Convert to scipy sparse

289

sparse_coo = sv1.to_coo() # COO sparse array

290

```

291

292

### Bit Class

293

294

Binary vectors for bit operations supporting Hamming and Jaccard distance calculations.

295

296

```python { .api }

297

class Bit:

298

def __init__(self, value):

299

"""

300

Create a bit vector from various input formats.

301

302

Args:

303

value: bytes, binary string, or boolean array-like

304

305

Raises:

306

ValueError: If input format is invalid

307

"""

308

309

def __repr__(self) -> str:

310

"""String representation of the bit vector."""

311

312

def __eq__(self, other) -> bool:

313

"""Compare bit vectors for equality."""

314

315

def to_list(self) -> list:

316

"""Convert to list of booleans."""

317

318

def to_numpy(self) -> np.ndarray:

319

"""Convert to boolean numpy array."""

320

321

def to_text(self) -> str:

322

"""Convert to binary string representation."""

323

324

def to_binary(self) -> bytes:

325

"""Convert to PostgreSQL binary representation."""

326

327

@classmethod

328

def from_text(cls, value: str) -> 'Bit':

329

"""Create bit vector from binary string."""

330

331

@classmethod

332

def from_binary(cls, value: bytes) -> 'Bit':

333

"""Create bit vector from PostgreSQL binary format."""

334

335

@classmethod

336

def _to_db(cls, value):

337

"""Convert value to database text format."""

338

339

@classmethod

340

def _to_db_binary(cls, value):

341

"""Convert value to database binary format."""

342

```

343

344

**Usage Examples:**

345

346

```python

347

from pgvector import Bit

348

import numpy as np

349

350

# Create from binary string

351

b1 = Bit("1010")

352

353

# Create from boolean list

354

b2 = Bit([True, False, True, False])

355

356

# Create from boolean numpy array

357

bool_array = np.array([True, False, True, True], dtype=bool)

358

b3 = Bit(bool_array)

359

360

# Create from bytes

361

b4 = Bit(b'\x0f') # Represents "00001111"

362

363

# Convert to different formats

364

as_list = b1.to_list() # [True, False, True, False]

365

as_numpy = b1.to_numpy() # boolean numpy array

366

as_text = b1.to_text() # "1010"

367

368

# PostgreSQL serialization

369

binary_format = b1.to_binary()

370

b5 = Bit.from_binary(binary_format)

371

```

372

373

## Error Handling

374

375

All vector types raise `ValueError` for:

376

- Invalid input dimensions (must be 1D)

377

- Incompatible data types

378

- Missing required parameters (e.g., dimensions for SparseVector dict input)

379

- Invalid format strings for from_text methods

380

381

The Bit class additionally raises warnings when converting non-boolean data to boolean values.