or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

index.mdlegacy-interface.mdmodern-index.mdoptimization.mdquantized-indexes.md

quantized-indexes.mddocs/

0

# Quantized Indexes

1

2

Quantized indexes provide memory-efficient vector indexing by using compressed representations while maintaining search accuracy. NGT offers two quantization approaches: QuantizedIndex for standard quantization and QuantizedBlobIndex for advanced blob-based quantization with maximum compression.

3

4

## Capabilities

5

6

### Quantized Index

7

8

Standard quantized indexing that reduces memory usage through vector quantization while preserving search performance.

9

10

```python { .api }

11

class QuantizedIndex:

12

def __init__(self, path, max_no_of_edges=128, zero_based_numbering=True,

13

read_only=False, log_disabled=False):

14

"""

15

Open quantized index for memory-efficient search.

16

17

Args:

18

path (str): Path to quantized index directory

19

max_no_of_edges (int): Maximum edges per node (default: 128)

20

zero_based_numbering (bool): Use zero-based object IDs (default: True)

21

read_only (bool): Open in read-only mode (default: False)

22

log_disabled (bool): Disable progress logging (default: False)

23

"""

24

```

25

26

### Quantized Search Operations

27

28

Search operations optimized for quantized vector representations with result expansion control.

29

30

```python { .api }

31

class QuantizedIndex:

32

def search(self, query, size=0, epsilon=-1.0, result_expansion=-1.0, edge_size=-1):

33

"""

34

Search nearest neighbors in quantized index.

35

36

Args:

37

query (array-like): Query vector

38

size (int): Number of results to return, 0 uses default (default: 0)

39

epsilon (float): Search range expansion, -1.0 uses default (default: -1.0)

40

result_expansion (float): Result expansion ratio, -1.0 uses default (default: -1.0)

41

edge_size (int): Number of edges to explore, -1 uses default

42

43

Returns:

44

list: List of (object_id, distance) tuples

45

"""

46

```

47

48

### Quantized Configuration

49

50

Configure search parameters specific to quantized index operations.

51

52

```python { .api }

53

class QuantizedIndex:

54

def set(self, num_of_search_objects=0, search_radius=float('-inf'), epsilon=-1.0, result_expansion=-1.0):

55

"""

56

Set default search parameters for quantized index.

57

58

Args:

59

num_of_search_objects (int): Default number of search results (default: 0)

60

search_radius (float): Maximum search radius (default: float('-inf'))

61

epsilon (float): Default search epsilon (default: -1.0)

62

result_expansion (float): Default result expansion ratio (default: -1.0)

63

64

Returns:

65

None

66

"""

67

68

def set_with_distance(self, boolean=True):

69

"""

70

Configure whether to return distances with search results.

71

72

Args:

73

boolean (bool): Include distances in results (default: True)

74

75

Returns:

76

None

77

"""

78

79

def set_defaults(self, size=0, search_radius=float('-inf'), epsilon=-1.0, result_expansion=-1.0):

80

"""

81

Set default parameters (deprecated, use set() instead).

82

83

Args:

84

size (int): Default number of search results (default: 0)

85

search_radius (float): Maximum search radius (default: float('-inf'))

86

epsilon (float): Default search epsilon (default: -1.0)

87

result_expansion (float): Default result expansion ratio (default: -1.0)

88

89

Returns:

90

None

91

"""

92

```

93

94

### Quantized Blob Index

95

96

Advanced quantized indexing with blob storage for maximum compression and specialized search operations.

97

98

```python { .api }

99

class QuantizedBlobIndex:

100

def __init__(self, path, max_no_of_edges=128, zero_based_numbering=True,

101

read_only=False, log_disabled=False, refinement=False,

102

refinement_object_type="Any"):

103

"""

104

Open quantized blob index for maximum compression.

105

106

Args:

107

path (str): Path to quantized blob index directory

108

max_no_of_edges (int): Maximum edges per node (default: 128)

109

zero_based_numbering (bool): Use zero-based object IDs (default: True)

110

read_only (bool): Open in read-only mode (default: False)

111

log_disabled (bool): Disable progress logging (default: False)

112

refinement (bool): Enable search refinement (default: False)

113

refinement_object_type (str): Object type for refinement (default: "Any")

114

"""

115

```

116

117

### Blob Search Operations

118

119

Search operations designed for blob-quantized indexes with batch processing capabilities.

120

121

```python { .api }

122

class QuantizedBlobIndex:

123

def search(self, query, size=0, epsilon=float('-inf')):

124

"""

125

Search nearest neighbors in blob quantized index.

126

127

Args:

128

query (array-like): Query vector

129

size (int): Number of results to return, 0 uses default (default: 0)

130

epsilon (float): Search range parameter (default: float('-inf'))

131

132

Returns:

133

list: List of (object_id, distance) tuples

134

"""

135

136

def batch_search(self, query, results, size=0):

137

"""

138

Batch search multiple queries in blob index.

139

140

Args:

141

query (array-like): Array of query vectors

142

results (BatchResults): Container for batch results

143

size (int): Number of results per query, 0 uses default (default: 0)

144

145

Returns:

146

None (results stored in results parameter)

147

"""

148

149

def batch_search_tmp(self, query, size=0):

150

"""

151

Temporary batch search implementation.

152

153

Args:

154

query (array-like): Array of query vectors

155

size (int): Number of results per query, 0 uses default (default: 0)

156

157

Returns:

158

list: Batch search results

159

"""

160

161

def batch_range_search(self, query, results, radius=float('-inf')):

162

"""

163

Range-based batch search within specified radius.

164

165

Args:

166

query (array-like): Array of query vectors

167

results (BatchResults): Container for batch results

168

radius (float): Search radius, float('-inf') for no limit (default: float('-inf'))

169

170

Returns:

171

None (results stored in results parameter)

172

"""

173

```

174

175

### Blob Index Management

176

177

Management operations for blob quantized indexes including insertion and persistence.

178

179

```python { .api }

180

class QuantizedBlobIndex:

181

def batch_insert(self, objects, debug=False):

182

"""

183

Insert multiple objects into blob index.

184

185

Args:

186

objects (array-like): Array of vectors to insert

187

debug (bool): Enable debug output (default: False)

188

189

Returns:

190

None

191

"""

192

193

def save(self):

194

"""

195

Save blob index to disk.

196

197

Returns:

198

None

199

"""

200

201

def set(self, num_of_search_objects=0, epsilon=float('-inf'), blob_epsilon=-1.0, result_expansion=-1.0, radius=-1.0, edge_size=-1, exploration_size=0, exact_result_expansion=0.0, num_of_probes=-1):

202

"""

203

Set default search parameters for blob index.

204

205

Args:

206

num_of_search_objects (int): Default number of search results (default: 0)

207

epsilon (float): Default search epsilon (default: float('-inf'))

208

blob_epsilon (float): Blob-specific epsilon, -1.0 uses default (default: -1.0)

209

result_expansion (float): Default result expansion ratio, -1.0 uses default (default: -1.0)

210

radius (float): Search radius, -1.0 uses default (default: -1.0)

211

edge_size (int): Edge size, -1 uses default (default: -1)

212

exploration_size (int): Graph exploration size (default: 0)

213

exact_result_expansion (float): Exact result expansion ratio (default: 0.0)

214

num_of_probes (int): Number of probes, -1 uses default (default: -1)

215

216

Returns:

217

None

218

"""

219

220

def set_with_distance(self, boolean=True):

221

"""

222

Configure whether to return distances with search results.

223

224

Args:

225

boolean (bool): Include distances in results (default: True)

226

227

Returns:

228

None

229

"""

230

```

231

232

### Batch Results Container

233

234

Container class for managing batch search results from quantized indexes.

235

236

```python { .api }

237

class BatchResults:

238

def __init__(self):

239

"""

240

Create empty batch results container.

241

"""

242

243

def get(self, position):

244

"""

245

Get result at specific position.

246

247

Args:

248

position (int): Position index

249

250

Returns:

251

Result at specified position

252

"""

253

254

def get_ids(self):

255

"""

256

Get all result IDs as array.

257

258

Returns:

259

array: Object IDs from search results

260

"""

261

262

def get_indexed_ids(self):

263

"""

264

Get indexed result IDs.

265

266

Returns:

267

array: Indexed object IDs

268

"""

269

270

def get_indexed_distances(self):

271

"""

272

Get indexed distances.

273

274

Returns:

275

array: Distance values for indexed results

276

"""

277

278

def get_index(self):

279

"""

280

Get result index information.

281

282

Returns:

283

Index information for results

284

"""

285

286

def get_size(self):

287

"""

288

Get number of results in container.

289

290

Returns:

291

int: Number of results

292

"""

293

```

294

295

## Usage Examples

296

297

### Quantized Index Search

298

299

```python

300

import ngtpy

301

import numpy as np

302

303

# Open existing quantized index

304

# Note: Quantized index must be created using command-line tools (ngtqg quantize)

305

qindex = ngtpy.QuantizedIndex("quantized_index_path", max_no_of_edges=64)

306

307

# Configure search parameters

308

qindex.set(num_of_search_objects=20, epsilon=0.02, result_expansion=3.0)

309

310

# Search with result expansion for higher accuracy

311

query = np.random.random(128).astype(np.float32)

312

results = qindex.search(query, size=10, epsilon=0.05, result_expansion=5.0)

313

314

for rank, (obj_id, distance) in enumerate(results):

315

print(f"Rank {rank+1}: Object {obj_id}, Distance {distance:.4f}")

316

```

317

318

### Quantized Blob Index with Batch Search

319

320

```python

321

import ngtpy

322

import numpy as np

323

324

# Open quantized blob index with refinement

325

qb_index = ngtpy.QuantizedBlobIndex(

326

"blob_index_path",

327

max_no_of_edges=128,

328

refinement=True

329

)

330

331

# Prepare multiple queries

332

queries = np.random.random((5, 128)).astype(np.float32)

333

batch_results = ngtpy.BatchResults()

334

335

# Perform batch search

336

qb_index.batch_search(queries, batch_results, size=5)

337

338

# Process batch results

339

for i in range(batch_results.get_size()):

340

result = batch_results.get(i)

341

print(f"Query {i}: {result}")

342

343

# Range-based search within specified radius

344

range_results = ngtpy.BatchResults()

345

qb_index.batch_range_search(queries, range_results, radius=0.5)

346

347

print(f"Range search found {range_results.get_size()} total results")

348

```

349

350

### Adding Data to Blob Index

351

352

```python

353

import ngtpy

354

import numpy as np

355

356

# Open blob index for writing

357

qb_index = ngtpy.QuantizedBlobIndex("writable_blob_index", read_only=False)

358

359

# Insert new vectors

360

new_vectors = np.random.random((100, 128)).astype(np.float32)

361

qb_index.batch_insert(new_vectors, debug=True)

362

363

# Save the updated index

364

qb_index.save()

365

366

print("Successfully added 100 new vectors to blob index")

367

```