or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

index.mdlegacy-interface.mdmodern-index.mdoptimization.mdquantized-indexes.md

modern-index.mddocs/

0

# Modern Index Interface

1

2

The modern interface (ngtpy) provides high-performance pybind11-based bindings for the NGT C++ library. This is the recommended interface for new applications, offering better performance and more complete feature access compared to the legacy ctypes interface.

3

4

## Capabilities

5

6

### Index Creation

7

8

Create empty indexes with specified parameters for high-dimensional vector indexing.

9

10

```python { .api }

11

def create(path, dimension, edge_size_for_creation=10, edge_size_for_search=40,

12

distance_type="L2", object_type="Float", graph_type="ANNG"):

13

"""

14

Create an empty index with specified parameters.

15

16

Args:

17

path (str): Index storage path

18

dimension (int): Vector dimensionality

19

edge_size_for_creation (int): Edges per node during index creation (default: 10)

20

edge_size_for_search (int): Edges per node during search (default: 40)

21

distance_type (str): Distance function - "L2", "L1", "Angle", "Hamming",

22

"Jaccard", "Cosine", "Normalized L2", "Normalized Angle",

23

"Normalized Cosine", "Inner Product", "Sparse Jaccard" (default: "L2")

24

object_type (str): Data type - "Float", "Float16", "Byte" (default: "Float")

25

graph_type (str): Graph algorithm - "ANNG", "IANNG", "RANNG", "RIANNG" (default: "ANNG")

26

27

Returns:

28

None

29

"""

30

```

31

32

### Index Management

33

34

Core index class for vector storage, indexing, and search operations.

35

36

```python { .api }

37

class Index:

38

def __init__(self, path, read_only=False, zero_based_numbering=True, tree_disabled=False, log_disabled=False):

39

"""

40

Open existing index or create index object.

41

42

Args:

43

path (str): Path to index directory

44

read_only (bool): Open in read-only mode (default: False)

45

zero_based_numbering (bool): Use zero-based object IDs (default: True)

46

tree_disabled (bool): Disable tree-based search, use graph-only (default: False)

47

log_disabled (bool): Disable progress logging (default: False)

48

"""

49

50

def close(self):

51

"""Close the index and free resources."""

52

53

def save(self):

54

"""Save index to disk."""

55

```

56

57

### Vector Insertion

58

59

Insert vectors into the index with flexible batch and single object insertion options.

60

61

```python { .api }

62

class Index:

63

def insert(self, object, debug=False):

64

"""

65

Insert single object without building index.

66

67

Args:

68

object (array-like): Vector to insert

69

debug (bool): Enable debug output (default: False)

70

71

Returns:

72

int: Object ID of inserted vector

73

"""

74

75

def batch_insert(self, objects, num_threads=8, target_size_of_graph=0, debug=False):

76

"""

77

Insert multiple objects and build index.

78

79

Args:

80

objects (array-like): Array of vectors to insert

81

num_threads (int): Number of threads for insertion (default: 8)

82

target_size_of_graph (int): Target graph size, 0 for automatic (default: 0)

83

debug (bool): Enable debug output (default: False)

84

85

Returns:

86

None

87

"""

88

89

def build_index(self, num_threads=8, target_size_of_graph=0):

90

"""

91

Build index for previously inserted objects.

92

93

Args:

94

num_threads (int): Number of threads for building (default: 8)

95

target_size_of_graph (int): Target graph size, 0 for automatic (default: 0)

96

97

Returns:

98

None

99

"""

100

```

101

102

### Vector Search

103

104

Search for nearest neighbors with configurable parameters and search modes.

105

106

```python { .api }

107

class Index:

108

def search(self, query, size=0, epsilon=-1.0, edge_size=-1, with_distance=True):

109

"""

110

Search for nearest neighbors using graph traversal.

111

112

Args:

113

query (array-like): Query vector

114

size (int): Number of results to return, 0 uses default (default: 0)

115

epsilon (float): Search range expansion, -1.0 uses default (default: -1.0)

116

edge_size (int): Number of edges to explore, -1 uses default

117

with_distance (bool): Include distances in results (default: True)

118

119

Returns:

120

list: List of (object_id, distance) tuples if with_distance=True,

121

otherwise list of object_ids

122

"""

123

124

def linear_search(self, query, size=0, with_distance=True):

125

"""

126

Linear search without using index (brute force).

127

128

Args:

129

query (array-like): Query vector

130

size (int): Number of results to return, 0 uses default (default: 0)

131

with_distance (bool): Include distances in results (default: True)

132

133

Returns:

134

list: List of (object_id, distance) tuples if with_distance=True,

135

otherwise list of object_ids

136

"""

137

138

def batch_search(self, query, results, size=0, epsilon=-1.0, edge_size=-1, with_distance=True):

139

"""

140

Batch search multiple queries efficiently.

141

142

Args:

143

query (array-like): Array of query vectors

144

results (BatchResults): Container for batch results

145

size (int): Number of results per query, 0 uses default (default: 0)

146

epsilon (float): Search range expansion, -1.0 uses default (default: -1.0)

147

edge_size (int): Number of edges to explore, -1 uses default

148

with_distance (bool): Include distances in results (default: True)

149

150

Returns:

151

None (results stored in results parameter)

152

"""

153

```

154

155

### Object Management

156

157

Access and manage indexed objects with removal and retrieval capabilities.

158

159

```python { .api }

160

class Index:

161

def get_object(self, object_id):

162

"""

163

Retrieve object by ID.

164

165

Args:

166

object_id (int): Object identifier

167

168

Returns:

169

list: Vector as list of floats

170

"""

171

172

def remove(self, object_id):

173

"""

174

Remove object from index.

175

176

Args:

177

object_id (int): Object identifier to remove

178

179

Returns:

180

None

181

"""

182

183

def get_num_of_objects(self):

184

"""

185

Get number of indexed objects.

186

187

Returns:

188

int: Number of objects in index

189

"""

190

```

191

192

### Index Statistics and Configuration

193

194

Access index statistics and configure search parameters.

195

196

```python { .api }

197

class Index:

198

def get_num_of_distance_computations(self):

199

"""

200

Get number of distance computations performed.

201

202

Returns:

203

int: Distance computation count

204

"""

205

206

def get_size_of_object_repository(self):

207

"""

208

Get size of object repository.

209

210

Returns:

211

int: Object repository size

212

"""

213

214

def get_size_of_graph_repository(self):

215

"""

216

Get size of graph repository.

217

218

Returns:

219

int: Graph repository size

220

"""

221

222

def set(self, num_of_search_objects=0, search_radius=-1.0, epsilon=-1.0, edge_size=-1, expected_accuracy=-1.0, result_expansion=-1.0):

223

"""

224

Set default search parameters.

225

226

Args:

227

num_of_search_objects (int): Default number of search results (default: 0)

228

search_radius (float): Maximum search radius, -1.0 uses default (default: -1.0)

229

epsilon (float): Default search epsilon, -1.0 uses default (default: -1.0)

230

edge_size (int): Default edge size, -1 uses default (default: -1)

231

expected_accuracy (float): Expected accuracy level, -1.0 uses default (default: -1.0)

232

result_expansion (float): Result expansion ratio, -1.0 uses default (default: -1.0)

233

234

Returns:

235

None

236

"""

237

```

238

239

### Import/Export Operations

240

241

Export and import index data for backup, sharing, or migration purposes.

242

243

```python { .api }

244

class Index:

245

def export_index(self, path):

246

"""

247

Export index to file.

248

249

Args:

250

path (str): Export file path

251

252

Returns:

253

None

254

"""

255

256

def import_index(self, path):

257

"""

258

Import index from file.

259

260

Args:

261

path (str): Import file path

262

263

Returns:

264

None

265

"""

266

267

def refine_anng(self, epsilon=0.1, expected_accuracy=0.0, num_of_edges=0, num_of_explored_edges=-1, batch_size=10000):

268

"""

269

Refine ANNG (Approximate Nearest Neighbor Graph) index.

270

271

Args:

272

epsilon (float): Refinement epsilon parameter (default: 0.1)

273

expected_accuracy (float): Expected accuracy level (default: 0.0)

274

num_of_edges (int): Number of edges per node, 0 uses default (default: 0)

275

num_of_explored_edges (int): Number of edges to explore, -1 uses default (default: -1)

276

batch_size (int): Processing batch size (default: 10000)

277

278

Returns:

279

None

280

"""

281

```

282

283

## Usage Examples

284

285

### Basic Index Creation and Search

286

287

```python

288

import ngtpy

289

import numpy as np

290

291

# Create 100 128-dimensional vectors

292

vectors = np.random.random((100, 128)).astype(np.float32)

293

query = vectors[0]

294

295

# Create index

296

ngtpy.create("example_index", 128, distance_type="L2", object_type="Float")

297

index = ngtpy.Index("example_index")

298

299

# Insert vectors and build index

300

index.batch_insert(vectors)

301

index.save()

302

303

# Search for 5 nearest neighbors

304

results = index.search(query, size=5, epsilon=0.1)

305

for rank, (obj_id, distance) in enumerate(results):

306

print(f"Rank {rank+1}: Object {obj_id}, Distance {distance:.4f}")

307

308

index.close()

309

```

310

311

### Individual Object Insertion

312

313

```python

314

import ngtpy

315

import numpy as np

316

317

# Create empty index

318

ngtpy.create("incremental_index", 64, distance_type="Cosine")

319

index = ngtpy.Index("incremental_index")

320

321

# Insert objects one by one

322

object_ids = []

323

for i in range(50):

324

vector = np.random.random(64).astype(np.float32)

325

obj_id = index.insert(vector)

326

object_ids.append(obj_id)

327

328

# Build index after all insertions

329

index.build_index(num_threads=4)

330

index.save()

331

332

# Retrieve and verify objects

333

for obj_id in object_ids[:5]:

334

retrieved = index.get_object(obj_id)

335

print(f"Object {obj_id}: {retrieved[:3]}...") # Show first 3 dimensions

336

337

index.close()

338

```