0
# Modern Index Interface
1
2
The modern interface (ngtpy) provides high-performance pybind11-based bindings for the NGT C++ library. This is the recommended interface for new applications, offering better performance and more complete feature access compared to the legacy ctypes interface.
3
4
## Capabilities
5
6
### Index Creation
7
8
Create empty indexes with specified parameters for high-dimensional vector indexing.
9
10
```python { .api }
11
def create(path, dimension, edge_size_for_creation=10, edge_size_for_search=40,
12
distance_type="L2", object_type="Float", graph_type="ANNG"):
13
"""
14
Create an empty index with specified parameters.
15
16
Args:
17
path (str): Index storage path
18
dimension (int): Vector dimensionality
19
edge_size_for_creation (int): Edges per node during index creation (default: 10)
20
edge_size_for_search (int): Edges per node during search (default: 40)
21
distance_type (str): Distance function - "L2", "L1", "Angle", "Hamming",
22
"Jaccard", "Cosine", "Normalized L2", "Normalized Angle",
23
"Normalized Cosine", "Inner Product", "Sparse Jaccard" (default: "L2")
24
object_type (str): Data type - "Float", "Float16", "Byte" (default: "Float")
25
graph_type (str): Graph algorithm - "ANNG", "IANNG", "RANNG", "RIANNG" (default: "ANNG")
26
27
Returns:
28
None
29
"""
30
```
31
32
### Index Management
33
34
Core index class for vector storage, indexing, and search operations.
35
36
```python { .api }
37
class Index:
38
def __init__(self, path, read_only=False, zero_based_numbering=True, tree_disabled=False, log_disabled=False):
39
"""
40
Open existing index or create index object.
41
42
Args:
43
path (str): Path to index directory
44
read_only (bool): Open in read-only mode (default: False)
45
zero_based_numbering (bool): Use zero-based object IDs (default: True)
46
tree_disabled (bool): Disable tree-based search, use graph-only (default: False)
47
log_disabled (bool): Disable progress logging (default: False)
48
"""
49
50
def close(self):
51
"""Close the index and free resources."""
52
53
def save(self):
54
"""Save index to disk."""
55
```
56
57
### Vector Insertion
58
59
Insert vectors into the index with flexible batch and single object insertion options.
60
61
```python { .api }
62
class Index:
63
def insert(self, object, debug=False):
64
"""
65
Insert single object without building index.
66
67
Args:
68
object (array-like): Vector to insert
69
debug (bool): Enable debug output (default: False)
70
71
Returns:
72
int: Object ID of inserted vector
73
"""
74
75
def batch_insert(self, objects, num_threads=8, target_size_of_graph=0, debug=False):
76
"""
77
Insert multiple objects and build index.
78
79
Args:
80
objects (array-like): Array of vectors to insert
81
num_threads (int): Number of threads for insertion (default: 8)
82
target_size_of_graph (int): Target graph size, 0 for automatic (default: 0)
83
debug (bool): Enable debug output (default: False)
84
85
Returns:
86
None
87
"""
88
89
def build_index(self, num_threads=8, target_size_of_graph=0):
90
"""
91
Build index for previously inserted objects.
92
93
Args:
94
num_threads (int): Number of threads for building (default: 8)
95
target_size_of_graph (int): Target graph size, 0 for automatic (default: 0)
96
97
Returns:
98
None
99
"""
100
```
101
102
### Vector Search
103
104
Search for nearest neighbors with configurable parameters and search modes.
105
106
```python { .api }
107
class Index:
108
def search(self, query, size=0, epsilon=-1.0, edge_size=-1, with_distance=True):
109
"""
110
Search for nearest neighbors using graph traversal.
111
112
Args:
113
query (array-like): Query vector
114
size (int): Number of results to return, 0 uses default (default: 0)
115
epsilon (float): Search range expansion, -1.0 uses default (default: -1.0)
116
edge_size (int): Number of edges to explore, -1 uses default
117
with_distance (bool): Include distances in results (default: True)
118
119
Returns:
120
list: List of (object_id, distance) tuples if with_distance=True,
121
otherwise list of object_ids
122
"""
123
124
def linear_search(self, query, size=0, with_distance=True):
125
"""
126
Linear search without using index (brute force).
127
128
Args:
129
query (array-like): Query vector
130
size (int): Number of results to return, 0 uses default (default: 0)
131
with_distance (bool): Include distances in results (default: True)
132
133
Returns:
134
list: List of (object_id, distance) tuples if with_distance=True,
135
otherwise list of object_ids
136
"""
137
138
def batch_search(self, query, results, size=0, epsilon=-1.0, edge_size=-1, with_distance=True):
139
"""
140
Batch search multiple queries efficiently.
141
142
Args:
143
query (array-like): Array of query vectors
144
results (BatchResults): Container for batch results
145
size (int): Number of results per query, 0 uses default (default: 0)
146
epsilon (float): Search range expansion, -1.0 uses default (default: -1.0)
147
edge_size (int): Number of edges to explore, -1 uses default
148
with_distance (bool): Include distances in results (default: True)
149
150
Returns:
151
None (results stored in results parameter)
152
"""
153
```
154
155
### Object Management
156
157
Access and manage indexed objects with removal and retrieval capabilities.
158
159
```python { .api }
160
class Index:
161
def get_object(self, object_id):
162
"""
163
Retrieve object by ID.
164
165
Args:
166
object_id (int): Object identifier
167
168
Returns:
169
list: Vector as list of floats
170
"""
171
172
def remove(self, object_id):
173
"""
174
Remove object from index.
175
176
Args:
177
object_id (int): Object identifier to remove
178
179
Returns:
180
None
181
"""
182
183
def get_num_of_objects(self):
184
"""
185
Get number of indexed objects.
186
187
Returns:
188
int: Number of objects in index
189
"""
190
```
191
192
### Index Statistics and Configuration
193
194
Access index statistics and configure search parameters.
195
196
```python { .api }
197
class Index:
198
def get_num_of_distance_computations(self):
199
"""
200
Get number of distance computations performed.
201
202
Returns:
203
int: Distance computation count
204
"""
205
206
def get_size_of_object_repository(self):
207
"""
208
Get size of object repository.
209
210
Returns:
211
int: Object repository size
212
"""
213
214
def get_size_of_graph_repository(self):
215
"""
216
Get size of graph repository.
217
218
Returns:
219
int: Graph repository size
220
"""
221
222
def set(self, num_of_search_objects=0, search_radius=-1.0, epsilon=-1.0, edge_size=-1, expected_accuracy=-1.0, result_expansion=-1.0):
223
"""
224
Set default search parameters.
225
226
Args:
227
num_of_search_objects (int): Default number of search results (default: 0)
228
search_radius (float): Maximum search radius, -1.0 uses default (default: -1.0)
229
epsilon (float): Default search epsilon, -1.0 uses default (default: -1.0)
230
edge_size (int): Default edge size, -1 uses default (default: -1)
231
expected_accuracy (float): Expected accuracy level, -1.0 uses default (default: -1.0)
232
result_expansion (float): Result expansion ratio, -1.0 uses default (default: -1.0)
233
234
Returns:
235
None
236
"""
237
```
238
239
### Import/Export Operations
240
241
Export and import index data for backup, sharing, or migration purposes.
242
243
```python { .api }
244
class Index:
245
def export_index(self, path):
246
"""
247
Export index to file.
248
249
Args:
250
path (str): Export file path
251
252
Returns:
253
None
254
"""
255
256
def import_index(self, path):
257
"""
258
Import index from file.
259
260
Args:
261
path (str): Import file path
262
263
Returns:
264
None
265
"""
266
267
def refine_anng(self, epsilon=0.1, expected_accuracy=0.0, num_of_edges=0, num_of_explored_edges=-1, batch_size=10000):
268
"""
269
Refine ANNG (Approximate Nearest Neighbor Graph) index.
270
271
Args:
272
epsilon (float): Refinement epsilon parameter (default: 0.1)
273
expected_accuracy (float): Expected accuracy level (default: 0.0)
274
num_of_edges (int): Number of edges per node, 0 uses default (default: 0)
275
num_of_explored_edges (int): Number of edges to explore, -1 uses default (default: -1)
276
batch_size (int): Processing batch size (default: 10000)
277
278
Returns:
279
None
280
"""
281
```
282
283
## Usage Examples
284
285
### Basic Index Creation and Search
286
287
```python
288
import ngtpy
289
import numpy as np
290
291
# Create 100 128-dimensional vectors
292
vectors = np.random.random((100, 128)).astype(np.float32)
293
query = vectors[0]
294
295
# Create index
296
ngtpy.create("example_index", 128, distance_type="L2", object_type="Float")
297
index = ngtpy.Index("example_index")
298
299
# Insert vectors and build index
300
index.batch_insert(vectors)
301
index.save()
302
303
# Search for 5 nearest neighbors
304
results = index.search(query, size=5, epsilon=0.1)
305
for rank, (obj_id, distance) in enumerate(results):
306
print(f"Rank {rank+1}: Object {obj_id}, Distance {distance:.4f}")
307
308
index.close()
309
```
310
311
### Individual Object Insertion
312
313
```python
314
import ngtpy
315
import numpy as np
316
317
# Create empty index
318
ngtpy.create("incremental_index", 64, distance_type="Cosine")
319
index = ngtpy.Index("incremental_index")
320
321
# Insert objects one by one
322
object_ids = []
323
for i in range(50):
324
vector = np.random.random(64).astype(np.float32)
325
obj_id = index.insert(vector)
326
object_ids.append(obj_id)
327
328
# Build index after all insertions
329
index.build_index(num_threads=4)
330
index.save()
331
332
# Retrieve and verify objects
333
for obj_id in object_ids[:5]:
334
retrieved = index.get_object(obj_id)
335
print(f"Object {obj_id}: {retrieved[:3]}...") # Show first 3 dimensions
336
337
index.close()
338
```