High-performance library for approximate nearest neighbor search in high-dimensional vector spaces
—
The modern interface (ngtpy) provides high-performance pybind11-based bindings for the NGT C++ library. This is the recommended interface for new applications, offering better performance and more complete feature access compared to the legacy ctypes interface.
Create empty indexes with specified parameters for high-dimensional vector indexing.
def create(path, dimension, edge_size_for_creation=10, edge_size_for_search=40,
distance_type="L2", object_type="Float", graph_type="ANNG"):
"""
Create an empty index with specified parameters.
Args:
path (str): Index storage path
dimension (int): Vector dimensionality
edge_size_for_creation (int): Edges per node during index creation (default: 10)
edge_size_for_search (int): Edges per node during search (default: 40)
distance_type (str): Distance function - "L2", "L1", "Angle", "Hamming",
"Jaccard", "Cosine", "Normalized L2", "Normalized Angle",
"Normalized Cosine", "Inner Product", "Sparse Jaccard" (default: "L2")
object_type (str): Data type - "Float", "Float16", "Byte" (default: "Float")
graph_type (str): Graph algorithm - "ANNG", "IANNG", "RANNG", "RIANNG" (default: "ANNG")
Returns:
None
"""Core index class for vector storage, indexing, and search operations.
class Index:
def __init__(self, path, read_only=False, zero_based_numbering=True, tree_disabled=False, log_disabled=False):
"""
Open existing index or create index object.
Args:
path (str): Path to index directory
read_only (bool): Open in read-only mode (default: False)
zero_based_numbering (bool): Use zero-based object IDs (default: True)
tree_disabled (bool): Disable tree-based search, use graph-only (default: False)
log_disabled (bool): Disable progress logging (default: False)
"""
def close(self):
"""Close the index and free resources."""
def save(self):
"""Save index to disk."""Insert vectors into the index with flexible batch and single object insertion options.
class Index:
def insert(self, object, debug=False):
"""
Insert single object without building index.
Args:
object (array-like): Vector to insert
debug (bool): Enable debug output (default: False)
Returns:
int: Object ID of inserted vector
"""
def batch_insert(self, objects, num_threads=8, target_size_of_graph=0, debug=False):
"""
Insert multiple objects and build index.
Args:
objects (array-like): Array of vectors to insert
num_threads (int): Number of threads for insertion (default: 8)
target_size_of_graph (int): Target graph size, 0 for automatic (default: 0)
debug (bool): Enable debug output (default: False)
Returns:
None
"""
def build_index(self, num_threads=8, target_size_of_graph=0):
"""
Build index for previously inserted objects.
Args:
num_threads (int): Number of threads for building (default: 8)
target_size_of_graph (int): Target graph size, 0 for automatic (default: 0)
Returns:
None
"""Search for nearest neighbors with configurable parameters and search modes.
class Index:
def search(self, query, size=0, epsilon=-1.0, edge_size=-1, with_distance=True):
"""
Search for nearest neighbors using graph traversal.
Args:
query (array-like): Query vector
size (int): Number of results to return, 0 uses default (default: 0)
epsilon (float): Search range expansion, -1.0 uses default (default: -1.0)
edge_size (int): Number of edges to explore, -1 uses default
with_distance (bool): Include distances in results (default: True)
Returns:
list: List of (object_id, distance) tuples if with_distance=True,
otherwise list of object_ids
"""
def linear_search(self, query, size=0, with_distance=True):
"""
Linear search without using index (brute force).
Args:
query (array-like): Query vector
size (int): Number of results to return, 0 uses default (default: 0)
with_distance (bool): Include distances in results (default: True)
Returns:
list: List of (object_id, distance) tuples if with_distance=True,
otherwise list of object_ids
"""
def batch_search(self, query, results, size=0, epsilon=-1.0, edge_size=-1, with_distance=True):
"""
Batch search multiple queries efficiently.
Args:
query (array-like): Array of query vectors
results (BatchResults): Container for batch results
size (int): Number of results per query, 0 uses default (default: 0)
epsilon (float): Search range expansion, -1.0 uses default (default: -1.0)
edge_size (int): Number of edges to explore, -1 uses default
with_distance (bool): Include distances in results (default: True)
Returns:
None (results stored in results parameter)
"""Access and manage indexed objects with removal and retrieval capabilities.
class Index:
def get_object(self, object_id):
"""
Retrieve object by ID.
Args:
object_id (int): Object identifier
Returns:
list: Vector as list of floats
"""
def remove(self, object_id):
"""
Remove object from index.
Args:
object_id (int): Object identifier to remove
Returns:
None
"""
def get_num_of_objects(self):
"""
Get number of indexed objects.
Returns:
int: Number of objects in index
"""Access index statistics and configure search parameters.
class Index:
def get_num_of_distance_computations(self):
"""
Get number of distance computations performed.
Returns:
int: Distance computation count
"""
def get_size_of_object_repository(self):
"""
Get size of object repository.
Returns:
int: Object repository size
"""
def get_size_of_graph_repository(self):
"""
Get size of graph repository.
Returns:
int: Graph repository size
"""
def set(self, num_of_search_objects=0, search_radius=-1.0, epsilon=-1.0, edge_size=-1, expected_accuracy=-1.0, result_expansion=-1.0):
"""
Set default search parameters.
Args:
num_of_search_objects (int): Default number of search results (default: 0)
search_radius (float): Maximum search radius, -1.0 uses default (default: -1.0)
epsilon (float): Default search epsilon, -1.0 uses default (default: -1.0)
edge_size (int): Default edge size, -1 uses default (default: -1)
expected_accuracy (float): Expected accuracy level, -1.0 uses default (default: -1.0)
result_expansion (float): Result expansion ratio, -1.0 uses default (default: -1.0)
Returns:
None
"""Export and import index data for backup, sharing, or migration purposes.
class Index:
def export_index(self, path):
"""
Export index to file.
Args:
path (str): Export file path
Returns:
None
"""
def import_index(self, path):
"""
Import index from file.
Args:
path (str): Import file path
Returns:
None
"""
def refine_anng(self, epsilon=0.1, expected_accuracy=0.0, num_of_edges=0, num_of_explored_edges=-1, batch_size=10000):
"""
Refine ANNG (Approximate Nearest Neighbor Graph) index.
Args:
epsilon (float): Refinement epsilon parameter (default: 0.1)
expected_accuracy (float): Expected accuracy level (default: 0.0)
num_of_edges (int): Number of edges per node, 0 uses default (default: 0)
num_of_explored_edges (int): Number of edges to explore, -1 uses default (default: -1)
batch_size (int): Processing batch size (default: 10000)
Returns:
None
"""import ngtpy
import numpy as np
# Create 100 128-dimensional vectors
vectors = np.random.random((100, 128)).astype(np.float32)
query = vectors[0]
# Create index
ngtpy.create("example_index", 128, distance_type="L2", object_type="Float")
index = ngtpy.Index("example_index")
# Insert vectors and build index
index.batch_insert(vectors)
index.save()
# Search for 5 nearest neighbors
results = index.search(query, size=5, epsilon=0.1)
for rank, (obj_id, distance) in enumerate(results):
print(f"Rank {rank+1}: Object {obj_id}, Distance {distance:.4f}")
index.close()import ngtpy
import numpy as np
# Create empty index
ngtpy.create("incremental_index", 64, distance_type="Cosine")
index = ngtpy.Index("incremental_index")
# Insert objects one by one
object_ids = []
for i in range(50):
vector = np.random.random(64).astype(np.float32)
obj_id = index.insert(vector)
object_ids.append(obj_id)
# Build index after all insertions
index.build_index(num_threads=4)
index.save()
# Retrieve and verify objects
for obj_id in object_ids[:5]:
retrieved = index.get_object(obj_id)
print(f"Object {obj_id}: {retrieved[:3]}...") # Show first 3 dimensions
index.close()Install with Tessl CLI
npx tessl i tessl/pypi-ngt