High-performance library for approximate nearest neighbor search in high-dimensional vector spaces
npx @tessl/cli install tessl/pypi-ngt@2.3.0NGT (Neighborhood Graph and Tree) provides Python bindings for high-performance approximate nearest neighbor search in high-dimensional vector spaces. Built on top of a C++ library, it offers both legacy and modern interfaces for indexing and searching large-scale vector datasets with multiple distance functions and data types.
pip install ngtModern interface (recommended):
import ngtpyLegacy interface:
from ngt import base as ngtBoth interfaces:
import ngt
import ngtpyimport ngtpy
import random
# Create sample high-dimensional vectors
dim = 128
nb = 1000
vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
query = vectors[0]
# Create and populate index
ngtpy.create("my_index", dim, distance_type="L2", object_type="Float")
index = ngtpy.Index("my_index")
index.batch_insert(vectors)
index.save()
# Search for nearest neighbors
results = index.search(query, size=5, epsilon=0.1)
for i, (object_id, distance) in enumerate(results):
print(f"{i}: ID={object_id}, Distance={distance}")
original_vector = index.get_object(object_id)
print(f"Original vector: {original_vector}")
index.close()NGT provides a layered architecture supporting multiple indexing approaches:
The library supports various distance functions (L1, L2, Cosine, Angular, Hamming, Jaccard, Inner Product) and data types (Float32, Float16, uint8) for different use cases in machine learning, computer vision, and recommendation systems.
Primary high-performance interface using pybind11 bindings for standard vector indexing and search operations with full feature access.
# Index creation and management
def create(path, dimension, edge_size_for_creation=10, edge_size_for_search=40,
distance_type="L2", object_type="Float"): ...
class Index:
def __init__(path, read_only=False, zero_based_numbering=True, log_disabled=False): ...
def search(query, size=0, epsilon=-1.0, edge_size=-1, with_distance=True): ...
def batch_insert(objects, num_threads=8, target_size_of_graph=0, debug=False): ...
def insert(object, debug=False): ...
def build_index(num_threads=8, target_size_of_graph=0): ...Memory-efficient indexing using vector quantization for reduced storage while maintaining search accuracy.
class QuantizedIndex:
def __init__(path, max_no_of_edges=128, zero_based_numbering=True,
read_only=False, log_disabled=False): ...
def search(query, size=0, epsilon=-1.0, result_expansion=-1.0, edge_size=-1): ...
class QuantizedBlobIndex:
def __init__(path, max_no_of_edges=128, zero_based_numbering=True,
read_only=False, log_disabled=False, refinement=False): ...
def search(query, size=0, epsilon=float('-inf')): ...Tools for improving index performance through graph structure optimization and parameter tuning.
class Optimizer:
def __init__(num_of_outgoings=-1, num_of_incomings=-1, num_of_queries=-1,
num_of_results=-1, log_disabled=False): ...
def execute(in_path, out_path): ...
def adjust_search_coefficients(path): ...Ctypes-based interface providing backward compatibility with manual memory management and simplified API.
class Index:
def __init__(path): ...
@staticmethod
def create(path, dimension, edge_size_for_creation=10, edge_size_for_search=40,
object_type="Float", distance_type="L2"): ...
def search(query, k=20, epsilon=0.1): ...
def insert(objects, num_threads=8): ...class BatchResults:
"""Container for batch search results"""
def __init__(): ...
def get(position): ...
def get_ids(): ...
def get_indexed_ids(): ...
def get_indexed_distances(): ...
def get_index(): ...
def get_size(): ...
class ObjectDistance:
"""Search result structure (legacy interface)"""
id: int
distance: float
class NativeError(Exception):
"""Exception for native library errors"""
class APIError(Exception):
"""Exception for API usage errors"""