High-performance library for approximate nearest neighbor search in high-dimensional vector spaces
—
The legacy interface (ngt.base) provides ctypes-based bindings for backward compatibility. While the modern interface (ngtpy) is recommended for new applications, the legacy interface remains useful for existing code and simpler use cases requiring manual memory management.
Core index class using ctypes bindings for vector storage and search operations.
class Index:
def __init__(self, path):
"""
Open existing index for read/write operations.
Args:
path (bytes): Path to index directory (must be bytes, not str)
"""
@staticmethod
def create(path, dimension, edge_size_for_creation=10, edge_size_for_search=40,
object_type="Float", distance_type="L2"):
"""
Create new empty index with specified parameters.
Args:
path (bytes): Index storage path (must be bytes, not str)
dimension (int): Vector dimensionality
edge_size_for_creation (int): Edges per node during creation (default: 10)
edge_size_for_search (int): Edges per node during search (default: 40)
object_type (str): Data type - "Float", "Integer" (1 byte unsigned) (default: "Float")
distance_type (str): Distance function - "L2", "L1", "Angle", "Hamming",
"Jaccard", "Cosine" (default: "L2")
Returns:
Index: New Index object
"""Search for nearest neighbors with simplified parameter control.
class Index:
def search(self, query, k=20, epsilon=0.1):
"""
Search for k nearest neighbors.
Args:
query (array-like): Query vector
k (int): Number of nearest neighbors to return (default: 20)
epsilon (float): Search range expansion parameter (default: 0.1)
Returns:
list: List of ObjectDistance instances with id and distance attributes
"""Insert vectors using simplified insertion methods with automatic index building.
class Index:
def insert_object(self, object):
"""
Insert single object without building index.
Args:
object (array-like): Vector to insert
Returns:
int: Object ID of inserted vector
"""
def insert(self, objects, num_threads=8):
"""
Insert multiple objects and build index.
Args:
objects (array-like): Array of vectors to insert
num_threads (int): Number of threads for insertion (default: 8)
Returns:
None
"""
def insert_blob(self, objects, num_threads=8):
"""
Insert multiple objects using blob method and build index.
Args:
objects (array-like): Array of vectors to insert
num_threads (int): Number of threads for insertion (default: 8)
Returns:
None
"""
def insert_from_tsv(self, path, num_threads=8, dlmt='\t'):
"""
Insert objects from TSV file and build index.
Args:
path (str): Path to TSV file containing vectors
num_threads (int): Number of threads for insertion (default: 8)
dlmt (str): Delimiter character (default: '\t')
Returns:
None
"""Build indexes and manage index persistence with simplified operations.
class Index:
def build_index(self, num_threads=8):
"""
Build index for previously inserted objects.
Args:
num_threads (int): Number of threads for building (default: 8)
Returns:
None
"""
def save(self, path=None):
"""
Save index to disk.
Args:
path (str, optional): Save path, uses original path if None
Returns:
None
"""Access and manage indexed objects with basic retrieval and removal operations.
class Index:
def get_object(self, id):
"""
Retrieve object by ID.
Args:
id (int): Object identifier
Returns:
list: Vector as list of floats
"""
def remove(self, id):
"""
Remove object from index.
Args:
id (int): Object identifier to remove
Returns:
None
"""Data structure for search results containing object ID and distance information.
class ObjectDistance:
"""
Search result structure containing object ID and distance.
Attributes:
id (int): Object identifier (c_uint)
distance (float): Distance value (c_float)
"""
id: int
distance: floatException classes for error handling in the legacy interface.
class NativeError(Exception):
"""
Exception raised when underlying C library encounters errors.
This exception is raised for low-level library errors, memory issues,
or invalid operations on the native NGT library.
"""
class APIError(Exception):
"""
Exception raised for API usage errors.
This exception is raised for invalid API usage, incorrect parameters,
or operations that violate the API contract.
"""from ngt import base as ngt
import random
# Create sample data
dim = 10
nb = 100
vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
query = vectors[0]
# Create index
ngt.Index.create("legacy_index", dim, distance_type="L2", object_type="Float")
# Open index and insert data
index = ngt.Index("legacy_index")
index.insert(vectors, num_threads=4)
index.save()
# Search for nearest neighbors
results = index.search(query, k=5, epsilon=0.1)
for i, result in enumerate(results):
print(f"Rank {i+1}: Object {result.id}, Distance {result.distance:.4f}")
vector = index.get_object(result.id)
print(f"Vector: {vector[:3]}...") # Show first 3 dimensionsfrom ngt import base as ngt
import random
# Create empty index
ngt.Index.create("incremental_legacy", 64, edge_size_for_creation=15)
index = ngt.Index("incremental_legacy")
# Insert objects one by one
object_ids = []
for i in range(50):
vector = [random.random() for _ in range(64)]
obj_id = index.insert_object(vector)
object_ids.append(obj_id)
print(f"Inserted object {obj_id}")
# Build index after insertions
index.build_index(num_threads=2)
index.save()
print(f"Index built with {len(object_ids)} objects")from ngt import base as ngt
# Create index for TSV import
ngt.Index.create("tsv_index", 128, distance_type="Cosine")
index = ngt.Index("tsv_index")
# Import vectors from TSV file
# File format: each line contains tab-separated vector components
index.insert_from_tsv("vectors.tsv", num_threads=8, dlmt='\t')
index.save()
# Search example
query = [0.5] * 128 # Example query vector
results = index.search(query, k=10, epsilon=0.05)
print(f"Found {len(results)} nearest neighbors")
for result in results:
print(f"Object {result.id}: distance {result.distance:.6f}")from ngt import base as ngt
try:
# Attempt to open non-existent index
index = ngt.Index("nonexistent_index")
except ngt.NativeError as e:
print(f"Native library error: {e}")
except ngt.APIError as e:
print(f"API usage error: {e}")
try:
# Create index with invalid parameters
ngt.Index.create("test_index", -1) # Invalid dimension
except ngt.APIError as e:
print(f"Invalid parameters: {e}")# Legacy interface example
from ngt import base as ngt_legacy
import ngtpy
# Legacy approach
ngt_legacy.Index.create("legacy_idx", 10, distance_type="L2")
legacy_index = ngt_legacy.Index("legacy_idx")
legacy_results = legacy_index.search(query, k=5, epsilon=0.1)
# Modern approach (equivalent)
ngtpy.create("modern_idx", 10, distance_type="L2")
modern_index = ngtpy.Index("modern_idx")
modern_results = modern_index.search(query, size=5, epsilon=0.1)
# Legacy results are ObjectDistance objects
for result in legacy_results:
print(f"Legacy: {result.id}, {result.distance}")
# Modern results are tuples
for obj_id, distance in modern_results:
print(f"Modern: {obj_id}, {distance}")Install with Tessl CLI
npx tessl i tessl/pypi-ngt