CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-ngt

High-performance library for approximate nearest neighbor search in high-dimensional vector spaces

Pending
Overview
Eval results
Files

legacy-interface.mddocs/

Legacy Interface

The legacy interface (ngt.base) provides ctypes-based bindings for backward compatibility. While the modern interface (ngtpy) is recommended for new applications, the legacy interface remains useful for existing code and simpler use cases requiring manual memory management.

Capabilities

Index Management

Core index class using ctypes bindings for vector storage and search operations.

class Index:
    def __init__(self, path):
        """
        Open existing index for read/write operations.
        
        Args:
            path (bytes): Path to index directory (must be bytes, not str)
        """
    
    @staticmethod
    def create(path, dimension, edge_size_for_creation=10, edge_size_for_search=40,
              object_type="Float", distance_type="L2"):
        """
        Create new empty index with specified parameters.
        
        Args:
            path (bytes): Index storage path (must be bytes, not str)
            dimension (int): Vector dimensionality
            edge_size_for_creation (int): Edges per node during creation (default: 10)
            edge_size_for_search (int): Edges per node during search (default: 40)
            object_type (str): Data type - "Float", "Integer" (1 byte unsigned) (default: "Float")
            distance_type (str): Distance function - "L2", "L1", "Angle", "Hamming",
                               "Jaccard", "Cosine" (default: "L2")
        
        Returns:
            Index: New Index object
        """

Vector Search

Search for nearest neighbors with simplified parameter control.

class Index:
    def search(self, query, k=20, epsilon=0.1):
        """
        Search for k nearest neighbors.
        
        Args:
            query (array-like): Query vector
            k (int): Number of nearest neighbors to return (default: 20)
            epsilon (float): Search range expansion parameter (default: 0.1)
        
        Returns:
            list: List of ObjectDistance instances with id and distance attributes
        """

Vector Insertion

Insert vectors using simplified insertion methods with automatic index building.

class Index:
    def insert_object(self, object):
        """
        Insert single object without building index.
        
        Args:
            object (array-like): Vector to insert
        
        Returns:
            int: Object ID of inserted vector
        """
    
    def insert(self, objects, num_threads=8):
        """
        Insert multiple objects and build index.
        
        Args:
            objects (array-like): Array of vectors to insert
            num_threads (int): Number of threads for insertion (default: 8)
        
        Returns:
            None
        """
    
    def insert_blob(self, objects, num_threads=8):
        """
        Insert multiple objects using blob method and build index.
        
        Args:
            objects (array-like): Array of vectors to insert
            num_threads (int): Number of threads for insertion (default: 8)
        
        Returns:
            None
        """
    
    def insert_from_tsv(self, path, num_threads=8, dlmt='\t'):
        """
        Insert objects from TSV file and build index.
        
        Args:
            path (str): Path to TSV file containing vectors
            num_threads (int): Number of threads for insertion (default: 8)
            dlmt (str): Delimiter character (default: '\t')
        
        Returns:
            None
        """

Index Building and Management

Build indexes and manage index persistence with simplified operations.

class Index:
    def build_index(self, num_threads=8):
        """
        Build index for previously inserted objects.
        
        Args:
            num_threads (int): Number of threads for building (default: 8)
        
        Returns:
            None
        """
    
    def save(self, path=None):
        """
        Save index to disk.
        
        Args:
            path (str, optional): Save path, uses original path if None
        
        Returns:
            None
        """

Object Management

Access and manage indexed objects with basic retrieval and removal operations.

class Index:
    def get_object(self, id):
        """
        Retrieve object by ID.
        
        Args:
            id (int): Object identifier
        
        Returns:
            list: Vector as list of floats
        """
    
    def remove(self, id):
        """
        Remove object from index.
        
        Args:
            id (int): Object identifier to remove
        
        Returns:
            None
        """

Result Structure

Data structure for search results containing object ID and distance information.

class ObjectDistance:
    """
    Search result structure containing object ID and distance.
    
    Attributes:
        id (int): Object identifier (c_uint)
        distance (float): Distance value (c_float)
    """
    id: int
    distance: float

Exception Classes

Exception classes for error handling in the legacy interface.

class NativeError(Exception):
    """
    Exception raised when underlying C library encounters errors.
    
    This exception is raised for low-level library errors, memory issues,
    or invalid operations on the native NGT library.
    """

class APIError(Exception):
    """
    Exception raised for API usage errors.
    
    This exception is raised for invalid API usage, incorrect parameters,
    or operations that violate the API contract.
    """

Usage Examples

Basic Index Creation and Search

from ngt import base as ngt
import random

# Create sample data
dim = 10
nb = 100
vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
query = vectors[0]

# Create index
ngt.Index.create("legacy_index", dim, distance_type="L2", object_type="Float")

# Open index and insert data
index = ngt.Index("legacy_index")
index.insert(vectors, num_threads=4)
index.save()

# Search for nearest neighbors
results = index.search(query, k=5, epsilon=0.1)
for i, result in enumerate(results):
    print(f"Rank {i+1}: Object {result.id}, Distance {result.distance:.4f}")
    vector = index.get_object(result.id)
    print(f"Vector: {vector[:3]}...")  # Show first 3 dimensions

Individual Object Insertion

from ngt import base as ngt
import random

# Create empty index
ngt.Index.create("incremental_legacy", 64, edge_size_for_creation=15)
index = ngt.Index("incremental_legacy")

# Insert objects one by one
object_ids = []
for i in range(50):
    vector = [random.random() for _ in range(64)]
    obj_id = index.insert_object(vector)
    object_ids.append(obj_id)
    print(f"Inserted object {obj_id}")

# Build index after insertions
index.build_index(num_threads=2)
index.save()

print(f"Index built with {len(object_ids)} objects")

TSV File Import

from ngt import base as ngt

# Create index for TSV import
ngt.Index.create("tsv_index", 128, distance_type="Cosine")
index = ngt.Index("tsv_index")

# Import vectors from TSV file
# File format: each line contains tab-separated vector components
index.insert_from_tsv("vectors.tsv", num_threads=8, dlmt='\t')
index.save()

# Search example
query = [0.5] * 128  # Example query vector
results = index.search(query, k=10, epsilon=0.05)

print(f"Found {len(results)} nearest neighbors")
for result in results:
    print(f"Object {result.id}: distance {result.distance:.6f}")

Error Handling

from ngt import base as ngt

try:
    # Attempt to open non-existent index
    index = ngt.Index("nonexistent_index")
except ngt.NativeError as e:
    print(f"Native library error: {e}")
except ngt.APIError as e:
    print(f"API usage error: {e}")

try:
    # Create index with invalid parameters
    ngt.Index.create("test_index", -1)  # Invalid dimension
except ngt.APIError as e:
    print(f"Invalid parameters: {e}")

Comparison with Modern Interface

# Legacy interface example
from ngt import base as ngt_legacy
import ngtpy

# Legacy approach
ngt_legacy.Index.create("legacy_idx", 10, distance_type="L2")
legacy_index = ngt_legacy.Index("legacy_idx")
legacy_results = legacy_index.search(query, k=5, epsilon=0.1)

# Modern approach (equivalent)
ngtpy.create("modern_idx", 10, distance_type="L2")
modern_index = ngtpy.Index("modern_idx")
modern_results = modern_index.search(query, size=5, epsilon=0.1)

# Legacy results are ObjectDistance objects
for result in legacy_results:
    print(f"Legacy: {result.id}, {result.distance}")

# Modern results are tuples
for obj_id, distance in modern_results:
    print(f"Modern: {obj_id}, {distance}")

Install with Tessl CLI

npx tessl i tessl/pypi-ngt

docs

index.md

legacy-interface.md

modern-index.md

optimization.md

quantized-indexes.md

tile.json