High-performance library for approximate nearest neighbor search in high-dimensional vector spaces
—
Quantized indexes provide memory-efficient vector indexing by using compressed representations while maintaining search accuracy. NGT offers two quantization approaches: QuantizedIndex for standard quantization and QuantizedBlobIndex for advanced blob-based quantization with maximum compression.
Standard quantized indexing that reduces memory usage through vector quantization while preserving search performance.
class QuantizedIndex:
def __init__(self, path, max_no_of_edges=128, zero_based_numbering=True,
read_only=False, log_disabled=False):
"""
Open quantized index for memory-efficient search.
Args:
path (str): Path to quantized index directory
max_no_of_edges (int): Maximum edges per node (default: 128)
zero_based_numbering (bool): Use zero-based object IDs (default: True)
read_only (bool): Open in read-only mode (default: False)
log_disabled (bool): Disable progress logging (default: False)
"""Search operations optimized for quantized vector representations with result expansion control.
class QuantizedIndex:
def search(self, query, size=0, epsilon=-1.0, result_expansion=-1.0, edge_size=-1):
"""
Search nearest neighbors in quantized index.
Args:
query (array-like): Query vector
size (int): Number of results to return, 0 uses default (default: 0)
epsilon (float): Search range expansion, -1.0 uses default (default: -1.0)
result_expansion (float): Result expansion ratio, -1.0 uses default (default: -1.0)
edge_size (int): Number of edges to explore, -1 uses default
Returns:
list: List of (object_id, distance) tuples
"""Configure search parameters specific to quantized index operations.
class QuantizedIndex:
def set(self, num_of_search_objects=0, search_radius=float('-inf'), epsilon=-1.0, result_expansion=-1.0):
"""
Set default search parameters for quantized index.
Args:
num_of_search_objects (int): Default number of search results (default: 0)
search_radius (float): Maximum search radius (default: float('-inf'))
epsilon (float): Default search epsilon (default: -1.0)
result_expansion (float): Default result expansion ratio (default: -1.0)
Returns:
None
"""
def set_with_distance(self, boolean=True):
"""
Configure whether to return distances with search results.
Args:
boolean (bool): Include distances in results (default: True)
Returns:
None
"""
def set_defaults(self, size=0, search_radius=float('-inf'), epsilon=-1.0, result_expansion=-1.0):
"""
Set default parameters (deprecated, use set() instead).
Args:
size (int): Default number of search results (default: 0)
search_radius (float): Maximum search radius (default: float('-inf'))
epsilon (float): Default search epsilon (default: -1.0)
result_expansion (float): Default result expansion ratio (default: -1.0)
Returns:
None
"""Advanced quantized indexing with blob storage for maximum compression and specialized search operations.
class QuantizedBlobIndex:
def __init__(self, path, max_no_of_edges=128, zero_based_numbering=True,
read_only=False, log_disabled=False, refinement=False,
refinement_object_type="Any"):
"""
Open quantized blob index for maximum compression.
Args:
path (str): Path to quantized blob index directory
max_no_of_edges (int): Maximum edges per node (default: 128)
zero_based_numbering (bool): Use zero-based object IDs (default: True)
read_only (bool): Open in read-only mode (default: False)
log_disabled (bool): Disable progress logging (default: False)
refinement (bool): Enable search refinement (default: False)
refinement_object_type (str): Object type for refinement (default: "Any")
"""Search operations designed for blob-quantized indexes with batch processing capabilities.
class QuantizedBlobIndex:
def search(self, query, size=0, epsilon=float('-inf')):
"""
Search nearest neighbors in blob quantized index.
Args:
query (array-like): Query vector
size (int): Number of results to return, 0 uses default (default: 0)
epsilon (float): Search range parameter (default: float('-inf'))
Returns:
list: List of (object_id, distance) tuples
"""
def batch_search(self, query, results, size=0):
"""
Batch search multiple queries in blob index.
Args:
query (array-like): Array of query vectors
results (BatchResults): Container for batch results
size (int): Number of results per query, 0 uses default (default: 0)
Returns:
None (results stored in results parameter)
"""
def batch_search_tmp(self, query, size=0):
"""
Temporary batch search implementation.
Args:
query (array-like): Array of query vectors
size (int): Number of results per query, 0 uses default (default: 0)
Returns:
list: Batch search results
"""
def batch_range_search(self, query, results, radius=float('-inf')):
"""
Range-based batch search within specified radius.
Args:
query (array-like): Array of query vectors
results (BatchResults): Container for batch results
radius (float): Search radius, float('-inf') for no limit (default: float('-inf'))
Returns:
None (results stored in results parameter)
"""Management operations for blob quantized indexes including insertion and persistence.
class QuantizedBlobIndex:
def batch_insert(self, objects, debug=False):
"""
Insert multiple objects into blob index.
Args:
objects (array-like): Array of vectors to insert
debug (bool): Enable debug output (default: False)
Returns:
None
"""
def save(self):
"""
Save blob index to disk.
Returns:
None
"""
def set(self, num_of_search_objects=0, epsilon=float('-inf'), blob_epsilon=-1.0, result_expansion=-1.0, radius=-1.0, edge_size=-1, exploration_size=0, exact_result_expansion=0.0, num_of_probes=-1):
"""
Set default search parameters for blob index.
Args:
num_of_search_objects (int): Default number of search results (default: 0)
epsilon (float): Default search epsilon (default: float('-inf'))
blob_epsilon (float): Blob-specific epsilon, -1.0 uses default (default: -1.0)
result_expansion (float): Default result expansion ratio, -1.0 uses default (default: -1.0)
radius (float): Search radius, -1.0 uses default (default: -1.0)
edge_size (int): Edge size, -1 uses default (default: -1)
exploration_size (int): Graph exploration size (default: 0)
exact_result_expansion (float): Exact result expansion ratio (default: 0.0)
num_of_probes (int): Number of probes, -1 uses default (default: -1)
Returns:
None
"""
def set_with_distance(self, boolean=True):
"""
Configure whether to return distances with search results.
Args:
boolean (bool): Include distances in results (default: True)
Returns:
None
"""Container class for managing batch search results from quantized indexes.
class BatchResults:
def __init__(self):
"""
Create empty batch results container.
"""
def get(self, position):
"""
Get result at specific position.
Args:
position (int): Position index
Returns:
Result at specified position
"""
def get_ids(self):
"""
Get all result IDs as array.
Returns:
array: Object IDs from search results
"""
def get_indexed_ids(self):
"""
Get indexed result IDs.
Returns:
array: Indexed object IDs
"""
def get_indexed_distances(self):
"""
Get indexed distances.
Returns:
array: Distance values for indexed results
"""
def get_index(self):
"""
Get result index information.
Returns:
Index information for results
"""
def get_size(self):
"""
Get number of results in container.
Returns:
int: Number of results
"""import ngtpy
import numpy as np
# Open existing quantized index
# Note: Quantized index must be created using command-line tools (ngtqg quantize)
qindex = ngtpy.QuantizedIndex("quantized_index_path", max_no_of_edges=64)
# Configure search parameters
qindex.set(num_of_search_objects=20, epsilon=0.02, result_expansion=3.0)
# Search with result expansion for higher accuracy
query = np.random.random(128).astype(np.float32)
results = qindex.search(query, size=10, epsilon=0.05, result_expansion=5.0)
for rank, (obj_id, distance) in enumerate(results):
print(f"Rank {rank+1}: Object {obj_id}, Distance {distance:.4f}")import ngtpy
import numpy as np
# Open quantized blob index with refinement
qb_index = ngtpy.QuantizedBlobIndex(
"blob_index_path",
max_no_of_edges=128,
refinement=True
)
# Prepare multiple queries
queries = np.random.random((5, 128)).astype(np.float32)
batch_results = ngtpy.BatchResults()
# Perform batch search
qb_index.batch_search(queries, batch_results, size=5)
# Process batch results
for i in range(batch_results.get_size()):
result = batch_results.get(i)
print(f"Query {i}: {result}")
# Range-based search within specified radius
range_results = ngtpy.BatchResults()
qb_index.batch_range_search(queries, range_results, radius=0.5)
print(f"Range search found {range_results.get_size()} total results")import ngtpy
import numpy as np
# Open blob index for writing
qb_index = ngtpy.QuantizedBlobIndex("writable_blob_index", read_only=False)
# Insert new vectors
new_vectors = np.random.random((100, 128)).astype(np.float32)
qb_index.batch_insert(new_vectors, debug=True)
# Save the updated index
qb_index.save()
print("Successfully added 100 new vectors to blob index")Install with Tessl CLI
npx tessl i tessl/pypi-ngt