A modern C++ toolkit containing machine learning algorithms and tools for creating complex software to solve real world problems
—
Data I/O, statistical analysis, filtering functions, and various utility operations for supporting machine learning workflows and data processing tasks.
Functions for loading and saving data in standard machine learning formats.
def load_libsvm_formatted_data(filename: str) -> tuple:
"""
Load data in libsvm format.
Args:
filename: Path to libsvm format file
Returns:
Tuple of (samples, labels) where samples is list of sparse_vector
and labels is list of numeric labels
"""
def save_libsvm_formatted_data(filename: str, samples, labels):
"""
Save data in libsvm format.
Args:
filename: Output filename
samples: List of sample vectors (sparse or dense)
labels: List of corresponding labels
"""Usage Example:
import dlib
# Create sample data
samples = []
labels = []
# Dense vectors
for i in range(100):
sample = dlib.vector([i * 0.1, i * 0.2, i * 0.3])
samples.append(sample)
labels.append(1 if i % 2 == 0 else -1)
# Save in libsvm format
dlib.save_libsvm_formatted_data("dataset.libsvm", samples, labels)
# Load back
loaded_samples, loaded_labels = dlib.load_libsvm_formatted_data("dataset.libsvm")
print(f"Loaded {len(loaded_samples)} samples")
print(f"First sample: {loaded_samples[0]}")
print(f"First label: {loaded_labels[0]}")
# Works with sparse vectors too
sparse_samples = []
for i in range(50):
sparse_vec = dlib.sparse_vector()
sparse_vec.extend([
dlib.pair(0, i * 0.5),
dlib.pair(5, i * 0.3),
dlib.pair(10, i * 0.1)
])
sparse_samples.append(sparse_vec)
sparse_labels = [1] * 25 + [-1] * 25
dlib.save_libsvm_formatted_data("sparse_dataset.libsvm", sparse_samples, sparse_labels)Functions for time series analysis and statistical testing.
def count_steps_without_decrease(time_series, probability: float = 0.51) -> int:
"""
Count steps without decrease in time series.
Args:
time_series: List or array of numeric values
probability: Probability threshold for statistical test
Returns:
Number of steps without significant decrease
"""
def count_steps_without_decrease_robust(
time_series,
probability: float = 0.51,
quantile_discard: float = 0.1
) -> int:
"""
Robust version that discards outliers.
Args:
time_series: List or array of numeric values
probability: Probability threshold for statistical test
quantile_discard: Fraction of extreme values to discard
Returns:
Number of steps without significant decrease (robust estimate)
"""
def probability_that_sequence_is_increasing(time_series) -> float:
"""
Statistical test for increasing sequence.
Args:
time_series: List or array of numeric values
Returns:
Probability that sequence is increasing (0-1)
"""Usage Example:
import dlib
import numpy as np
# Generate time series data
np.random.seed(42)
# Increasing trend with noise
trend_data = []
for i in range(100):
trend_value = i * 0.1 + np.random.normal(0, 0.5)
trend_data.append(trend_value)
# Analyze time series
steps_no_decrease = dlib.count_steps_without_decrease(trend_data)
steps_robust = dlib.count_steps_without_decrease_robust(trend_data, quantile_discard=0.2)
increasing_prob = dlib.probability_that_sequence_is_increasing(trend_data)
print(f"Steps without decrease: {steps_no_decrease}")
print(f"Steps without decrease (robust): {steps_robust}")
print(f"Probability of increasing: {increasing_prob:.3f}")
# Test with different data patterns
flat_data = [1.0] * 50 + [np.random.normal(1.0, 0.1) for _ in range(50)]
decreasing_data = [10.0 - i * 0.1 + np.random.normal(0, 0.2) for i in range(100)]
print(f"Flat data increasing probability: {dlib.probability_that_sequence_is_increasing(flat_data):.3f}")
print(f"Decreasing data increasing probability: {dlib.probability_that_sequence_is_increasing(decreasing_data):.3f}")Kalman filtering and signal processing utilities for tracking and noise reduction.
class momentum_filter:
"""Kalman filter for tracking moving objects."""
def __init__(
self,
measurement_noise: float,
typical_acceleration: float,
max_measurement_deviation: float
):
"""
Initialize momentum filter.
Args:
measurement_noise: Expected measurement noise level
typical_acceleration: Expected acceleration magnitude
max_measurement_deviation: Maximum allowed measurement deviation
"""
def measurement_noise(self) -> float:
"""Get measurement noise parameter."""
def typical_acceleration(self) -> float:
"""Get typical acceleration parameter."""
def max_measurement_deviation(self) -> float:
"""Get max measurement deviation parameter."""
def __call__(self, measurement) -> object:
"""
Filter measurement through Kalman filter.
Args:
measurement: New measurement (point, vector, etc.)
Returns:
Filtered estimate
"""
def find_optimal_momentum_filter(
sequence: list,
smoothness: float = 1.0
) -> momentum_filter:
"""
Find optimal momentum filter parameters.
Args:
sequence: Sequence of measurements to analyze
smoothness: Smoothness parameter (higher = smoother filtering)
Returns:
Optimally configured momentum filter
"""Usage Example:
import dlib
import numpy as np
# Generate noisy position measurements
np.random.seed(42)
true_positions = []
noisy_measurements = []
for t in range(100):
# True position with some acceleration
true_pos = dlib.point(int(t + 0.01 * t**2), int(50 + 5 * np.sin(t * 0.1)))
true_positions.append(true_pos)
# Add measurement noise
noisy_x = true_pos.x + np.random.normal(0, 3.0)
noisy_y = true_pos.y + np.random.normal(0, 3.0)
noisy_measurements.append(dlib.point(int(noisy_x), int(noisy_y)))
# Create momentum filter
filter = dlib.momentum_filter(
measurement_noise=3.0,
typical_acceleration=0.1,
max_measurement_deviation=2.0
)
# Filter measurements
filtered_positions = []
for measurement in noisy_measurements:
filtered = filter(measurement)
filtered_positions.append(filtered)
# Or find optimal parameters automatically
optimal_filter = dlib.find_optimal_momentum_filter(noisy_measurements, smoothness=2.0)
optimal_filtered = []
for measurement in noisy_measurements:
filtered = optimal_filter(measurement)
optimal_filtered.append(filtered)
print(f"Original filter noise param: {filter.measurement_noise()}")
print(f"Optimal filter noise param: {optimal_filter.measurement_noise()}")Utility functions for assignment problems and optimization tasks.
def assignment_cost(cost_matrix, assignment: list) -> float:
"""
Calculate total cost of assignment.
Args:
cost_matrix: 2D matrix of assignment costs
assignment: List of assignments (row to column mapping)
Returns:
Total assignment cost
"""
def max_cost_assignment(cost_matrix) -> list:
"""
Solve maximum cost assignment problem using Hungarian algorithm.
Args:
cost_matrix: 2D matrix where cost_matrix[i][j] is cost of assigning row i to column j
Returns:
List where result[i] is the column assigned to row i
"""Helper functions for working with sparse vectors.
def make_sparse_vector(sparse_vec: sparse_vector) -> sparse_vector:
"""
Sort and deduplicate sparse vector.
Args:
sparse_vec: Input sparse vector (may have unsorted or duplicate indices)
Returns:
Cleaned sparse vector with sorted indices and no duplicates
"""Usage Example:
import dlib
# Create sparse vector with potential issues
sparse_vec = dlib.sparse_vector()
sparse_vec.extend([
dlib.pair(5, 2.5),
dlib.pair(1, 1.0),
dlib.pair(5, 3.0), # Duplicate index
dlib.pair(3, 1.5),
dlib.pair(1, 0.5) # Another duplicate
])
print("Original sparse vector:")
for i in range(len(sparse_vec)):
pair = sparse_vec[i]
print(f" Index {pair.first}: {pair.second}")
# Clean up sparse vector
clean_vec = dlib.make_sparse_vector(sparse_vec)
print("Cleaned sparse vector:")
for i in range(len(clean_vec)):
pair = clean_vec[i]
print(f" Index {pair.first}: {pair.second}")Simple utilities for interactive use and debugging.
def hit_enter_to_continue():
"""
Interactive pause utility - waits for user to press Enter.
Useful for debugging and interactive scripts.
"""Usage Example:
import dlib
print("Starting data processing...")
# Process some data
data = list(range(1000))
processed = [x * 2 for x in data]
print("Processing complete. Press Enter to continue...")
dlib.hit_enter_to_continue()
print("Continuing with analysis...")Functions for working with image dataset XML metadata files (used by object detection training).
def load_image_dataset_metadata(filename: str):
"""
Load image dataset metadata from XML file.
Args:
filename: Path to XML metadata file
Returns:
Dataset metadata structure containing image paths and annotations
"""
def save_image_dataset_metadata(metadata, filename: str):
"""
Save image dataset metadata to XML file.
Args:
metadata: Dataset metadata structure
filename: Output XML filename
"""Usage Example:
import dlib
# Load existing dataset metadata
try:
dataset = dlib.load_image_dataset_metadata("training_dataset.xml")
print("Loaded dataset metadata successfully")
# Process or modify dataset
# ... modify dataset structure ...
# Save modified dataset
dlib.save_image_dataset_metadata(dataset, "modified_dataset.xml")
except Exception as e:
print(f"Error loading dataset: {e}")Additional filtering utilities for specific use cases.
def create_kalman_filter(
initial_state,
measurement_noise: float,
process_noise: float
):
"""
Create generic Kalman filter.
Args:
initial_state: Initial state estimate
measurement_noise: Measurement noise variance
process_noise: Process noise variance
Returns:
Configured Kalman filter
"""
def apply_temporal_smoothing(
measurements: list,
window_size: int = 5,
method: str = "gaussian"
):
"""
Apply temporal smoothing to measurement sequence.
Args:
measurements: List of measurements over time
window_size: Size of smoothing window
method: Smoothing method ("gaussian", "uniform", "exponential")
Returns:
Smoothed measurement sequence
"""Helper functions for performance monitoring and debugging.
def benchmark_function(func, args: tuple, num_iterations: int = 100) -> float:
"""
Benchmark function execution time.
Args:
func: Function to benchmark
args: Arguments to pass to function
num_iterations: Number of iterations to run
Returns:
Average execution time in seconds
"""
def memory_usage_estimate(data_structure) -> int:
"""
Estimate memory usage of dlib data structure.
Args:
data_structure: Dlib object (matrix, vector, etc.)
Returns:
Estimated memory usage in bytes
"""Complete Utilities Usage Example:
import dlib
import numpy as np
import time
def comprehensive_utilities_demo():
"""Demonstrate various utility functions."""
print("=== Data I/O Demo ===")
# Create and save dataset
samples = [dlib.vector([i, i*2, i*3]) for i in range(100)]
labels = [1 if i % 2 == 0 else -1 for i in range(100)]
dlib.save_libsvm_formatted_data("demo_dataset.libsvm", samples, labels)
loaded_samples, loaded_labels = dlib.load_libsvm_formatted_data("demo_dataset.libsvm")
print(f"Saved and loaded {len(loaded_samples)} samples")
print("\n=== Statistical Analysis Demo ===")
# Generate time series with trend
time_series = [i + np.random.normal(0, 0.5) for i in range(50)]
steps = dlib.count_steps_without_decrease(time_series)
increasing_prob = dlib.probability_that_sequence_is_increasing(time_series)
print(f"Steps without decrease: {steps}")
print(f"Increasing probability: {increasing_prob:.3f}")
print("\n=== Filtering Demo ===")
# Create noisy position data
true_trajectory = [dlib.point(t, int(50 + 20 * np.sin(t * 0.1))) for t in range(100)]
noisy_trajectory = [
dlib.point(p.x + int(np.random.normal(0, 3)),
p.y + int(np.random.normal(0, 3)))
for p in true_trajectory
]
# Apply filtering
filter = dlib.momentum_filter(3.0, 0.1, 2.0)
filtered_trajectory = [filter(p) for p in noisy_trajectory]
print(f"Filtered {len(filtered_trajectory)} position measurements")
print("\n=== Assignment Problem Demo ===")
# Solve assignment problem
cost_matrix = [
[9, 2, 7, 8],
[6, 4, 3, 7],
[5, 8, 1, 8],
[7, 6, 9, 4]
]
assignment = dlib.max_cost_assignment(cost_matrix)
total_cost = dlib.assignment_cost(cost_matrix, assignment)
print(f"Optimal assignment: {assignment}")
print(f"Total cost: {total_cost}")
print("\n=== Sparse Vector Demo ===")
# Create and clean sparse vector
sparse_vec = dlib.sparse_vector()
sparse_vec.extend([
dlib.pair(10, 1.0),
dlib.pair(2, 2.0),
dlib.pair(10, 3.0), # Duplicate
dlib.pair(5, 1.5)
])
clean_vec = dlib.make_sparse_vector(sparse_vec)
print(f"Cleaned sparse vector with {len(clean_vec)} unique elements")
print("\n=== Interactive Demo ===")
print("Demonstration complete. Press Enter to finish...")
dlib.hit_enter_to_continue()
print("Demo finished!")
if __name__ == "__main__":
comprehensive_utilities_demo()These utility functions provide essential support for machine learning workflows, data processing, and interactive development with dlib.
Install with Tessl CLI
npx tessl i tessl/pypi-dlib