A modern C++ toolkit containing machine learning algorithms and tools for creating complex software to solve real world problems
—
Comprehensive machine learning algorithms including Support Vector Machines, clustering, optimization, tracking, and statistical analysis tools for data science and pattern recognition applications.
Multiple SVM implementations with different kernel types for classification and regression tasks.
class svm_c_trainer_linear:
"""Linear SVM trainer for classification."""
def __init__(self):
"""Initialize linear SVM trainer."""
def set_c(self, c: float):
"""
Set C regularization parameter.
Args:
c: Regularization strength (higher = less regularization)
"""
def set_epsilon(self, epsilon: float):
"""
Set epsilon parameter for training convergence.
Args:
epsilon: Convergence threshold
"""
def train(self, samples, labels):
"""
Train SVM on samples and labels.
Args:
samples: Training samples (vectors or sparse_vectors)
labels: Training labels (+1 or -1)
Returns:
Trained decision function
"""
class svm_c_trainer_radial_basis:
"""RBF kernel SVM trainer for non-linear classification."""
def __init__(self):
"""Initialize RBF SVM trainer."""
def set_gamma(self, gamma: float):
"""
Set RBF kernel gamma parameter.
Args:
gamma: Kernel width parameter
"""
def set_c(self, c: float):
"""
Set C regularization parameter.
Args:
c: Regularization strength
"""
def train(self, samples, labels):
"""
Train RBF SVM on samples and labels.
Args:
samples: Training samples
labels: Training labels (+1 or -1)
Returns:
Trained decision function
"""
class svm_c_trainer_histogram_intersection:
"""Histogram intersection kernel SVM trainer."""
def __init__(self):
"""Initialize histogram intersection SVM trainer."""
def set_c(self, c: float):
"""Set C regularization parameter."""
def train(self, samples, labels):
"""
Train histogram intersection SVM.
Args:
samples: Training histograms
labels: Training labels
Returns:
Trained decision function
"""
class svm_c_trainer_sparse_linear:
"""Sparse linear SVM trainer for high-dimensional data."""
def __init__(self):
"""Initialize sparse linear SVM trainer."""
def set_c(self, c: float):
"""Set C regularization parameter."""
def train(self, samples: sparse_vectors, labels):
"""
Train sparse linear SVM.
Args:
samples: Sparse training vectors
labels: Training labels
Returns:
Trained decision function
"""Usage Example:
import dlib
import numpy as np
# Generate sample data
np.random.seed(42)
samples = []
labels = []
# Class 1: points around (2, 2)
for _ in range(100):
x = np.random.normal(2, 0.5)
y = np.random.normal(2, 0.5)
samples.append(dlib.vector([x, y]))
labels.append(+1)
# Class 2: points around (-2, -2)
for _ in range(100):
x = np.random.normal(-2, 0.5)
y = np.random.normal(-2, 0.5)
samples.append(dlib.vector([x, y]))
labels.append(-1)
# Train linear SVM
linear_trainer = dlib.svm_c_trainer_linear()
linear_trainer.set_c(10.0)
linear_classifier = linear_trainer.train(samples, labels)
# Train RBF SVM
rbf_trainer = dlib.svm_c_trainer_radial_basis()
rbf_trainer.set_c(10.0)
rbf_trainer.set_gamma(0.1)
rbf_classifier = rbf_trainer.train(samples, labels)
# Test classifiers
test_point = dlib.vector([1.5, 1.5])
linear_prediction = linear_classifier(test_point)
rbf_prediction = rbf_classifier(test_point)
print(f"Linear SVM prediction: {linear_prediction}")
print(f"RBF SVM prediction: {rbf_prediction}")Ranking SVMs for learning to rank problems and preference learning.
class svm_rank_trainer:
"""SVM trainer for ranking problems."""
def __init__(self):
"""Initialize ranking SVM trainer."""
def set_c(self, c: float):
"""Set C regularization parameter."""
def train(self, samples: list) -> object:
"""
Train ranking SVM on ranking samples.
Args:
samples: List of ranking_pair objects or ranked lists
Returns:
Trained ranking function
"""
class ranking_pair:
"""Training pair for ranking SVM."""
def __init__(self, relevant: vector, non_relevant: vector):
"""
Create ranking pair.
Args:
relevant: Vector that should be ranked higher
non_relevant: Vector that should be ranked lower
"""
@property
def relevant_vector(self) -> vector:
"""Get the relevant (higher-ranked) vector."""
@property
def nonrelevant_vector(self) -> vector:
"""Get the non-relevant (lower-ranked) vector."""Usage Example:
import dlib
# Create ranking training data
ranking_samples = []
# Create pairs where first item should rank higher than second
for i in range(100):
# Higher quality item (more features)
high_quality = dlib.vector([5.0, 4.0, 3.0])
# Lower quality item
low_quality = dlib.vector([2.0, 1.0, 1.5])
pair = dlib.ranking_pair(high_quality, low_quality)
ranking_samples.append(pair)
# Train ranking SVM
rank_trainer = dlib.svm_rank_trainer()
rank_trainer.set_c(1.0)
ranking_function = rank_trainer.train(ranking_samples)
# Use for ranking new items
item1 = dlib.vector([4.5, 3.5, 2.5])
item2 = dlib.vector([1.5, 2.0, 1.0])
score1 = ranking_function(item1)
score2 = ranking_function(item2)
print(f"Item 1 score: {score1}")
print(f"Item 2 score: {score2}")
print(f"Item 1 ranks higher: {score1 > score2}")Correlation-based object tracking for video sequences and real-time applications.
class correlation_tracker:
"""Object tracking using correlation filters."""
def __init__(self):
"""Initialize correlation tracker."""
def start_track(self, img, bounding_box: rectangle):
"""
Start tracking object in bounding box.
Args:
img: Initial image
bounding_box: Initial object location
"""
def update(self, img) -> float:
"""
Update tracker with new image.
Args:
img: New image frame
Returns:
Tracking confidence score
"""
def get_position(self) -> rectangle:
"""
Get current tracked object position.
Returns:
Current bounding box of tracked object
"""Usage Example:
import dlib
import cv2
# Initialize tracker
tracker = dlib.correlation_tracker()
# Open video
cap = cv2.VideoCapture("video.mp4")
ret, frame = cap.read()
if ret:
# Select initial bounding box (example)
bbox = dlib.rectangle(100, 100, 200, 200)
# Start tracking
tracker.start_track(frame, bbox)
while True:
ret, frame = cap.read()
if not ret:
break
# Update tracker
confidence = tracker.update(frame)
# Get current position
current_pos = tracker.get_position()
# Draw tracking box
cv2.rectangle(frame,
(current_pos.left(), current_pos.top()),
(current_pos.right(), current_pos.bottom()),
(0, 255, 0), 2)
# Show confidence
cv2.putText(frame, f"Conf: {confidence:.3f}",
(10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1,
(0, 255, 0), 2)
cv2.imshow("Tracking", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()Dimensionality reduction and feature analysis using CCA.
class cca:
"""Canonical Correlation Analysis."""
def __init__(self):
"""Initialize CCA."""
def train(self, x_samples: list, y_samples: list):
"""
Train CCA on paired samples.
Args:
x_samples: First set of vectors
y_samples: Second set of vectors (paired with x_samples)
"""
def apply_cca_transform(self, x_sample: vector) -> vector:
"""
Apply CCA transform to x vector.
Args:
x_sample: Input vector from x space
Returns:
Transformed vector in canonical space
"""
def apply_cca_transform_y(self, y_sample: vector) -> vector:
"""
Apply CCA transform to y vector.
Args:
y_sample: Input vector from y space
Returns:
Transformed vector in canonical space
"""Functions for global optimization and parameter search.
def find_min_global(
func,
bounds: list,
num_function_calls: int
):
"""
Find global minimum of function using derivative-free optimization.
Args:
func: Function to minimize (takes vector, returns float)
bounds: List of (min, max) bounds for each parameter
num_function_calls: Maximum number of function evaluations
Returns:
Tuple of (optimal_parameters, minimum_value)
"""
def find_max_global(
func,
bounds: list,
num_function_calls: int
):
"""
Find global maximum of function.
Args:
func: Function to maximize
bounds: Parameter bounds
num_function_calls: Maximum evaluations
Returns:
Tuple of (optimal_parameters, maximum_value)
"""Usage Example:
import dlib
import math
# Define function to minimize (Rosenbrock function)
def rosenbrock(params):
x, y = params[0], params[1]
return (1 - x)**2 + 100 * (y - x**2)**2
# Set parameter bounds
bounds = [(-5, 5), (-5, 5)] # x and y bounds
# Find global minimum
optimal_params, min_value = dlib.find_min_global(
rosenbrock,
bounds,
num_function_calls=1000
)
print(f"Optimal parameters: x={optimal_params[0]:.4f}, y={optimal_params[1]:.4f}")
print(f"Minimum value: {min_value:.6f}")
print(f"Expected minimum at (1, 1) with value 0")Algorithms for solving assignment problems and optimization tasks.
def max_cost_assignment(cost_matrix) -> list:
"""
Solve maximum cost assignment problem.
Args:
cost_matrix: 2D matrix of assignment costs
Returns:
List of optimal assignments (row indices to column indices)
"""
def assignment_cost(cost_matrix, assignment: list) -> float:
"""
Calculate total cost of assignment.
Args:
cost_matrix: 2D cost matrix
assignment: List of assignments
Returns:
Total assignment cost
"""Usage Example:
import dlib
import numpy as np
# Create cost matrix (workers x tasks)
cost_matrix = np.array([
[4, 2, 8],
[4, 3, 7],
[1, 5, 9]
])
# Find optimal assignment
assignment = dlib.max_cost_assignment(cost_matrix)
total_cost = dlib.assignment_cost(cost_matrix, assignment)
print(f"Optimal assignment: {assignment}")
print(f"Total cost: {total_cost}")
# assignment[i] gives the task assigned to worker i
for worker, task in enumerate(assignment):
print(f"Worker {worker} -> Task {task} (cost: {cost_matrix[worker][task]})")Machine learning-based sequence segmentation for temporal data analysis.
class sequence_segmenter:
"""Machine learning-based sequence segmentation."""
def __init__(self):
"""Initialize sequence segmenter."""
def train(self, sequences: list, labels: list):
"""
Train segmenter on labeled sequences.
Args:
sequences: List of sequence data
labels: List of corresponding segmentation labels
"""
def segment_sequence(self, sequence) -> list:
"""
Segment new sequence.
Args:
sequence: Input sequence to segment
Returns:
List of segment boundaries
"""Various decision function classes for different SVM kernels and problem types.
class decision_function_linear:
"""Linear decision function for classification."""
def __call__(self, sample: vector) -> float:
"""
Evaluate decision function on sample.
Args:
sample: Input vector
Returns:
Decision value (positive = class +1, negative = class -1)
"""
class decision_function_radial_basis:
"""RBF kernel decision function."""
def __call__(self, sample: vector) -> float:
"""Evaluate RBF decision function."""
class decision_function_histogram_intersection:
"""Histogram intersection kernel decision function."""
def __call__(self, sample: vector) -> float:
"""Evaluate histogram intersection decision function."""Additional clustering methods beyond face clustering.
def chinese_whispers(edges: list) -> list:
"""
Chinese Whispers clustering algorithm.
Args:
edges: List of graph edges as (node1, node2) tuples
Returns:
List of cluster labels for each node
"""
def spectral_clustering(similarity_matrix, num_clusters: int) -> list:
"""
Spectral clustering algorithm.
Args:
similarity_matrix: Pairwise similarity matrix
num_clusters: Number of clusters to find
Returns:
List of cluster assignments
"""Complete Machine Learning Pipeline Example:
import dlib
import numpy as np
from sklearn.datasets import make_classification
def complete_ml_pipeline():
"""Demonstrate complete ML workflow with dlib."""
# Generate synthetic dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
# Convert to dlib format
samples = []
labels = []
for i in range(len(X)):
samples.append(dlib.vector(X[i]))
labels.append(1 if y[i] == 1 else -1)
# Split data
train_samples = samples[:800]
train_labels = labels[:800]
test_samples = samples[800:]
test_labels = labels[800:]
# Train multiple classifiers
print("Training classifiers...")
# Linear SVM
linear_trainer = dlib.svm_c_trainer_linear()
linear_trainer.set_c(1.0)
linear_classifier = linear_trainer.train(train_samples, train_labels)
# RBF SVM
rbf_trainer = dlib.svm_c_trainer_radial_basis()
rbf_trainer.set_c(1.0)
rbf_trainer.set_gamma(0.1)
rbf_classifier = rbf_trainer.train(train_samples, train_labels)
# Evaluate classifiers
linear_correct = 0
rbf_correct = 0
for sample, true_label in zip(test_samples, test_labels):
linear_pred = 1 if linear_classifier(sample) > 0 else -1
rbf_pred = 1 if rbf_classifier(sample) > 0 else -1
if linear_pred == true_label:
linear_correct += 1
if rbf_pred == true_label:
rbf_correct += 1
print(f"Linear SVM accuracy: {linear_correct/len(test_samples):.3f}")
print(f"RBF SVM accuracy: {rbf_correct/len(test_samples):.3f}")
# Optimization example
def optimize_hyperparams(c_value):
trainer = dlib.svm_c_trainer_linear()
trainer.set_c(c_value[0])
classifier = trainer.train(train_samples[:200], train_labels[:200])
# Simple validation error
errors = 0
for sample, label in zip(train_samples[200:400], train_labels[200:400]):
pred = 1 if classifier(sample) > 0 else -1
if pred != label:
errors += 1
return errors / 200.0 # Return error rate to minimize
# Find optimal C parameter
optimal_c, min_error = dlib.find_min_global(
optimize_hyperparams,
[(0.1, 100.0)],
num_function_calls=50
)
print(f"Optimal C parameter: {optimal_c[0]:.3f}")
print(f"Minimum validation error: {min_error:.3f}")
if __name__ == "__main__":
complete_ml_pipeline()This machine learning capability provides a comprehensive suite of algorithms for classification, optimization, tracking, and data analysis tasks.
Install with Tessl CLI
npx tessl i tessl/pypi-dlib