0
# Machine Learning
1
2
Comprehensive machine learning algorithms including Support Vector Machines, clustering, optimization, tracking, and statistical analysis tools for data science and pattern recognition applications.
3
4
## Capabilities
5
6
### Support Vector Machine Classifiers
7
8
Multiple SVM implementations with different kernel types for classification and regression tasks.
9
10
```python { .api }
11
class svm_c_trainer_linear:
12
"""Linear SVM trainer for classification."""
13
14
def __init__(self):
15
"""Initialize linear SVM trainer."""
16
17
def set_c(self, c: float):
18
"""
19
Set C regularization parameter.
20
21
Args:
22
c: Regularization strength (higher = less regularization)
23
"""
24
25
def set_epsilon(self, epsilon: float):
26
"""
27
Set epsilon parameter for training convergence.
28
29
Args:
30
epsilon: Convergence threshold
31
"""
32
33
def train(self, samples, labels):
34
"""
35
Train SVM on samples and labels.
36
37
Args:
38
samples: Training samples (vectors or sparse_vectors)
39
labels: Training labels (+1 or -1)
40
41
Returns:
42
Trained decision function
43
"""
44
45
class svm_c_trainer_radial_basis:
46
"""RBF kernel SVM trainer for non-linear classification."""
47
48
def __init__(self):
49
"""Initialize RBF SVM trainer."""
50
51
def set_gamma(self, gamma: float):
52
"""
53
Set RBF kernel gamma parameter.
54
55
Args:
56
gamma: Kernel width parameter
57
"""
58
59
def set_c(self, c: float):
60
"""
61
Set C regularization parameter.
62
63
Args:
64
c: Regularization strength
65
"""
66
67
def train(self, samples, labels):
68
"""
69
Train RBF SVM on samples and labels.
70
71
Args:
72
samples: Training samples
73
labels: Training labels (+1 or -1)
74
75
Returns:
76
Trained decision function
77
"""
78
79
class svm_c_trainer_histogram_intersection:
80
"""Histogram intersection kernel SVM trainer."""
81
82
def __init__(self):
83
"""Initialize histogram intersection SVM trainer."""
84
85
def set_c(self, c: float):
86
"""Set C regularization parameter."""
87
88
def train(self, samples, labels):
89
"""
90
Train histogram intersection SVM.
91
92
Args:
93
samples: Training histograms
94
labels: Training labels
95
96
Returns:
97
Trained decision function
98
"""
99
100
class svm_c_trainer_sparse_linear:
101
"""Sparse linear SVM trainer for high-dimensional data."""
102
103
def __init__(self):
104
"""Initialize sparse linear SVM trainer."""
105
106
def set_c(self, c: float):
107
"""Set C regularization parameter."""
108
109
def train(self, samples: sparse_vectors, labels):
110
"""
111
Train sparse linear SVM.
112
113
Args:
114
samples: Sparse training vectors
115
labels: Training labels
116
117
Returns:
118
Trained decision function
119
"""
120
```
121
122
**Usage Example:**
123
```python
124
import dlib
125
import numpy as np
126
127
# Generate sample data
128
np.random.seed(42)
129
samples = []
130
labels = []
131
132
# Class 1: points around (2, 2)
133
for _ in range(100):
134
x = np.random.normal(2, 0.5)
135
y = np.random.normal(2, 0.5)
136
samples.append(dlib.vector([x, y]))
137
labels.append(+1)
138
139
# Class 2: points around (-2, -2)
140
for _ in range(100):
141
x = np.random.normal(-2, 0.5)
142
y = np.random.normal(-2, 0.5)
143
samples.append(dlib.vector([x, y]))
144
labels.append(-1)
145
146
# Train linear SVM
147
linear_trainer = dlib.svm_c_trainer_linear()
148
linear_trainer.set_c(10.0)
149
linear_classifier = linear_trainer.train(samples, labels)
150
151
# Train RBF SVM
152
rbf_trainer = dlib.svm_c_trainer_radial_basis()
153
rbf_trainer.set_c(10.0)
154
rbf_trainer.set_gamma(0.1)
155
rbf_classifier = rbf_trainer.train(samples, labels)
156
157
# Test classifiers
158
test_point = dlib.vector([1.5, 1.5])
159
linear_prediction = linear_classifier(test_point)
160
rbf_prediction = rbf_classifier(test_point)
161
162
print(f"Linear SVM prediction: {linear_prediction}")
163
print(f"RBF SVM prediction: {rbf_prediction}")
164
```
165
166
### Support Vector Machine Ranking
167
168
Ranking SVMs for learning to rank problems and preference learning.
169
170
```python { .api }
171
class svm_rank_trainer:
172
"""SVM trainer for ranking problems."""
173
174
def __init__(self):
175
"""Initialize ranking SVM trainer."""
176
177
def set_c(self, c: float):
178
"""Set C regularization parameter."""
179
180
def train(self, samples: list) -> object:
181
"""
182
Train ranking SVM on ranking samples.
183
184
Args:
185
samples: List of ranking_pair objects or ranked lists
186
187
Returns:
188
Trained ranking function
189
"""
190
191
class ranking_pair:
192
"""Training pair for ranking SVM."""
193
194
def __init__(self, relevant: vector, non_relevant: vector):
195
"""
196
Create ranking pair.
197
198
Args:
199
relevant: Vector that should be ranked higher
200
non_relevant: Vector that should be ranked lower
201
"""
202
203
@property
204
def relevant_vector(self) -> vector:
205
"""Get the relevant (higher-ranked) vector."""
206
207
@property
208
def nonrelevant_vector(self) -> vector:
209
"""Get the non-relevant (lower-ranked) vector."""
210
```
211
212
**Usage Example:**
213
```python
214
import dlib
215
216
# Create ranking training data
217
ranking_samples = []
218
219
# Create pairs where first item should rank higher than second
220
for i in range(100):
221
# Higher quality item (more features)
222
high_quality = dlib.vector([5.0, 4.0, 3.0])
223
# Lower quality item
224
low_quality = dlib.vector([2.0, 1.0, 1.5])
225
226
pair = dlib.ranking_pair(high_quality, low_quality)
227
ranking_samples.append(pair)
228
229
# Train ranking SVM
230
rank_trainer = dlib.svm_rank_trainer()
231
rank_trainer.set_c(1.0)
232
ranking_function = rank_trainer.train(ranking_samples)
233
234
# Use for ranking new items
235
item1 = dlib.vector([4.5, 3.5, 2.5])
236
item2 = dlib.vector([1.5, 2.0, 1.0])
237
238
score1 = ranking_function(item1)
239
score2 = ranking_function(item2)
240
241
print(f"Item 1 score: {score1}")
242
print(f"Item 2 score: {score2}")
243
print(f"Item 1 ranks higher: {score1 > score2}")
244
```
245
246
### Object Tracking
247
248
Correlation-based object tracking for video sequences and real-time applications.
249
250
```python { .api }
251
class correlation_tracker:
252
"""Object tracking using correlation filters."""
253
254
def __init__(self):
255
"""Initialize correlation tracker."""
256
257
def start_track(self, img, bounding_box: rectangle):
258
"""
259
Start tracking object in bounding box.
260
261
Args:
262
img: Initial image
263
bounding_box: Initial object location
264
"""
265
266
def update(self, img) -> float:
267
"""
268
Update tracker with new image.
269
270
Args:
271
img: New image frame
272
273
Returns:
274
Tracking confidence score
275
"""
276
277
def get_position(self) -> rectangle:
278
"""
279
Get current tracked object position.
280
281
Returns:
282
Current bounding box of tracked object
283
"""
284
```
285
286
**Usage Example:**
287
```python
288
import dlib
289
import cv2
290
291
# Initialize tracker
292
tracker = dlib.correlation_tracker()
293
294
# Open video
295
cap = cv2.VideoCapture("video.mp4")
296
ret, frame = cap.read()
297
298
if ret:
299
# Select initial bounding box (example)
300
bbox = dlib.rectangle(100, 100, 200, 200)
301
302
# Start tracking
303
tracker.start_track(frame, bbox)
304
305
while True:
306
ret, frame = cap.read()
307
if not ret:
308
break
309
310
# Update tracker
311
confidence = tracker.update(frame)
312
313
# Get current position
314
current_pos = tracker.get_position()
315
316
# Draw tracking box
317
cv2.rectangle(frame,
318
(current_pos.left(), current_pos.top()),
319
(current_pos.right(), current_pos.bottom()),
320
(0, 255, 0), 2)
321
322
# Show confidence
323
cv2.putText(frame, f"Conf: {confidence:.3f}",
324
(10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1,
325
(0, 255, 0), 2)
326
327
cv2.imshow("Tracking", frame)
328
329
if cv2.waitKey(1) & 0xFF == ord('q'):
330
break
331
332
cap.release()
333
cv2.destroyAllWindows()
334
```
335
336
### Canonical Correlation Analysis
337
338
Dimensionality reduction and feature analysis using CCA.
339
340
```python { .api }
341
class cca:
342
"""Canonical Correlation Analysis."""
343
344
def __init__(self):
345
"""Initialize CCA."""
346
347
def train(self, x_samples: list, y_samples: list):
348
"""
349
Train CCA on paired samples.
350
351
Args:
352
x_samples: First set of vectors
353
y_samples: Second set of vectors (paired with x_samples)
354
"""
355
356
def apply_cca_transform(self, x_sample: vector) -> vector:
357
"""
358
Apply CCA transform to x vector.
359
360
Args:
361
x_sample: Input vector from x space
362
363
Returns:
364
Transformed vector in canonical space
365
"""
366
367
def apply_cca_transform_y(self, y_sample: vector) -> vector:
368
"""
369
Apply CCA transform to y vector.
370
371
Args:
372
y_sample: Input vector from y space
373
374
Returns:
375
Transformed vector in canonical space
376
"""
377
```
378
379
### Global Optimization
380
381
Functions for global optimization and parameter search.
382
383
```python { .api }
384
def find_min_global(
385
func,
386
bounds: list,
387
num_function_calls: int
388
):
389
"""
390
Find global minimum of function using derivative-free optimization.
391
392
Args:
393
func: Function to minimize (takes vector, returns float)
394
bounds: List of (min, max) bounds for each parameter
395
num_function_calls: Maximum number of function evaluations
396
397
Returns:
398
Tuple of (optimal_parameters, minimum_value)
399
"""
400
401
def find_max_global(
402
func,
403
bounds: list,
404
num_function_calls: int
405
):
406
"""
407
Find global maximum of function.
408
409
Args:
410
func: Function to maximize
411
bounds: Parameter bounds
412
num_function_calls: Maximum evaluations
413
414
Returns:
415
Tuple of (optimal_parameters, maximum_value)
416
"""
417
```
418
419
**Usage Example:**
420
```python
421
import dlib
422
import math
423
424
# Define function to minimize (Rosenbrock function)
425
def rosenbrock(params):
426
x, y = params[0], params[1]
427
return (1 - x)**2 + 100 * (y - x**2)**2
428
429
# Set parameter bounds
430
bounds = [(-5, 5), (-5, 5)] # x and y bounds
431
432
# Find global minimum
433
optimal_params, min_value = dlib.find_min_global(
434
rosenbrock,
435
bounds,
436
num_function_calls=1000
437
)
438
439
print(f"Optimal parameters: x={optimal_params[0]:.4f}, y={optimal_params[1]:.4f}")
440
print(f"Minimum value: {min_value:.6f}")
441
print(f"Expected minimum at (1, 1) with value 0")
442
```
443
444
### Assignment and Optimization Problems
445
446
Algorithms for solving assignment problems and optimization tasks.
447
448
```python { .api }
449
def max_cost_assignment(cost_matrix) -> list:
450
"""
451
Solve maximum cost assignment problem.
452
453
Args:
454
cost_matrix: 2D matrix of assignment costs
455
456
Returns:
457
List of optimal assignments (row indices to column indices)
458
"""
459
460
def assignment_cost(cost_matrix, assignment: list) -> float:
461
"""
462
Calculate total cost of assignment.
463
464
Args:
465
cost_matrix: 2D cost matrix
466
assignment: List of assignments
467
468
Returns:
469
Total assignment cost
470
"""
471
```
472
473
**Usage Example:**
474
```python
475
import dlib
476
import numpy as np
477
478
# Create cost matrix (workers x tasks)
479
cost_matrix = np.array([
480
[4, 2, 8],
481
[4, 3, 7],
482
[1, 5, 9]
483
])
484
485
# Find optimal assignment
486
assignment = dlib.max_cost_assignment(cost_matrix)
487
total_cost = dlib.assignment_cost(cost_matrix, assignment)
488
489
print(f"Optimal assignment: {assignment}")
490
print(f"Total cost: {total_cost}")
491
492
# assignment[i] gives the task assigned to worker i
493
for worker, task in enumerate(assignment):
494
print(f"Worker {worker} -> Task {task} (cost: {cost_matrix[worker][task]})")
495
```
496
497
### Sequence Segmentation
498
499
Machine learning-based sequence segmentation for temporal data analysis.
500
501
```python { .api }
502
class sequence_segmenter:
503
"""Machine learning-based sequence segmentation."""
504
505
def __init__(self):
506
"""Initialize sequence segmenter."""
507
508
def train(self, sequences: list, labels: list):
509
"""
510
Train segmenter on labeled sequences.
511
512
Args:
513
sequences: List of sequence data
514
labels: List of corresponding segmentation labels
515
"""
516
517
def segment_sequence(self, sequence) -> list:
518
"""
519
Segment new sequence.
520
521
Args:
522
sequence: Input sequence to segment
523
524
Returns:
525
List of segment boundaries
526
"""
527
```
528
529
### Decision Functions
530
531
Various decision function classes for different SVM kernels and problem types.
532
533
```python { .api }
534
class decision_function_linear:
535
"""Linear decision function for classification."""
536
537
def __call__(self, sample: vector) -> float:
538
"""
539
Evaluate decision function on sample.
540
541
Args:
542
sample: Input vector
543
544
Returns:
545
Decision value (positive = class +1, negative = class -1)
546
"""
547
548
class decision_function_radial_basis:
549
"""RBF kernel decision function."""
550
551
def __call__(self, sample: vector) -> float:
552
"""Evaluate RBF decision function."""
553
554
class decision_function_histogram_intersection:
555
"""Histogram intersection kernel decision function."""
556
557
def __call__(self, sample: vector) -> float:
558
"""Evaluate histogram intersection decision function."""
559
```
560
561
### Clustering Algorithms
562
563
Additional clustering methods beyond face clustering.
564
565
```python { .api }
566
def chinese_whispers(edges: list) -> list:
567
"""
568
Chinese Whispers clustering algorithm.
569
570
Args:
571
edges: List of graph edges as (node1, node2) tuples
572
573
Returns:
574
List of cluster labels for each node
575
"""
576
577
def spectral_clustering(similarity_matrix, num_clusters: int) -> list:
578
"""
579
Spectral clustering algorithm.
580
581
Args:
582
similarity_matrix: Pairwise similarity matrix
583
num_clusters: Number of clusters to find
584
585
Returns:
586
List of cluster assignments
587
"""
588
```
589
590
**Complete Machine Learning Pipeline Example:**
591
```python
592
import dlib
593
import numpy as np
594
from sklearn.datasets import make_classification
595
596
def complete_ml_pipeline():
597
"""Demonstrate complete ML workflow with dlib."""
598
599
# Generate synthetic dataset
600
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
601
602
# Convert to dlib format
603
samples = []
604
labels = []
605
606
for i in range(len(X)):
607
samples.append(dlib.vector(X[i]))
608
labels.append(1 if y[i] == 1 else -1)
609
610
# Split data
611
train_samples = samples[:800]
612
train_labels = labels[:800]
613
test_samples = samples[800:]
614
test_labels = labels[800:]
615
616
# Train multiple classifiers
617
print("Training classifiers...")
618
619
# Linear SVM
620
linear_trainer = dlib.svm_c_trainer_linear()
621
linear_trainer.set_c(1.0)
622
linear_classifier = linear_trainer.train(train_samples, train_labels)
623
624
# RBF SVM
625
rbf_trainer = dlib.svm_c_trainer_radial_basis()
626
rbf_trainer.set_c(1.0)
627
rbf_trainer.set_gamma(0.1)
628
rbf_classifier = rbf_trainer.train(train_samples, train_labels)
629
630
# Evaluate classifiers
631
linear_correct = 0
632
rbf_correct = 0
633
634
for sample, true_label in zip(test_samples, test_labels):
635
linear_pred = 1 if linear_classifier(sample) > 0 else -1
636
rbf_pred = 1 if rbf_classifier(sample) > 0 else -1
637
638
if linear_pred == true_label:
639
linear_correct += 1
640
if rbf_pred == true_label:
641
rbf_correct += 1
642
643
print(f"Linear SVM accuracy: {linear_correct/len(test_samples):.3f}")
644
print(f"RBF SVM accuracy: {rbf_correct/len(test_samples):.3f}")
645
646
# Optimization example
647
def optimize_hyperparams(c_value):
648
trainer = dlib.svm_c_trainer_linear()
649
trainer.set_c(c_value[0])
650
classifier = trainer.train(train_samples[:200], train_labels[:200])
651
652
# Simple validation error
653
errors = 0
654
for sample, label in zip(train_samples[200:400], train_labels[200:400]):
655
pred = 1 if classifier(sample) > 0 else -1
656
if pred != label:
657
errors += 1
658
659
return errors / 200.0 # Return error rate to minimize
660
661
# Find optimal C parameter
662
optimal_c, min_error = dlib.find_min_global(
663
optimize_hyperparams,
664
[(0.1, 100.0)],
665
num_function_calls=50
666
)
667
668
print(f"Optimal C parameter: {optimal_c[0]:.3f}")
669
print(f"Minimum validation error: {min_error:.3f}")
670
671
if __name__ == "__main__":
672
complete_ml_pipeline()
673
```
674
675
This machine learning capability provides a comprehensive suite of algorithms for classification, optimization, tracking, and data analysis tasks.