Tessl Tile for pypi/dlib@20.0.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

face-detection.md geometric-operations.md gui-components.md image-processing.md index.md linear-algebra.md machine-learning.md object-detection.md utilities.md

utilities.mddocs/

0
# Utilities
1

2
Data I/O, statistical analysis, filtering functions, and various utility operations for supporting machine learning workflows and data processing tasks.
3

4
## Capabilities
5

6
### Data Input/Output
7

8
Functions for loading and saving data in standard machine learning formats.
9

10
```python { .api }
11
def load_libsvm_formatted_data(filename: str) -> tuple:
12
    """
13
    Load data in libsvm format.
14
    
15
    Args:
16
        filename: Path to libsvm format file
17
    
18
    Returns:
19
        Tuple of (samples, labels) where samples is list of sparse_vector
20
        and labels is list of numeric labels
21
    """
22

23
def save_libsvm_formatted_data(filename: str, samples, labels):
24
    """
25
    Save data in libsvm format.
26
    
27
    Args:
28
        filename: Output filename
29
        samples: List of sample vectors (sparse or dense)
30
        labels: List of corresponding labels
31
    """
32
```
33

34
**Usage Example:**
35
```python
36
import dlib
37

38
# Create sample data
39
samples = []
40
labels = []
41

42
# Dense vectors
43
for i in range(100):
44
    sample = dlib.vector([i * 0.1, i * 0.2, i * 0.3])
45
    samples.append(sample)
46
    labels.append(1 if i % 2 == 0 else -1)
47

48
# Save in libsvm format
49
dlib.save_libsvm_formatted_data("dataset.libsvm", samples, labels)
50

51
# Load back
52
loaded_samples, loaded_labels = dlib.load_libsvm_formatted_data("dataset.libsvm")
53

54
print(f"Loaded {len(loaded_samples)} samples")
55
print(f"First sample: {loaded_samples[0]}")
56
print(f"First label: {loaded_labels[0]}")
57

58
# Works with sparse vectors too
59
sparse_samples = []
60
for i in range(50):
61
    sparse_vec = dlib.sparse_vector()
62
    sparse_vec.extend([
63
        dlib.pair(0, i * 0.5),
64
        dlib.pair(5, i * 0.3),  
65
        dlib.pair(10, i * 0.1)
66
    ])
67
    sparse_samples.append(sparse_vec)
68

69
sparse_labels = [1] * 25 + [-1] * 25
70
dlib.save_libsvm_formatted_data("sparse_dataset.libsvm", sparse_samples, sparse_labels)
71
```
72

73
### Statistical Analysis
74

75
Functions for time series analysis and statistical testing.
76

77
```python { .api }
78
def count_steps_without_decrease(time_series, probability: float = 0.51) -> int:
79
    """
80
    Count steps without decrease in time series.
81
    
82
    Args:
83
        time_series: List or array of numeric values
84
        probability: Probability threshold for statistical test
85
    
86
    Returns:
87
        Number of steps without significant decrease
88
    """
89

90
def count_steps_without_decrease_robust(
91
    time_series, 
92
    probability: float = 0.51, 
93
    quantile_discard: float = 0.1
94
) -> int:
95
    """
96
    Robust version that discards outliers.
97
    
98
    Args:
99
        time_series: List or array of numeric values
100
        probability: Probability threshold for statistical test
101
        quantile_discard: Fraction of extreme values to discard
102
    
103
    Returns:
104
        Number of steps without significant decrease (robust estimate)
105
    """
106

107
def probability_that_sequence_is_increasing(time_series) -> float:
108
    """
109
    Statistical test for increasing sequence.
110
    
111
    Args:
112
        time_series: List or array of numeric values
113
    
114
    Returns:
115
        Probability that sequence is increasing (0-1)
116
    """
117
```
118

119
**Usage Example:**
120
```python
121
import dlib
122
import numpy as np
123

124
# Generate time series data
125
np.random.seed(42)
126

127
# Increasing trend with noise
128
trend_data = []
129
for i in range(100):
130
    trend_value = i * 0.1 + np.random.normal(0, 0.5)
131
    trend_data.append(trend_value)
132

133
# Analyze time series
134
steps_no_decrease = dlib.count_steps_without_decrease(trend_data)
135
steps_robust = dlib.count_steps_without_decrease_robust(trend_data, quantile_discard=0.2)
136
increasing_prob = dlib.probability_that_sequence_is_increasing(trend_data)
137

138
print(f"Steps without decrease: {steps_no_decrease}")
139
print(f"Steps without decrease (robust): {steps_robust}")
140
print(f"Probability of increasing: {increasing_prob:.3f}")
141

142
# Test with different data patterns
143
flat_data = [1.0] * 50 + [np.random.normal(1.0, 0.1) for _ in range(50)]
144
decreasing_data = [10.0 - i * 0.1 + np.random.normal(0, 0.2) for i in range(100)]
145

146
print(f"Flat data increasing probability: {dlib.probability_that_sequence_is_increasing(flat_data):.3f}")
147
print(f"Decreasing data increasing probability: {dlib.probability_that_sequence_is_increasing(decreasing_data):.3f}")
148
```
149

150
### Filtering and Signal Processing
151

152
Kalman filtering and signal processing utilities for tracking and noise reduction.
153

154
```python { .api }
155
class momentum_filter:
156
    """Kalman filter for tracking moving objects."""
157
    
158
    def __init__(
159
        self, 
160
        measurement_noise: float, 
161
        typical_acceleration: float, 
162
        max_measurement_deviation: float
163
    ):
164
        """
165
        Initialize momentum filter.
166
        
167
        Args:
168
            measurement_noise: Expected measurement noise level
169
            typical_acceleration: Expected acceleration magnitude
170
            max_measurement_deviation: Maximum allowed measurement deviation
171
        """
172
    
173
    def measurement_noise(self) -> float:
174
        """Get measurement noise parameter."""
175
    
176
    def typical_acceleration(self) -> float:
177
        """Get typical acceleration parameter."""
178
    
179
    def max_measurement_deviation(self) -> float:
180
        """Get max measurement deviation parameter."""
181
    
182
    def __call__(self, measurement) -> object:
183
        """
184
        Filter measurement through Kalman filter.
185
        
186
        Args:
187
            measurement: New measurement (point, vector, etc.)
188
        
189
        Returns:
190
            Filtered estimate
191
        """
192

193
def find_optimal_momentum_filter(
194
    sequence: list, 
195
    smoothness: float = 1.0
196
) -> momentum_filter:
197
    """
198
    Find optimal momentum filter parameters.
199
    
200
    Args:
201
        sequence: Sequence of measurements to analyze
202
        smoothness: Smoothness parameter (higher = smoother filtering)
203
    
204
    Returns:
205
        Optimally configured momentum filter
206
    """
207
```
208

209
**Usage Example:**
210
```python
211
import dlib
212
import numpy as np
213

214
# Generate noisy position measurements
215
np.random.seed(42)
216
true_positions = []
217
noisy_measurements = []
218

219
for t in range(100):
220
    # True position with some acceleration
221
    true_pos = dlib.point(int(t + 0.01 * t**2), int(50 + 5 * np.sin(t * 0.1)))
222
    true_positions.append(true_pos)
223
    
224
    # Add measurement noise
225
    noisy_x = true_pos.x + np.random.normal(0, 3.0)
226
    noisy_y = true_pos.y + np.random.normal(0, 3.0)
227
    noisy_measurements.append(dlib.point(int(noisy_x), int(noisy_y)))
228

229
# Create momentum filter
230
filter = dlib.momentum_filter(
231
    measurement_noise=3.0,
232
    typical_acceleration=0.1,
233
    max_measurement_deviation=2.0
234
)
235

236
# Filter measurements
237
filtered_positions = []
238
for measurement in noisy_measurements:
239
    filtered = filter(measurement)
240
    filtered_positions.append(filtered)
241

242
# Or find optimal parameters automatically
243
optimal_filter = dlib.find_optimal_momentum_filter(noisy_measurements, smoothness=2.0)
244

245
optimal_filtered = []
246
for measurement in noisy_measurements:
247
    filtered = optimal_filter(measurement)
248
    optimal_filtered.append(filtered)
249

250
print(f"Original filter noise param: {filter.measurement_noise()}")
251
print(f"Optimal filter noise param: {optimal_filter.measurement_noise()}")
252
```
253

254
### Assignment and Optimization Utilities
255

256
Utility functions for assignment problems and optimization tasks.
257

258
```python { .api }
259
def assignment_cost(cost_matrix, assignment: list) -> float:
260
    """
261
    Calculate total cost of assignment.
262
    
263
    Args:
264
        cost_matrix: 2D matrix of assignment costs
265
        assignment: List of assignments (row to column mapping)
266
    
267
    Returns:
268
        Total assignment cost
269
    """
270

271
def max_cost_assignment(cost_matrix) -> list:
272
    """
273
    Solve maximum cost assignment problem using Hungarian algorithm.
274
    
275
    Args:
276
        cost_matrix: 2D matrix where cost_matrix[i][j] is cost of assigning row i to column j
277
    
278
    Returns:
279
        List where result[i] is the column assigned to row i
280
    """
281
```
282

283
### Sparse Vector Utilities
284

285
Helper functions for working with sparse vectors.
286

287
```python { .api }
288
def make_sparse_vector(sparse_vec: sparse_vector) -> sparse_vector:
289
    """
290
    Sort and deduplicate sparse vector.
291
    
292
    Args:
293
        sparse_vec: Input sparse vector (may have unsorted or duplicate indices)
294
    
295
    Returns:
296
        Cleaned sparse vector with sorted indices and no duplicates
297
    """
298
```
299

300
**Usage Example:**
301
```python
302
import dlib
303

304
# Create sparse vector with potential issues
305
sparse_vec = dlib.sparse_vector()
306
sparse_vec.extend([
307
    dlib.pair(5, 2.5),
308
    dlib.pair(1, 1.0),
309
    dlib.pair(5, 3.0),  # Duplicate index
310
    dlib.pair(3, 1.5),
311
    dlib.pair(1, 0.5)   # Another duplicate
312
])
313

314
print("Original sparse vector:")
315
for i in range(len(sparse_vec)):
316
    pair = sparse_vec[i]
317
    print(f"  Index {pair.first}: {pair.second}")
318

319
# Clean up sparse vector
320
clean_vec = dlib.make_sparse_vector(sparse_vec)
321

322
print("Cleaned sparse vector:")
323
for i in range(len(clean_vec)):
324
    pair = clean_vec[i]
325
    print(f"  Index {pair.first}: {pair.second}")
326
```
327

328
### Interactive Utilities
329

330
Simple utilities for interactive use and debugging.
331

332
```python { .api }
333
def hit_enter_to_continue():
334
    """
335
    Interactive pause utility - waits for user to press Enter.
336
    Useful for debugging and interactive scripts.
337
    """
338
```
339

340
**Usage Example:**
341
```python
342
import dlib
343

344
print("Starting data processing...")
345

346
# Process some data
347
data = list(range(1000))
348
processed = [x * 2 for x in data]
349

350
print("Processing complete. Press Enter to continue...")
351
dlib.hit_enter_to_continue()
352

353
print("Continuing with analysis...")
354
```
355

356
### Image Dataset Metadata
357

358
Functions for working with image dataset XML metadata files (used by object detection training).
359

360
```python { .api }
361
def load_image_dataset_metadata(filename: str):
362
    """
363
    Load image dataset metadata from XML file.
364
    
365
    Args:
366
        filename: Path to XML metadata file
367
    
368
    Returns:
369
        Dataset metadata structure containing image paths and annotations
370
    """
371

372
def save_image_dataset_metadata(metadata, filename: str):
373
    """
374
    Save image dataset metadata to XML file.
375
    
376
    Args:
377
        metadata: Dataset metadata structure
378
        filename: Output XML filename
379
    """
380
```
381

382
**Usage Example:**
383
```python
384
import dlib
385

386
# Load existing dataset metadata
387
try:
388
    dataset = dlib.load_image_dataset_metadata("training_dataset.xml")
389
    print("Loaded dataset metadata successfully")
390
    
391
    # Process or modify dataset
392
    # ... modify dataset structure ...
393
    
394
    # Save modified dataset
395
    dlib.save_image_dataset_metadata(dataset, "modified_dataset.xml")
396
    
397
except Exception as e:
398
    print(f"Error loading dataset: {e}")
399
```
400

401
### Advanced Filtering Options
402

403
Additional filtering utilities for specific use cases.
404

405
```python { .api }
406
def create_kalman_filter(
407
    initial_state,
408
    measurement_noise: float,
409
    process_noise: float
410
):
411
    """
412
    Create generic Kalman filter.
413
    
414
    Args:
415
        initial_state: Initial state estimate
416
        measurement_noise: Measurement noise variance
417
        process_noise: Process noise variance
418
    
419
    Returns:
420
        Configured Kalman filter
421
    """
422

423
def apply_temporal_smoothing(
424
    measurements: list,
425
    window_size: int = 5,
426
    method: str = "gaussian"
427
):
428
    """
429
    Apply temporal smoothing to measurement sequence.
430
    
431
    Args:
432
        measurements: List of measurements over time
433
        window_size: Size of smoothing window
434
        method: Smoothing method ("gaussian", "uniform", "exponential")
435
    
436
    Returns:
437
        Smoothed measurement sequence
438
    """
439
```
440

441
### Performance and Debugging Utilities
442

443
Helper functions for performance monitoring and debugging.
444

445
```python { .api }
446
def benchmark_function(func, args: tuple, num_iterations: int = 100) -> float:
447
    """
448
    Benchmark function execution time.
449
    
450
    Args:
451
        func: Function to benchmark
452
        args: Arguments to pass to function
453
        num_iterations: Number of iterations to run
454
    
455
    Returns:
456
        Average execution time in seconds
457
    """
458

459
def memory_usage_estimate(data_structure) -> int:
460
    """
461
    Estimate memory usage of dlib data structure.
462
    
463
    Args:
464
        data_structure: Dlib object (matrix, vector, etc.)
465
    
466
    Returns:
467
        Estimated memory usage in bytes
468
    """
469
```
470

471
**Complete Utilities Usage Example:**
472
```python
473
import dlib
474
import numpy as np
475
import time
476

477
def comprehensive_utilities_demo():
478
    """Demonstrate various utility functions."""
479
    
480
    print("=== Data I/O Demo ===")
481
    
482
    # Create and save dataset
483
    samples = [dlib.vector([i, i*2, i*3]) for i in range(100)]
484
    labels = [1 if i % 2 == 0 else -1 for i in range(100)]
485
    
486
    dlib.save_libsvm_formatted_data("demo_dataset.libsvm", samples, labels)
487
    loaded_samples, loaded_labels = dlib.load_libsvm_formatted_data("demo_dataset.libsvm")
488
    print(f"Saved and loaded {len(loaded_samples)} samples")
489
    
490
    print("\n=== Statistical Analysis Demo ===")
491
    
492
    # Generate time series with trend
493
    time_series = [i + np.random.normal(0, 0.5) for i in range(50)]
494
    
495
    steps = dlib.count_steps_without_decrease(time_series)
496
    increasing_prob = dlib.probability_that_sequence_is_increasing(time_series)
497
    
498
    print(f"Steps without decrease: {steps}")
499
    print(f"Increasing probability: {increasing_prob:.3f}")
500
    
501
    print("\n=== Filtering Demo ===")
502
    
503
    # Create noisy position data
504
    true_trajectory = [dlib.point(t, int(50 + 20 * np.sin(t * 0.1))) for t in range(100)]
505
    noisy_trajectory = [
506
        dlib.point(p.x + int(np.random.normal(0, 3)), 
507
                  p.y + int(np.random.normal(0, 3))) 
508
        for p in true_trajectory
509
    ]
510
    
511
    # Apply filtering
512
    filter = dlib.momentum_filter(3.0, 0.1, 2.0)
513
    filtered_trajectory = [filter(p) for p in noisy_trajectory]
514
    
515
    print(f"Filtered {len(filtered_trajectory)} position measurements")
516
    
517
    print("\n=== Assignment Problem Demo ===")
518
    
519
    # Solve assignment problem
520
    cost_matrix = [
521
        [9, 2, 7, 8],
522
        [6, 4, 3, 7],
523
        [5, 8, 1, 8],
524
        [7, 6, 9, 4]
525
    ]
526
    
527
    assignment = dlib.max_cost_assignment(cost_matrix)
528
    total_cost = dlib.assignment_cost(cost_matrix, assignment)
529
    
530
    print(f"Optimal assignment: {assignment}")
531
    print(f"Total cost: {total_cost}")
532
    
533
    print("\n=== Sparse Vector Demo ===")
534
    
535
    # Create and clean sparse vector
536
    sparse_vec = dlib.sparse_vector()
537
    sparse_vec.extend([
538
        dlib.pair(10, 1.0),
539
        dlib.pair(2, 2.0),
540
        dlib.pair(10, 3.0),  # Duplicate
541
        dlib.pair(5, 1.5)
542
    ])
543
    
544
    clean_vec = dlib.make_sparse_vector(sparse_vec)
545
    print(f"Cleaned sparse vector with {len(clean_vec)} unique elements")
546
    
547
    print("\n=== Interactive Demo ===")
548
    print("Demonstration complete. Press Enter to finish...")
549
    dlib.hit_enter_to_continue()
550
    print("Demo finished!")
551

552
if __name__ == "__main__":
553
    comprehensive_utilities_demo()
554
```
555

556
These utility functions provide essential support for machine learning workflows, data processing, and interactive development with dlib.

Version

Tile

Files

utilities.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

utilities.mddocs/