or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

face-detection.mdgeometric-operations.mdgui-components.mdimage-processing.mdindex.mdlinear-algebra.mdmachine-learning.mdobject-detection.mdutilities.md

utilities.mddocs/

0

# Utilities

1

2

Data I/O, statistical analysis, filtering functions, and various utility operations for supporting machine learning workflows and data processing tasks.

3

4

## Capabilities

5

6

### Data Input/Output

7

8

Functions for loading and saving data in standard machine learning formats.

9

10

```python { .api }

11

def load_libsvm_formatted_data(filename: str) -> tuple:

12

"""

13

Load data in libsvm format.

14

15

Args:

16

filename: Path to libsvm format file

17

18

Returns:

19

Tuple of (samples, labels) where samples is list of sparse_vector

20

and labels is list of numeric labels

21

"""

22

23

def save_libsvm_formatted_data(filename: str, samples, labels):

24

"""

25

Save data in libsvm format.

26

27

Args:

28

filename: Output filename

29

samples: List of sample vectors (sparse or dense)

30

labels: List of corresponding labels

31

"""

32

```

33

34

**Usage Example:**

35

```python

36

import dlib

37

38

# Create sample data

39

samples = []

40

labels = []

41

42

# Dense vectors

43

for i in range(100):

44

sample = dlib.vector([i * 0.1, i * 0.2, i * 0.3])

45

samples.append(sample)

46

labels.append(1 if i % 2 == 0 else -1)

47

48

# Save in libsvm format

49

dlib.save_libsvm_formatted_data("dataset.libsvm", samples, labels)

50

51

# Load back

52

loaded_samples, loaded_labels = dlib.load_libsvm_formatted_data("dataset.libsvm")

53

54

print(f"Loaded {len(loaded_samples)} samples")

55

print(f"First sample: {loaded_samples[0]}")

56

print(f"First label: {loaded_labels[0]}")

57

58

# Works with sparse vectors too

59

sparse_samples = []

60

for i in range(50):

61

sparse_vec = dlib.sparse_vector()

62

sparse_vec.extend([

63

dlib.pair(0, i * 0.5),

64

dlib.pair(5, i * 0.3),

65

dlib.pair(10, i * 0.1)

66

])

67

sparse_samples.append(sparse_vec)

68

69

sparse_labels = [1] * 25 + [-1] * 25

70

dlib.save_libsvm_formatted_data("sparse_dataset.libsvm", sparse_samples, sparse_labels)

71

```

72

73

### Statistical Analysis

74

75

Functions for time series analysis and statistical testing.

76

77

```python { .api }

78

def count_steps_without_decrease(time_series, probability: float = 0.51) -> int:

79

"""

80

Count steps without decrease in time series.

81

82

Args:

83

time_series: List or array of numeric values

84

probability: Probability threshold for statistical test

85

86

Returns:

87

Number of steps without significant decrease

88

"""

89

90

def count_steps_without_decrease_robust(

91

time_series,

92

probability: float = 0.51,

93

quantile_discard: float = 0.1

94

) -> int:

95

"""

96

Robust version that discards outliers.

97

98

Args:

99

time_series: List or array of numeric values

100

probability: Probability threshold for statistical test

101

quantile_discard: Fraction of extreme values to discard

102

103

Returns:

104

Number of steps without significant decrease (robust estimate)

105

"""

106

107

def probability_that_sequence_is_increasing(time_series) -> float:

108

"""

109

Statistical test for increasing sequence.

110

111

Args:

112

time_series: List or array of numeric values

113

114

Returns:

115

Probability that sequence is increasing (0-1)

116

"""

117

```

118

119

**Usage Example:**

120

```python

121

import dlib

122

import numpy as np

123

124

# Generate time series data

125

np.random.seed(42)

126

127

# Increasing trend with noise

128

trend_data = []

129

for i in range(100):

130

trend_value = i * 0.1 + np.random.normal(0, 0.5)

131

trend_data.append(trend_value)

132

133

# Analyze time series

134

steps_no_decrease = dlib.count_steps_without_decrease(trend_data)

135

steps_robust = dlib.count_steps_without_decrease_robust(trend_data, quantile_discard=0.2)

136

increasing_prob = dlib.probability_that_sequence_is_increasing(trend_data)

137

138

print(f"Steps without decrease: {steps_no_decrease}")

139

print(f"Steps without decrease (robust): {steps_robust}")

140

print(f"Probability of increasing: {increasing_prob:.3f}")

141

142

# Test with different data patterns

143

flat_data = [1.0] * 50 + [np.random.normal(1.0, 0.1) for _ in range(50)]

144

decreasing_data = [10.0 - i * 0.1 + np.random.normal(0, 0.2) for i in range(100)]

145

146

print(f"Flat data increasing probability: {dlib.probability_that_sequence_is_increasing(flat_data):.3f}")

147

print(f"Decreasing data increasing probability: {dlib.probability_that_sequence_is_increasing(decreasing_data):.3f}")

148

```

149

150

### Filtering and Signal Processing

151

152

Kalman filtering and signal processing utilities for tracking and noise reduction.

153

154

```python { .api }

155

class momentum_filter:

156

"""Kalman filter for tracking moving objects."""

157

158

def __init__(

159

self,

160

measurement_noise: float,

161

typical_acceleration: float,

162

max_measurement_deviation: float

163

):

164

"""

165

Initialize momentum filter.

166

167

Args:

168

measurement_noise: Expected measurement noise level

169

typical_acceleration: Expected acceleration magnitude

170

max_measurement_deviation: Maximum allowed measurement deviation

171

"""

172

173

def measurement_noise(self) -> float:

174

"""Get measurement noise parameter."""

175

176

def typical_acceleration(self) -> float:

177

"""Get typical acceleration parameter."""

178

179

def max_measurement_deviation(self) -> float:

180

"""Get max measurement deviation parameter."""

181

182

def __call__(self, measurement) -> object:

183

"""

184

Filter measurement through Kalman filter.

185

186

Args:

187

measurement: New measurement (point, vector, etc.)

188

189

Returns:

190

Filtered estimate

191

"""

192

193

def find_optimal_momentum_filter(

194

sequence: list,

195

smoothness: float = 1.0

196

) -> momentum_filter:

197

"""

198

Find optimal momentum filter parameters.

199

200

Args:

201

sequence: Sequence of measurements to analyze

202

smoothness: Smoothness parameter (higher = smoother filtering)

203

204

Returns:

205

Optimally configured momentum filter

206

"""

207

```

208

209

**Usage Example:**

210

```python

211

import dlib

212

import numpy as np

213

214

# Generate noisy position measurements

215

np.random.seed(42)

216

true_positions = []

217

noisy_measurements = []

218

219

for t in range(100):

220

# True position with some acceleration

221

true_pos = dlib.point(int(t + 0.01 * t**2), int(50 + 5 * np.sin(t * 0.1)))

222

true_positions.append(true_pos)

223

224

# Add measurement noise

225

noisy_x = true_pos.x + np.random.normal(0, 3.0)

226

noisy_y = true_pos.y + np.random.normal(0, 3.0)

227

noisy_measurements.append(dlib.point(int(noisy_x), int(noisy_y)))

228

229

# Create momentum filter

230

filter = dlib.momentum_filter(

231

measurement_noise=3.0,

232

typical_acceleration=0.1,

233

max_measurement_deviation=2.0

234

)

235

236

# Filter measurements

237

filtered_positions = []

238

for measurement in noisy_measurements:

239

filtered = filter(measurement)

240

filtered_positions.append(filtered)

241

242

# Or find optimal parameters automatically

243

optimal_filter = dlib.find_optimal_momentum_filter(noisy_measurements, smoothness=2.0)

244

245

optimal_filtered = []

246

for measurement in noisy_measurements:

247

filtered = optimal_filter(measurement)

248

optimal_filtered.append(filtered)

249

250

print(f"Original filter noise param: {filter.measurement_noise()}")

251

print(f"Optimal filter noise param: {optimal_filter.measurement_noise()}")

252

```

253

254

### Assignment and Optimization Utilities

255

256

Utility functions for assignment problems and optimization tasks.

257

258

```python { .api }

259

def assignment_cost(cost_matrix, assignment: list) -> float:

260

"""

261

Calculate total cost of assignment.

262

263

Args:

264

cost_matrix: 2D matrix of assignment costs

265

assignment: List of assignments (row to column mapping)

266

267

Returns:

268

Total assignment cost

269

"""

270

271

def max_cost_assignment(cost_matrix) -> list:

272

"""

273

Solve maximum cost assignment problem using Hungarian algorithm.

274

275

Args:

276

cost_matrix: 2D matrix where cost_matrix[i][j] is cost of assigning row i to column j

277

278

Returns:

279

List where result[i] is the column assigned to row i

280

"""

281

```

282

283

### Sparse Vector Utilities

284

285

Helper functions for working with sparse vectors.

286

287

```python { .api }

288

def make_sparse_vector(sparse_vec: sparse_vector) -> sparse_vector:

289

"""

290

Sort and deduplicate sparse vector.

291

292

Args:

293

sparse_vec: Input sparse vector (may have unsorted or duplicate indices)

294

295

Returns:

296

Cleaned sparse vector with sorted indices and no duplicates

297

"""

298

```

299

300

**Usage Example:**

301

```python

302

import dlib

303

304

# Create sparse vector with potential issues

305

sparse_vec = dlib.sparse_vector()

306

sparse_vec.extend([

307

dlib.pair(5, 2.5),

308

dlib.pair(1, 1.0),

309

dlib.pair(5, 3.0), # Duplicate index

310

dlib.pair(3, 1.5),

311

dlib.pair(1, 0.5) # Another duplicate

312

])

313

314

print("Original sparse vector:")

315

for i in range(len(sparse_vec)):

316

pair = sparse_vec[i]

317

print(f" Index {pair.first}: {pair.second}")

318

319

# Clean up sparse vector

320

clean_vec = dlib.make_sparse_vector(sparse_vec)

321

322

print("Cleaned sparse vector:")

323

for i in range(len(clean_vec)):

324

pair = clean_vec[i]

325

print(f" Index {pair.first}: {pair.second}")

326

```

327

328

### Interactive Utilities

329

330

Simple utilities for interactive use and debugging.

331

332

```python { .api }

333

def hit_enter_to_continue():

334

"""

335

Interactive pause utility - waits for user to press Enter.

336

Useful for debugging and interactive scripts.

337

"""

338

```

339

340

**Usage Example:**

341

```python

342

import dlib

343

344

print("Starting data processing...")

345

346

# Process some data

347

data = list(range(1000))

348

processed = [x * 2 for x in data]

349

350

print("Processing complete. Press Enter to continue...")

351

dlib.hit_enter_to_continue()

352

353

print("Continuing with analysis...")

354

```

355

356

### Image Dataset Metadata

357

358

Functions for working with image dataset XML metadata files (used by object detection training).

359

360

```python { .api }

361

def load_image_dataset_metadata(filename: str):

362

"""

363

Load image dataset metadata from XML file.

364

365

Args:

366

filename: Path to XML metadata file

367

368

Returns:

369

Dataset metadata structure containing image paths and annotations

370

"""

371

372

def save_image_dataset_metadata(metadata, filename: str):

373

"""

374

Save image dataset metadata to XML file.

375

376

Args:

377

metadata: Dataset metadata structure

378

filename: Output XML filename

379

"""

380

```

381

382

**Usage Example:**

383

```python

384

import dlib

385

386

# Load existing dataset metadata

387

try:

388

dataset = dlib.load_image_dataset_metadata("training_dataset.xml")

389

print("Loaded dataset metadata successfully")

390

391

# Process or modify dataset

392

# ... modify dataset structure ...

393

394

# Save modified dataset

395

dlib.save_image_dataset_metadata(dataset, "modified_dataset.xml")

396

397

except Exception as e:

398

print(f"Error loading dataset: {e}")

399

```

400

401

### Advanced Filtering Options

402

403

Additional filtering utilities for specific use cases.

404

405

```python { .api }

406

def create_kalman_filter(

407

initial_state,

408

measurement_noise: float,

409

process_noise: float

410

):

411

"""

412

Create generic Kalman filter.

413

414

Args:

415

initial_state: Initial state estimate

416

measurement_noise: Measurement noise variance

417

process_noise: Process noise variance

418

419

Returns:

420

Configured Kalman filter

421

"""

422

423

def apply_temporal_smoothing(

424

measurements: list,

425

window_size: int = 5,

426

method: str = "gaussian"

427

):

428

"""

429

Apply temporal smoothing to measurement sequence.

430

431

Args:

432

measurements: List of measurements over time

433

window_size: Size of smoothing window

434

method: Smoothing method ("gaussian", "uniform", "exponential")

435

436

Returns:

437

Smoothed measurement sequence

438

"""

439

```

440

441

### Performance and Debugging Utilities

442

443

Helper functions for performance monitoring and debugging.

444

445

```python { .api }

446

def benchmark_function(func, args: tuple, num_iterations: int = 100) -> float:

447

"""

448

Benchmark function execution time.

449

450

Args:

451

func: Function to benchmark

452

args: Arguments to pass to function

453

num_iterations: Number of iterations to run

454

455

Returns:

456

Average execution time in seconds

457

"""

458

459

def memory_usage_estimate(data_structure) -> int:

460

"""

461

Estimate memory usage of dlib data structure.

462

463

Args:

464

data_structure: Dlib object (matrix, vector, etc.)

465

466

Returns:

467

Estimated memory usage in bytes

468

"""

469

```

470

471

**Complete Utilities Usage Example:**

472

```python

473

import dlib

474

import numpy as np

475

import time

476

477

def comprehensive_utilities_demo():

478

"""Demonstrate various utility functions."""

479

480

print("=== Data I/O Demo ===")

481

482

# Create and save dataset

483

samples = [dlib.vector([i, i*2, i*3]) for i in range(100)]

484

labels = [1 if i % 2 == 0 else -1 for i in range(100)]

485

486

dlib.save_libsvm_formatted_data("demo_dataset.libsvm", samples, labels)

487

loaded_samples, loaded_labels = dlib.load_libsvm_formatted_data("demo_dataset.libsvm")

488

print(f"Saved and loaded {len(loaded_samples)} samples")

489

490

print("\n=== Statistical Analysis Demo ===")

491

492

# Generate time series with trend

493

time_series = [i + np.random.normal(0, 0.5) for i in range(50)]

494

495

steps = dlib.count_steps_without_decrease(time_series)

496

increasing_prob = dlib.probability_that_sequence_is_increasing(time_series)

497

498

print(f"Steps without decrease: {steps}")

499

print(f"Increasing probability: {increasing_prob:.3f}")

500

501

print("\n=== Filtering Demo ===")

502

503

# Create noisy position data

504

true_trajectory = [dlib.point(t, int(50 + 20 * np.sin(t * 0.1))) for t in range(100)]

505

noisy_trajectory = [

506

dlib.point(p.x + int(np.random.normal(0, 3)),

507

p.y + int(np.random.normal(0, 3)))

508

for p in true_trajectory

509

]

510

511

# Apply filtering

512

filter = dlib.momentum_filter(3.0, 0.1, 2.0)

513

filtered_trajectory = [filter(p) for p in noisy_trajectory]

514

515

print(f"Filtered {len(filtered_trajectory)} position measurements")

516

517

print("\n=== Assignment Problem Demo ===")

518

519

# Solve assignment problem

520

cost_matrix = [

521

[9, 2, 7, 8],

522

[6, 4, 3, 7],

523

[5, 8, 1, 8],

524

[7, 6, 9, 4]

525

]

526

527

assignment = dlib.max_cost_assignment(cost_matrix)

528

total_cost = dlib.assignment_cost(cost_matrix, assignment)

529

530

print(f"Optimal assignment: {assignment}")

531

print(f"Total cost: {total_cost}")

532

533

print("\n=== Sparse Vector Demo ===")

534

535

# Create and clean sparse vector

536

sparse_vec = dlib.sparse_vector()

537

sparse_vec.extend([

538

dlib.pair(10, 1.0),

539

dlib.pair(2, 2.0),

540

dlib.pair(10, 3.0), # Duplicate

541

dlib.pair(5, 1.5)

542

])

543

544

clean_vec = dlib.make_sparse_vector(sparse_vec)

545

print(f"Cleaned sparse vector with {len(clean_vec)} unique elements")

546

547

print("\n=== Interactive Demo ===")

548

print("Demonstration complete. Press Enter to finish...")

549

dlib.hit_enter_to_continue()

550

print("Demo finished!")

551

552

if __name__ == "__main__":

553

comprehensive_utilities_demo()

554

```

555

556

These utility functions provide essential support for machine learning workflows, data processing, and interactive development with dlib.