0
# Topological Representations
1
2
Machine learning interfaces for converting persistence diagrams into vector representations suitable for statistical analysis, classification, clustering, and neural network applications. These representations bridge topological data analysis with standard machine learning workflows.
3
4
## Capabilities
5
6
### Vector Methods
7
8
Classes that transform persistence diagrams into fixed-dimensional vector representations.
9
10
```python { .api }
11
class PersistenceImage:
12
def __init__(self, bandwidth: float = 1.0, weight=None, im_range=None, resolution: tuple = (20, 20)):
13
"""
14
Convert persistence diagrams to persistence images.
15
16
Parameters:
17
- bandwidth: Gaussian kernel bandwidth
18
- weight: Weight function for points (default: lambda x: 1)
19
- im_range: Image coordinate range
20
- resolution: Image resolution (height, width)
21
"""
22
23
def fit_transform(self, X):
24
"""
25
Transform persistence diagrams to images.
26
27
Parameters:
28
- X: List of persistence diagrams
29
30
Returns:
31
array: Persistence images as flattened vectors
32
"""
33
34
class Landscape:
35
def __init__(self, num_landscapes: int = 5, resolution: int = 100, sample_range=[np.nan, np.nan], keep_endpoints=False):
36
"""
37
Convert persistence diagrams to persistence landscapes.
38
39
Parameters:
40
- num_landscapes: Number of landscape functions
41
- resolution: Number of sample points
42
- sample_range: Range for sampling (default auto-computed)
43
- keep_endpoints: Whether to keep exact endpoints
44
"""
45
46
def fit_transform(self, X):
47
"""
48
Transform persistence diagrams to landscapes.
49
50
Parameters:
51
- X: List of persistence diagrams
52
53
Returns:
54
array: Persistence landscapes
55
"""
56
57
class Silhouette:
58
def __init__(self, resolution: int = 100, weight=None):
59
"""
60
Convert persistence diagrams to silhouettes.
61
62
Parameters:
63
- resolution: Number of sample points
64
- weight: Weight function for persistence points
65
"""
66
67
def fit_transform(self, X):
68
"""
69
Transform persistence diagrams to silhouettes.
70
71
Parameters:
72
- X: List of persistence diagrams
73
74
Returns:
75
array: Silhouette vectors
76
"""
77
78
class BettiCurve:
79
def __init__(self, resolution: int = 100):
80
"""
81
Convert persistence diagrams to Betti curves.
82
83
Parameters:
84
- resolution: Number of sample points along filtration
85
"""
86
87
def fit_transform(self, X):
88
"""
89
Transform persistence diagrams to Betti curves.
90
91
Parameters:
92
- X: List of persistence diagrams
93
94
Returns:
95
array: Betti curve vectors
96
"""
97
98
class ComplexPolynomial:
99
def __init__(self, polynomial_type: str = "T", threshold: float = -1):
100
"""
101
Convert persistence diagrams using complex polynomials.
102
103
Parameters:
104
- polynomial_type: Type of polynomial ("T", "U", "V")
105
- threshold: Threshold for stable computation
106
"""
107
108
def fit_transform(self, X):
109
"""
110
Transform persistence diagrams using polynomials.
111
112
Parameters:
113
- X: List of persistence diagrams
114
115
Returns:
116
array: Polynomial feature vectors
117
"""
118
119
class Entropy:
120
def __init__(self, mode: str = "scalar", resolution: int = 100, sample_range=[np.nan, np.nan]):
121
"""
122
Compute persistence entropy or entropy summary function.
123
124
Parameters:
125
- mode: "scalar" for entropy value, "vector" for summary function
126
- resolution: Number of sample points for vector mode
127
- sample_range: Range for sampling in vector mode
128
"""
129
130
def fit_transform(self, X):
131
"""
132
Transform persistence diagrams to entropy features.
133
134
Parameters:
135
- X: List of persistence diagrams
136
137
Returns:
138
array: Entropy scalars or entropy summary vectors
139
"""
140
141
class TopologicalVector:
142
def __init__(self, threshold: int = 10):
143
"""
144
Convert persistence diagrams to topological vectors.
145
146
Parameters:
147
- threshold: Threshold for number of points considered
148
"""
149
150
def fit_transform(self, X):
151
"""
152
Transform persistence diagrams to topological vectors.
153
154
Parameters:
155
- X: List of persistence diagrams
156
157
Returns:
158
array: Topological vector representations
159
"""
160
161
class Atol:
162
def __init__(self, quantiser):
163
"""
164
Vectorize measures using ATOL (Approximation of TOlogy with Labels).
165
166
Parameters:
167
- quantiser: Quantization function for the measure space
168
"""
169
170
def fit_transform(self, X):
171
"""
172
Transform measures to ATOL vectors.
173
174
Parameters:
175
- X: List of measures (e.g., persistence diagrams)
176
177
Returns:
178
array: ATOL feature vectors
179
"""
180
181
class PersistenceLengths:
182
def __init__(self, num_lengths: int = 10):
183
"""
184
Extract the N longest persistence lengths.
185
186
Parameters:
187
- num_lengths: Number of persistence lengths to extract
188
"""
189
190
def fit_transform(self, X):
191
"""
192
Transform persistence diagrams to length vectors.
193
194
Parameters:
195
- X: List of persistence diagrams
196
197
Returns:
198
array: Vectors of sorted persistence lengths
199
"""
200
```
201
202
### Kernel Methods
203
204
Kernel functions for persistence diagrams that can be used with kernel-based machine learning algorithms.
205
206
```python { .api }
207
class PersistenceWeightedGaussianKernel:
208
def __init__(self, bandwidth: float = 1.0, weight=None):
209
"""
210
Weighted Gaussian kernel for persistence diagrams.
211
212
Parameters:
213
- bandwidth: Gaussian kernel bandwidth
214
- weight: Weight function for persistence points
215
"""
216
217
def __call__(self, diag1, diag2):
218
"""
219
Compute kernel value between two diagrams.
220
221
Parameters:
222
- diag1: First persistence diagram
223
- diag2: Second persistence diagram
224
225
Returns:
226
float: Kernel value
227
"""
228
229
class PersistenceScaleSpaceKernel:
230
def __init__(self, bandwidth: float = 1.0):
231
"""
232
Scale space kernel for persistence diagrams.
233
234
Parameters:
235
- bandwidth: Kernel bandwidth parameter
236
"""
237
238
def __call__(self, diag1, diag2):
239
"""
240
Compute kernel value between two diagrams.
241
242
Parameters:
243
- diag1: First persistence diagram
244
- diag2: Second persistence diagram
245
246
Returns:
247
float: Kernel value
248
"""
249
250
class SlicedWassersteinKernel:
251
def __init__(self, num_directions: int = 10, bandwidth: float = 1.0):
252
"""
253
Sliced Wasserstein kernel for persistence diagrams.
254
255
Parameters:
256
- num_directions: Number of projection directions
257
- bandwidth: Kernel bandwidth
258
"""
259
260
def __call__(self, diag1, diag2):
261
"""
262
Compute kernel value between two diagrams.
263
264
Parameters:
265
- diag1: First persistence diagram
266
- diag2: Second persistence diagram
267
268
Returns:
269
float: Kernel value
270
"""
271
```
272
273
### Distance Metrics
274
275
Specialized distance functions for persistence diagrams beyond standard Wasserstein and bottleneck distances.
276
277
```python { .api }
278
class SlicedWassersteinDistance:
279
def __init__(self, num_directions: int = 10):
280
"""
281
Sliced Wasserstein distance for persistence diagrams.
282
283
Parameters:
284
- num_directions: Number of random projection directions
285
"""
286
287
def __call__(self, diag1, diag2):
288
"""
289
Compute distance between two diagrams.
290
291
Parameters:
292
- diag1: First persistence diagram
293
- diag2: Second persistence diagram
294
295
Returns:
296
float: Sliced Wasserstein distance
297
"""
298
299
class PersistenceFisherDistance:
300
def __init__(self, bandwidth: float = 1.0, kernel: str = "gaussian"):
301
"""
302
Fisher information distance for persistence diagrams.
303
304
Parameters:
305
- bandwidth: Kernel bandwidth
306
- kernel: Kernel type
307
"""
308
309
def __call__(self, diag1, diag2):
310
"""
311
Compute Fisher distance between two diagrams.
312
313
Parameters:
314
- diag1: First persistence diagram
315
- diag2: Second persistence diagram
316
317
Returns:
318
float: Fisher information distance
319
"""
320
```
321
322
### Preprocessing
323
324
Utilities for preprocessing persistence diagrams before applying machine learning methods.
325
326
```python { .api }
327
class BirthPersistenceTransform:
328
def __init__(self):
329
"""Transform (birth, death) to (birth, persistence) coordinates."""
330
331
def fit_transform(self, X):
332
"""
333
Transform persistence diagrams to birth-persistence coordinates.
334
335
Parameters:
336
- X: List of persistence diagrams
337
338
Returns:
339
list: Transformed diagrams
340
"""
341
342
class Scaler:
343
def __init__(self, metric: str = "bottleneck"):
344
"""
345
Scale persistence diagrams for normalization.
346
347
Parameters:
348
- metric: Distance metric for scaling
349
"""
350
351
def fit_transform(self, X):
352
"""
353
Scale persistence diagrams.
354
355
Parameters:
356
- X: List of persistence diagrams
357
358
Returns:
359
list: Scaled diagrams
360
"""
361
362
class ProminentPoints:
363
def __init__(self, use: str = "coordinates", num_pts: int = 10):
364
"""
365
Select most prominent points from persistence diagrams.
366
367
Parameters:
368
- use: Selection criterion ("coordinates", "persistence", etc.)
369
- num_pts: Number of points to select
370
"""
371
372
def fit_transform(self, X):
373
"""
374
Select prominent points from diagrams.
375
376
Parameters:
377
- X: List of persistence diagrams
378
379
Returns:
380
list: Diagrams with selected points
381
"""
382
```
383
384
## Scikit-learn Integration
385
386
Scikit-learn compatible transformers for topological data analysis workflows.
387
388
```python { .api }
389
# From gudhi.sklearn module
390
class CubicalPersistence:
391
def __init__(self, dimensions=None, persistence_dim_max=True):
392
"""
393
Sklearn transformer for cubical persistence.
394
395
Parameters:
396
- dimensions: Cubical complex dimensions
397
- persistence_dim_max: Compute persistence in all dimensions
398
"""
399
400
def fit_transform(self, X):
401
"""
402
Compute persistence for cubical complexes.
403
404
Parameters:
405
- X: Input data (images, grids)
406
407
Returns:
408
list: Persistence diagrams
409
"""
410
411
class RipsPersistence:
412
def __init__(self, max_edge_length=float('inf'), max_dimension=1):
413
"""
414
Sklearn transformer for Rips persistence.
415
416
Parameters:
417
- max_edge_length: Maximum edge length for Rips complex
418
- max_dimension: Maximum dimension for complex
419
"""
420
421
def fit_transform(self, X):
422
"""
423
Compute persistence for point clouds.
424
425
Parameters:
426
- X: Point cloud data
427
428
Returns:
429
list: Persistence diagrams
430
"""
431
```
432
433
## Usage Examples
434
435
### Persistence Images for Classification
436
437
```python
438
import gudhi
439
import numpy as np
440
from sklearn.svm import SVC
441
from sklearn.model_selection import train_test_split
442
443
# Generate persistence diagrams from different classes
444
diagrams_class1 = [] # ... compute diagrams for class 1
445
diagrams_class2 = [] # ... compute diagrams for class 2
446
447
# Combine data
448
X_diagrams = diagrams_class1 + diagrams_class2
449
y = [0] * len(diagrams_class1) + [1] * len(diagrams_class2)
450
451
# Convert to persistence images
452
pi = gudhi.representations.PersistenceImage(resolution=(20, 20))
453
X_images = pi.fit_transform(X_diagrams)
454
455
# Train classifier
456
X_train, X_test, y_train, y_test = train_test_split(X_images, y, test_size=0.3)
457
clf = SVC(kernel='rbf')
458
clf.fit(X_train, y_train)
459
accuracy = clf.score(X_test, y_test)
460
print(f"Classification accuracy: {accuracy:.3f}")
461
```
462
463
### Persistence Landscapes
464
465
```python
466
import gudhi
467
import matplotlib.pyplot as plt
468
469
# Compute persistence diagram
470
# ... (create persistence diagram)
471
472
# Convert to persistence landscape
473
landscape = gudhi.representations.Landscape(num_landscapes=3)
474
landscape_vector = landscape.fit_transform([persistence])
475
476
print(f"Landscape vector shape: {landscape_vector.shape}")
477
478
# Visualize first landscape function
479
plt.plot(landscape_vector[0][:100]) # First 100 points of first landscape
480
plt.title("First Persistence Landscape Function")
481
plt.show()
482
```
483
484
### Kernel Methods with Grid Search
485
486
```python
487
import gudhi
488
from sklearn.svm import SVC
489
from sklearn.model_selection import GridSearchCV
490
from sklearn.metrics.pairwise import pairwise_kernels
491
492
# Define custom kernel function
493
def persistence_kernel(X, Y=None):
494
kernel = gudhi.representations.PersistenceWeightedGaussianKernel(bandwidth=1.0)
495
if Y is None:
496
Y = X
497
498
gram_matrix = np.zeros((len(X), len(Y)))
499
for i, diag1 in enumerate(X):
500
for j, diag2 in enumerate(Y):
501
gram_matrix[i, j] = kernel(diag1, diag2)
502
return gram_matrix
503
504
# Use with SVM
505
svm = SVC(kernel=persistence_kernel)
506
# ... train and evaluate
507
```