tessl/pypi-gudhi

Computational topology and topological data analysis library providing state-of-the-art algorithms for constructing simplicial complexes and computing persistent homology

—

Pending

Overview

Eval results

Files

Topological Representations

Name: tessl/pypi-gudhi
Author: tessl

Machine learning interfaces for converting persistence diagrams into vector representations suitable for statistical analysis, classification, clustering, and neural network applications. These representations bridge topological data analysis with standard machine learning workflows.

Capabilities

Vector Methods

Classes that transform persistence diagrams into fixed-dimensional vector representations.

class PersistenceImage:
    def __init__(self, bandwidth: float = 1.0, weight=None, im_range=None, resolution: tuple = (20, 20)):
        """
        Convert persistence diagrams to persistence images.
        
        Parameters:
        - bandwidth: Gaussian kernel bandwidth
        - weight: Weight function for points (default: lambda x: 1)
        - im_range: Image coordinate range
        - resolution: Image resolution (height, width)
        """

    def fit_transform(self, X):
        """
        Transform persistence diagrams to images.
        
        Parameters:
        - X: List of persistence diagrams
        
        Returns:
        array: Persistence images as flattened vectors
        """

class Landscape:
    def __init__(self, num_landscapes: int = 5, resolution: int = 100, sample_range=[np.nan, np.nan], keep_endpoints=False):
        """
        Convert persistence diagrams to persistence landscapes.
        
        Parameters:
        - num_landscapes: Number of landscape functions
        - resolution: Number of sample points
        - sample_range: Range for sampling (default auto-computed)
        - keep_endpoints: Whether to keep exact endpoints
        """

    def fit_transform(self, X):
        """
        Transform persistence diagrams to landscapes.
        
        Parameters:
        - X: List of persistence diagrams
        
        Returns:
        array: Persistence landscapes
        """

class Silhouette:
    def __init__(self, resolution: int = 100, weight=None):
        """
        Convert persistence diagrams to silhouettes.
        
        Parameters:
        - resolution: Number of sample points
        - weight: Weight function for persistence points
        """

    def fit_transform(self, X):
        """
        Transform persistence diagrams to silhouettes.
        
        Parameters:
        - X: List of persistence diagrams
        
        Returns:
        array: Silhouette vectors
        """

class BettiCurve:
    def __init__(self, resolution: int = 100):
        """
        Convert persistence diagrams to Betti curves.
        
        Parameters:
        - resolution: Number of sample points along filtration
        """

    def fit_transform(self, X):
        """
        Transform persistence diagrams to Betti curves.
        
        Parameters:
        - X: List of persistence diagrams
        
        Returns:
        array: Betti curve vectors
        """

class ComplexPolynomial:
    def __init__(self, polynomial_type: str = "T", threshold: float = -1):
        """
        Convert persistence diagrams using complex polynomials.
        
        Parameters:
        - polynomial_type: Type of polynomial ("T", "U", "V")
        - threshold: Threshold for stable computation
        """

    def fit_transform(self, X):
        """
        Transform persistence diagrams using polynomials.
        
        Parameters:
        - X: List of persistence diagrams
        
        Returns:
        array: Polynomial feature vectors
        """

class Entropy:
    def __init__(self, mode: str = "scalar", resolution: int = 100, sample_range=[np.nan, np.nan]):
        """
        Compute persistence entropy or entropy summary function.
        
        Parameters:
        - mode: "scalar" for entropy value, "vector" for summary function
        - resolution: Number of sample points for vector mode
        - sample_range: Range for sampling in vector mode
        """

    def fit_transform(self, X):
        """
        Transform persistence diagrams to entropy features.
        
        Parameters:
        - X: List of persistence diagrams
        
        Returns:
        array: Entropy scalars or entropy summary vectors
        """

class TopologicalVector:
    def __init__(self, threshold: int = 10):
        """
        Convert persistence diagrams to topological vectors.
        
        Parameters:
        - threshold: Threshold for number of points considered
        """

    def fit_transform(self, X):
        """
        Transform persistence diagrams to topological vectors.
        
        Parameters:
        - X: List of persistence diagrams
        
        Returns:
        array: Topological vector representations
        """

class Atol:
    def __init__(self, quantiser):
        """
        Vectorize measures using ATOL (Approximation of TOlogy with Labels).
        
        Parameters:
        - quantiser: Quantization function for the measure space
        """

    def fit_transform(self, X):
        """
        Transform measures to ATOL vectors.
        
        Parameters:
        - X: List of measures (e.g., persistence diagrams)
        
        Returns:
        array: ATOL feature vectors
        """

class PersistenceLengths:
    def __init__(self, num_lengths: int = 10):
        """
        Extract the N longest persistence lengths.
        
        Parameters:
        - num_lengths: Number of persistence lengths to extract
        """

    def fit_transform(self, X):
        """
        Transform persistence diagrams to length vectors.
        
        Parameters:
        - X: List of persistence diagrams
        
        Returns:
        array: Vectors of sorted persistence lengths
        """

Kernel Methods

Kernel functions for persistence diagrams that can be used with kernel-based machine learning algorithms.

class PersistenceWeightedGaussianKernel:
    def __init__(self, bandwidth: float = 1.0, weight=None):
        """
        Weighted Gaussian kernel for persistence diagrams.
        
        Parameters:
        - bandwidth: Gaussian kernel bandwidth
        - weight: Weight function for persistence points
        """

    def __call__(self, diag1, diag2):
        """
        Compute kernel value between two diagrams.
        
        Parameters:
        - diag1: First persistence diagram
        - diag2: Second persistence diagram
        
        Returns:
        float: Kernel value
        """

class PersistenceScaleSpaceKernel:
    def __init__(self, bandwidth: float = 1.0):
        """
        Scale space kernel for persistence diagrams.
        
        Parameters:
        - bandwidth: Kernel bandwidth parameter
        """

    def __call__(self, diag1, diag2):
        """
        Compute kernel value between two diagrams.
        
        Parameters:
        - diag1: First persistence diagram
        - diag2: Second persistence diagram
        
        Returns:
        float: Kernel value
        """

class SlicedWassersteinKernel:
    def __init__(self, num_directions: int = 10, bandwidth: float = 1.0):
        """
        Sliced Wasserstein kernel for persistence diagrams.
        
        Parameters:
        - num_directions: Number of projection directions
        - bandwidth: Kernel bandwidth
        """

    def __call__(self, diag1, diag2):
        """
        Compute kernel value between two diagrams.
        
        Parameters:
        - diag1: First persistence diagram
        - diag2: Second persistence diagram
        
        Returns:
        float: Kernel value
        """

Distance Metrics

Specialized distance functions for persistence diagrams beyond standard Wasserstein and bottleneck distances.

class SlicedWassersteinDistance:
    def __init__(self, num_directions: int = 10):
        """
        Sliced Wasserstein distance for persistence diagrams.
        
        Parameters:
        - num_directions: Number of random projection directions
        """

    def __call__(self, diag1, diag2):
        """
        Compute distance between two diagrams.
        
        Parameters:
        - diag1: First persistence diagram
        - diag2: Second persistence diagram
        
        Returns:
        float: Sliced Wasserstein distance
        """

class PersistenceFisherDistance:
    def __init__(self, bandwidth: float = 1.0, kernel: str = "gaussian"):
        """
        Fisher information distance for persistence diagrams.
        
        Parameters:
        - bandwidth: Kernel bandwidth
        - kernel: Kernel type
        """

    def __call__(self, diag1, diag2):
        """
        Compute Fisher distance between two diagrams.
        
        Parameters:
        - diag1: First persistence diagram
        - diag2: Second persistence diagram
        
        Returns:
        float: Fisher information distance
        """

Preprocessing

Utilities for preprocessing persistence diagrams before applying machine learning methods.

class BirthPersistenceTransform:
    def __init__(self):
        """Transform (birth, death) to (birth, persistence) coordinates."""

    def fit_transform(self, X):
        """
        Transform persistence diagrams to birth-persistence coordinates.
        
        Parameters:
        - X: List of persistence diagrams
        
        Returns:
        list: Transformed diagrams
        """

class Scaler:
    def __init__(self, metric: str = "bottleneck"):
        """
        Scale persistence diagrams for normalization.
        
        Parameters:
        - metric: Distance metric for scaling
        """

    def fit_transform(self, X):
        """
        Scale persistence diagrams.
        
        Parameters:
        - X: List of persistence diagrams
        
        Returns:
        list: Scaled diagrams
        """

class ProminentPoints:
    def __init__(self, use: str = "coordinates", num_pts: int = 10):
        """
        Select most prominent points from persistence diagrams.
        
        Parameters:
        - use: Selection criterion ("coordinates", "persistence", etc.)
        - num_pts: Number of points to select
        """

    def fit_transform(self, X):
        """
        Select prominent points from diagrams.
        
        Parameters:
        - X: List of persistence diagrams
        
        Returns:
        list: Diagrams with selected points
        """

Scikit-learn Integration

Scikit-learn compatible transformers for topological data analysis workflows.

# From gudhi.sklearn module
class CubicalPersistence:
    def __init__(self, dimensions=None, persistence_dim_max=True):
        """
        Sklearn transformer for cubical persistence.
        
        Parameters:
        - dimensions: Cubical complex dimensions
        - persistence_dim_max: Compute persistence in all dimensions
        """

    def fit_transform(self, X):
        """
        Compute persistence for cubical complexes.
        
        Parameters:
        - X: Input data (images, grids)
        
        Returns:
        list: Persistence diagrams
        """

class RipsPersistence:
    def __init__(self, max_edge_length=float('inf'), max_dimension=1):
        """
        Sklearn transformer for Rips persistence.
        
        Parameters:
        - max_edge_length: Maximum edge length for Rips complex
        - max_dimension: Maximum dimension for complex
        """

    def fit_transform(self, X):
        """
        Compute persistence for point clouds.
        
        Parameters:
        - X: Point cloud data
        
        Returns:
        list: Persistence diagrams
        """

Usage Examples

Persistence Images for Classification

import gudhi
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

# Generate persistence diagrams from different classes
diagrams_class1 = []  # ... compute diagrams for class 1
diagrams_class2 = []  # ... compute diagrams for class 2

# Combine data
X_diagrams = diagrams_class1 + diagrams_class2
y = [0] * len(diagrams_class1) + [1] * len(diagrams_class2)

# Convert to persistence images
pi = gudhi.representations.PersistenceImage(resolution=(20, 20))
X_images = pi.fit_transform(X_diagrams)

# Train classifier
X_train, X_test, y_train, y_test = train_test_split(X_images, y, test_size=0.3)
clf = SVC(kernel='rbf')
clf.fit(X_train, y_train)
accuracy = clf.score(X_test, y_test)
print(f"Classification accuracy: {accuracy:.3f}")

Persistence Landscapes

import gudhi
import matplotlib.pyplot as plt

# Compute persistence diagram
# ... (create persistence diagram)

# Convert to persistence landscape
landscape = gudhi.representations.Landscape(num_landscapes=3)
landscape_vector = landscape.fit_transform([persistence])

print(f"Landscape vector shape: {landscape_vector.shape}")

# Visualize first landscape function
plt.plot(landscape_vector[0][:100])  # First 100 points of first landscape
plt.title("First Persistence Landscape Function")
plt.show()

Kernel Methods with Grid Search

import gudhi
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics.pairwise import pairwise_kernels

# Define custom kernel function
def persistence_kernel(X, Y=None):
    kernel = gudhi.representations.PersistenceWeightedGaussianKernel(bandwidth=1.0)
    if Y is None:
        Y = X
    
    gram_matrix = np.zeros((len(X), len(Y)))
    for i, diag1 in enumerate(X):
        for j, diag2 in enumerate(Y):
            gram_matrix[i, j] = kernel(diag1, diag2)
    return gram_matrix

# Use with SVM
svm = SVC(kernel=persistence_kernel)
# ... train and evaluate

Install with Tessl CLI