tessl/pypi-metric-learn

Python implementations of metric learning algorithms

—

Pending

Overview

Eval results

Files

Base Classes and Mixins

Name: tessl/pypi-metric-learn
Author: tessl

Core abstract classes and mixins that define the metric learning API. Understanding these classes is essential for using metric-learn algorithms effectively and for implementing custom metric learning algorithms.

Capabilities

BaseMetricLearner

Abstract base class that defines the core interface for all metric learning algorithms. All metric learning algorithms in the package inherit from this class.

class BaseMetricLearner(BaseEstimator):
    def __init__(self, preprocessor=None):
        """
        Base constructor for metric learners.
        
        Parameters:
        - preprocessor: array-like or callable, preprocessor to get data from indices
        """
    
    def pair_score(self, pairs):
        """
        Compute similarity score between pairs.
        
        Parameters:
        - pairs: array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2),
                3D array of pairs or 2D array of indices
        
        Returns:
        - scores: ndarray, shape=(n_pairs,), similarity scores (higher = more similar)
        """
    
    def pair_distance(self, pairs):
        """  
        Compute distance between pairs.
        
        Parameters:
        - pairs: array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2),
                3D array of pairs or 2D array of indices
        
        Returns:
        - distances: ndarray, shape=(n_pairs,), distances between pairs
        """
    
    def get_metric(self):
        """
        Get metric function for use with scikit-learn algorithms.
        
        Returns:
        - metric: callable, function that computes distance between two 1D arrays
        """
    
    def score_pairs(self, pairs):
        """
        Legacy method for computing scores between pairs.
        
        .. deprecated:: 0.7.0
            Use pair_distance or pair_score instead.
        """

MahalanobisMixin

Mixin class for algorithms that learn Mahalanobis distance metrics. Inherits from BaseMetricLearner and adds functionality specific to Mahalanobis metrics.

class MahalanobisMixin(BaseMetricLearner):
    def transform(self, X):
        """
        Apply the learned linear transformation to data.
        
        Parameters:
        - X: array-like, shape=(n_samples, n_features), data to transform
        
        Returns:
        - X_transformed: ndarray, shape=(n_samples, n_components), transformed data
        """
    
    def get_mahalanobis_matrix(self):
        """
        Get the learned Mahalanobis matrix.
        
        Returns:
        - M: ndarray, shape=(n_features, n_features), Mahalanobis matrix
        """
    
    def pair_distance(self, pairs):
        """
        Compute Mahalanobis distance between pairs.
        
        Parameters:
        - pairs: array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2)
        
        Returns:
        - distances: ndarray, shape=(n_pairs,), Mahalanobis distances
        """
    
    def pair_score(self, pairs):
        """
        Compute similarity score (negative distance) between pairs.
        
        Parameters:
        - pairs: array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2)
        
        Returns:
        - scores: ndarray, shape=(n_pairs,), similarity scores
        """

Attributes:

components_: ndarray, shape=(n_components, n_features)
    """The learned linear transformation matrix L such that M = L.T @ L"""

Classification Mixins

Mixins that add classification capabilities for constraint-based learning scenarios.

PairsClassifierMixin

Adds binary classification capabilities for pair constraints.

class _PairsClassifierMixin:
    def predict(self, pairs):
        """
        Predict similarity/dissimilarity for pairs.
        
        Parameters:
        - pairs: array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2)
        
        Returns:
        - predictions: ndarray, shape=(n_pairs,), predicted labels (+1 or -1)
        """
    
    def decision_function(self, pairs):
        """
        Compute decision function values for pairs.
        
        Parameters:
        - pairs: array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2)
        
        Returns:
        - decision_scores: ndarray, shape=(n_pairs,), decision function values
        """
    
    def score(self, pairs, y):
        """
        Compute accuracy score for pair predictions.
        
        Parameters:
        - pairs: array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2)
        - y: array-like, shape=(n_pairs,), true labels
        
        Returns:
        - accuracy: float, classification accuracy
        """
    
    def set_threshold(self, threshold):
        """
        Set classification threshold.
        
        Parameters:
        - threshold: float, decision threshold for classification
        """
    
    def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy'):
        """
        Calibrate classification threshold using validation data.
        
        Parameters:
        - pairs_valid: array-like, validation pairs
        - y_valid: array-like, validation labels
        - strategy: str, calibration strategy ('accuracy', 'f1', etc.)
        """

TripletsClassifierMixin

Adds classification capabilities for triplet constraints.

class _TripletsClassifierMixin:
    def predict(self, triplets):
        """
        Predict triplet constraint satisfaction.
        
        Parameters:
        - triplets: array-like, shape=(n_triplets, 3, n_features) or (n_triplets, 3)
        
        Returns:
        - predictions: ndarray, shape=(n_triplets,), predicted constraint satisfaction
        """
    
    def decision_function(self, triplets):
        """
        Compute decision function for triplets.
        
        Parameters:
        - triplets: array-like, shape=(n_triplets, 3, n_features) or (n_triplets, 3)
        
        Returns:
        - decision_scores: ndarray, shape=(n_triplets,), decision scores
        """
    
    def score(self, triplets, y):
        """
        Compute accuracy for triplet predictions.
        
        Parameters:
        - triplets: array-like, triplet constraints
        - y: array-like, true constraint labels
        
        Returns:
        - accuracy: float, classification accuracy
        """

QuadrupletsClassifierMixin

Adds classification capabilities for quadruplet constraints.

class _QuadrupletsClassifierMixin:
    def predict(self, quadruplets):
        """
        Predict quadruplet constraint satisfaction.
        
        Parameters:
        - quadruplets: array-like, shape=(n_quadruplets, 4, n_features) or (n_quadruplets, 4)
        
        Returns:
        - predictions: ndarray, shape=(n_quadruplets,), predicted constraint satisfaction
        """
    
    def decision_function(self, quadruplets):
        """
        Compute decision function for quadruplets.
        
        Parameters:
        - quadruplets: array-like, shape=(n_quadruplets, 4, n_features) or (n_quadruplets, 4)
        
        Returns:
        - decision_scores: ndarray, shape=(n_quadruplets,), decision scores
        """
    
    def score(self, quadruplets, y):
        """
        Compute accuracy for quadruplet predictions.
        
        Parameters:
        - quadruplets: array-like, quadruplet constraints
        - y: array-like, true constraint labels
        
        Returns:
        - accuracy: float, classification accuracy
        """

Understanding the Class Hierarchy

The metric-learn package uses a clean inheritance hierarchy:

# Base class for all metric learners
BaseMetricLearner (abstract)
│
├── MahalanobisMixin (concrete mixin)
│   │
│   ├── LMNN, NCA, LFDA (supervised algorithms)
│   ├── ITML, LSML, SDML, RCA, SCML (weakly-supervised algorithms)  
│   ├── MMC (clustering algorithm)
│   └── Covariance (baseline algorithm)
│
└── MLKR (regression algorithm, does not use Mahalanobis)

# Classification mixins can be combined with base classes
_PairsClassifierMixin
_TripletsClassifierMixin  
_QuadrupletsClassifierMixin

Working with Base Classes

Understanding the Metric Interface

All algorithms provide a consistent interface for computing distances and similarities:

from metric_learn import LMNN, ITML
from sklearn.datasets import make_classification
import numpy as np

# Generate sample data
X, y = make_classification(n_samples=100, n_features=5, n_classes=3, random_state=42)

# Train different algorithms
lmnn = LMNN(n_neighbors=3)
lmnn.fit(X, y)

# Generate pairs and constraints for ITML
from metric_learn import Constraints
constraints = Constraints(y)
pos_pairs, neg_pairs = constraints.positive_negative_pairs(n_constraints=100)
pairs = np.vstack([pos_pairs, neg_pairs])
pair_labels = np.hstack([np.ones(len(pos_pairs)), -np.ones(len(neg_pairs))])
itml = ITML(preprocessor=X)  
itml.fit(pairs, pair_labels)

# Both algorithms provide the same interface
test_pairs = [(0, 1), (2, 10), (5, 20)]

for name, algo in [('LMNN', lmnn), ('ITML', itml)]:
    # Compute distances
    distances = algo.pair_distance(test_pairs)
    
    # Compute similarity scores  
    scores = algo.pair_score(test_pairs)
    
    # Get metric function for scikit-learn
    metric_func = algo.get_metric()
    
    print(f"{name}: distances={distances[:2]}, scores={scores[:2]}")

Using the Transform Interface

Algorithms that inherit from MahalanobisMixin provide data transformation:

from metric_learn import LMNN, NCA, ITML
from sklearn.datasets import load_iris

X, y = load_iris(return_X_y=True)

# Train algorithms
lmnn = LMNN(n_neighbors=3)
lmnn.fit(X, y)

nca = NCA(max_iter=100)
nca.fit(X, y)

# All Mahalanobis-based algorithms support transform
for name, algo in [('LMNN', lmnn), ('NCA', nca)]:
    # Transform data to learned metric space
    X_transformed = algo.transform(X)
    
    # Get the learned Mahalanobis matrix
    M = algo.get_mahalanobis_matrix()
    
    # Get linear transformation components
    L = algo.components_
    
    print(f"{name}: transformed shape={X_transformed.shape}, M shape={M.shape}")
    print(f"  Verification: M = L.T @ L = {np.allclose(M, L.T @ L)}")

Custom Metric Learning Algorithm

Understanding the base classes enables implementing custom algorithms:

from metric_learn.base_metric import MahalanobisMixin
from sklearn.base import TransformerMixin
import numpy as np

class CustomMetricLearner(MahalanobisMixin, TransformerMixin):
    """Example custom metric learning algorithm."""
    
    def __init__(self, alpha=1.0, preprocessor=None):
        super().__init__(preprocessor=preprocessor)
        self.alpha = alpha
    
    def fit(self, X, y):
        """Implement your metric learning algorithm here."""
        # Example: simple covariance-based metric with regularization
        X = self._prepare_inputs(X, y, type_of_inputs='classic')[0]
        
        # Compute regularized covariance
        cov = np.cov(X.T) + self.alpha * np.eye(X.shape[1])
        
        # Use matrix decomposition for components_
        eigenvals, eigenvecs = np.linalg.eigh(cov)
        self.components_ = eigenvecs @ np.diag(np.sqrt(np.maximum(eigenvals, 1e-8)))
        
        return self

# Usage
custom_learner = CustomMetricLearner(alpha=0.1)
custom_learner.fit(X, y)
X_transformed = custom_learner.transform(X)
print("Custom algorithm trained successfully!")

Install with Tessl CLI