Python implementations of metric learning algorithms
—
Core abstract classes and mixins that define the metric learning API. Understanding these classes is essential for using metric-learn algorithms effectively and for implementing custom metric learning algorithms.
Abstract base class that defines the core interface for all metric learning algorithms. All metric learning algorithms in the package inherit from this class.
class BaseMetricLearner(BaseEstimator):
def __init__(self, preprocessor=None):
"""
Base constructor for metric learners.
Parameters:
- preprocessor: array-like or callable, preprocessor to get data from indices
"""
def pair_score(self, pairs):
"""
Compute similarity score between pairs.
Parameters:
- pairs: array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2),
3D array of pairs or 2D array of indices
Returns:
- scores: ndarray, shape=(n_pairs,), similarity scores (higher = more similar)
"""
def pair_distance(self, pairs):
"""
Compute distance between pairs.
Parameters:
- pairs: array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2),
3D array of pairs or 2D array of indices
Returns:
- distances: ndarray, shape=(n_pairs,), distances between pairs
"""
def get_metric(self):
"""
Get metric function for use with scikit-learn algorithms.
Returns:
- metric: callable, function that computes distance between two 1D arrays
"""
def score_pairs(self, pairs):
"""
Legacy method for computing scores between pairs.
.. deprecated:: 0.7.0
Use pair_distance or pair_score instead.
"""Mixin class for algorithms that learn Mahalanobis distance metrics. Inherits from BaseMetricLearner and adds functionality specific to Mahalanobis metrics.
class MahalanobisMixin(BaseMetricLearner):
def transform(self, X):
"""
Apply the learned linear transformation to data.
Parameters:
- X: array-like, shape=(n_samples, n_features), data to transform
Returns:
- X_transformed: ndarray, shape=(n_samples, n_components), transformed data
"""
def get_mahalanobis_matrix(self):
"""
Get the learned Mahalanobis matrix.
Returns:
- M: ndarray, shape=(n_features, n_features), Mahalanobis matrix
"""
def pair_distance(self, pairs):
"""
Compute Mahalanobis distance between pairs.
Parameters:
- pairs: array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2)
Returns:
- distances: ndarray, shape=(n_pairs,), Mahalanobis distances
"""
def pair_score(self, pairs):
"""
Compute similarity score (negative distance) between pairs.
Parameters:
- pairs: array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2)
Returns:
- scores: ndarray, shape=(n_pairs,), similarity scores
"""Attributes:
components_: ndarray, shape=(n_components, n_features)
"""The learned linear transformation matrix L such that M = L.T @ L"""Mixins that add classification capabilities for constraint-based learning scenarios.
Adds binary classification capabilities for pair constraints.
class _PairsClassifierMixin:
def predict(self, pairs):
"""
Predict similarity/dissimilarity for pairs.
Parameters:
- pairs: array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2)
Returns:
- predictions: ndarray, shape=(n_pairs,), predicted labels (+1 or -1)
"""
def decision_function(self, pairs):
"""
Compute decision function values for pairs.
Parameters:
- pairs: array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2)
Returns:
- decision_scores: ndarray, shape=(n_pairs,), decision function values
"""
def score(self, pairs, y):
"""
Compute accuracy score for pair predictions.
Parameters:
- pairs: array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2)
- y: array-like, shape=(n_pairs,), true labels
Returns:
- accuracy: float, classification accuracy
"""
def set_threshold(self, threshold):
"""
Set classification threshold.
Parameters:
- threshold: float, decision threshold for classification
"""
def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy'):
"""
Calibrate classification threshold using validation data.
Parameters:
- pairs_valid: array-like, validation pairs
- y_valid: array-like, validation labels
- strategy: str, calibration strategy ('accuracy', 'f1', etc.)
"""Adds classification capabilities for triplet constraints.
class _TripletsClassifierMixin:
def predict(self, triplets):
"""
Predict triplet constraint satisfaction.
Parameters:
- triplets: array-like, shape=(n_triplets, 3, n_features) or (n_triplets, 3)
Returns:
- predictions: ndarray, shape=(n_triplets,), predicted constraint satisfaction
"""
def decision_function(self, triplets):
"""
Compute decision function for triplets.
Parameters:
- triplets: array-like, shape=(n_triplets, 3, n_features) or (n_triplets, 3)
Returns:
- decision_scores: ndarray, shape=(n_triplets,), decision scores
"""
def score(self, triplets, y):
"""
Compute accuracy for triplet predictions.
Parameters:
- triplets: array-like, triplet constraints
- y: array-like, true constraint labels
Returns:
- accuracy: float, classification accuracy
"""Adds classification capabilities for quadruplet constraints.
class _QuadrupletsClassifierMixin:
def predict(self, quadruplets):
"""
Predict quadruplet constraint satisfaction.
Parameters:
- quadruplets: array-like, shape=(n_quadruplets, 4, n_features) or (n_quadruplets, 4)
Returns:
- predictions: ndarray, shape=(n_quadruplets,), predicted constraint satisfaction
"""
def decision_function(self, quadruplets):
"""
Compute decision function for quadruplets.
Parameters:
- quadruplets: array-like, shape=(n_quadruplets, 4, n_features) or (n_quadruplets, 4)
Returns:
- decision_scores: ndarray, shape=(n_quadruplets,), decision scores
"""
def score(self, quadruplets, y):
"""
Compute accuracy for quadruplet predictions.
Parameters:
- quadruplets: array-like, quadruplet constraints
- y: array-like, true constraint labels
Returns:
- accuracy: float, classification accuracy
"""The metric-learn package uses a clean inheritance hierarchy:
# Base class for all metric learners
BaseMetricLearner (abstract)
│
├── MahalanobisMixin (concrete mixin)
│ │
│ ├── LMNN, NCA, LFDA (supervised algorithms)
│ ├── ITML, LSML, SDML, RCA, SCML (weakly-supervised algorithms)
│ ├── MMC (clustering algorithm)
│ └── Covariance (baseline algorithm)
│
└── MLKR (regression algorithm, does not use Mahalanobis)
# Classification mixins can be combined with base classes
_PairsClassifierMixin
_TripletsClassifierMixin
_QuadrupletsClassifierMixinAll algorithms provide a consistent interface for computing distances and similarities:
from metric_learn import LMNN, ITML
from sklearn.datasets import make_classification
import numpy as np
# Generate sample data
X, y = make_classification(n_samples=100, n_features=5, n_classes=3, random_state=42)
# Train different algorithms
lmnn = LMNN(n_neighbors=3)
lmnn.fit(X, y)
# Generate pairs and constraints for ITML
from metric_learn import Constraints
constraints = Constraints(y)
pos_pairs, neg_pairs = constraints.positive_negative_pairs(n_constraints=100)
pairs = np.vstack([pos_pairs, neg_pairs])
pair_labels = np.hstack([np.ones(len(pos_pairs)), -np.ones(len(neg_pairs))])
itml = ITML(preprocessor=X)
itml.fit(pairs, pair_labels)
# Both algorithms provide the same interface
test_pairs = [(0, 1), (2, 10), (5, 20)]
for name, algo in [('LMNN', lmnn), ('ITML', itml)]:
# Compute distances
distances = algo.pair_distance(test_pairs)
# Compute similarity scores
scores = algo.pair_score(test_pairs)
# Get metric function for scikit-learn
metric_func = algo.get_metric()
print(f"{name}: distances={distances[:2]}, scores={scores[:2]}")Algorithms that inherit from MahalanobisMixin provide data transformation:
from metric_learn import LMNN, NCA, ITML
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)
# Train algorithms
lmnn = LMNN(n_neighbors=3)
lmnn.fit(X, y)
nca = NCA(max_iter=100)
nca.fit(X, y)
# All Mahalanobis-based algorithms support transform
for name, algo in [('LMNN', lmnn), ('NCA', nca)]:
# Transform data to learned metric space
X_transformed = algo.transform(X)
# Get the learned Mahalanobis matrix
M = algo.get_mahalanobis_matrix()
# Get linear transformation components
L = algo.components_
print(f"{name}: transformed shape={X_transformed.shape}, M shape={M.shape}")
print(f" Verification: M = L.T @ L = {np.allclose(M, L.T @ L)}")Understanding the base classes enables implementing custom algorithms:
from metric_learn.base_metric import MahalanobisMixin
from sklearn.base import TransformerMixin
import numpy as np
class CustomMetricLearner(MahalanobisMixin, TransformerMixin):
"""Example custom metric learning algorithm."""
def __init__(self, alpha=1.0, preprocessor=None):
super().__init__(preprocessor=preprocessor)
self.alpha = alpha
def fit(self, X, y):
"""Implement your metric learning algorithm here."""
# Example: simple covariance-based metric with regularization
X = self._prepare_inputs(X, y, type_of_inputs='classic')[0]
# Compute regularized covariance
cov = np.cov(X.T) + self.alpha * np.eye(X.shape[1])
# Use matrix decomposition for components_
eigenvals, eigenvecs = np.linalg.eigh(cov)
self.components_ = eigenvecs @ np.diag(np.sqrt(np.maximum(eigenvals, 1e-8)))
return self
# Usage
custom_learner = CustomMetricLearner(alpha=0.1)
custom_learner.fit(X, y)
X_transformed = custom_learner.transform(X)
print("Custom algorithm trained successfully!")Install with Tessl CLI
npx tessl i tessl/pypi-metric-learn