Python implementations of metric learning algorithms
—
Supervised metric learning algorithms that learn from labeled training data to optimize distance metrics for classification and related tasks. All algorithms inherit from MahalanobisMixin and follow the scikit-learn API.
Learns a Mahalanobis distance metric in the k-NN classification setting, attempting to keep close k-nearest neighbors from the same class while separating examples from different classes by a large margin.
class LMNN(MahalanobisMixin, TransformerMixin):
def __init__(self, init='auto', n_neighbors=3, min_iter=50, max_iter=1000, learn_rate=1e-7,
regularization=0.5, convergence_tol=0.001, verbose=False,
preprocessor=None, n_components=None, random_state=None):
"""
Parameters:
- init: str or array-like, initialization method ('auto', 'pca', 'lda', 'identity', 'random')
- n_neighbors: int, number of target neighbors per example
- min_iter: int, minimum number of iterations
- max_iter: int, maximum number of iterations
- learn_rate: float, learning rate for the optimization
- regularization: float, regularization parameter between 0 and 1
- convergence_tol: float, convergence tolerance
- verbose: bool, whether to print progress messages
- preprocessor: array-like or callable, preprocessor for input data
- n_components: int or None, dimensionality of transformed space
- random_state: int, random state for reproducibility
"""
def fit(self, X, y):
"""
Fit the LMNN metric learner.
Parameters:
- X: array-like, shape=(n_samples, n_features), training data
- y: array-like, shape=(n_samples,), training labels
Returns:
- self: returns the instance itself
"""Usage example:
from metric_learn import LMNN
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)
lmnn = LMNN(n_neighbors=3, learn_rate=1e-6)
lmnn.fit(X, y)
X_transformed = lmnn.transform(X)Learns a linear transformation to maximize the expected leave-one-out classification accuracy of the stochastic nearest neighbors rule in the transformed space.
class NCA(MahalanobisMixin, TransformerMixin):
def __init__(self, init='auto', n_components=None, max_iter=100, tol=None, verbose=False, preprocessor=None, random_state=None):
"""
Parameters:
- init: str or array-like, initialization method ('auto', 'pca', 'lda', 'identity', 'random')
- n_components: int or None, dimensionality of transformed space
- max_iter: int, maximum number of iterations
- tol: float or None, convergence tolerance
- verbose: bool, whether to print progress messages
- preprocessor: array-like or callable, preprocessor for input data
- random_state: int, random state for reproducibility
"""
def fit(self, X, y):
"""
Fit the NCA metric learner.
Parameters:
- X: array-like, shape=(n_samples, n_features), training data
- y: array-like, shape=(n_samples,), training labels
Returns:
- self: returns the instance itself
"""Combines the ideas of Fisher Discriminant Analysis and locality-preserving projection for dimensionality reduction and metric learning, particularly effective when classes have multimodal distributions.
class LFDA(MahalanobisMixin, TransformerMixin):
def __init__(self, n_components=None, k=None, embedding_type='weighted', preprocessor=None):
"""
Parameters:
- n_components: int or None, dimensionality of transformed space
- k: int or None, number of nearest neighbors for local scaling
- embedding_type: str, type of embedding ('weighted', 'orthonormalized', 'plain')
- preprocessor: array-like or callable, preprocessor for input data
"""
def fit(self, X, y):
"""
Fit the LFDA metric learner.
Parameters:
- X: array-like, shape=(n_samples, n_features), training data
- y: array-like, shape=(n_samples,), training labels
Returns:
- self: returns the instance itself
"""Usage example:
from metric_learn import LFDA
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=200, n_features=10, n_classes=3, random_state=42)
lfda = LFDA(n_components=5, k=7)
lfda.fit(X, y)
X_transformed = lfda.transform(X)Several algorithms have supervised variants that automatically generate constraints from class labels.
class ITML_Supervised(ITML):
def fit(self, X, y, num_constraints=None):
"""
Fit ITML using automatically generated constraints from labels.
Parameters:
- X: array-like, shape=(n_samples, n_features), training data
- y: array-like, shape=(n_samples,), training labels
- num_constraints: int or None, number of constraints to generate
Returns:
- self: returns the instance itself
"""class LSML_Supervised(LSML):
def fit(self, X, y, num_constraints=None):
"""
Fit LSML using automatically generated constraints from labels.
Parameters:
- X: array-like, shape=(n_samples, n_features), training data
- y: array-like, shape=(n_samples,), training labels
- num_constraints: int or None, number of constraints to generate
Returns:
- self: returns the instance itself
"""class SDML_Supervised(SDML):
def fit(self, X, y, num_constraints=None):
"""
Fit SDML using automatically generated constraints from labels.
Parameters:
- X: array-like, shape=(n_samples, n_features), training data
- y: array-like, shape=(n_samples,), training labels
- num_constraints: int or None, number of constraints to generate
Returns:
- self: returns the instance itself
"""class RCA_Supervised(RCA):
def fit(self, X, y, num_chunks=100):
"""
Fit RCA using automatically generated constraints from labels.
Parameters:
- X: array-like, shape=(n_samples, n_features), training data
- y: array-like, shape=(n_samples,), training labels
- num_chunks: int, number of chunks to generate
Returns:
- self: returns the instance itself
"""class MMC_Supervised(MMC):
def fit(self, X, y, num_constraints=None):
"""
Fit MMC using automatically generated constraints from labels.
Parameters:
- X: array-like, shape=(n_samples, n_features), training data
- y: array-like, shape=(n_samples,), training labels
- num_constraints: int or None, number of constraints to generate
Returns:
- self: returns the instance itself
"""class SCML_Supervised(SCML):
def fit(self, X, y, num_constraints=None):
"""
Fit SCML using automatically generated constraints from labels.
Parameters:
- X: array-like, shape=(n_samples, n_features), training data
- y: array-like, shape=(n_samples,), training labels
- num_constraints: int or None, number of constraints to generate
Returns:
- self: returns the instance itself
"""All supervised algorithms follow similar usage patterns:
from metric_learn import LMNN, NCA, LFDA
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
# Load data
X, y = load_digits(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Train metric learner
metric_learner = LMNN(n_neighbors=3)
metric_learner.fit(X_train, y_train)
# Transform data
X_train_transformed = metric_learner.transform(X_train)
X_test_transformed = metric_learner.transform(X_test)
# Use with scikit-learn classifier
knn = KNeighborsClassifier(n_neighbors=3, metric=metric_learner.get_metric())
knn.fit(X_train, y_train)
accuracy = knn.score(X_test, y_test)Install with Tessl CLI
npx tessl i tessl/pypi-metric-learn