0
# Specialized Algorithms
1
2
Algorithms designed for specific use cases beyond standard classification metric learning, including clustering, kernel regression, and baseline methods.
3
4
## Capabilities
5
6
### Metric Learning for Kernel Regression (MLKR)
7
8
Learns a metric specifically optimized for kernel regression tasks. Unlike other algorithms focused on classification, MLKR optimizes the metric to improve regression performance.
9
10
```python { .api }
11
class MLKR(BaseMetricLearner, TransformerMixin):
12
def __init__(self, n_components=None, init='auto', tol=None, max_iter=1000, verbose=False, preprocessor=None, random_state=None):
13
"""
14
Parameters:
15
- n_components: int or None, dimensionality of transformed space
16
- init: str or array-like, initialization method ('auto', 'pca', 'lda', 'identity', 'random')
17
- tol: float or None, convergence tolerance
18
- max_iter: int, maximum number of iterations
19
- verbose: bool, whether to print progress messages
20
- preprocessor: array-like or callable, preprocessor for input data
21
- random_state: int, random state for reproducibility
22
"""
23
24
def fit(self, X, y):
25
"""
26
Fit the MLKR metric learner.
27
28
Parameters:
29
- X: array-like, shape=(n_samples, n_features), training data
30
- y: array-like, shape=(n_samples,), continuous target values
31
32
Returns:
33
- self: returns the instance itself
34
"""
35
36
def transform(self, X):
37
"""
38
Transform data using the learned metric.
39
40
Parameters:
41
- X: array-like, shape=(n_samples, n_features), data to transform
42
43
Returns:
44
- X_transformed: array-like, shape=(n_samples, n_components), transformed data
45
"""
46
```
47
48
Usage example:
49
50
```python
51
from metric_learn import MLKR
52
from sklearn.datasets import make_regression
53
from sklearn.model_selection import train_test_split
54
55
# Generate regression data
56
X, y = make_regression(n_samples=200, n_features=10, noise=0.1, random_state=42)
57
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
58
59
# Learn metric for regression
60
mlkr = MLKR(n_components=8, max_iter=100)
61
mlkr.fit(X_train, y_train)
62
63
# Transform data for use with kernel regression
64
X_train_transformed = mlkr.transform(X_train)
65
X_test_transformed = mlkr.transform(X_test)
66
```
67
68
### Mahalanobis Metric for Clustering (MMC)
69
70
Learns a metric optimized for clustering tasks by maximizing the separation between clusters while minimizing intra-cluster distances. Particularly effective for improving k-means clustering.
71
72
```python { .api }
73
class MMC(MahalanobisMixin, TransformerMixin):
74
def __init__(self, init='identity', max_iter=100, max_proj=10000, convergence_threshold=1e-3,
75
num_constraints=None, diagonal=False, diagonal_c=1.0, verbose=False,
76
preprocessor=None, random_state=None):
77
"""
78
Parameters:
79
- init: str or array-like, initialization method ('identity', 'random')
80
- max_iter: int, maximum number of iterations
81
- max_proj: int, maximum number of gradient projection steps
82
- convergence_threshold: float, convergence threshold
83
- num_constraints: int or None, number of constraints to satisfy
84
- diagonal: bool, whether to learn a diagonal metric
85
- diagonal_c: float, regularization for diagonal entries
86
- verbose: bool, whether to print progress messages
87
- preprocessor: array-like or callable, preprocessor for input data
88
- random_state: int, random state for reproducibility
89
"""
90
91
def fit(self, pairs, y):
92
"""
93
Fit the MMC metric learner.
94
95
Parameters:
96
- pairs: array-like, shape=(n_constraints, 2, n_features) or (n_constraints, 2),
97
3D array of pairs or 2D array of indices
98
- y: array-like, shape=(n_constraints,), constraint labels (+1 for similar, -1 for dissimilar)
99
100
Returns:
101
- self: returns the instance itself
102
"""
103
```
104
105
Usage example:
106
107
```python
108
from metric_learn import MMC
109
from sklearn.datasets import make_blobs
110
from sklearn.cluster import KMeans
111
112
# Generate clusterable data
113
X, y_true = make_blobs(n_samples=300, centers=4, n_features=5, random_state=42)
114
115
# Create constraints based on true clusters (for demonstration)
116
from metric_learn import Constraints
117
constraints = Constraints(y_true)
118
pos_pairs, neg_pairs = constraints.positive_negative_pairs(n_constraints=250)
119
pairs = np.vstack([pos_pairs, neg_pairs])
120
y_constraints = np.hstack([np.ones(len(pos_pairs)), -np.ones(len(neg_pairs))])
121
122
# Learn metric for clustering
123
mmc = MMC(preprocessor=X, max_iter=50)
124
mmc.fit(pairs, y_constraints)
125
126
# Use with k-means clustering
127
X_transformed = mmc.transform(X)
128
kmeans = KMeans(n_clusters=4, random_state=42)
129
cluster_labels = kmeans.fit_predict(X_transformed)
130
```
131
132
### Covariance Baseline
133
134
A simple baseline method that uses the inverse covariance matrix as the metric. This provides a reasonable starting point and comparison baseline for other metric learning algorithms.
135
136
```python { .api }
137
class Covariance(MahalanobisMixin, TransformerMixin):
138
def __init__(self, preprocessor=None):
139
"""
140
Parameters:
141
- preprocessor: array-like or callable, preprocessor for input data
142
"""
143
144
def fit(self, X, y=None):
145
"""
146
Fit the Covariance metric learner.
147
148
Parameters:
149
- X: array-like, shape=(n_samples, n_features), training data
150
- y: array-like, optional, not used but kept for API consistency
151
152
Returns:
153
- self: returns the instance itself
154
"""
155
```
156
157
Usage example:
158
159
```python
160
from metric_learn import Covariance
161
from sklearn.datasets import load_iris
162
163
X, y = load_iris(return_X_y=True)
164
165
# Fit simple covariance baseline
166
cov = Covariance()
167
cov.fit(X)
168
169
# Get the learned metric (inverse covariance)
170
metric_matrix = cov.get_mahalanobis_matrix()
171
print("Metric matrix shape:", metric_matrix.shape)
172
173
# Transform data
174
X_transformed = cov.transform(X)
175
```
176
177
## Integration with Clustering
178
179
Specialized algorithms like MMC are designed to work seamlessly with clustering algorithms:
180
181
```python
182
from metric_learn import MMC, Constraints
183
from sklearn.cluster import KMeans, AgglomerativeClustering
184
from sklearn.datasets import make_blobs
185
from sklearn.metrics import adjusted_rand_score
186
187
# Generate data with natural clusters
188
X, y_true = make_blobs(n_samples=200, centers=3, n_features=4,
189
cluster_std=1.5, random_state=42)
190
191
# Generate constraints from partial labeling
192
constraints = Constraints(y_true)
193
pos_pairs, neg_pairs = constraints.positive_negative_pairs(n_constraints=150)
194
pairs = np.vstack([pos_pairs, neg_pairs])
195
y_constraints = np.hstack([np.ones(len(pos_pairs)), -np.ones(len(neg_pairs))])
196
197
# Learn clustering-optimized metric
198
mmc = MMC(preprocessor=X, max_iter=100, verbose=True)
199
mmc.fit(pairs, y_constraints)
200
201
# Apply to different clustering algorithms
202
X_transformed = mmc.transform(X)
203
204
# K-means clustering
205
kmeans = KMeans(n_clusters=3, random_state=42)
206
kmeans_labels = kmeans.fit_predict(X_transformed)
207
print("K-means ARI:", adjusted_rand_score(y_true, kmeans_labels))
208
209
# Hierarchical clustering
210
agg_clustering = AgglomerativeClustering(n_clusters=3)
211
agg_labels = agg_clustering.fit_predict(X_transformed)
212
print("Hierarchical ARI:", adjusted_rand_score(y_true, agg_labels))
213
```
214
215
## Regression Integration
216
217
MLKR is specifically designed for regression tasks:
218
219
```python
220
from metric_learn import MLKR
221
from sklearn.kernel_ridge import KernelRidge
222
from sklearn.datasets import make_regression
223
from sklearn.model_selection import train_test_split
224
from sklearn.metrics import mean_squared_error
225
226
# Generate regression data
227
X, y = make_regression(n_samples=300, n_features=8, noise=0.1, random_state=42)
228
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
229
230
# Learn metric for regression
231
mlkr = MLKR(n_components=6, max_iter=200)
232
mlkr.fit(X_train, y_train)
233
234
# Transform data
235
X_train_transformed = mlkr.transform(X_train)
236
X_test_transformed = mlkr.transform(X_test)
237
238
# Use with kernel regression
239
kernel_ridge = KernelRidge(alpha=0.1, kernel='rbf')
240
kernel_ridge.fit(X_train_transformed, y_train)
241
242
# Evaluate
243
y_pred = kernel_ridge.predict(X_test_transformed)
244
mse = mean_squared_error(y_test, y_pred)
245
print(f"MSE with MLKR transformation: {mse:.4f}")
246
247
# Compare with original features
248
kernel_ridge_baseline = KernelRidge(alpha=0.1, kernel='rbf')
249
kernel_ridge_baseline.fit(X_train, y_train)
250
y_pred_baseline = kernel_ridge_baseline.predict(X_test)
251
mse_baseline = mean_squared_error(y_test, y_pred_baseline)
252
print(f"MSE without transformation: {mse_baseline:.4f}")
253
```
254
255
## Common Patterns
256
257
All specialized algorithms follow similar patterns but are optimized for their specific use cases:
258
259
```python
260
from metric_learn import MLKR, MMC, Covariance
261
262
# For regression tasks
263
mlkr = MLKR(n_components=5)
264
mlkr.fit(X_regression, y_continuous)
265
266
# For clustering tasks (requires constraints)
267
mmc = MMC(preprocessor=X_clustering)
268
mmc.fit(pairs, y_constraints)
269
270
# For baseline comparison
271
cov = Covariance()
272
cov.fit(X_baseline)
273
274
# All provide standard metric learning interface
275
for algo in [mlkr, mmc, cov]:
276
X_transformed = algo.transform(X)
277
metric_func = algo.get_metric()
278
if hasattr(algo, 'get_mahalanobis_matrix'):
279
M = algo.get_mahalanobis_matrix()
280
```