Tessl Tile for pypi/metric-learn@0.7.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

base-classes.md index.md specialized-algorithms.md supervised-algorithms.md utilities.md weakly-supervised-algorithms.md

specialized-algorithms.mddocs/

0
# Specialized Algorithms
1

2
Algorithms designed for specific use cases beyond standard classification metric learning, including clustering, kernel regression, and baseline methods.
3

4
## Capabilities
5

6
### Metric Learning for Kernel Regression (MLKR)
7

8
Learns a metric specifically optimized for kernel regression tasks. Unlike other algorithms focused on classification, MLKR optimizes the metric to improve regression performance.
9

10
```python { .api }
11
class MLKR(BaseMetricLearner, TransformerMixin):
12
    def __init__(self, n_components=None, init='auto', tol=None, max_iter=1000, verbose=False, preprocessor=None, random_state=None):
13
        """
14
        Parameters:
15
        - n_components: int or None, dimensionality of transformed space
16
        - init: str or array-like, initialization method ('auto', 'pca', 'lda', 'identity', 'random')
17
        - tol: float or None, convergence tolerance
18
        - max_iter: int, maximum number of iterations
19
        - verbose: bool, whether to print progress messages
20
        - preprocessor: array-like or callable, preprocessor for input data
21
        - random_state: int, random state for reproducibility
22
        """
23
    
24
    def fit(self, X, y):
25
        """
26
        Fit the MLKR metric learner.
27
        
28
        Parameters:
29
        - X: array-like, shape=(n_samples, n_features), training data
30
        - y: array-like, shape=(n_samples,), continuous target values
31
        
32
        Returns:
33
        - self: returns the instance itself
34
        """
35
    
36
    def transform(self, X):
37
        """
38
        Transform data using the learned metric.
39
        
40
        Parameters:
41
        - X: array-like, shape=(n_samples, n_features), data to transform
42
        
43
        Returns:
44
        - X_transformed: array-like, shape=(n_samples, n_components), transformed data
45
        """
46
```
47

48
Usage example:
49

50
```python
51
from metric_learn import MLKR
52
from sklearn.datasets import make_regression
53
from sklearn.model_selection import train_test_split
54

55
# Generate regression data
56
X, y = make_regression(n_samples=200, n_features=10, noise=0.1, random_state=42)
57
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
58

59
# Learn metric for regression
60
mlkr = MLKR(n_components=8, max_iter=100)
61
mlkr.fit(X_train, y_train)
62

63
# Transform data for use with kernel regression
64
X_train_transformed = mlkr.transform(X_train)
65
X_test_transformed = mlkr.transform(X_test)
66
```
67

68
### Mahalanobis Metric for Clustering (MMC)
69

70
Learns a metric optimized for clustering tasks by maximizing the separation between clusters while minimizing intra-cluster distances. Particularly effective for improving k-means clustering.
71

72
```python { .api }
73
class MMC(MahalanobisMixin, TransformerMixin):
74
    def __init__(self, init='identity', max_iter=100, max_proj=10000, convergence_threshold=1e-3,
75
                 num_constraints=None, diagonal=False, diagonal_c=1.0, verbose=False,
76
                 preprocessor=None, random_state=None):
77
        """
78
        Parameters:
79
        - init: str or array-like, initialization method ('identity', 'random')
80
        - max_iter: int, maximum number of iterations
81
        - max_proj: int, maximum number of gradient projection steps  
82
        - convergence_threshold: float, convergence threshold
83
        - num_constraints: int or None, number of constraints to satisfy
84
        - diagonal: bool, whether to learn a diagonal metric
85
        - diagonal_c: float, regularization for diagonal entries
86
        - verbose: bool, whether to print progress messages
87
        - preprocessor: array-like or callable, preprocessor for input data
88
        - random_state: int, random state for reproducibility
89
        """
90
    
91
    def fit(self, pairs, y):
92
        """
93
        Fit the MMC metric learner.
94
        
95
        Parameters:
96
        - pairs: array-like, shape=(n_constraints, 2, n_features) or (n_constraints, 2),
97
                3D array of pairs or 2D array of indices
98
        - y: array-like, shape=(n_constraints,), constraint labels (+1 for similar, -1 for dissimilar)
99
        
100
        Returns:
101
        - self: returns the instance itself
102
        """
103
```
104

105
Usage example:
106

107
```python
108
from metric_learn import MMC
109
from sklearn.datasets import make_blobs
110
from sklearn.cluster import KMeans
111

112
# Generate clusterable data
113
X, y_true = make_blobs(n_samples=300, centers=4, n_features=5, random_state=42)
114

115
# Create constraints based on true clusters (for demonstration)
116
from metric_learn import Constraints
117
constraints = Constraints(y_true)
118
pos_pairs, neg_pairs = constraints.positive_negative_pairs(n_constraints=250)
119
pairs = np.vstack([pos_pairs, neg_pairs])
120
y_constraints = np.hstack([np.ones(len(pos_pairs)), -np.ones(len(neg_pairs))])
121

122
# Learn metric for clustering
123
mmc = MMC(preprocessor=X, max_iter=50)
124
mmc.fit(pairs, y_constraints)
125

126
# Use with k-means clustering
127
X_transformed = mmc.transform(X)
128
kmeans = KMeans(n_clusters=4, random_state=42)
129
cluster_labels = kmeans.fit_predict(X_transformed)
130
```
131

132
### Covariance Baseline
133

134
A simple baseline method that uses the inverse covariance matrix as the metric. This provides a reasonable starting point and comparison baseline for other metric learning algorithms.
135

136
```python { .api }
137
class Covariance(MahalanobisMixin, TransformerMixin):
138
    def __init__(self, preprocessor=None):
139
        """
140
        Parameters:
141
        - preprocessor: array-like or callable, preprocessor for input data
142
        """
143
    
144
    def fit(self, X, y=None):
145
        """
146
        Fit the Covariance metric learner.
147
        
148
        Parameters:
149
        - X: array-like, shape=(n_samples, n_features), training data
150
        - y: array-like, optional, not used but kept for API consistency
151
        
152
        Returns:
153
        - self: returns the instance itself
154
        """
155
```
156

157
Usage example:
158

159
```python
160
from metric_learn import Covariance
161
from sklearn.datasets import load_iris
162

163
X, y = load_iris(return_X_y=True)
164

165
# Fit simple covariance baseline
166
cov = Covariance()
167
cov.fit(X)
168

169
# Get the learned metric (inverse covariance)
170
metric_matrix = cov.get_mahalanobis_matrix()
171
print("Metric matrix shape:", metric_matrix.shape)
172

173
# Transform data
174
X_transformed = cov.transform(X)
175
```
176

177
## Integration with Clustering
178

179
Specialized algorithms like MMC are designed to work seamlessly with clustering algorithms:
180

181
```python
182
from metric_learn import MMC, Constraints
183
from sklearn.cluster import KMeans, AgglomerativeClustering
184
from sklearn.datasets import make_blobs
185
from sklearn.metrics import adjusted_rand_score
186

187
# Generate data with natural clusters
188
X, y_true = make_blobs(n_samples=200, centers=3, n_features=4, 
189
                       cluster_std=1.5, random_state=42)
190

191
# Generate constraints from partial labeling
192
constraints = Constraints(y_true)
193
pos_pairs, neg_pairs = constraints.positive_negative_pairs(n_constraints=150)
194
pairs = np.vstack([pos_pairs, neg_pairs])
195
y_constraints = np.hstack([np.ones(len(pos_pairs)), -np.ones(len(neg_pairs))])
196

197
# Learn clustering-optimized metric
198
mmc = MMC(preprocessor=X, max_iter=100, verbose=True)
199
mmc.fit(pairs, y_constraints)
200

201
# Apply to different clustering algorithms
202
X_transformed = mmc.transform(X)
203

204
# K-means clustering
205
kmeans = KMeans(n_clusters=3, random_state=42)
206
kmeans_labels = kmeans.fit_predict(X_transformed)
207
print("K-means ARI:", adjusted_rand_score(y_true, kmeans_labels))
208

209
# Hierarchical clustering  
210
agg_clustering = AgglomerativeClustering(n_clusters=3)
211
agg_labels = agg_clustering.fit_predict(X_transformed)
212
print("Hierarchical ARI:", adjusted_rand_score(y_true, agg_labels))
213
```
214

215
## Regression Integration
216

217
MLKR is specifically designed for regression tasks:
218

219
```python
220
from metric_learn import MLKR
221
from sklearn.kernel_ridge import KernelRidge
222
from sklearn.datasets import make_regression
223
from sklearn.model_selection import train_test_split
224
from sklearn.metrics import mean_squared_error
225

226
# Generate regression data
227
X, y = make_regression(n_samples=300, n_features=8, noise=0.1, random_state=42)
228
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
229

230
# Learn metric for regression
231
mlkr = MLKR(n_components=6, max_iter=200)
232
mlkr.fit(X_train, y_train)
233

234
# Transform data
235
X_train_transformed = mlkr.transform(X_train)
236
X_test_transformed = mlkr.transform(X_test)
237

238
# Use with kernel regression
239
kernel_ridge = KernelRidge(alpha=0.1, kernel='rbf')
240
kernel_ridge.fit(X_train_transformed, y_train)
241

242
# Evaluate
243
y_pred = kernel_ridge.predict(X_test_transformed)
244
mse = mean_squared_error(y_test, y_pred)
245
print(f"MSE with MLKR transformation: {mse:.4f}")
246

247
# Compare with original features
248
kernel_ridge_baseline = KernelRidge(alpha=0.1, kernel='rbf')
249
kernel_ridge_baseline.fit(X_train, y_train)
250
y_pred_baseline = kernel_ridge_baseline.predict(X_test)
251
mse_baseline = mean_squared_error(y_test, y_pred_baseline)
252
print(f"MSE without transformation: {mse_baseline:.4f}")
253
```
254

255
## Common Patterns
256

257
All specialized algorithms follow similar patterns but are optimized for their specific use cases:
258

259
```python
260
from metric_learn import MLKR, MMC, Covariance
261

262
# For regression tasks
263
mlkr = MLKR(n_components=5)
264
mlkr.fit(X_regression, y_continuous)
265

266
# For clustering tasks (requires constraints)
267
mmc = MMC(preprocessor=X_clustering)
268
mmc.fit(pairs, y_constraints)
269

270
# For baseline comparison
271
cov = Covariance()
272
cov.fit(X_baseline)
273

274
# All provide standard metric learning interface
275
for algo in [mlkr, mmc, cov]:
276
    X_transformed = algo.transform(X)
277
    metric_func = algo.get_metric()
278
    if hasattr(algo, 'get_mahalanobis_matrix'):
279
        M = algo.get_mahalanobis_matrix()
280
```

Version

Tile

Files

specialized-algorithms.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

specialized-algorithms.mddocs/