Tessl Tile for pypi/metric-learn@0.7.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

base-classes.md index.md specialized-algorithms.md supervised-algorithms.md utilities.md weakly-supervised-algorithms.md

utilities.mddocs/

0
# Utilities
1

2
Helper classes and functions for working with metric learning algorithms, including constraint generation and data preprocessing utilities.
3

4
## Capabilities
5

6
### Constraints Class
7

8
Helper class for generating constraint pairs from labeled data, enabling easy conversion from supervised learning problems to weakly-supervised metric learning.
9

10
```python { .api }
11
class Constraints:
12
    def __init__(self, partial_labels):
13
        """
14
        Build constraints from labeled data.
15
        
16
        Parameters:
17
        - partial_labels: array-like, shape=(n_samples,), labels with -1 for unknown
18
        """
19
    
20
    def positive_negative_pairs(self, n_constraints, same_length=False, random_state=None):
21
        """
22
        Generate positive and negative pairs from labeled data.
23
        
24
        Parameters:
25
        - n_constraints: int, number of positive and negative constraints to generate
26
        - same_length: bool, whether to ensure same number of positive and negative pairs
27
        - random_state: int or None, random state for reproducibility
28
        
29
        Returns:
30
        - positive_pairs: array-like, shape=(n_pos, 2), pairs with same label
31
        - negative_pairs: array-like, shape=(n_neg, 2), pairs with different labels
32
        """
33
    
34
    def chunks(self, n_chunks=100, chunk_size=2, random_state=None):
35
        """
36
        Generate chunks of similar items for RCA algorithm.
37
        
38
        Parameters:
39
        - n_chunks: int, number of chunks to generate
40
        - chunk_size: int, number of items per chunk
41
        - random_state: int or None, random state for reproducibility
42
        
43
        Returns:
44
        - chunks: array-like, shape=(n_samples,), 1D array of chunk indicators 
45
                 where -1 indicates that the point does not belong to any chunk
46
        """
47
    
48
    def generate_knntriplets(self, X, k_genuine, k_impostor):
49
        """
50
        Generate triplets from labeled data using k-nearest neighbors.
51
        
52
        Parameters:
53
        - X: array-like, shape=(n_samples, n_features), input data
54
        - k_genuine: int, number of neighbors of the same class to consider
55
        - k_impostor: int, number of neighbors of different classes to consider
56
        
57
        Returns:
58
        - triplets: array-like, shape=(n_constraints, 3), 2D array of triplet indicators
59
        """
60
```
61

62
Usage examples:
63

64
```python
65
from metric_learn import Constraints
66
from sklearn.datasets import load_iris
67
import numpy as np
68

69
# Load sample data
70
X, y = load_iris(return_X_y=True)
71

72
# Create constraints generator from labels
73
constraints = Constraints(y)
74

75
# Generate positive and negative pairs
76
pos_pairs, neg_pairs = constraints.positive_negative_pairs(n_constraints=200)
77

78
# Combine into format expected by weakly-supervised algorithms
79
pairs = np.vstack([pos_pairs, neg_pairs])
80
pair_labels = np.hstack([np.ones(len(pos_pairs)), -np.ones(len(neg_pairs))])
81

82
print("Generated pairs shape:", pairs.shape)
83
print("Pair labels shape:", pair_labels.shape)
84
print("Unique pair labels:", np.unique(pair_labels))  # [-1, 1]
85

86
# Use with weakly-supervised algorithms
87
from metric_learn import ITML
88
itml = ITML(preprocessor=X)
89
itml.fit(pairs, pair_labels)
90
```
91

92
### Working with Different Constraint Types
93

94
The metric-learn package supports various constraint formats for different algorithms:
95

96
#### Pair Constraints
97

98
Most common format for weakly-supervised learning:
99

100
```python
101
from metric_learn import Constraints, ITML, LSML
102
from sklearn.datasets import make_classification
103

104
# Generate sample data
105
X, y = make_classification(n_samples=200, n_features=5, n_classes=3, random_state=42)
106

107
# Generate pair constraints
108
constraints = Constraints(y)
109
pos_pairs, neg_pairs = constraints.positive_negative_pairs(n_constraints=250)
110
pairs = np.vstack([pos_pairs, neg_pairs])
111
pair_labels = np.hstack([np.ones(len(pos_pairs)), -np.ones(len(neg_pairs))])
112

113
# Use with different algorithms
114
algorithms = [
115
    ITML(preprocessor=X),
116
    LSML(preprocessor=X)  
117
]
118

119
for algo in algorithms:
120
    algo.fit(pairs, pair_labels)
121
    print(f"{algo.__class__.__name__} fitted with {len(pairs)} constraints")
122
```
123

124
#### Chunk Constraints for RCA
125

126
RCA uses a different constraint format based on chunks of similar items:
127

128
```python
129
from metric_learn import RCA
130
import numpy as np
131

132
# Create chunks manually
133
chunks = [
134
    [0, 1, 2],      # Chunk 1: indices of similar items
135
    [3, 4, 5],      # Chunk 2: indices of similar items  
136
    [6, 7, 8, 9],   # Chunk 3: indices of similar items
137
    [10, 11]        # Chunk 4: indices of similar items
138
]
139

140
rca = RCA(dim=3)
141
rca.fit(chunks)
142

143
# Generate chunks from class labels
144
def labels_to_chunks(y):
145
    """Convert class labels to RCA chunk format."""
146
    chunks = []
147
    unique_labels = np.unique(y)
148
    for label in unique_labels:
149
        chunk_indices = np.where(y == label)[0].tolist()
150
        if len(chunk_indices) > 1:  # Need at least 2 items per chunk
151
            chunks.append(chunk_indices)
152
    return chunks
153

154
# Example usage
155
from sklearn.datasets import load_digits
156
X, y = load_digits(return_X_y=True)
157

158
# Convert labels to chunks using Constraints class
159
y_subset = y[:100] 
160
constraints = Constraints(y_subset)
161
chunks = constraints.chunks(n_chunks=20, chunk_size=3)
162

163
rca = RCA(dim=10)
164
rca.fit(chunks)
165
X_transformed = rca.transform(X[:100])
166
```
167

168
### Data Preprocessing Utilities
169

170
While not exported as separate utilities, metric-learn algorithms include preprocessing capabilities:
171

172
#### Using Preprocessors
173

174
```python
175
from metric_learn import ITML
176
import numpy as np
177

178
# Your dataset
179
X = np.random.randn(100, 8)
180

181
# Index-based constraints (more memory efficient)
182
pairs_idx = [(0, 1), (2, 5), (10, 20), (15, 25)]
183
y = [1, -1, 1, -1]  # 1 for similar, -1 for dissimilar
184

185
# Method 1: Use preprocessor parameter
186
itml_with_preprocessor = ITML(preprocessor=X)
187
itml_with_preprocessor.fit(pairs_idx, y)
188

189
# Method 2: Convert indices to actual data pairs  
190
pairs_data = np.array([[X[i], X[j]] for i, j in pairs_idx])
191
itml_direct = ITML()
192
itml_direct.fit(pairs_data, y)
193

194
# Both methods are equivalent
195
```
196

197
#### Custom Preprocessor Functions
198

199
```python
200
from metric_learn import ITML
201
import numpy as np
202

203
def custom_preprocessor(indices):
204
    """Custom preprocessor that applies transformations before metric learning."""
205
    # indices is a 2D array of shape (n_pairs, 2)
206
    # Return 3D array of shape (n_pairs, 2, n_features)
207
    pairs = []
208
    for i, j in indices:
209
        # Apply custom transformations
210
        x_i = your_transform_function(your_data[i])
211
        x_j = your_transform_function(your_data[j])
212
        pairs.append([x_i, x_j])
213
    return np.array(pairs)
214

215
# Use custom preprocessor
216
itml = ITML(preprocessor=custom_preprocessor)
217
itml.fit(pairs_idx, y)
218
```
219

220
### Package Version Information
221

222
```python { .api }
223
__version__: str
224
    """Package version string"""
225
```
226

227
Usage:
228

229
```python
230
import metric_learn
231
print("Metric-learn version:", metric_learn.__version__)
232
```
233

234
### Integration Utilities
235

236
Common patterns for integrating metric-learn with scikit-learn workflows:
237

238
#### Pipeline Integration
239

240
```python
241
from sklearn.pipeline import Pipeline
242
from sklearn.preprocessing import StandardScaler
243
from sklearn.neighbors import KNeighborsClassifier
244
from metric_learn import LMNN
245

246
# Create pipeline with metric learning
247
pipeline = Pipeline([
248
    ('scaler', StandardScaler()),
249
    ('metric_learner', LMNN(k=3)),
250
    ('classifier', KNeighborsClassifier(n_neighbors=3))
251
])
252

253
# Note: This requires custom handling since LMNN needs labels in fit()
254
# Better approach:
255
from sklearn.model_selection import train_test_split
256

257
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
258

259
# Scale data
260
scaler = StandardScaler()
261
X_train_scaled = scaler.fit_transform(X_train)
262
X_test_scaled = scaler.transform(X_test)
263

264
# Learn metric
265
lmnn = LMNN(k=3)
266
lmnn.fit(X_train_scaled, y_train)
267

268
# Transform data
269
X_train_transformed = lmnn.transform(X_train_scaled)
270
X_test_transformed = lmnn.transform(X_test_scaled)
271

272
# Classify
273
knn = KNeighborsClassifier(n_neighbors=3, metric=lmnn.get_metric())
274
knn.fit(X_train_scaled, y_train)  # Use original scaled data for metric computation
275
accuracy = knn.score(X_test_scaled, y_test)
276
```
277

278
#### Cross-Validation with Metric Learning
279

280
```python
281
from sklearn.model_selection import cross_val_score
282
from sklearn.neighbors import KNeighborsClassifier
283
from metric_learn import NCA
284
import numpy as np
285

286
def metric_learning_cv_score(X, y, metric_learner, classifier, cv=5):
287
    """Custom cross-validation for metric learning algorithms."""
288
    from sklearn.model_selection import KFold
289
    
290
    kf = KFold(n_splits=cv, shuffle=True, random_state=42)
291
    scores = []
292
    
293
    for train_idx, test_idx in kf.split(X):
294
        X_train, X_test = X[train_idx], X[test_idx]  
295
        y_train, y_test = y[train_idx], y[test_idx]
296
        
297
        # Fit metric learner
298
        metric_learner_copy = type(metric_learner)(**metric_learner.get_params())
299
        metric_learner_copy.fit(X_train, y_train)
300
        
301
        # Transform data
302
        X_train_transformed = metric_learner_copy.transform(X_train)
303
        X_test_transformed = metric_learner_copy.transform(X_test)
304
        
305
        # Fit and score classifier
306
        classifier_copy = type(classifier)(**classifier.get_params())
307
        classifier_copy.fit(X_train_transformed, y_train)
308
        score = classifier_copy.score(X_test_transformed, y_test)
309
        scores.append(score)
310
    
311
    return np.array(scores)
312

313
# Usage example
314
from sklearn.datasets import load_wine
315
X, y = load_wine(return_X_y=True)
316

317
nca = NCA(max_iter=100)
318
knn = KNeighborsClassifier(n_neighbors=3)
319

320
scores = metric_learning_cv_score(X, y, nca, knn, cv=5)
321
print(f"CV scores: {scores}")
322
print(f"Mean CV score: {scores.mean():.3f} (+/- {scores.std() * 2:.3f})")
323
```

Version

Tile

Files

utilities.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

utilities.mddocs/