0
# Data Transformers
1
2
Transformer classes for preprocessing targets and features to ensure compatibility between scikit-learn and Keras data formats. These transformers handle automatic data preprocessing based on target types and provide seamless integration with scikit-learn pipelines.
3
4
## Capabilities
5
6
### TargetReshaper
7
8
Converts 1D targets to 2D format and back for compatibility with transformers that require 2D inputs, such as OneHotEncoder and OrdinalEncoder.
9
10
```python { .api }
11
class TargetReshaper(BaseEstimator, TransformerMixin):
12
def __init__(self):
13
"""Initialize TargetReshaper."""
14
15
def fit(self, y):
16
"""
17
Fit the transformer to target array.
18
19
Args:
20
y: array-like - Target array to learn shape from
21
22
Returns:
23
self: Fitted transformer
24
"""
25
26
def transform(self, y):
27
"""
28
Transform 1D targets to 2D format.
29
30
Args:
31
y: array-like - Target array to transform
32
33
Returns:
34
array-like: Reshaped target array
35
"""
36
37
def inverse_transform(self, y):
38
"""
39
Transform 2D targets back to original dimensionality.
40
41
Args:
42
y: array-like - 2D target array to reshape back
43
44
Returns:
45
array-like: Target array in original shape
46
"""
47
48
@property
49
def ndim_(self):
50
"""int: Original dimensions of fitted target array."""
51
```
52
53
### ClassifierLabelEncoder
54
55
Default target transformer for KerasClassifier that handles label encoding and one-hot encoding for classification targets with support for different target types.
56
57
```python { .api }
58
class ClassifierLabelEncoder(BaseEstimator, TransformerMixin):
59
def __init__(self, loss=None):
60
"""
61
Initialize ClassifierLabelEncoder.
62
63
Args:
64
loss: Loss function to determine encoding strategy
65
"""
66
67
def fit(self, y):
68
"""
69
Fit encoder to label array.
70
71
Args:
72
y: array-like of shape (n_samples,) - Target class labels
73
74
Returns:
75
self: Fitted encoder
76
"""
77
78
def transform(self, y):
79
"""
80
Transform labels to encoded format.
81
82
Args:
83
y: array-like of shape (n_samples,) - Target labels to encode
84
85
Returns:
86
array-like: Encoded target labels suitable for Keras training
87
"""
88
89
def inverse_transform(self, y_transformed, return_proba=False):
90
"""
91
Transform encoded labels back to original format.
92
93
Args:
94
y_transformed: array-like - Encoded labels or probabilities
95
return_proba: bool - Whether to return probabilities or class predictions
96
97
Returns:
98
array-like: Original label format or probabilities
99
"""
100
101
def get_metadata(self):
102
"""
103
Get metadata about label encoding.
104
105
Returns:
106
dict: Metadata including classes, encoding type, etc.
107
"""
108
```
109
110
### RegressorTargetEncoder
111
112
Default target transformer for KerasRegressor that handles target preprocessing for regression tasks including reshaping and validation.
113
114
```python { .api }
115
class RegressorTargetEncoder(BaseEstimator, TransformerMixin):
116
def __init__(self):
117
"""Initialize RegressorTargetEncoder."""
118
119
def fit(self, y):
120
"""
121
Fit encoder to target array.
122
123
Args:
124
y: array-like - Regression target values
125
126
Returns:
127
self: Fitted encoder
128
"""
129
130
def transform(self, y):
131
"""
132
Transform regression targets for Keras compatibility.
133
134
Args:
135
y: array-like - Target values to transform
136
137
Returns:
138
array-like: Transformed targets suitable for Keras
139
"""
140
141
def inverse_transform(self, y):
142
"""
143
Transform targets back to original format.
144
145
Args:
146
y: array-like - Transformed target values
147
148
Returns:
149
array-like: Original target format
150
"""
151
152
def get_metadata(self):
153
"""
154
Get metadata about target encoding.
155
156
Returns:
157
dict: Metadata including target type, shape, etc.
158
"""
159
```
160
161
## Usage Examples
162
163
### Manual Target Reshaping
164
165
```python
166
from scikeras.utils.transformers import TargetReshaper
167
import numpy as np
168
169
# Create 1D target array
170
y_1d = np.array([0, 1, 0, 1, 1])
171
172
# Initialize and fit reshaper
173
reshaper = TargetReshaper()
174
reshaper.fit(y_1d)
175
176
# Transform to 2D for compatibility with sklearn transformers
177
y_2d = reshaper.transform(y_1d)
178
print(f"Original shape: {y_1d.shape}") # (5,)
179
print(f"Reshaped: {y_2d.shape}") # (5, 1)
180
181
# Transform back to original shape
182
y_back = reshaper.inverse_transform(y_2d)
183
print(f"Back to original: {y_back.shape}") # (5,)
184
```
185
186
### Classification Label Encoding
187
188
```python
189
from scikeras.utils.transformers import ClassifierLabelEncoder
190
from sklearn.datasets import make_classification
191
import numpy as np
192
193
# Create multiclass classification data
194
X, y = make_classification(n_samples=100, n_classes=3, n_features=10,
195
n_informative=5, random_state=42)
196
197
# Use string labels
198
y_str = np.array(['class_a', 'class_b', 'class_c'])[y]
199
200
# Initialize and fit encoder
201
encoder = ClassifierLabelEncoder()
202
encoder.fit(y_str)
203
204
# Transform for Keras training
205
y_encoded = encoder.transform(y_str)
206
print(f"Original labels: {y_str[:5]}")
207
print(f"Encoded shape: {y_encoded.shape}")
208
209
# Get encoding metadata
210
metadata = encoder.get_metadata()
211
print(f"Classes: {metadata.get('classes', 'Not available')}")
212
```
213
214
### Pipeline Integration
215
216
```python
217
from scikeras.utils.transformers import TargetReshaper
218
from sklearn.pipeline import Pipeline
219
from sklearn.preprocessing import OneHotEncoder
220
import numpy as np
221
222
# Create categorical target data
223
y_categorical = np.array(['A', 'B', 'A', 'C', 'B'])
224
225
# Create pipeline with TargetReshaper for OneHotEncoder compatibility
226
target_pipeline = Pipeline([
227
('reshape', TargetReshaper()),
228
('onehot', OneHotEncoder(sparse_output=False))
229
])
230
231
# Fit and transform
232
y_processed = target_pipeline.fit_transform(y_categorical)
233
print(f"Original: {y_categorical}")
234
print(f"One-hot encoded shape: {y_processed.shape}")
235
print(f"One-hot encoded:\\n{y_processed}")
236
```
237
238
### Custom Classification Target Processing
239
240
```python
241
from scikeras.utils.transformers import ClassifierLabelEncoder
242
from scikeras.wrappers import KerasClassifier
243
import keras
244
import numpy as np
245
246
# Create imbalanced multiclass data
247
y_imbalanced = np.random.choice(['rare', 'common', 'medium'],
248
size=1000, p=[0.1, 0.7, 0.2])
249
250
def create_classifier():
251
model = keras.Sequential([
252
keras.layers.Dense(50, activation='relu', input_dim=10),
253
keras.layers.Dense(3, activation='softmax')
254
])
255
model.compile(optimizer='adam', loss='categorical_crossentropy',
256
metrics=['accuracy'])
257
return model
258
259
# The classifier automatically uses ClassifierLabelEncoder
260
clf = KerasClassifier(model=create_classifier, epochs=10)
261
262
# Generate dummy features
263
X = np.random.random((1000, 10))
264
265
# Fit - encoder handles label preprocessing automatically
266
clf.fit(X, y_imbalanced)
267
268
# Predictions return original label format
269
predictions = clf.predict(X[:5])
270
probabilities = clf.predict_proba(X[:5])
271
272
print(f"Original labels: {y_imbalanced[:5]}")
273
print(f"Predictions: {predictions}")
274
print(f"Probability shape: {probabilities.shape}")
275
```
276
277
### Regression Target Processing
278
279
```python
280
from scikeras.utils.transformers import RegressorTargetEncoder
281
from scikeras.wrappers import KerasRegressor
282
import numpy as np
283
import keras
284
285
# Create multi-output regression data
286
n_samples, n_outputs = 100, 3
287
y_multi = np.random.random((n_samples, n_outputs))
288
289
def create_regressor():
290
model = keras.Sequential([
291
keras.layers.Dense(50, activation='relu', input_dim=5),
292
keras.layers.Dense(n_outputs)
293
])
294
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
295
return model
296
297
# The regressor automatically uses RegressorTargetEncoder
298
reg = KerasRegressor(model=create_regressor, epochs=10)
299
300
# Generate dummy features
301
X = np.random.random((n_samples, 5))
302
303
# Fit - encoder handles target preprocessing automatically
304
reg.fit(X, y_multi)
305
306
# Predictions maintain original target format
307
predictions = reg.predict(X[:5])
308
print(f"Target shape: {y_multi.shape}")
309
print(f"Prediction shape: {predictions.shape}")
310
```
311
312
## Advanced Usage
313
314
### Custom Target Transformation Pipeline
315
316
```python
317
from scikeras.utils.transformers import TargetReshaper
318
from sklearn.pipeline import Pipeline
319
from sklearn.preprocessing import StandardScaler
320
import numpy as np
321
322
# Create custom target preprocessing pipeline
323
def create_target_pipeline():
324
return Pipeline([
325
('reshape', TargetReshaper()),
326
('scale', StandardScaler())
327
])
328
329
# Use with regression data
330
y_regression = np.random.randn(100) * 100 + 50 # Mean=50, std=100
331
332
pipeline = create_target_pipeline()
333
y_processed = pipeline.fit_transform(y_regression)
334
335
print(f"Original stats: mean={y_regression.mean():.2f}, std={y_regression.std():.2f}")
336
print(f"Processed stats: mean={y_processed.mean():.2f}, std={y_processed.std():.2f}")
337
338
# Inverse transform back to original scale
339
y_back = pipeline.inverse_transform(y_processed)
340
print(f"Recovered stats: mean={y_back.mean():.2f}, std={y_back.std():.2f}")
341
```
342
343
## Types
344
345
```python { .api }
346
# Target types supported by transformers
347
TargetType = Union[np.ndarray, List, Tuple]
348
349
# Metadata structure returned by get_metadata()
350
TransformerMetadata = Dict[str, Any]
351
352
# Encoding options for ClassifierLabelEncoder
353
EncodingType = Literal['ordinal', 'onehot', 'binary']
354
```