or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

index.mdrandom-state.mdtransformers.mdutils.mdwrappers.md

transformers.mddocs/

0

# Data Transformers

1

2

Transformer classes for preprocessing targets and features to ensure compatibility between scikit-learn and Keras data formats. These transformers handle automatic data preprocessing based on target types and provide seamless integration with scikit-learn pipelines.

3

4

## Capabilities

5

6

### TargetReshaper

7

8

Converts 1D targets to 2D format and back for compatibility with transformers that require 2D inputs, such as OneHotEncoder and OrdinalEncoder.

9

10

```python { .api }

11

class TargetReshaper(BaseEstimator, TransformerMixin):

12

def __init__(self):

13

"""Initialize TargetReshaper."""

14

15

def fit(self, y):

16

"""

17

Fit the transformer to target array.

18

19

Args:

20

y: array-like - Target array to learn shape from

21

22

Returns:

23

self: Fitted transformer

24

"""

25

26

def transform(self, y):

27

"""

28

Transform 1D targets to 2D format.

29

30

Args:

31

y: array-like - Target array to transform

32

33

Returns:

34

array-like: Reshaped target array

35

"""

36

37

def inverse_transform(self, y):

38

"""

39

Transform 2D targets back to original dimensionality.

40

41

Args:

42

y: array-like - 2D target array to reshape back

43

44

Returns:

45

array-like: Target array in original shape

46

"""

47

48

@property

49

def ndim_(self):

50

"""int: Original dimensions of fitted target array."""

51

```

52

53

### ClassifierLabelEncoder

54

55

Default target transformer for KerasClassifier that handles label encoding and one-hot encoding for classification targets with support for different target types.

56

57

```python { .api }

58

class ClassifierLabelEncoder(BaseEstimator, TransformerMixin):

59

def __init__(self, loss=None):

60

"""

61

Initialize ClassifierLabelEncoder.

62

63

Args:

64

loss: Loss function to determine encoding strategy

65

"""

66

67

def fit(self, y):

68

"""

69

Fit encoder to label array.

70

71

Args:

72

y: array-like of shape (n_samples,) - Target class labels

73

74

Returns:

75

self: Fitted encoder

76

"""

77

78

def transform(self, y):

79

"""

80

Transform labels to encoded format.

81

82

Args:

83

y: array-like of shape (n_samples,) - Target labels to encode

84

85

Returns:

86

array-like: Encoded target labels suitable for Keras training

87

"""

88

89

def inverse_transform(self, y_transformed, return_proba=False):

90

"""

91

Transform encoded labels back to original format.

92

93

Args:

94

y_transformed: array-like - Encoded labels or probabilities

95

return_proba: bool - Whether to return probabilities or class predictions

96

97

Returns:

98

array-like: Original label format or probabilities

99

"""

100

101

def get_metadata(self):

102

"""

103

Get metadata about label encoding.

104

105

Returns:

106

dict: Metadata including classes, encoding type, etc.

107

"""

108

```

109

110

### RegressorTargetEncoder

111

112

Default target transformer for KerasRegressor that handles target preprocessing for regression tasks including reshaping and validation.

113

114

```python { .api }

115

class RegressorTargetEncoder(BaseEstimator, TransformerMixin):

116

def __init__(self):

117

"""Initialize RegressorTargetEncoder."""

118

119

def fit(self, y):

120

"""

121

Fit encoder to target array.

122

123

Args:

124

y: array-like - Regression target values

125

126

Returns:

127

self: Fitted encoder

128

"""

129

130

def transform(self, y):

131

"""

132

Transform regression targets for Keras compatibility.

133

134

Args:

135

y: array-like - Target values to transform

136

137

Returns:

138

array-like: Transformed targets suitable for Keras

139

"""

140

141

def inverse_transform(self, y):

142

"""

143

Transform targets back to original format.

144

145

Args:

146

y: array-like - Transformed target values

147

148

Returns:

149

array-like: Original target format

150

"""

151

152

def get_metadata(self):

153

"""

154

Get metadata about target encoding.

155

156

Returns:

157

dict: Metadata including target type, shape, etc.

158

"""

159

```

160

161

## Usage Examples

162

163

### Manual Target Reshaping

164

165

```python

166

from scikeras.utils.transformers import TargetReshaper

167

import numpy as np

168

169

# Create 1D target array

170

y_1d = np.array([0, 1, 0, 1, 1])

171

172

# Initialize and fit reshaper

173

reshaper = TargetReshaper()

174

reshaper.fit(y_1d)

175

176

# Transform to 2D for compatibility with sklearn transformers

177

y_2d = reshaper.transform(y_1d)

178

print(f"Original shape: {y_1d.shape}") # (5,)

179

print(f"Reshaped: {y_2d.shape}") # (5, 1)

180

181

# Transform back to original shape

182

y_back = reshaper.inverse_transform(y_2d)

183

print(f"Back to original: {y_back.shape}") # (5,)

184

```

185

186

### Classification Label Encoding

187

188

```python

189

from scikeras.utils.transformers import ClassifierLabelEncoder

190

from sklearn.datasets import make_classification

191

import numpy as np

192

193

# Create multiclass classification data

194

X, y = make_classification(n_samples=100, n_classes=3, n_features=10,

195

n_informative=5, random_state=42)

196

197

# Use string labels

198

y_str = np.array(['class_a', 'class_b', 'class_c'])[y]

199

200

# Initialize and fit encoder

201

encoder = ClassifierLabelEncoder()

202

encoder.fit(y_str)

203

204

# Transform for Keras training

205

y_encoded = encoder.transform(y_str)

206

print(f"Original labels: {y_str[:5]}")

207

print(f"Encoded shape: {y_encoded.shape}")

208

209

# Get encoding metadata

210

metadata = encoder.get_metadata()

211

print(f"Classes: {metadata.get('classes', 'Not available')}")

212

```

213

214

### Pipeline Integration

215

216

```python

217

from scikeras.utils.transformers import TargetReshaper

218

from sklearn.pipeline import Pipeline

219

from sklearn.preprocessing import OneHotEncoder

220

import numpy as np

221

222

# Create categorical target data

223

y_categorical = np.array(['A', 'B', 'A', 'C', 'B'])

224

225

# Create pipeline with TargetReshaper for OneHotEncoder compatibility

226

target_pipeline = Pipeline([

227

('reshape', TargetReshaper()),

228

('onehot', OneHotEncoder(sparse_output=False))

229

])

230

231

# Fit and transform

232

y_processed = target_pipeline.fit_transform(y_categorical)

233

print(f"Original: {y_categorical}")

234

print(f"One-hot encoded shape: {y_processed.shape}")

235

print(f"One-hot encoded:\\n{y_processed}")

236

```

237

238

### Custom Classification Target Processing

239

240

```python

241

from scikeras.utils.transformers import ClassifierLabelEncoder

242

from scikeras.wrappers import KerasClassifier

243

import keras

244

import numpy as np

245

246

# Create imbalanced multiclass data

247

y_imbalanced = np.random.choice(['rare', 'common', 'medium'],

248

size=1000, p=[0.1, 0.7, 0.2])

249

250

def create_classifier():

251

model = keras.Sequential([

252

keras.layers.Dense(50, activation='relu', input_dim=10),

253

keras.layers.Dense(3, activation='softmax')

254

])

255

model.compile(optimizer='adam', loss='categorical_crossentropy',

256

metrics=['accuracy'])

257

return model

258

259

# The classifier automatically uses ClassifierLabelEncoder

260

clf = KerasClassifier(model=create_classifier, epochs=10)

261

262

# Generate dummy features

263

X = np.random.random((1000, 10))

264

265

# Fit - encoder handles label preprocessing automatically

266

clf.fit(X, y_imbalanced)

267

268

# Predictions return original label format

269

predictions = clf.predict(X[:5])

270

probabilities = clf.predict_proba(X[:5])

271

272

print(f"Original labels: {y_imbalanced[:5]}")

273

print(f"Predictions: {predictions}")

274

print(f"Probability shape: {probabilities.shape}")

275

```

276

277

### Regression Target Processing

278

279

```python

280

from scikeras.utils.transformers import RegressorTargetEncoder

281

from scikeras.wrappers import KerasRegressor

282

import numpy as np

283

import keras

284

285

# Create multi-output regression data

286

n_samples, n_outputs = 100, 3

287

y_multi = np.random.random((n_samples, n_outputs))

288

289

def create_regressor():

290

model = keras.Sequential([

291

keras.layers.Dense(50, activation='relu', input_dim=5),

292

keras.layers.Dense(n_outputs)

293

])

294

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

295

return model

296

297

# The regressor automatically uses RegressorTargetEncoder

298

reg = KerasRegressor(model=create_regressor, epochs=10)

299

300

# Generate dummy features

301

X = np.random.random((n_samples, 5))

302

303

# Fit - encoder handles target preprocessing automatically

304

reg.fit(X, y_multi)

305

306

# Predictions maintain original target format

307

predictions = reg.predict(X[:5])

308

print(f"Target shape: {y_multi.shape}")

309

print(f"Prediction shape: {predictions.shape}")

310

```

311

312

## Advanced Usage

313

314

### Custom Target Transformation Pipeline

315

316

```python

317

from scikeras.utils.transformers import TargetReshaper

318

from sklearn.pipeline import Pipeline

319

from sklearn.preprocessing import StandardScaler

320

import numpy as np

321

322

# Create custom target preprocessing pipeline

323

def create_target_pipeline():

324

return Pipeline([

325

('reshape', TargetReshaper()),

326

('scale', StandardScaler())

327

])

328

329

# Use with regression data

330

y_regression = np.random.randn(100) * 100 + 50 # Mean=50, std=100

331

332

pipeline = create_target_pipeline()

333

y_processed = pipeline.fit_transform(y_regression)

334

335

print(f"Original stats: mean={y_regression.mean():.2f}, std={y_regression.std():.2f}")

336

print(f"Processed stats: mean={y_processed.mean():.2f}, std={y_processed.std():.2f}")

337

338

# Inverse transform back to original scale

339

y_back = pipeline.inverse_transform(y_processed)

340

print(f"Recovered stats: mean={y_back.mean():.2f}, std={y_back.std():.2f}")

341

```

342

343

## Types

344

345

```python { .api }

346

# Target types supported by transformers

347

TargetType = Union[np.ndarray, List, Tuple]

348

349

# Metadata structure returned by get_metadata()

350

TransformerMetadata = Dict[str, Any]

351

352

# Encoding options for ClassifierLabelEncoder

353

EncodingType = Literal['ordinal', 'onehot', 'binary']

354

```