Tessl Tile for pypi/scikeras@0.13.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

index.md random-state.md transformers.md utils.md wrappers.md

random-state.mddocs/

0
# Random State Management
1

2
Context manager for ensuring reproducible results across Python, NumPy, and TensorFlow random number generators. This utility enables deterministic training and evaluation for scientific reproducibility and debugging.
3

4
## Capabilities
5

6
### Reproducible Random State Context
7

8
Context manager that sets and restores random state across all major random number generators used in machine learning workflows.
9

10
```python { .api }
11
@contextmanager
12
def tensorflow_random_state(seed):
13
    """
14
    Context manager for reproducible random state across all generators.
15
    
16
    Args:
17
        seed (int): Random seed value for reproducibility
18
    
19
    Yields:
20
        None: Context for reproducible operations
21
    
22
    Note:
23
        This context manager:
24
        - Sets Python's random module seed
25
        - Sets NumPy's random seed
26
        - Sets TensorFlow's random seed
27
        - Enables TensorFlow's deterministic operations
28
        - Restores all original states when exiting
29
    """
30
```
31

32
## Usage Examples
33

34
### Basic Reproducible Training
35

36
```python
37
from scikeras.utils.random_state import tensorflow_random_state
38
from scikeras.wrappers import KerasClassifier
39
import keras
40
import numpy as np
41

42
def create_model():
43
    model = keras.Sequential([
44
        keras.layers.Dense(50, activation='relu', input_dim=10),
45
        keras.layers.Dense(1, activation='sigmoid')
46
    ])
47
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
48
    return model
49

50
# Generate sample data
51
X = np.random.random((100, 10))
52
y = np.random.randint(0, 2, 100)
53

54
# Train with reproducible results
55
with tensorflow_random_state(42):
56
    clf = KerasClassifier(model=create_model, epochs=10, verbose=0)
57
    clf.fit(X, y)
58
    predictions_1 = clf.predict(X)
59

60
# Train again with same seed - should get identical results
61
with tensorflow_random_state(42):
62
    clf2 = KerasClassifier(model=create_model, epochs=10, verbose=0)
63
    clf2.fit(X, y)
64
    predictions_2 = clf2.predict(X)
65

66
# Verify reproducibility
67
print(f"Predictions match: {np.array_equal(predictions_1, predictions_2)}")
68
```
69

70
### Cross-Validation with Reproducible Results
71

72
```python
73
from scikeras.utils.random_state import tensorflow_random_state
74
from scikeras.wrappers import KerasRegressor
75
from sklearn.model_selection import cross_val_score
76
import keras
77
import numpy as np
78

79
def create_regressor():
80
    model = keras.Sequential([
81
        keras.layers.Dense(64, activation='relu', input_dim=5),
82
        keras.layers.Dense(1)
83
    ])
84
    model.compile(optimizer='adam', loss='mse')
85
    return model
86

87
# Generate sample data
88
X = np.random.random((200, 5))
89
y = np.random.random(200)
90

91
# Reproducible cross-validation
92
with tensorflow_random_state(123):
93
    reg = KerasRegressor(model=create_regressor, epochs=20, verbose=0)
94
    scores_1 = cross_val_score(reg, X, y, cv=5, scoring='neg_mean_squared_error')
95

96
# Repeat with same seed
97
with tensorflow_random_state(123):
98
    reg2 = KerasRegressor(model=create_regressor, epochs=20, verbose=0)
99
    scores_2 = cross_val_score(reg2, X, y, cv=5, scoring='neg_mean_squared_error')
100

101
print(f"CV scores match: {np.allclose(scores_1, scores_2)}")
102
print(f"First run: {scores_1}")
103
print(f"Second run: {scores_2}")
104
```
105

106
### Grid Search with Reproducible Results
107

108
```python
109
from scikeras.utils.random_state import tensorflow_random_state
110
from scikeras.wrappers import KerasClassifier
111
from sklearn.model_selection import GridSearchCV
112
import keras
113
import numpy as np
114

115
def create_model(units=50, dropout_rate=0.2):
116
    model = keras.Sequential([
117
        keras.layers.Dense(units, activation='relu', input_dim=8),
118
        keras.layers.Dropout(dropout_rate),
119
        keras.layers.Dense(1, activation='sigmoid')
120
    ])
121
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
122
    return model
123

124
# Generate sample data
125
X = np.random.random((300, 8))
126
y = np.random.randint(0, 2, 300)
127

128
# Reproducible grid search
129
param_grid = {
130
    'model__units': [25, 50],
131
    'model__dropout_rate': [0.1, 0.3],
132
    'epochs': [5, 10]
133
}
134

135
with tensorflow_random_state(456):
136
    clf = KerasClassifier(model=create_model, verbose=0)
137
    grid_search = GridSearchCV(clf, param_grid, cv=3, scoring='accuracy')
138
    grid_search.fit(X, y)
139
    best_score_1 = grid_search.best_score_
140
    best_params_1 = grid_search.best_params_
141

142
# Repeat search with same seed
143
with tensorflow_random_state(456):
144
    clf2 = KerasClassifier(model=create_model, verbose=0)
145
    grid_search2 = GridSearchCV(clf2, param_grid, cv=3, scoring='accuracy')
146
    grid_search2.fit(X, y)
147
    best_score_2 = grid_search2.best_score_
148
    best_params_2 = grid_search2.best_params_
149

150
print(f"Best scores match: {best_score_1 == best_score_2}")
151
print(f"Best params match: {best_params_1 == best_params_2}")
152
print(f"Best parameters: {best_params_1}")
153
print(f"Best score: {best_score_1:.4f}")
154
```
155

156
### Debugging with Reproducible Failures
157

158
```python
159
from scikeras.utils.random_state import tensorflow_random_state
160
from scikeras.wrappers import KerasClassifier
161
import keras
162
import numpy as np
163

164
def create_unstable_model():
165
    """Model that might have training instability."""
166
    model = keras.Sequential([
167
        keras.layers.Dense(100, activation='relu', input_dim=20),
168
        keras.layers.Dense(100, activation='relu'),
169
        keras.layers.Dense(1, activation='sigmoid')
170
    ])
171
    # High learning rate might cause instability
172
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.1), 
173
                 loss='binary_crossentropy', metrics=['accuracy'])
174
    return model
175

176
# Generate challenging dataset
177
X = np.random.random((500, 20))
178
y = (X.sum(axis=1) > 10).astype(int)
179

180
# Reproduce training behavior for debugging
181
debug_seed = 789
182

183
with tensorflow_random_state(debug_seed):
184
    clf = KerasClassifier(model=create_unstable_model, epochs=50, verbose=1)
185
    try:
186
        clf.fit(X, y)
187
        final_score = clf.score(X, y)
188
        print(f"Training completed. Final score: {final_score:.4f}")
189
    except Exception as e:
190
        print(f"Training failed with error: {e}")
191

192
# Reproduce exact same behavior for investigation
193
print("\\nReproducing the same training run...")
194
with tensorflow_random_state(debug_seed):
195
    clf2 = KerasClassifier(model=create_unstable_model, epochs=50, verbose=1)
196
    try:
197
        clf2.fit(X, y)
198
        final_score2 = clf2.score(X, y)
199
        print(f"Training completed. Final score: {final_score2:.4f}")
200
    except Exception as e:
201
        print(f"Training failed with error: {e}")
202
```
203

204
### Ensemble Training with Controlled Randomness
205

206
```python
207
from scikeras.utils.random_state import tensorflow_random_state
208
from scikeras.wrappers import KerasClassifier
209
import keras
210
import numpy as np
211

212
def create_model():
213
    model = keras.Sequential([
214
        keras.layers.Dense(30, activation='relu', input_dim=10),
215
        keras.layers.Dense(1, activation='sigmoid')
216
    ])
217
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
218
    return model
219

220
# Generate sample data
221
X = np.random.random((400, 10))
222
y = np.random.randint(0, 2, 400)
223

224
# Train ensemble with different seeds but reproducible results
225
ensemble_models = []
226
ensemble_seeds = [100, 200, 300, 400, 500]
227

228
for i, seed in enumerate(ensemble_seeds):
229
    print(f"Training ensemble member {i+1} with seed {seed}")
230
    
231
    with tensorflow_random_state(seed):
232
        clf = KerasClassifier(model=create_model, epochs=20, verbose=0)
233
        clf.fit(X, y)
234
        ensemble_models.append(clf)
235

236
# Make ensemble predictions
237
ensemble_predictions = []
238
for model in ensemble_models:
239
    with tensorflow_random_state(42):  # Same seed for prediction consistency
240
        pred = model.predict_proba(X[:10])[:, 1]  # Get positive class probabilities
241
        ensemble_predictions.append(pred)
242

243
# Average predictions
244
ensemble_avg = np.mean(ensemble_predictions, axis=0)
245
print(f"\\nEnsemble predictions for first 10 samples:")
246
print(f"Individual model predictions:")
247
for i, pred in enumerate(ensemble_predictions):
248
    print(f"Model {i+1}: {pred}")
249
print(f"Ensemble average: {ensemble_avg}")
250
```
251

252
## Implementation Details
253

254
### State Management
255

256
The context manager preserves and restores:
257

258
- **Python random state**: `random.getstate()` and `random.setstate()`
259
- **NumPy random state**: `np.random.get_state()` and `np.random.set_state()`
260
- **TensorFlow random seed**: `tf.random.set_seed()`
261
- **TensorFlow deterministic operations**: `tf.config.experimental.enable_op_determinism()`
262
- **Environment variables**: `TF_DETERMINISTIC_OPS`
263

264
### TensorFlow Compatibility
265

266
The function handles both TensorFlow 2.x installations and environments where TensorFlow is not available:
267

268
```python
269
# When TensorFlow is available
270
with tensorflow_random_state(42):
271
    # Full deterministic behavior
272
    pass
273

274
# When TensorFlow is not installed
275
with tensorflow_random_state(42):
276
    # Still sets Python and NumPy seeds
277
    # TensorFlow operations are no-ops
278
    pass
279
```
280

281
### Performance Considerations
282

283
Enabling deterministic operations may impact performance:
284

285
- Some GPU operations run slower in deterministic mode
286
- Memory usage may be slightly higher
287
- Consider using only during development/debugging if performance is critical
288

289
### Thread Safety
290

291
The context manager is not thread-safe. Use separate seeds for concurrent training:
292

293
```python
294
import threading
295
from concurrent.futures import ThreadPoolExecutor
296

297
def train_with_seed(seed):
298
    with tensorflow_random_state(seed):
299
        # Training code here
300
        pass
301

302
# Use different seeds for parallel training
303
with ThreadPoolExecutor() as executor:
304
    futures = [executor.submit(train_with_seed, seed) 
305
              for seed in [100, 200, 300]]
306
```

Version

Tile

Files

random-state.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

random-state.mddocs/