0
# Random State Management
1
2
Context manager for ensuring reproducible results across Python, NumPy, and TensorFlow random number generators. This utility enables deterministic training and evaluation for scientific reproducibility and debugging.
3
4
## Capabilities
5
6
### Reproducible Random State Context
7
8
Context manager that sets and restores random state across all major random number generators used in machine learning workflows.
9
10
```python { .api }
11
@contextmanager
12
def tensorflow_random_state(seed):
13
"""
14
Context manager for reproducible random state across all generators.
15
16
Args:
17
seed (int): Random seed value for reproducibility
18
19
Yields:
20
None: Context for reproducible operations
21
22
Note:
23
This context manager:
24
- Sets Python's random module seed
25
- Sets NumPy's random seed
26
- Sets TensorFlow's random seed
27
- Enables TensorFlow's deterministic operations
28
- Restores all original states when exiting
29
"""
30
```
31
32
## Usage Examples
33
34
### Basic Reproducible Training
35
36
```python
37
from scikeras.utils.random_state import tensorflow_random_state
38
from scikeras.wrappers import KerasClassifier
39
import keras
40
import numpy as np
41
42
def create_model():
43
model = keras.Sequential([
44
keras.layers.Dense(50, activation='relu', input_dim=10),
45
keras.layers.Dense(1, activation='sigmoid')
46
])
47
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
48
return model
49
50
# Generate sample data
51
X = np.random.random((100, 10))
52
y = np.random.randint(0, 2, 100)
53
54
# Train with reproducible results
55
with tensorflow_random_state(42):
56
clf = KerasClassifier(model=create_model, epochs=10, verbose=0)
57
clf.fit(X, y)
58
predictions_1 = clf.predict(X)
59
60
# Train again with same seed - should get identical results
61
with tensorflow_random_state(42):
62
clf2 = KerasClassifier(model=create_model, epochs=10, verbose=0)
63
clf2.fit(X, y)
64
predictions_2 = clf2.predict(X)
65
66
# Verify reproducibility
67
print(f"Predictions match: {np.array_equal(predictions_1, predictions_2)}")
68
```
69
70
### Cross-Validation with Reproducible Results
71
72
```python
73
from scikeras.utils.random_state import tensorflow_random_state
74
from scikeras.wrappers import KerasRegressor
75
from sklearn.model_selection import cross_val_score
76
import keras
77
import numpy as np
78
79
def create_regressor():
80
model = keras.Sequential([
81
keras.layers.Dense(64, activation='relu', input_dim=5),
82
keras.layers.Dense(1)
83
])
84
model.compile(optimizer='adam', loss='mse')
85
return model
86
87
# Generate sample data
88
X = np.random.random((200, 5))
89
y = np.random.random(200)
90
91
# Reproducible cross-validation
92
with tensorflow_random_state(123):
93
reg = KerasRegressor(model=create_regressor, epochs=20, verbose=0)
94
scores_1 = cross_val_score(reg, X, y, cv=5, scoring='neg_mean_squared_error')
95
96
# Repeat with same seed
97
with tensorflow_random_state(123):
98
reg2 = KerasRegressor(model=create_regressor, epochs=20, verbose=0)
99
scores_2 = cross_val_score(reg2, X, y, cv=5, scoring='neg_mean_squared_error')
100
101
print(f"CV scores match: {np.allclose(scores_1, scores_2)}")
102
print(f"First run: {scores_1}")
103
print(f"Second run: {scores_2}")
104
```
105
106
### Grid Search with Reproducible Results
107
108
```python
109
from scikeras.utils.random_state import tensorflow_random_state
110
from scikeras.wrappers import KerasClassifier
111
from sklearn.model_selection import GridSearchCV
112
import keras
113
import numpy as np
114
115
def create_model(units=50, dropout_rate=0.2):
116
model = keras.Sequential([
117
keras.layers.Dense(units, activation='relu', input_dim=8),
118
keras.layers.Dropout(dropout_rate),
119
keras.layers.Dense(1, activation='sigmoid')
120
])
121
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
122
return model
123
124
# Generate sample data
125
X = np.random.random((300, 8))
126
y = np.random.randint(0, 2, 300)
127
128
# Reproducible grid search
129
param_grid = {
130
'model__units': [25, 50],
131
'model__dropout_rate': [0.1, 0.3],
132
'epochs': [5, 10]
133
}
134
135
with tensorflow_random_state(456):
136
clf = KerasClassifier(model=create_model, verbose=0)
137
grid_search = GridSearchCV(clf, param_grid, cv=3, scoring='accuracy')
138
grid_search.fit(X, y)
139
best_score_1 = grid_search.best_score_
140
best_params_1 = grid_search.best_params_
141
142
# Repeat search with same seed
143
with tensorflow_random_state(456):
144
clf2 = KerasClassifier(model=create_model, verbose=0)
145
grid_search2 = GridSearchCV(clf2, param_grid, cv=3, scoring='accuracy')
146
grid_search2.fit(X, y)
147
best_score_2 = grid_search2.best_score_
148
best_params_2 = grid_search2.best_params_
149
150
print(f"Best scores match: {best_score_1 == best_score_2}")
151
print(f"Best params match: {best_params_1 == best_params_2}")
152
print(f"Best parameters: {best_params_1}")
153
print(f"Best score: {best_score_1:.4f}")
154
```
155
156
### Debugging with Reproducible Failures
157
158
```python
159
from scikeras.utils.random_state import tensorflow_random_state
160
from scikeras.wrappers import KerasClassifier
161
import keras
162
import numpy as np
163
164
def create_unstable_model():
165
"""Model that might have training instability."""
166
model = keras.Sequential([
167
keras.layers.Dense(100, activation='relu', input_dim=20),
168
keras.layers.Dense(100, activation='relu'),
169
keras.layers.Dense(1, activation='sigmoid')
170
])
171
# High learning rate might cause instability
172
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.1),
173
loss='binary_crossentropy', metrics=['accuracy'])
174
return model
175
176
# Generate challenging dataset
177
X = np.random.random((500, 20))
178
y = (X.sum(axis=1) > 10).astype(int)
179
180
# Reproduce training behavior for debugging
181
debug_seed = 789
182
183
with tensorflow_random_state(debug_seed):
184
clf = KerasClassifier(model=create_unstable_model, epochs=50, verbose=1)
185
try:
186
clf.fit(X, y)
187
final_score = clf.score(X, y)
188
print(f"Training completed. Final score: {final_score:.4f}")
189
except Exception as e:
190
print(f"Training failed with error: {e}")
191
192
# Reproduce exact same behavior for investigation
193
print("\\nReproducing the same training run...")
194
with tensorflow_random_state(debug_seed):
195
clf2 = KerasClassifier(model=create_unstable_model, epochs=50, verbose=1)
196
try:
197
clf2.fit(X, y)
198
final_score2 = clf2.score(X, y)
199
print(f"Training completed. Final score: {final_score2:.4f}")
200
except Exception as e:
201
print(f"Training failed with error: {e}")
202
```
203
204
### Ensemble Training with Controlled Randomness
205
206
```python
207
from scikeras.utils.random_state import tensorflow_random_state
208
from scikeras.wrappers import KerasClassifier
209
import keras
210
import numpy as np
211
212
def create_model():
213
model = keras.Sequential([
214
keras.layers.Dense(30, activation='relu', input_dim=10),
215
keras.layers.Dense(1, activation='sigmoid')
216
])
217
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
218
return model
219
220
# Generate sample data
221
X = np.random.random((400, 10))
222
y = np.random.randint(0, 2, 400)
223
224
# Train ensemble with different seeds but reproducible results
225
ensemble_models = []
226
ensemble_seeds = [100, 200, 300, 400, 500]
227
228
for i, seed in enumerate(ensemble_seeds):
229
print(f"Training ensemble member {i+1} with seed {seed}")
230
231
with tensorflow_random_state(seed):
232
clf = KerasClassifier(model=create_model, epochs=20, verbose=0)
233
clf.fit(X, y)
234
ensemble_models.append(clf)
235
236
# Make ensemble predictions
237
ensemble_predictions = []
238
for model in ensemble_models:
239
with tensorflow_random_state(42): # Same seed for prediction consistency
240
pred = model.predict_proba(X[:10])[:, 1] # Get positive class probabilities
241
ensemble_predictions.append(pred)
242
243
# Average predictions
244
ensemble_avg = np.mean(ensemble_predictions, axis=0)
245
print(f"\\nEnsemble predictions for first 10 samples:")
246
print(f"Individual model predictions:")
247
for i, pred in enumerate(ensemble_predictions):
248
print(f"Model {i+1}: {pred}")
249
print(f"Ensemble average: {ensemble_avg}")
250
```
251
252
## Implementation Details
253
254
### State Management
255
256
The context manager preserves and restores:
257
258
- **Python random state**: `random.getstate()` and `random.setstate()`
259
- **NumPy random state**: `np.random.get_state()` and `np.random.set_state()`
260
- **TensorFlow random seed**: `tf.random.set_seed()`
261
- **TensorFlow deterministic operations**: `tf.config.experimental.enable_op_determinism()`
262
- **Environment variables**: `TF_DETERMINISTIC_OPS`
263
264
### TensorFlow Compatibility
265
266
The function handles both TensorFlow 2.x installations and environments where TensorFlow is not available:
267
268
```python
269
# When TensorFlow is available
270
with tensorflow_random_state(42):
271
# Full deterministic behavior
272
pass
273
274
# When TensorFlow is not installed
275
with tensorflow_random_state(42):
276
# Still sets Python and NumPy seeds
277
# TensorFlow operations are no-ops
278
pass
279
```
280
281
### Performance Considerations
282
283
Enabling deterministic operations may impact performance:
284
285
- Some GPU operations run slower in deterministic mode
286
- Memory usage may be slightly higher
287
- Consider using only during development/debugging if performance is critical
288
289
### Thread Safety
290
291
The context manager is not thread-safe. Use separate seeds for concurrent training:
292
293
```python
294
import threading
295
from concurrent.futures import ThreadPoolExecutor
296
297
def train_with_seed(seed):
298
with tensorflow_random_state(seed):
299
# Training code here
300
pass
301
302
# Use different seeds for parallel training
303
with ThreadPoolExecutor() as executor:
304
futures = [executor.submit(train_with_seed, seed)
305
for seed in [100, 200, 300]]
306
```